diff --git a/CHANGELOG.md b/CHANGELOG.md index c8ccc8ae..cf2c2f84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ - Add `--quiet-empty` (`-E`) flag to suppress output when input is empty. Closes #1936, see #3563 (@NORMAL-EX) - Improve native man pages and command help syntax highlighting by stripping overstriking, see #3517 (@akirk) - Add `--fallback-syntax`/`--fallback-language` to apply syntax highlighting only when auto-detection fails, see #1341 (@Xavrir) +- Map `BUILD` case sensitively to Python (Starlark) for Bazel, see #3576 (@vorburger) ## Bugfixes - Fix inconsistent `.deb` MUSL package names (aarch64-musl used `arm64` instead of `musl-linux-arm64`, and `musleabihf` target missed `bat-musl` prefix). Closes #3482, see #3642 (@mvanhorn) diff --git a/build/syntax_mapping.rs b/build/syntax_mapping.rs index 64be4bb9..c7133be9 100644 --- a/build/syntax_mapping.rs +++ b/build/syntax_mapping.rs @@ -47,11 +47,37 @@ impl ToTokens for MappingTarget { } } -#[derive(Clone, Debug, PartialEq, Eq, Hash, DeserializeFromStr)] +/// Whether a glob pattern should be matched case-sensitively or case-insensitively. +/// +/// Mirrors the runtime `Case` type in `src/syntax_mapping.rs`. +#[derive(Clone, Debug, PartialEq, Eq, Hash, Default)] +enum Case { + Sensitive, + #[default] + Insensitive, +} +impl ToTokens for Case { + fn to_tokens(&self, tokens: &mut TokenStream) { + let t = match self { + Self::Sensitive => quote! { Case::Sensitive }, + Self::Insensitive => quote! { Case::Insensitive }, + }; + tokens.append_all(t); + } +} + +#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Deserialize)] +#[serde(try_from = "RawMatcher")] /// A single matcher. /// /// Codegen converts this into a `Lazy>`. -struct Matcher(Vec); +struct Matcher { + segments: Vec, + /// Whether the glob pattern should be matched case-sensitively. + /// + /// Defaults to `Case::Insensitive` for backwards compatibility. + case: Case, +} /// Parse a matcher. /// /// Note that this implementation is rather strict: it will greedily interpret @@ -116,18 +142,59 @@ impl FromStr for Matcher { bail!(r#"Invalid matcher: "{s}""#); } - Ok(Self(non_empty_segments)) + Ok(Self { + segments: non_empty_segments, + case: Case::Insensitive, + }) + } +} + +/// Helper type for deserializing a `Matcher` from either a plain string or a +/// `{ glob = "...", case_sensitive = true }` struct. +#[derive(Deserialize)] +#[serde(untagged)] +enum RawMatcher { + Simple(String), + Full { + glob: String, + #[serde(default)] + case_sensitive: bool, + }, +} + +impl TryFrom for Matcher { + type Error = anyhow::Error; + + fn try_from(raw: RawMatcher) -> Result { + match raw { + RawMatcher::Simple(s) => Matcher::from_str(&s), + RawMatcher::Full { + glob, + case_sensitive, + } => { + let mut matcher = Matcher::from_str(&glob)?; + matcher.case = if case_sensitive { + Case::Sensitive + } else { + Case::Insensitive + }; + Ok(matcher) + } + } } } impl ToTokens for Matcher { fn to_tokens(&self, tokens: &mut TokenStream) { - let t = match self.0.as_slice() { + let case = &self.case; + let t = match self.segments.as_slice() { [] => unreachable!("0-length matcher should never be created"), [MatcherSegment::Text(text)] => { - quote! { Lazy::new(|| Some(build_matcher_fixed(#text))) } + quote! { Lazy::new(|| Some(build_matcher_fixed(#text, #case))) } } // parser logic ensures that this case can only happen when there are dynamic segments - segs @ [_, ..] => quote! { Lazy::new(|| build_matcher_dynamic(&[ #(#segs),* ])) }, + segs @ [_, ..] => { + quote! { Lazy::new(|| build_matcher_dynamic(&[ #(#segs),* ], #case)) } + } }; tokens.append_all(t); } @@ -175,6 +242,7 @@ impl MatcherSegment { /// A struct that models a single .toml file in /src/syntax_mapping/builtins/. #[derive(Clone, Debug, Deserialize)] struct MappingDefModel { + #[serde(default)] mappings: IndexMap>, } impl MappingDefModel { diff --git a/src/syntax_mapping.rs b/src/syntax_mapping.rs index 0cd2d655..584a5cb6 100644 --- a/src/syntax_mapping.rs +++ b/src/syntax_mapping.rs @@ -17,9 +17,16 @@ use ignored_suffixes::IgnoredSuffixes; mod builtin; pub mod ignored_suffixes; -fn make_glob_matcher(from: &str) -> Result { +/// Whether a glob pattern should be matched case-sensitively or case-insensitively. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum Case { + Sensitive, + Insensitive, +} + +fn make_glob_matcher(from: &str, case: Case) -> Result { let matcher = GlobBuilder::new(from) - .case_insensitive(true) + .case_insensitive(matches!(case, Case::Insensitive)) .literal_separator(true) .build()? .compile_matcher(); @@ -97,7 +104,14 @@ impl<'a> SyntaxMapping<'a> { } pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> { - let matcher = make_glob_matcher(from)?; + let matcher = make_glob_matcher(from, Case::Insensitive)?; + self.custom_mappings.push((matcher, to)); + Ok(()) + } + + /// Like [`Self::insert`], but the glob pattern is matched case-sensitively. + pub fn insert_case_sensitive(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> { + let matcher = make_glob_matcher(from, Case::Sensitive)?; self.custom_mappings.push((matcher, to)); Ok(()) } @@ -261,4 +275,41 @@ mod tests { Some(MappingTarget::MapTo("alpha")) ); } + + #[test] + fn case_sensitive_custom_mappings_work() { + let mut map = SyntaxMapping::new(); + map.insert_case_sensitive("MY_SPECIAL_FILE", MappingTarget::MapTo("Python")) + .ok(); + + // Exact case matches + assert_eq!( + map.get_syntax_for("/path/to/MY_SPECIAL_FILE"), + Some(MappingTarget::MapTo("Python")) + ); + // Different case should NOT match the case-sensitive rule + assert_eq!(map.get_syntax_for("/path/to/my_special_file"), None); + assert_eq!(map.get_syntax_for("/path/to/My_Special_File"), None); + } + + #[test] + fn builtin_mappings_build_is_case_sensitive() { + let map = SyntaxMapping::new(); + + // "BUILD" (uppercase) should map to Python via case-sensitive builtin + assert_eq!( + map.get_syntax_for("/path/to/BUILD"), + Some(MappingTarget::MapTo("Python")) + ); + // "build" (lowercase) should still map to MapToUnknown + assert_eq!( + map.get_syntax_for("/path/to/build"), + Some(MappingTarget::MapToUnknown) + ); + // Mixed case should NOT match the Python rule + assert_eq!( + map.get_syntax_for("/path/to/Build"), + Some(MappingTarget::MapToUnknown) + ); + } } diff --git a/src/syntax_mapping/builtin.rs b/src/syntax_mapping/builtin.rs index 1822be57..79d298c3 100644 --- a/src/syntax_mapping/builtin.rs +++ b/src/syntax_mapping/builtin.rs @@ -3,7 +3,7 @@ use std::env; use globset::GlobMatcher; use once_cell::sync::Lazy; -use crate::syntax_mapping::{make_glob_matcher, MappingTarget}; +use crate::syntax_mapping::{make_glob_matcher, Case, MappingTarget}; // Static syntax mappings generated from /src/syntax_mapping/builtins/ by the // build script (/build/syntax_mapping.rs). @@ -53,8 +53,8 @@ include!(concat!( /// A failure to compile is a fatal error. /// /// Used internally by `Lazy>`'s lazy evaluation closure. -fn build_matcher_fixed(from: &str) -> GlobMatcher { - make_glob_matcher(from).expect("A builtin fixed glob matcher failed to compile") +fn build_matcher_fixed(from: &str, case: Case) -> GlobMatcher { + make_glob_matcher(from, case).expect("A builtin fixed glob matcher failed to compile") } /// Join a list of matcher segments to create a glob string, replacing all @@ -64,7 +64,7 @@ fn build_matcher_fixed(from: &str) -> GlobMatcher { /// to compile. /// /// Used internally by `Lazy>`'s lazy evaluation closure. -fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option { +fn build_matcher_dynamic(segs: &[MatcherSegment], case: Case) -> Option { // join segments let mut buf = String::new(); for seg in segs { @@ -77,7 +77,7 @@ fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option { } } // compile glob matcher - let matcher = make_glob_matcher(&buf).ok()?; + let matcher = make_glob_matcher(&buf, case).ok()?; Some(matcher) } diff --git a/src/syntax_mapping/builtins/README.md b/src/syntax_mapping/builtins/README.md index 29cf43ee..220e45df 100644 --- a/src/syntax_mapping/builtins/README.md +++ b/src/syntax_mapping/builtins/README.md @@ -20,12 +20,10 @@ syntax mappings defined by all TOML files, and embed them into the binary. ## File syntax -Each TOML file should contain a single section named `mappings`, with each of -its keys being a language identifier (first column of `bat -L`; also referred to -as "target"). +Each TOML file should contain a single section named `mappings`, with each of its keys being a language +identifier (first column of `bat -L`; also referred to as "target"). -The value of each key should be an array of strings, with each item being a glob -matcher. We will call each of these items a "rule". +The value of each key should be an array of "rules". The rules are expected to be objects with a `glob` string and a `case_sensitive` boolean. For simplification, a rule can be just a glob string, which is shorthand for the default case insensitive mode. For example, if `foo-application` uses both TOML and YAML configuration files, we could write something like this: @@ -98,6 +96,15 @@ like this: ] ``` +### Case sensitivity + +By default, all glob patterns are matched case-insensitively. To match a pattern case-sensitively, use the object form of the rule with the `case_sensitive` option: + +```toml +[mappings] +"Python" = [{ glob = "BUILD", case_sensitive = true }] +``` + ## Ordering At compile time, all TOML files applicable to the target are processed in diff --git a/src/syntax_mapping/builtins/common/50-bazel.toml b/src/syntax_mapping/builtins/common/50-bazel.toml new file mode 100644 index 00000000..2ced1399 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-bazel.toml @@ -0,0 +1,2 @@ +[mappings] +"Python" = [{ glob = "BUILD", case_sensitive = true }]