1
0
mirror of https://github.com/sharkdp/bat synced 2026-06-09 10:03:18 +00:00

Add case-sensitive glob support to syntax mapping

to allow us to map `BUILD` case sensitively to Python for Skylark
This commit is contained in:
Keith Hall
2026-03-14 09:18:56 +02:00
parent 5a4a7de933
commit 56fe0fa226
5 changed files with 91 additions and 16 deletions
+35 -5
View File
@@ -51,7 +51,13 @@ impl ToTokens for MappingTarget {
/// A single matcher.
///
/// Codegen converts this into a `Lazy<Option<GlobMatcher>>`.
struct Matcher(Vec<MatcherSegment>);
struct Matcher {
segments: Vec<MatcherSegment>,
/// Whether the glob pattern should be matched case-insensitively.
///
/// Defaults to `true` (case-insensitive) for backwards compatibility.
case_insensitive: bool,
}
/// Parse a matcher.
///
/// Note that this implementation is rather strict: it will greedily interpret
@@ -116,18 +122,24 @@ impl FromStr for Matcher {
bail!(r#"Invalid matcher: "{s}""#);
}
Ok(Self(non_empty_segments))
Ok(Self {
segments: non_empty_segments,
case_insensitive: true,
})
}
}
impl ToTokens for Matcher {
fn to_tokens(&self, tokens: &mut TokenStream) {
let t = match self.0.as_slice() {
let case_insensitive = self.case_insensitive;
let t = match self.segments.as_slice() {
[] => unreachable!("0-length matcher should never be created"),
[MatcherSegment::Text(text)] => {
quote! { Lazy::new(|| Some(build_matcher_fixed(#text))) }
quote! { Lazy::new(|| Some(build_matcher_fixed(#text, #case_insensitive))) }
}
// parser logic ensures that this case can only happen when there are dynamic segments
segs @ [_, ..] => quote! { Lazy::new(|| build_matcher_dynamic(&[ #(#segs),* ])) },
segs @ [_, ..] => {
quote! { Lazy::new(|| build_matcher_dynamic(&[ #(#segs),* ], #case_insensitive)) }
}
};
tokens.append_all(t);
}
@@ -175,7 +187,12 @@ impl MatcherSegment {
/// A struct that models a single .toml file in /src/syntax_mapping/builtins/.
#[derive(Clone, Debug, Deserialize)]
struct MappingDefModel {
#[serde(default)]
mappings: IndexMap<MappingTarget, Vec<Matcher>>,
/// Case-sensitive mappings. Unlike `mappings`, these glob patterns are
/// matched case-sensitively.
#[serde(default)]
case_sensitive_mappings: IndexMap<MappingTarget, Vec<Matcher>>,
}
impl MappingDefModel {
fn into_mapping_list(self) -> MappingList {
@@ -188,6 +205,19 @@ impl MappingDefModel {
.map(|matcher| (matcher, target.clone()))
.collect::<Vec<_>>()
})
.chain(
self.case_sensitive_mappings
.into_iter()
.flat_map(|(target, matchers)| {
matchers
.into_iter()
.map(|mut matcher| {
matcher.case_insensitive = false;
(matcher, target.clone())
})
.collect::<Vec<_>>()
}),
)
.collect();
MappingList(list)
}
+47 -3
View File
@@ -17,9 +17,9 @@ use ignored_suffixes::IgnoredSuffixes;
mod builtin;
pub mod ignored_suffixes;
fn make_glob_matcher(from: &str) -> Result<GlobMatcher> {
fn make_glob_matcher(from: &str, case_insensitive: bool) -> Result<GlobMatcher> {
let matcher = GlobBuilder::new(from)
.case_insensitive(true)
.case_insensitive(case_insensitive)
.literal_separator(true)
.build()?
.compile_matcher();
@@ -97,7 +97,14 @@ impl<'a> SyntaxMapping<'a> {
}
pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
let matcher = make_glob_matcher(from)?;
let matcher = make_glob_matcher(from, true)?;
self.custom_mappings.push((matcher, to));
Ok(())
}
/// Like [`Self::insert`], but the glob pattern is matched case-sensitively.
pub fn insert_case_sensitive(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
let matcher = make_glob_matcher(from, false)?;
self.custom_mappings.push((matcher, to));
Ok(())
}
@@ -261,4 +268,41 @@ mod tests {
Some(MappingTarget::MapTo("alpha"))
);
}
#[test]
fn case_sensitive_custom_mappings_work() {
let mut map = SyntaxMapping::new();
map.insert_case_sensitive("MY_SPECIAL_FILE", MappingTarget::MapTo("Python"))
.ok();
// Exact case matches
assert_eq!(
map.get_syntax_for("/path/to/MY_SPECIAL_FILE"),
Some(MappingTarget::MapTo("Python"))
);
// Different case should NOT match the case-sensitive rule
assert_eq!(map.get_syntax_for("/path/to/my_special_file"), None);
assert_eq!(map.get_syntax_for("/path/to/My_Special_File"), None);
}
#[test]
fn builtin_mappings_build_is_case_sensitive() {
let map = SyntaxMapping::new();
// "BUILD" (uppercase) should map to Python via case-sensitive builtin
assert_eq!(
map.get_syntax_for("/path/to/BUILD"),
Some(MappingTarget::MapTo("Python"))
);
// "build" (lowercase) should still map to MapToUnknown
assert_eq!(
map.get_syntax_for("/path/to/build"),
Some(MappingTarget::MapToUnknown)
);
// Mixed case should NOT match the Python rule
assert_eq!(
map.get_syntax_for("/path/to/Build"),
Some(MappingTarget::MapToUnknown)
);
}
}
+5 -4
View File
@@ -53,8 +53,9 @@ include!(concat!(
/// A failure to compile is a fatal error.
///
/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
fn build_matcher_fixed(from: &str) -> GlobMatcher {
make_glob_matcher(from).expect("A builtin fixed glob matcher failed to compile")
fn build_matcher_fixed(from: &str, case_insensitive: bool) -> GlobMatcher {
make_glob_matcher(from, case_insensitive)
.expect("A builtin fixed glob matcher failed to compile")
}
/// Join a list of matcher segments to create a glob string, replacing all
@@ -64,7 +65,7 @@ fn build_matcher_fixed(from: &str) -> GlobMatcher {
/// to compile.
///
/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option<GlobMatcher> {
fn build_matcher_dynamic(segs: &[MatcherSegment], case_insensitive: bool) -> Option<GlobMatcher> {
// join segments
let mut buf = String::new();
for seg in segs {
@@ -77,7 +78,7 @@ fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option<GlobMatcher> {
}
}
// compile glob matcher
let matcher = make_glob_matcher(&buf).ok()?;
let matcher = make_glob_matcher(&buf, case_insensitive).ok()?;
Some(matcher)
}
+3 -3
View File
@@ -20,9 +20,9 @@ syntax mappings defined by all TOML files, and embed them into the binary.
## File syntax
Each TOML file should contain a single section named `mappings`, with each of
its keys being a language identifier (first column of `bat -L`; also referred to
as "target").
Each TOML file should contain a single section named `mappings` and/or a single
section named `case_sensitive_mappings`, with each of its keys being a language
identifier (first column of `bat -L`; also referred to as "target").
The value of each key should be an array of strings, with each item being a glob
matcher. We will call each of these items a "rule".
@@ -1,2 +1,2 @@
[mappings]
[case_sensitive_mappings]
"Python" = ["BUILD"]