mirror of
https://github.com/sharkdp/bat
synced 2026-06-09 10:03:18 +00:00
Add case-sensitive glob support to syntax mapping
to allow us to map `BUILD` case sensitively to Python for Skylark
This commit is contained in:
+35
-5
@@ -51,7 +51,13 @@ impl ToTokens for MappingTarget {
|
||||
/// A single matcher.
|
||||
///
|
||||
/// Codegen converts this into a `Lazy<Option<GlobMatcher>>`.
|
||||
struct Matcher(Vec<MatcherSegment>);
|
||||
struct Matcher {
|
||||
segments: Vec<MatcherSegment>,
|
||||
/// Whether the glob pattern should be matched case-insensitively.
|
||||
///
|
||||
/// Defaults to `true` (case-insensitive) for backwards compatibility.
|
||||
case_insensitive: bool,
|
||||
}
|
||||
/// Parse a matcher.
|
||||
///
|
||||
/// Note that this implementation is rather strict: it will greedily interpret
|
||||
@@ -116,18 +122,24 @@ impl FromStr for Matcher {
|
||||
bail!(r#"Invalid matcher: "{s}""#);
|
||||
}
|
||||
|
||||
Ok(Self(non_empty_segments))
|
||||
Ok(Self {
|
||||
segments: non_empty_segments,
|
||||
case_insensitive: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
impl ToTokens for Matcher {
|
||||
fn to_tokens(&self, tokens: &mut TokenStream) {
|
||||
let t = match self.0.as_slice() {
|
||||
let case_insensitive = self.case_insensitive;
|
||||
let t = match self.segments.as_slice() {
|
||||
[] => unreachable!("0-length matcher should never be created"),
|
||||
[MatcherSegment::Text(text)] => {
|
||||
quote! { Lazy::new(|| Some(build_matcher_fixed(#text))) }
|
||||
quote! { Lazy::new(|| Some(build_matcher_fixed(#text, #case_insensitive))) }
|
||||
}
|
||||
// parser logic ensures that this case can only happen when there are dynamic segments
|
||||
segs @ [_, ..] => quote! { Lazy::new(|| build_matcher_dynamic(&[ #(#segs),* ])) },
|
||||
segs @ [_, ..] => {
|
||||
quote! { Lazy::new(|| build_matcher_dynamic(&[ #(#segs),* ], #case_insensitive)) }
|
||||
}
|
||||
};
|
||||
tokens.append_all(t);
|
||||
}
|
||||
@@ -175,7 +187,12 @@ impl MatcherSegment {
|
||||
/// A struct that models a single .toml file in /src/syntax_mapping/builtins/.
|
||||
#[derive(Clone, Debug, Deserialize)]
|
||||
struct MappingDefModel {
|
||||
#[serde(default)]
|
||||
mappings: IndexMap<MappingTarget, Vec<Matcher>>,
|
||||
/// Case-sensitive mappings. Unlike `mappings`, these glob patterns are
|
||||
/// matched case-sensitively.
|
||||
#[serde(default)]
|
||||
case_sensitive_mappings: IndexMap<MappingTarget, Vec<Matcher>>,
|
||||
}
|
||||
impl MappingDefModel {
|
||||
fn into_mapping_list(self) -> MappingList {
|
||||
@@ -188,6 +205,19 @@ impl MappingDefModel {
|
||||
.map(|matcher| (matcher, target.clone()))
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.chain(
|
||||
self.case_sensitive_mappings
|
||||
.into_iter()
|
||||
.flat_map(|(target, matchers)| {
|
||||
matchers
|
||||
.into_iter()
|
||||
.map(|mut matcher| {
|
||||
matcher.case_insensitive = false;
|
||||
(matcher, target.clone())
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
}),
|
||||
)
|
||||
.collect();
|
||||
MappingList(list)
|
||||
}
|
||||
|
||||
+47
-3
@@ -17,9 +17,9 @@ use ignored_suffixes::IgnoredSuffixes;
|
||||
mod builtin;
|
||||
pub mod ignored_suffixes;
|
||||
|
||||
fn make_glob_matcher(from: &str) -> Result<GlobMatcher> {
|
||||
fn make_glob_matcher(from: &str, case_insensitive: bool) -> Result<GlobMatcher> {
|
||||
let matcher = GlobBuilder::new(from)
|
||||
.case_insensitive(true)
|
||||
.case_insensitive(case_insensitive)
|
||||
.literal_separator(true)
|
||||
.build()?
|
||||
.compile_matcher();
|
||||
@@ -97,7 +97,14 @@ impl<'a> SyntaxMapping<'a> {
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
|
||||
let matcher = make_glob_matcher(from)?;
|
||||
let matcher = make_glob_matcher(from, true)?;
|
||||
self.custom_mappings.push((matcher, to));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Like [`Self::insert`], but the glob pattern is matched case-sensitively.
|
||||
pub fn insert_case_sensitive(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
|
||||
let matcher = make_glob_matcher(from, false)?;
|
||||
self.custom_mappings.push((matcher, to));
|
||||
Ok(())
|
||||
}
|
||||
@@ -261,4 +268,41 @@ mod tests {
|
||||
Some(MappingTarget::MapTo("alpha"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn case_sensitive_custom_mappings_work() {
|
||||
let mut map = SyntaxMapping::new();
|
||||
map.insert_case_sensitive("MY_SPECIAL_FILE", MappingTarget::MapTo("Python"))
|
||||
.ok();
|
||||
|
||||
// Exact case matches
|
||||
assert_eq!(
|
||||
map.get_syntax_for("/path/to/MY_SPECIAL_FILE"),
|
||||
Some(MappingTarget::MapTo("Python"))
|
||||
);
|
||||
// Different case should NOT match the case-sensitive rule
|
||||
assert_eq!(map.get_syntax_for("/path/to/my_special_file"), None);
|
||||
assert_eq!(map.get_syntax_for("/path/to/My_Special_File"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn builtin_mappings_build_is_case_sensitive() {
|
||||
let map = SyntaxMapping::new();
|
||||
|
||||
// "BUILD" (uppercase) should map to Python via case-sensitive builtin
|
||||
assert_eq!(
|
||||
map.get_syntax_for("/path/to/BUILD"),
|
||||
Some(MappingTarget::MapTo("Python"))
|
||||
);
|
||||
// "build" (lowercase) should still map to MapToUnknown
|
||||
assert_eq!(
|
||||
map.get_syntax_for("/path/to/build"),
|
||||
Some(MappingTarget::MapToUnknown)
|
||||
);
|
||||
// Mixed case should NOT match the Python rule
|
||||
assert_eq!(
|
||||
map.get_syntax_for("/path/to/Build"),
|
||||
Some(MappingTarget::MapToUnknown)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,8 +53,9 @@ include!(concat!(
|
||||
/// A failure to compile is a fatal error.
|
||||
///
|
||||
/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
|
||||
fn build_matcher_fixed(from: &str) -> GlobMatcher {
|
||||
make_glob_matcher(from).expect("A builtin fixed glob matcher failed to compile")
|
||||
fn build_matcher_fixed(from: &str, case_insensitive: bool) -> GlobMatcher {
|
||||
make_glob_matcher(from, case_insensitive)
|
||||
.expect("A builtin fixed glob matcher failed to compile")
|
||||
}
|
||||
|
||||
/// Join a list of matcher segments to create a glob string, replacing all
|
||||
@@ -64,7 +65,7 @@ fn build_matcher_fixed(from: &str) -> GlobMatcher {
|
||||
/// to compile.
|
||||
///
|
||||
/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
|
||||
fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option<GlobMatcher> {
|
||||
fn build_matcher_dynamic(segs: &[MatcherSegment], case_insensitive: bool) -> Option<GlobMatcher> {
|
||||
// join segments
|
||||
let mut buf = String::new();
|
||||
for seg in segs {
|
||||
@@ -77,7 +78,7 @@ fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option<GlobMatcher> {
|
||||
}
|
||||
}
|
||||
// compile glob matcher
|
||||
let matcher = make_glob_matcher(&buf).ok()?;
|
||||
let matcher = make_glob_matcher(&buf, case_insensitive).ok()?;
|
||||
Some(matcher)
|
||||
}
|
||||
|
||||
|
||||
@@ -20,9 +20,9 @@ syntax mappings defined by all TOML files, and embed them into the binary.
|
||||
|
||||
## File syntax
|
||||
|
||||
Each TOML file should contain a single section named `mappings`, with each of
|
||||
its keys being a language identifier (first column of `bat -L`; also referred to
|
||||
as "target").
|
||||
Each TOML file should contain a single section named `mappings` and/or a single
|
||||
section named `case_sensitive_mappings`, with each of its keys being a language
|
||||
identifier (first column of `bat -L`; also referred to as "target").
|
||||
|
||||
The value of each key should be an array of strings, with each item being a glob
|
||||
matcher. We will call each of these items a "rule".
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
[mappings]
|
||||
[case_sensitive_mappings]
|
||||
"Python" = ["BUILD"]
|
||||
|
||||
Reference in New Issue
Block a user