1
0
mirror of https://github.com/sharkdp/bat synced 2026-06-09 10:03:18 +00:00

Merge pull request #3576 from vorburger/patch-2

feat: Map BUILD to Python (Starlark) for Bazel (fixes #3575)
This commit is contained in:
Keith Hall
2026-03-22 20:14:05 +02:00
committed by GitHub
6 changed files with 148 additions and 19 deletions
+1
View File
@@ -13,6 +13,7 @@
- Add `--quiet-empty` (`-E`) flag to suppress output when input is empty. Closes #1936, see #3563 (@NORMAL-EX)
- Improve native man pages and command help syntax highlighting by stripping overstriking, see #3517 (@akirk)
- Add `--fallback-syntax`/`--fallback-language` to apply syntax highlighting only when auto-detection fails, see #1341 (@Xavrir)
- Map `BUILD` case sensitively to Python (Starlark) for Bazel, see #3576 (@vorburger)
## Bugfixes
- Fix inconsistent `.deb` MUSL package names (aarch64-musl used `arm64` instead of `musl-linux-arm64`, and `musleabihf` target missed `bat-musl` prefix). Closes #3482, see #3642 (@mvanhorn)
+74 -6
View File
@@ -47,11 +47,37 @@ impl ToTokens for MappingTarget {
}
}
#[derive(Clone, Debug, PartialEq, Eq, Hash, DeserializeFromStr)]
/// Whether a glob pattern should be matched case-sensitively or case-insensitively.
///
/// Mirrors the runtime `Case` type in `src/syntax_mapping.rs`.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Default)]
enum Case {
Sensitive,
#[default]
Insensitive,
}
impl ToTokens for Case {
fn to_tokens(&self, tokens: &mut TokenStream) {
let t = match self {
Self::Sensitive => quote! { Case::Sensitive },
Self::Insensitive => quote! { Case::Insensitive },
};
tokens.append_all(t);
}
}
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Deserialize)]
#[serde(try_from = "RawMatcher")]
/// A single matcher.
///
/// Codegen converts this into a `Lazy<Option<GlobMatcher>>`.
struct Matcher(Vec<MatcherSegment>);
struct Matcher {
segments: Vec<MatcherSegment>,
/// Whether the glob pattern should be matched case-sensitively.
///
/// Defaults to `Case::Insensitive` for backwards compatibility.
case: Case,
}
/// Parse a matcher.
///
/// Note that this implementation is rather strict: it will greedily interpret
@@ -116,18 +142,59 @@ impl FromStr for Matcher {
bail!(r#"Invalid matcher: "{s}""#);
}
Ok(Self(non_empty_segments))
Ok(Self {
segments: non_empty_segments,
case: Case::Insensitive,
})
}
}
/// Helper type for deserializing a `Matcher` from either a plain string or a
/// `{ glob = "...", case_sensitive = true }` struct.
#[derive(Deserialize)]
#[serde(untagged)]
enum RawMatcher {
Simple(String),
Full {
glob: String,
#[serde(default)]
case_sensitive: bool,
},
}
impl TryFrom<RawMatcher> for Matcher {
type Error = anyhow::Error;
fn try_from(raw: RawMatcher) -> Result<Self, Self::Error> {
match raw {
RawMatcher::Simple(s) => Matcher::from_str(&s),
RawMatcher::Full {
glob,
case_sensitive,
} => {
let mut matcher = Matcher::from_str(&glob)?;
matcher.case = if case_sensitive {
Case::Sensitive
} else {
Case::Insensitive
};
Ok(matcher)
}
}
}
}
impl ToTokens for Matcher {
fn to_tokens(&self, tokens: &mut TokenStream) {
let t = match self.0.as_slice() {
let case = &self.case;
let t = match self.segments.as_slice() {
[] => unreachable!("0-length matcher should never be created"),
[MatcherSegment::Text(text)] => {
quote! { Lazy::new(|| Some(build_matcher_fixed(#text))) }
quote! { Lazy::new(|| Some(build_matcher_fixed(#text, #case))) }
}
// parser logic ensures that this case can only happen when there are dynamic segments
segs @ [_, ..] => quote! { Lazy::new(|| build_matcher_dynamic(&[ #(#segs),* ])) },
segs @ [_, ..] => {
quote! { Lazy::new(|| build_matcher_dynamic(&[ #(#segs),* ], #case)) }
}
};
tokens.append_all(t);
}
@@ -175,6 +242,7 @@ impl MatcherSegment {
/// A struct that models a single .toml file in /src/syntax_mapping/builtins/.
#[derive(Clone, Debug, Deserialize)]
struct MappingDefModel {
#[serde(default)]
mappings: IndexMap<MappingTarget, Vec<Matcher>>,
}
impl MappingDefModel {
+54 -3
View File
@@ -17,9 +17,16 @@ use ignored_suffixes::IgnoredSuffixes;
mod builtin;
pub mod ignored_suffixes;
fn make_glob_matcher(from: &str) -> Result<GlobMatcher> {
/// Whether a glob pattern should be matched case-sensitively or case-insensitively.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum Case {
Sensitive,
Insensitive,
}
fn make_glob_matcher(from: &str, case: Case) -> Result<GlobMatcher> {
let matcher = GlobBuilder::new(from)
.case_insensitive(true)
.case_insensitive(matches!(case, Case::Insensitive))
.literal_separator(true)
.build()?
.compile_matcher();
@@ -97,7 +104,14 @@ impl<'a> SyntaxMapping<'a> {
}
pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
let matcher = make_glob_matcher(from)?;
let matcher = make_glob_matcher(from, Case::Insensitive)?;
self.custom_mappings.push((matcher, to));
Ok(())
}
/// Like [`Self::insert`], but the glob pattern is matched case-sensitively.
pub fn insert_case_sensitive(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
let matcher = make_glob_matcher(from, Case::Sensitive)?;
self.custom_mappings.push((matcher, to));
Ok(())
}
@@ -261,4 +275,41 @@ mod tests {
Some(MappingTarget::MapTo("alpha"))
);
}
#[test]
fn case_sensitive_custom_mappings_work() {
let mut map = SyntaxMapping::new();
map.insert_case_sensitive("MY_SPECIAL_FILE", MappingTarget::MapTo("Python"))
.ok();
// Exact case matches
assert_eq!(
map.get_syntax_for("/path/to/MY_SPECIAL_FILE"),
Some(MappingTarget::MapTo("Python"))
);
// Different case should NOT match the case-sensitive rule
assert_eq!(map.get_syntax_for("/path/to/my_special_file"), None);
assert_eq!(map.get_syntax_for("/path/to/My_Special_File"), None);
}
#[test]
fn builtin_mappings_build_is_case_sensitive() {
let map = SyntaxMapping::new();
// "BUILD" (uppercase) should map to Python via case-sensitive builtin
assert_eq!(
map.get_syntax_for("/path/to/BUILD"),
Some(MappingTarget::MapTo("Python"))
);
// "build" (lowercase) should still map to MapToUnknown
assert_eq!(
map.get_syntax_for("/path/to/build"),
Some(MappingTarget::MapToUnknown)
);
// Mixed case should NOT match the Python rule
assert_eq!(
map.get_syntax_for("/path/to/Build"),
Some(MappingTarget::MapToUnknown)
);
}
}
+5 -5
View File
@@ -3,7 +3,7 @@ use std::env;
use globset::GlobMatcher;
use once_cell::sync::Lazy;
use crate::syntax_mapping::{make_glob_matcher, MappingTarget};
use crate::syntax_mapping::{make_glob_matcher, Case, MappingTarget};
// Static syntax mappings generated from /src/syntax_mapping/builtins/ by the
// build script (/build/syntax_mapping.rs).
@@ -53,8 +53,8 @@ include!(concat!(
/// A failure to compile is a fatal error.
///
/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
fn build_matcher_fixed(from: &str) -> GlobMatcher {
make_glob_matcher(from).expect("A builtin fixed glob matcher failed to compile")
fn build_matcher_fixed(from: &str, case: Case) -> GlobMatcher {
make_glob_matcher(from, case).expect("A builtin fixed glob matcher failed to compile")
}
/// Join a list of matcher segments to create a glob string, replacing all
@@ -64,7 +64,7 @@ fn build_matcher_fixed(from: &str) -> GlobMatcher {
/// to compile.
///
/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option<GlobMatcher> {
fn build_matcher_dynamic(segs: &[MatcherSegment], case: Case) -> Option<GlobMatcher> {
// join segments
let mut buf = String::new();
for seg in segs {
@@ -77,7 +77,7 @@ fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option<GlobMatcher> {
}
}
// compile glob matcher
let matcher = make_glob_matcher(&buf).ok()?;
let matcher = make_glob_matcher(&buf, case).ok()?;
Some(matcher)
}
+12 -5
View File
@@ -20,12 +20,10 @@ syntax mappings defined by all TOML files, and embed them into the binary.
## File syntax
Each TOML file should contain a single section named `mappings`, with each of
its keys being a language identifier (first column of `bat -L`; also referred to
as "target").
Each TOML file should contain a single section named `mappings`, with each of its keys being a language
identifier (first column of `bat -L`; also referred to as "target").
The value of each key should be an array of strings, with each item being a glob
matcher. We will call each of these items a "rule".
The value of each key should be an array of "rules". The rules are expected to be objects with a `glob` string and a `case_sensitive` boolean. For simplification, a rule can be just a glob string, which is shorthand for the default case insensitive mode.
For example, if `foo-application` uses both TOML and YAML configuration files,
we could write something like this:
@@ -98,6 +96,15 @@ like this:
]
```
### Case sensitivity
By default, all glob patterns are matched case-insensitively. To match a pattern case-sensitively, use the object form of the rule with the `case_sensitive` option:
```toml
[mappings]
"Python" = [{ glob = "BUILD", case_sensitive = true }]
```
## Ordering
At compile time, all TOML files applicable to the target are processed in
@@ -0,0 +1,2 @@
[mappings]
"Python" = [{ glob = "BUILD", case_sensitive = true }]