1
0
mirror of https://github.com/sharkdp/bat synced 2026-06-09 10:03:18 +00:00

Imprv: cleanup matcher glob parsing logic

- Data flow is now strictly linear from `RawMatcher` to `Matcher`
  - I've also hoisted `RawMatcher` in front of `Matcher` to signal this
- Re-type `RawMatcher.case_sensitive` from `bool` to `Option<bool>`
  - This moves all parser logic away from `RawMatcher`, making it a more faithful representation of the data
- Favour default consts in `Matcher::try_from<RawMatcher>` to `Default` impl on `Case`
  - Because the default choice of casing is a design decision of the logic, not an intrinsic property of the type
This commit is contained in:
cyqsimon
2026-03-25 14:49:50 +08:00
parent a500fb236a
commit 3e789f5241
+75 -78
View File
@@ -47,13 +47,24 @@ impl ToTokens for MappingTarget {
} }
} }
/// Helper type for deserializing a `Matcher` from either a plain string or a
/// `{ glob = "...", case_sensitive = true }` struct.
#[derive(Deserialize)]
#[serde(untagged)]
enum RawMatcher {
Simple(String),
Full {
glob: String,
case_sensitive: Option<bool>,
},
}
/// Whether a glob pattern should be matched case-sensitively or case-insensitively. /// Whether a glob pattern should be matched case-sensitively or case-insensitively.
/// ///
/// Mirrors the runtime `Case` type in `src/syntax_mapping.rs`. /// Mirrors the runtime `Case` type in `src/syntax_mapping.rs`.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Default)] #[derive(Clone, Debug, PartialEq, Eq, Hash)]
enum Case { enum Case {
Sensitive, Sensitive,
#[default]
Insensitive, Insensitive,
} }
impl ToTokens for Case { impl ToTokens for Case {
@@ -66,11 +77,11 @@ impl ToTokens for Case {
} }
} }
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Deserialize)]
#[serde(try_from = "RawMatcher")]
/// A single matcher. /// A single matcher.
/// ///
/// Codegen converts this into a `Lazy<Option<GlobMatcher>>`. /// Codegen converts this into a `Lazy<Option<GlobMatcher>>`.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize)]
#[serde(try_from = "RawMatcher")]
struct Matcher { struct Matcher {
segments: Vec<MatcherSegment>, segments: Vec<MatcherSegment>,
/// Whether the glob pattern should be matched case-sensitively. /// Whether the glob pattern should be matched case-sensitively.
@@ -78,7 +89,7 @@ struct Matcher {
/// Defaults to `Case::Insensitive` for backwards compatibility. /// Defaults to `Case::Insensitive` for backwards compatibility.
case: Case, case: Case,
} }
/// Parse a matcher. /// Parse the glob pattern of a matcher.
/// ///
/// Note that this implementation is rather strict: it will greedily interpret /// Note that this implementation is rather strict: it will greedily interpret
/// every valid environment variable replacement as such, then immediately /// every valid environment variable replacement as such, then immediately
@@ -92,93 +103,79 @@ struct Matcher {
/// ///
/// Revision history: /// Revision history:
/// - 2024-02-20: allow `{` and `}` (glob brace expansion) /// - 2024-02-20: allow `{` and `}` (glob brace expansion)
impl FromStr for Matcher { fn parse_glob(s: &str) -> Result<Vec<MatcherSegment>, anyhow::Error> {
type Err = anyhow::Error; use MatcherSegment as Seg;
fn from_str(s: &str) -> Result<Self, Self::Err> { static VAR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{([\w\d_]+)\}").unwrap());
use MatcherSegment as Seg;
static VAR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{([\w\d_]+)\}").unwrap());
let mut segments = vec![]; let mut segments = vec![];
let mut text_start = 0; let mut text_start = 0;
for capture in VAR_REGEX.captures_iter(s) { for capture in VAR_REGEX.captures_iter(s) {
let match_0 = capture.get(0).unwrap(); let match_0 = capture.get(0).unwrap();
// text before this var // text before this var
let text_end = match_0.start(); let text_end = match_0.start();
segments.push(Seg::Text(s[text_start..text_end].into())); segments.push(Seg::Text(s[text_start..text_end].into()));
text_start = match_0.end(); text_start = match_0.end();
// this var // this var
segments.push(Seg::Env(capture.get(1).unwrap().as_str().into())); segments.push(Seg::Env(capture.get(1).unwrap().as_str().into()));
}
// possible trailing text
segments.push(Seg::Text(s[text_start..].into()));
// cleanup empty text segments
let non_empty_segments = segments
.into_iter()
.filter(|seg| seg.text().map(|t| !t.is_empty()).unwrap_or(true))
.collect_vec();
// sanity check
if non_empty_segments
.windows(2)
.any(|segs| segs[0].is_text() && segs[1].is_text())
{
unreachable!("Parsed into consecutive text segments: {non_empty_segments:?}");
}
// guard empty case
if non_empty_segments.is_empty() {
bail!(r#"Parsed an empty matcher: "{s}""#);
}
// guard variable syntax leftover fragments
if non_empty_segments
.iter()
.filter_map(Seg::text)
.any(|t| t.contains('$'))
{
bail!(r#"Invalid matcher: "{s}""#);
}
Ok(Self {
segments: non_empty_segments,
case: Case::Insensitive,
})
} }
} // possible trailing text
segments.push(Seg::Text(s[text_start..].into()));
/// Helper type for deserializing a `Matcher` from either a plain string or a // cleanup empty text segments
/// `{ glob = "...", case_sensitive = true }` struct. let non_empty_segments = segments
#[derive(Deserialize)] .into_iter()
#[serde(untagged)] .filter(|seg| seg.text().map(|t| !t.is_empty()).unwrap_or(true))
enum RawMatcher { .collect_vec();
Simple(String),
Full {
glob: String,
#[serde(default)]
case_sensitive: bool,
},
}
// sanity check
if non_empty_segments
.windows(2)
.any(|segs| segs[0].is_text() && segs[1].is_text())
{
unreachable!("Parsed into consecutive text segments: {non_empty_segments:?}");
}
// guard empty case
if non_empty_segments.is_empty() {
bail!(r#"Parsed an empty matcher: "{s}""#);
}
// guard variable syntax leftover fragments
if non_empty_segments
.iter()
.filter_map(Seg::text)
.any(|t| t.contains('$'))
{
bail!(r#"Invalid matcher: "{s}""#);
}
Ok(non_empty_segments)
}
impl TryFrom<RawMatcher> for Matcher { impl TryFrom<RawMatcher> for Matcher {
type Error = anyhow::Error; type Error = anyhow::Error;
fn try_from(raw: RawMatcher) -> Result<Self, Self::Error> { fn try_from(raw: RawMatcher) -> Result<Self, Self::Error> {
match raw { const DEFAULT_CASE: Case = Case::Insensitive;
RawMatcher::Simple(s) => Matcher::from_str(&s), match &raw {
RawMatcher::Simple(s) => {
let segments = parse_glob(s)?;
Ok(Self {
segments,
case: DEFAULT_CASE,
})
}
RawMatcher::Full { RawMatcher::Full {
glob, glob,
case_sensitive, case_sensitive,
} => { } => {
let mut matcher = Matcher::from_str(&glob)?; let segments = parse_glob(glob)?;
matcher.case = if case_sensitive { let case = match case_sensitive {
Case::Sensitive None => DEFAULT_CASE,
} else { Some(false) => Case::Insensitive,
Case::Insensitive Some(true) => Case::Sensitive,
}; };
Ok(matcher) Ok(Self { segments, case })
} }
} }
} }