1
0
mirror of https://github.com/sharkdp/bat synced 2026-06-09 10:03:18 +00:00

Imprv: cleanup matcher glob parsing logic

- Data flow is now strictly linear from `RawMatcher` to `Matcher`
  - I've also hoisted `RawMatcher` in front of `Matcher` to signal this
- Re-type `RawMatcher.case_sensitive` from `bool` to `Option<bool>`
  - This moves all parser logic away from `RawMatcher`, making it a more faithful representation of the data
- Favour default consts in `Matcher::try_from<RawMatcher>` to `Default` impl on `Case`
  - Because the default choice of casing is a design decision of the logic, not an intrinsic property of the type
This commit is contained in:
cyqsimon
2026-03-25 14:49:50 +08:00
parent a500fb236a
commit 3e789f5241
+33 -36
View File
@@ -47,13 +47,24 @@ impl ToTokens for MappingTarget {
}
}
/// Helper type for deserializing a `Matcher` from either a plain string or a
/// `{ glob = "...", case_sensitive = true }` struct.
#[derive(Deserialize)]
#[serde(untagged)]
enum RawMatcher {
Simple(String),
Full {
glob: String,
case_sensitive: Option<bool>,
},
}
/// Whether a glob pattern should be matched case-sensitively or case-insensitively.
///
/// Mirrors the runtime `Case` type in `src/syntax_mapping.rs`.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Default)]
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
enum Case {
Sensitive,
#[default]
Insensitive,
}
impl ToTokens for Case {
@@ -66,11 +77,11 @@ impl ToTokens for Case {
}
}
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Deserialize)]
#[serde(try_from = "RawMatcher")]
/// A single matcher.
///
/// Codegen converts this into a `Lazy<Option<GlobMatcher>>`.
#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize)]
#[serde(try_from = "RawMatcher")]
struct Matcher {
segments: Vec<MatcherSegment>,
/// Whether the glob pattern should be matched case-sensitively.
@@ -78,7 +89,7 @@ struct Matcher {
/// Defaults to `Case::Insensitive` for backwards compatibility.
case: Case,
}
/// Parse a matcher.
/// Parse the glob pattern of a matcher.
///
/// Note that this implementation is rather strict: it will greedily interpret
/// every valid environment variable replacement as such, then immediately
@@ -92,9 +103,7 @@ struct Matcher {
///
/// Revision history:
/// - 2024-02-20: allow `{` and `}` (glob brace expansion)
impl FromStr for Matcher {
type Err = anyhow::Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
fn parse_glob(s: &str) -> Result<Vec<MatcherSegment>, anyhow::Error> {
use MatcherSegment as Seg;
static VAR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{([\w\d_]+)\}").unwrap());
@@ -142,43 +151,31 @@ impl FromStr for Matcher {
bail!(r#"Invalid matcher: "{s}""#);
}
Ok(Self {
segments: non_empty_segments,
case: Case::Insensitive,
})
Ok(non_empty_segments)
}
}
/// Helper type for deserializing a `Matcher` from either a plain string or a
/// `{ glob = "...", case_sensitive = true }` struct.
#[derive(Deserialize)]
#[serde(untagged)]
enum RawMatcher {
Simple(String),
Full {
glob: String,
#[serde(default)]
case_sensitive: bool,
},
}
impl TryFrom<RawMatcher> for Matcher {
type Error = anyhow::Error;
fn try_from(raw: RawMatcher) -> Result<Self, Self::Error> {
match raw {
RawMatcher::Simple(s) => Matcher::from_str(&s),
const DEFAULT_CASE: Case = Case::Insensitive;
match &raw {
RawMatcher::Simple(s) => {
let segments = parse_glob(s)?;
Ok(Self {
segments,
case: DEFAULT_CASE,
})
}
RawMatcher::Full {
glob,
case_sensitive,
} => {
let mut matcher = Matcher::from_str(&glob)?;
matcher.case = if case_sensitive {
Case::Sensitive
} else {
Case::Insensitive
let segments = parse_glob(glob)?;
let case = match case_sensitive {
None => DEFAULT_CASE,
Some(false) => Case::Insensitive,
Some(true) => Case::Sensitive,
};
Ok(matcher)
Ok(Self { segments, case })
}
}
}