mirror of
https://github.com/sharkdp/bat
synced 2026-06-09 10:03:18 +00:00
Imprv: cleanup matcher glob parsing logic
- Data flow is now strictly linear from `RawMatcher` to `Matcher` - I've also hoisted `RawMatcher` in front of `Matcher` to signal this - Re-type `RawMatcher.case_sensitive` from `bool` to `Option<bool>` - This moves all parser logic away from `RawMatcher`, making it a more faithful representation of the data - Favour default consts in `Matcher::try_from<RawMatcher>` to `Default` impl on `Case` - Because the default choice of casing is a design decision of the logic, not an intrinsic property of the type
This commit is contained in:
+75
-78
@@ -47,13 +47,24 @@ impl ToTokens for MappingTarget {
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper type for deserializing a `Matcher` from either a plain string or a
|
||||
/// `{ glob = "...", case_sensitive = true }` struct.
|
||||
#[derive(Deserialize)]
|
||||
#[serde(untagged)]
|
||||
enum RawMatcher {
|
||||
Simple(String),
|
||||
Full {
|
||||
glob: String,
|
||||
case_sensitive: Option<bool>,
|
||||
},
|
||||
}
|
||||
|
||||
/// Whether a glob pattern should be matched case-sensitively or case-insensitively.
|
||||
///
|
||||
/// Mirrors the runtime `Case` type in `src/syntax_mapping.rs`.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash, Default)]
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
enum Case {
|
||||
Sensitive,
|
||||
#[default]
|
||||
Insensitive,
|
||||
}
|
||||
impl ToTokens for Case {
|
||||
@@ -66,11 +77,11 @@ impl ToTokens for Case {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Deserialize)]
|
||||
#[serde(try_from = "RawMatcher")]
|
||||
/// A single matcher.
|
||||
///
|
||||
/// Codegen converts this into a `Lazy<Option<GlobMatcher>>`.
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize)]
|
||||
#[serde(try_from = "RawMatcher")]
|
||||
struct Matcher {
|
||||
segments: Vec<MatcherSegment>,
|
||||
/// Whether the glob pattern should be matched case-sensitively.
|
||||
@@ -78,7 +89,7 @@ struct Matcher {
|
||||
/// Defaults to `Case::Insensitive` for backwards compatibility.
|
||||
case: Case,
|
||||
}
|
||||
/// Parse a matcher.
|
||||
/// Parse the glob pattern of a matcher.
|
||||
///
|
||||
/// Note that this implementation is rather strict: it will greedily interpret
|
||||
/// every valid environment variable replacement as such, then immediately
|
||||
@@ -92,93 +103,79 @@ struct Matcher {
|
||||
///
|
||||
/// Revision history:
|
||||
/// - 2024-02-20: allow `{` and `}` (glob brace expansion)
|
||||
impl FromStr for Matcher {
|
||||
type Err = anyhow::Error;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
use MatcherSegment as Seg;
|
||||
static VAR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{([\w\d_]+)\}").unwrap());
|
||||
fn parse_glob(s: &str) -> Result<Vec<MatcherSegment>, anyhow::Error> {
|
||||
use MatcherSegment as Seg;
|
||||
static VAR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{([\w\d_]+)\}").unwrap());
|
||||
|
||||
let mut segments = vec![];
|
||||
let mut text_start = 0;
|
||||
for capture in VAR_REGEX.captures_iter(s) {
|
||||
let match_0 = capture.get(0).unwrap();
|
||||
let mut segments = vec![];
|
||||
let mut text_start = 0;
|
||||
for capture in VAR_REGEX.captures_iter(s) {
|
||||
let match_0 = capture.get(0).unwrap();
|
||||
|
||||
// text before this var
|
||||
let text_end = match_0.start();
|
||||
segments.push(Seg::Text(s[text_start..text_end].into()));
|
||||
text_start = match_0.end();
|
||||
// text before this var
|
||||
let text_end = match_0.start();
|
||||
segments.push(Seg::Text(s[text_start..text_end].into()));
|
||||
text_start = match_0.end();
|
||||
|
||||
// this var
|
||||
segments.push(Seg::Env(capture.get(1).unwrap().as_str().into()));
|
||||
}
|
||||
// possible trailing text
|
||||
segments.push(Seg::Text(s[text_start..].into()));
|
||||
|
||||
// cleanup empty text segments
|
||||
let non_empty_segments = segments
|
||||
.into_iter()
|
||||
.filter(|seg| seg.text().map(|t| !t.is_empty()).unwrap_or(true))
|
||||
.collect_vec();
|
||||
|
||||
// sanity check
|
||||
if non_empty_segments
|
||||
.windows(2)
|
||||
.any(|segs| segs[0].is_text() && segs[1].is_text())
|
||||
{
|
||||
unreachable!("Parsed into consecutive text segments: {non_empty_segments:?}");
|
||||
}
|
||||
|
||||
// guard empty case
|
||||
if non_empty_segments.is_empty() {
|
||||
bail!(r#"Parsed an empty matcher: "{s}""#);
|
||||
}
|
||||
|
||||
// guard variable syntax leftover fragments
|
||||
if non_empty_segments
|
||||
.iter()
|
||||
.filter_map(Seg::text)
|
||||
.any(|t| t.contains('$'))
|
||||
{
|
||||
bail!(r#"Invalid matcher: "{s}""#);
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
segments: non_empty_segments,
|
||||
case: Case::Insensitive,
|
||||
})
|
||||
// this var
|
||||
segments.push(Seg::Env(capture.get(1).unwrap().as_str().into()));
|
||||
}
|
||||
}
|
||||
// possible trailing text
|
||||
segments.push(Seg::Text(s[text_start..].into()));
|
||||
|
||||
/// Helper type for deserializing a `Matcher` from either a plain string or a
|
||||
/// `{ glob = "...", case_sensitive = true }` struct.
|
||||
#[derive(Deserialize)]
|
||||
#[serde(untagged)]
|
||||
enum RawMatcher {
|
||||
Simple(String),
|
||||
Full {
|
||||
glob: String,
|
||||
#[serde(default)]
|
||||
case_sensitive: bool,
|
||||
},
|
||||
}
|
||||
// cleanup empty text segments
|
||||
let non_empty_segments = segments
|
||||
.into_iter()
|
||||
.filter(|seg| seg.text().map(|t| !t.is_empty()).unwrap_or(true))
|
||||
.collect_vec();
|
||||
|
||||
// sanity check
|
||||
if non_empty_segments
|
||||
.windows(2)
|
||||
.any(|segs| segs[0].is_text() && segs[1].is_text())
|
||||
{
|
||||
unreachable!("Parsed into consecutive text segments: {non_empty_segments:?}");
|
||||
}
|
||||
|
||||
// guard empty case
|
||||
if non_empty_segments.is_empty() {
|
||||
bail!(r#"Parsed an empty matcher: "{s}""#);
|
||||
}
|
||||
|
||||
// guard variable syntax leftover fragments
|
||||
if non_empty_segments
|
||||
.iter()
|
||||
.filter_map(Seg::text)
|
||||
.any(|t| t.contains('$'))
|
||||
{
|
||||
bail!(r#"Invalid matcher: "{s}""#);
|
||||
}
|
||||
|
||||
Ok(non_empty_segments)
|
||||
}
|
||||
impl TryFrom<RawMatcher> for Matcher {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(raw: RawMatcher) -> Result<Self, Self::Error> {
|
||||
match raw {
|
||||
RawMatcher::Simple(s) => Matcher::from_str(&s),
|
||||
const DEFAULT_CASE: Case = Case::Insensitive;
|
||||
match &raw {
|
||||
RawMatcher::Simple(s) => {
|
||||
let segments = parse_glob(s)?;
|
||||
Ok(Self {
|
||||
segments,
|
||||
case: DEFAULT_CASE,
|
||||
})
|
||||
}
|
||||
RawMatcher::Full {
|
||||
glob,
|
||||
case_sensitive,
|
||||
} => {
|
||||
let mut matcher = Matcher::from_str(&glob)?;
|
||||
matcher.case = if case_sensitive {
|
||||
Case::Sensitive
|
||||
} else {
|
||||
Case::Insensitive
|
||||
let segments = parse_glob(glob)?;
|
||||
let case = match case_sensitive {
|
||||
None => DEFAULT_CASE,
|
||||
Some(false) => Case::Insensitive,
|
||||
Some(true) => Case::Sensitive,
|
||||
};
|
||||
Ok(matcher)
|
||||
Ok(Self { segments, case })
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user