mirror of
https://github.com/sharkdp/bat
synced 2026-06-09 10:03:18 +00:00
Imprv: cleanup matcher glob parsing logic
- Data flow is now strictly linear from `RawMatcher` to `Matcher` - I've also hoisted `RawMatcher` in front of `Matcher` to signal this - Re-type `RawMatcher.case_sensitive` from `bool` to `Option<bool>` - This moves all parser logic away from `RawMatcher`, making it a more faithful representation of the data - Favour default consts in `Matcher::try_from<RawMatcher>` to `Default` impl on `Case` - Because the default choice of casing is a design decision of the logic, not an intrinsic property of the type
This commit is contained in:
+75
-78
@@ -47,13 +47,24 @@ impl ToTokens for MappingTarget {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Helper type for deserializing a `Matcher` from either a plain string or a
|
||||||
|
/// `{ glob = "...", case_sensitive = true }` struct.
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
#[serde(untagged)]
|
||||||
|
enum RawMatcher {
|
||||||
|
Simple(String),
|
||||||
|
Full {
|
||||||
|
glob: String,
|
||||||
|
case_sensitive: Option<bool>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
/// Whether a glob pattern should be matched case-sensitively or case-insensitively.
|
/// Whether a glob pattern should be matched case-sensitively or case-insensitively.
|
||||||
///
|
///
|
||||||
/// Mirrors the runtime `Case` type in `src/syntax_mapping.rs`.
|
/// Mirrors the runtime `Case` type in `src/syntax_mapping.rs`.
|
||||||
#[derive(Clone, Debug, PartialEq, Eq, Hash, Default)]
|
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
enum Case {
|
enum Case {
|
||||||
Sensitive,
|
Sensitive,
|
||||||
#[default]
|
|
||||||
Insensitive,
|
Insensitive,
|
||||||
}
|
}
|
||||||
impl ToTokens for Case {
|
impl ToTokens for Case {
|
||||||
@@ -66,11 +77,11 @@ impl ToTokens for Case {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Deserialize)]
|
|
||||||
#[serde(try_from = "RawMatcher")]
|
|
||||||
/// A single matcher.
|
/// A single matcher.
|
||||||
///
|
///
|
||||||
/// Codegen converts this into a `Lazy<Option<GlobMatcher>>`.
|
/// Codegen converts this into a `Lazy<Option<GlobMatcher>>`.
|
||||||
|
#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize)]
|
||||||
|
#[serde(try_from = "RawMatcher")]
|
||||||
struct Matcher {
|
struct Matcher {
|
||||||
segments: Vec<MatcherSegment>,
|
segments: Vec<MatcherSegment>,
|
||||||
/// Whether the glob pattern should be matched case-sensitively.
|
/// Whether the glob pattern should be matched case-sensitively.
|
||||||
@@ -78,7 +89,7 @@ struct Matcher {
|
|||||||
/// Defaults to `Case::Insensitive` for backwards compatibility.
|
/// Defaults to `Case::Insensitive` for backwards compatibility.
|
||||||
case: Case,
|
case: Case,
|
||||||
}
|
}
|
||||||
/// Parse a matcher.
|
/// Parse the glob pattern of a matcher.
|
||||||
///
|
///
|
||||||
/// Note that this implementation is rather strict: it will greedily interpret
|
/// Note that this implementation is rather strict: it will greedily interpret
|
||||||
/// every valid environment variable replacement as such, then immediately
|
/// every valid environment variable replacement as such, then immediately
|
||||||
@@ -92,93 +103,79 @@ struct Matcher {
|
|||||||
///
|
///
|
||||||
/// Revision history:
|
/// Revision history:
|
||||||
/// - 2024-02-20: allow `{` and `}` (glob brace expansion)
|
/// - 2024-02-20: allow `{` and `}` (glob brace expansion)
|
||||||
impl FromStr for Matcher {
|
fn parse_glob(s: &str) -> Result<Vec<MatcherSegment>, anyhow::Error> {
|
||||||
type Err = anyhow::Error;
|
use MatcherSegment as Seg;
|
||||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
static VAR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{([\w\d_]+)\}").unwrap());
|
||||||
use MatcherSegment as Seg;
|
|
||||||
static VAR_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\$\{([\w\d_]+)\}").unwrap());
|
|
||||||
|
|
||||||
let mut segments = vec![];
|
let mut segments = vec![];
|
||||||
let mut text_start = 0;
|
let mut text_start = 0;
|
||||||
for capture in VAR_REGEX.captures_iter(s) {
|
for capture in VAR_REGEX.captures_iter(s) {
|
||||||
let match_0 = capture.get(0).unwrap();
|
let match_0 = capture.get(0).unwrap();
|
||||||
|
|
||||||
// text before this var
|
// text before this var
|
||||||
let text_end = match_0.start();
|
let text_end = match_0.start();
|
||||||
segments.push(Seg::Text(s[text_start..text_end].into()));
|
segments.push(Seg::Text(s[text_start..text_end].into()));
|
||||||
text_start = match_0.end();
|
text_start = match_0.end();
|
||||||
|
|
||||||
// this var
|
// this var
|
||||||
segments.push(Seg::Env(capture.get(1).unwrap().as_str().into()));
|
segments.push(Seg::Env(capture.get(1).unwrap().as_str().into()));
|
||||||
}
|
|
||||||
// possible trailing text
|
|
||||||
segments.push(Seg::Text(s[text_start..].into()));
|
|
||||||
|
|
||||||
// cleanup empty text segments
|
|
||||||
let non_empty_segments = segments
|
|
||||||
.into_iter()
|
|
||||||
.filter(|seg| seg.text().map(|t| !t.is_empty()).unwrap_or(true))
|
|
||||||
.collect_vec();
|
|
||||||
|
|
||||||
// sanity check
|
|
||||||
if non_empty_segments
|
|
||||||
.windows(2)
|
|
||||||
.any(|segs| segs[0].is_text() && segs[1].is_text())
|
|
||||||
{
|
|
||||||
unreachable!("Parsed into consecutive text segments: {non_empty_segments:?}");
|
|
||||||
}
|
|
||||||
|
|
||||||
// guard empty case
|
|
||||||
if non_empty_segments.is_empty() {
|
|
||||||
bail!(r#"Parsed an empty matcher: "{s}""#);
|
|
||||||
}
|
|
||||||
|
|
||||||
// guard variable syntax leftover fragments
|
|
||||||
if non_empty_segments
|
|
||||||
.iter()
|
|
||||||
.filter_map(Seg::text)
|
|
||||||
.any(|t| t.contains('$'))
|
|
||||||
{
|
|
||||||
bail!(r#"Invalid matcher: "{s}""#);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(Self {
|
|
||||||
segments: non_empty_segments,
|
|
||||||
case: Case::Insensitive,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
// possible trailing text
|
||||||
|
segments.push(Seg::Text(s[text_start..].into()));
|
||||||
|
|
||||||
/// Helper type for deserializing a `Matcher` from either a plain string or a
|
// cleanup empty text segments
|
||||||
/// `{ glob = "...", case_sensitive = true }` struct.
|
let non_empty_segments = segments
|
||||||
#[derive(Deserialize)]
|
.into_iter()
|
||||||
#[serde(untagged)]
|
.filter(|seg| seg.text().map(|t| !t.is_empty()).unwrap_or(true))
|
||||||
enum RawMatcher {
|
.collect_vec();
|
||||||
Simple(String),
|
|
||||||
Full {
|
|
||||||
glob: String,
|
|
||||||
#[serde(default)]
|
|
||||||
case_sensitive: bool,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// sanity check
|
||||||
|
if non_empty_segments
|
||||||
|
.windows(2)
|
||||||
|
.any(|segs| segs[0].is_text() && segs[1].is_text())
|
||||||
|
{
|
||||||
|
unreachable!("Parsed into consecutive text segments: {non_empty_segments:?}");
|
||||||
|
}
|
||||||
|
|
||||||
|
// guard empty case
|
||||||
|
if non_empty_segments.is_empty() {
|
||||||
|
bail!(r#"Parsed an empty matcher: "{s}""#);
|
||||||
|
}
|
||||||
|
|
||||||
|
// guard variable syntax leftover fragments
|
||||||
|
if non_empty_segments
|
||||||
|
.iter()
|
||||||
|
.filter_map(Seg::text)
|
||||||
|
.any(|t| t.contains('$'))
|
||||||
|
{
|
||||||
|
bail!(r#"Invalid matcher: "{s}""#);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(non_empty_segments)
|
||||||
|
}
|
||||||
impl TryFrom<RawMatcher> for Matcher {
|
impl TryFrom<RawMatcher> for Matcher {
|
||||||
type Error = anyhow::Error;
|
type Error = anyhow::Error;
|
||||||
|
|
||||||
fn try_from(raw: RawMatcher) -> Result<Self, Self::Error> {
|
fn try_from(raw: RawMatcher) -> Result<Self, Self::Error> {
|
||||||
match raw {
|
const DEFAULT_CASE: Case = Case::Insensitive;
|
||||||
RawMatcher::Simple(s) => Matcher::from_str(&s),
|
match &raw {
|
||||||
|
RawMatcher::Simple(s) => {
|
||||||
|
let segments = parse_glob(s)?;
|
||||||
|
Ok(Self {
|
||||||
|
segments,
|
||||||
|
case: DEFAULT_CASE,
|
||||||
|
})
|
||||||
|
}
|
||||||
RawMatcher::Full {
|
RawMatcher::Full {
|
||||||
glob,
|
glob,
|
||||||
case_sensitive,
|
case_sensitive,
|
||||||
} => {
|
} => {
|
||||||
let mut matcher = Matcher::from_str(&glob)?;
|
let segments = parse_glob(glob)?;
|
||||||
matcher.case = if case_sensitive {
|
let case = match case_sensitive {
|
||||||
Case::Sensitive
|
None => DEFAULT_CASE,
|
||||||
} else {
|
Some(false) => Case::Insensitive,
|
||||||
Case::Insensitive
|
Some(true) => Case::Sensitive,
|
||||||
};
|
};
|
||||||
Ok(matcher)
|
Ok(Self { segments, case })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user