diff --git a/build/syntax_mapping.rs b/build/syntax_mapping.rs index c7133be9..b0db01eb 100644 --- a/build/syntax_mapping.rs +++ b/build/syntax_mapping.rs @@ -47,13 +47,24 @@ impl ToTokens for MappingTarget { } } +/// Helper type for deserializing a `Matcher` from either a plain string or a +/// `{ glob = "...", case_sensitive = true }` struct. +#[derive(Deserialize)] +#[serde(untagged)] +enum RawMatcher { + Simple(String), + Full { + glob: String, + case_sensitive: Option, + }, +} + /// Whether a glob pattern should be matched case-sensitively or case-insensitively. /// /// Mirrors the runtime `Case` type in `src/syntax_mapping.rs`. -#[derive(Clone, Debug, PartialEq, Eq, Hash, Default)] +#[derive(Clone, Debug, PartialEq, Eq, Hash)] enum Case { Sensitive, - #[default] Insensitive, } impl ToTokens for Case { @@ -66,11 +77,11 @@ impl ToTokens for Case { } } -#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, Deserialize)] -#[serde(try_from = "RawMatcher")] /// A single matcher. /// /// Codegen converts this into a `Lazy>`. +#[derive(Clone, Debug, PartialEq, Eq, Hash, Deserialize)] +#[serde(try_from = "RawMatcher")] struct Matcher { segments: Vec, /// Whether the glob pattern should be matched case-sensitively. @@ -78,7 +89,7 @@ struct Matcher { /// Defaults to `Case::Insensitive` for backwards compatibility. case: Case, } -/// Parse a matcher. +/// Parse the glob pattern of a matcher. /// /// Note that this implementation is rather strict: it will greedily interpret /// every valid environment variable replacement as such, then immediately @@ -92,93 +103,79 @@ struct Matcher { /// /// Revision history: /// - 2024-02-20: allow `{` and `}` (glob brace expansion) -impl FromStr for Matcher { - type Err = anyhow::Error; - fn from_str(s: &str) -> Result { - use MatcherSegment as Seg; - static VAR_REGEX: Lazy = Lazy::new(|| Regex::new(r"\$\{([\w\d_]+)\}").unwrap()); +fn parse_glob(s: &str) -> Result, anyhow::Error> { + use MatcherSegment as Seg; + static VAR_REGEX: Lazy = Lazy::new(|| Regex::new(r"\$\{([\w\d_]+)\}").unwrap()); - let mut segments = vec![]; - let mut text_start = 0; - for capture in VAR_REGEX.captures_iter(s) { - let match_0 = capture.get(0).unwrap(); + let mut segments = vec![]; + let mut text_start = 0; + for capture in VAR_REGEX.captures_iter(s) { + let match_0 = capture.get(0).unwrap(); - // text before this var - let text_end = match_0.start(); - segments.push(Seg::Text(s[text_start..text_end].into())); - text_start = match_0.end(); + // text before this var + let text_end = match_0.start(); + segments.push(Seg::Text(s[text_start..text_end].into())); + text_start = match_0.end(); - // this var - segments.push(Seg::Env(capture.get(1).unwrap().as_str().into())); - } - // possible trailing text - segments.push(Seg::Text(s[text_start..].into())); - - // cleanup empty text segments - let non_empty_segments = segments - .into_iter() - .filter(|seg| seg.text().map(|t| !t.is_empty()).unwrap_or(true)) - .collect_vec(); - - // sanity check - if non_empty_segments - .windows(2) - .any(|segs| segs[0].is_text() && segs[1].is_text()) - { - unreachable!("Parsed into consecutive text segments: {non_empty_segments:?}"); - } - - // guard empty case - if non_empty_segments.is_empty() { - bail!(r#"Parsed an empty matcher: "{s}""#); - } - - // guard variable syntax leftover fragments - if non_empty_segments - .iter() - .filter_map(Seg::text) - .any(|t| t.contains('$')) - { - bail!(r#"Invalid matcher: "{s}""#); - } - - Ok(Self { - segments: non_empty_segments, - case: Case::Insensitive, - }) + // this var + segments.push(Seg::Env(capture.get(1).unwrap().as_str().into())); } -} + // possible trailing text + segments.push(Seg::Text(s[text_start..].into())); -/// Helper type for deserializing a `Matcher` from either a plain string or a -/// `{ glob = "...", case_sensitive = true }` struct. -#[derive(Deserialize)] -#[serde(untagged)] -enum RawMatcher { - Simple(String), - Full { - glob: String, - #[serde(default)] - case_sensitive: bool, - }, -} + // cleanup empty text segments + let non_empty_segments = segments + .into_iter() + .filter(|seg| seg.text().map(|t| !t.is_empty()).unwrap_or(true)) + .collect_vec(); + // sanity check + if non_empty_segments + .windows(2) + .any(|segs| segs[0].is_text() && segs[1].is_text()) + { + unreachable!("Parsed into consecutive text segments: {non_empty_segments:?}"); + } + + // guard empty case + if non_empty_segments.is_empty() { + bail!(r#"Parsed an empty matcher: "{s}""#); + } + + // guard variable syntax leftover fragments + if non_empty_segments + .iter() + .filter_map(Seg::text) + .any(|t| t.contains('$')) + { + bail!(r#"Invalid matcher: "{s}""#); + } + + Ok(non_empty_segments) +} impl TryFrom for Matcher { type Error = anyhow::Error; - fn try_from(raw: RawMatcher) -> Result { - match raw { - RawMatcher::Simple(s) => Matcher::from_str(&s), + const DEFAULT_CASE: Case = Case::Insensitive; + match &raw { + RawMatcher::Simple(s) => { + let segments = parse_glob(s)?; + Ok(Self { + segments, + case: DEFAULT_CASE, + }) + } RawMatcher::Full { glob, case_sensitive, } => { - let mut matcher = Matcher::from_str(&glob)?; - matcher.case = if case_sensitive { - Case::Sensitive - } else { - Case::Insensitive + let segments = parse_glob(glob)?; + let case = match case_sensitive { + None => DEFAULT_CASE, + Some(false) => Case::Insensitive, + Some(true) => Case::Sensitive, }; - Ok(matcher) + Ok(Self { segments, case }) } } }