From 844bfded506e99c06237472bd83a8af5af433538 Mon Sep 17 00:00:00 2001 From: Rizky Mirzaviandy Priambodo <142987522+Xavrir@users.noreply.github.com> Date: Sun, 8 Mar 2026 10:18:29 +0700 Subject: [PATCH] Add --fallback-syntax for undetected files (#3617) * feat(cli): add fallback syntax option Expose a new fallback syntax CLI option so users can opt into syntax highlighting only when auto-detection fails. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus * feat(syntax): apply fallback only after detection fails Use the fallback syntax only when path and first-line detection fail, preserving existing behavior for detected files and explicit language selection. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus * test(cli): cover fallback syntax behavior Add integration coverage for fallback syntax usage, precedence with --language, and no-op behavior when syntax is already detected; update help snapshots for the new option. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus * docs(changelog): document fallback syntax option Record the new fallback syntax feature in the unreleased changelog section. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-opencode) Co-authored-by: Sisyphus --------- Co-authored-by: Sisyphus --- CHANGELOG.md | 1 + doc/long-help.txt | 7 +++ doc/short-help.txt | 2 + src/assets.rs | 25 +++++--- src/bin/bat/app.rs | 4 ++ src/bin/bat/clap_app.rs | 11 ++++ src/config.rs | 3 + src/printer.rs | 7 ++- tests/integration_tests.rs | 115 +++++++++++++++++++++++++++++++++++++ 9 files changed, 166 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1f26ba4..293e55dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Added an initial `flake.nix` for a ready made development environment; see #3578 (@vorburger) - Add `--quiet-empty` (`-E`) flag to suppress output when input is empty. Closes #1936, see #3563 (@NORMAL-EX) - Improve native man pages and command help syntax highlighting by stripping overstriking, see #3517 (@akirk) +- Add `--fallback-syntax`/`--fallback-language` to apply syntax highlighting only when auto-detection fails, see #1341 (@Xavrir) ## Bugfixes - Report error when pager is missing instead of silently falling back, see #3588 (@IMaloney) diff --git a/doc/long-help.txt b/doc/long-help.txt index 2c98ff25..82878acd 100644 --- a/doc/long-help.txt +++ b/doc/long-help.txt @@ -37,6 +37,13 @@ Options: name (like 'C++' or 'LaTeX') or possible file extension (like 'cpp', 'hpp' or 'md'). Use '--list-languages' to show all supported language names and file extensions. + --fallback-syntax + Set a fallback language for syntax highlighting when auto-detection fails. Unlike + '--language', this is only used when no syntax could be detected from filename, custom + syntax mappings, or first-line detection. + + [aliases: --fallback-language] + -H, --highlight-line Highlight the specified line ranges with a different background color For example: '--highlight-line 40' highlights line 40 diff --git a/doc/short-help.txt b/doc/short-help.txt index b0c45314..e08bb604 100644 --- a/doc/short-help.txt +++ b/doc/short-help.txt @@ -17,6 +17,8 @@ Options: Show plain style (alias for '--style=plain'). -l, --language Set the language for syntax highlighting. + --fallback-syntax + Set a fallback language for undetected syntaxes. [aliases: --fallback-language] -H, --highlight-line Highlight lines N through M. --file-name diff --git a/src/assets.rs b/src/assets.rs index 80ae3e57..1315537f 100644 --- a/src/assets.rs +++ b/src/assets.rs @@ -210,6 +210,7 @@ impl HighlightingAssets { pub(crate) fn get_syntax( &self, language: Option<&str>, + fallback_syntax: Option<&str>, input: &mut OpenedInput, mapping: &SyntaxMapping, ) -> Result> { @@ -234,9 +235,16 @@ impl HighlightingAssets { match path_syntax { // If a path wasn't provided, or if path based syntax detection // above failed, we fall back to first-line syntax detection. - Err(Error::UndetectedSyntax(path)) => self - .get_first_line_syntax(&mut input.reader)? - .ok_or(Error::UndetectedSyntax(path)), + Err(Error::UndetectedSyntax(path)) => { + if let Some(syntax_in_set) = self.get_first_line_syntax(&mut input.reader)? { + Ok(syntax_in_set) + } else if let Some(language) = fallback_syntax { + self.find_syntax_by_token(language)? + .ok_or_else(|| Error::UnknownSyntax(language.to_owned())) + } else { + Err(Error::UndetectedSyntax(path)) + } + } _ => path_syntax, } } @@ -416,11 +424,12 @@ mod tests { fn get_syntax_name( &self, language: Option<&str>, + fallback_syntax: Option<&str>, input: &mut OpenedInput, mapping: &SyntaxMapping, ) -> String { self.assets - .get_syntax(language, input, mapping) + .get_syntax(language, fallback_syntax, input, mapping) .map(|syntax_in_set| syntax_in_set.syntax.name.clone()) .unwrap_or_else(|_| "!no syntax!".to_owned()) } @@ -440,7 +449,7 @@ mod tests { let dummy_stdin: &[u8] = &[]; let mut opened_input = input.open(dummy_stdin, None).unwrap(); - self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping) + self.get_syntax_name(None, None, &mut opened_input, &self.syntax_mapping) } fn syntax_for_file_with_content_os(&self, file_name: &OsStr, first_line: &str) -> String { @@ -450,7 +459,7 @@ mod tests { let dummy_stdin: &[u8] = &[]; let mut opened_input = input.open(dummy_stdin, None).unwrap(); - self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping) + self.get_syntax_name(None, None, &mut opened_input, &self.syntax_mapping) } #[cfg(unix)] @@ -470,7 +479,7 @@ mod tests { let input = Input::stdin().with_name(Some(file_name)); let mut opened_input = input.open(content, None).unwrap(); - self.get_syntax_name(None, &mut opened_input, &self.syntax_mapping) + self.get_syntax_name(None, None, &mut opened_input, &self.syntax_mapping) } fn syntax_is_same_for_inputkinds(&self, file_name: &str, content: &str) -> bool { @@ -752,7 +761,7 @@ contexts: let mut opened_input = input.open(dummy_stdin, None).unwrap(); assert_eq!( - test.get_syntax_name(None, &mut opened_input, &test.syntax_mapping), + test.get_syntax_name(None, None, &mut opened_input, &test.syntax_mapping), "SSH Config" ); } diff --git a/src/bin/bat/app.rs b/src/bin/bat/app.rs index 73ad60fe..dddb5559 100644 --- a/src/bin/bat/app.rs +++ b/src/bin/bat/app.rs @@ -384,6 +384,10 @@ impl App { None } }), + fallback_syntax: self + .matches + .get_one::("fallback-syntax") + .map(|s| s.as_str()), show_nonprintable: self.matches.get_flag("show-all"), nonprintable_notation: match self .matches diff --git a/src/bin/bat/clap_app.rs b/src/bin/bat/clap_app.rs index 5e2b927c..3636f081 100644 --- a/src/bin/bat/clap_app.rs +++ b/src/bin/bat/clap_app.rs @@ -120,6 +120,17 @@ pub fn build_app(interactive_output: bool) -> Command { language names and file extensions.", ), ) + .arg( + Arg::new("fallback-syntax") + .long("fallback-syntax") + .visible_alias("fallback-language") + .help("Set a fallback language for undetected syntaxes.") + .long_help( + "Set a fallback language for syntax highlighting when auto-detection fails. \ + Unlike '--language', this is only used when no syntax could be detected from \ + filename, custom syntax mappings, or first-line detection.", + ), + ) .arg( Arg::new("highlight-line") .long("highlight-line") diff --git a/src/config.rs b/src/config.rs index 8ea0e275..97720fb5 100644 --- a/src/config.rs +++ b/src/config.rs @@ -38,6 +38,9 @@ pub struct Config<'a> { /// The explicitly configured language, if any pub language: Option<&'a str>, + /// The fallback syntax used when auto-detection fails + pub fallback_syntax: Option<&'a str>, + /// Whether or not to show/replace non-printable characters like space, tab and newline. pub show_nonprintable: bool, diff --git a/src/printer.rs b/src/printer.rs index 119258bd..6a57fb62 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -268,7 +268,12 @@ impl<'a> InteractivePrinter<'a> { const PLAIN_TEXT_SYNTAX: &str = "Plain Text"; const MANPAGE_SYNTAX: &str = "Manpage"; const COMMAND_HELP_SYNTAX: &str = "Command Help"; - match assets.get_syntax(config.language, input, &config.syntax_mapping) { + match assets.get_syntax( + config.language, + config.fallback_syntax, + input, + &config.syntax_mapping, + ) { Ok(syntax_in_set) => ( syntax_in_set.syntax.name == PLAIN_TEXT_SYNTAX, syntax_in_set.syntax.name == MANPAGE_SYNTAX diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index ee727eb0..cfbad253 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -2470,6 +2470,121 @@ fn no_first_line_fallback_when_mapping_to_invalid_syntax() { .stderr(predicate::str::contains("unknown syntax: 'InvalidSyntax'")); } +#[test] +fn fallback_syntax_is_used_when_no_syntax_is_detected() { + let content = "# comment\nfoo=bar\n"; + + let fallback_output = bat() + .arg("--color=always") + .arg("--style=plain") + .arg("--file-name=unknown.fallbacksyntax") + .arg("--fallback-syntax=bash") + .write_stdin(content) + .assert() + .success() + .get_output() + .stdout + .clone(); + + let explicit_output = bat() + .arg("--color=always") + .arg("--style=plain") + .arg("--language=bash") + .arg("--file-name=unknown.fallbacksyntax") + .write_stdin(content) + .assert() + .success() + .get_output() + .stdout + .clone(); + + assert_eq!( + from_utf8(&fallback_output).expect("output is valid utf-8"), + from_utf8(&explicit_output).expect("output is valid utf-8") + ); +} + +#[test] +fn fallback_syntax_does_not_override_detected_syntax() { + let content = "fn main() { println!(\"hello\"); }\n"; + + let with_fallback = bat() + .arg("--color=always") + .arg("--style=plain") + .arg("--file-name=test.rs") + .arg("--fallback-syntax=json") + .write_stdin(content) + .assert() + .success() + .get_output() + .stdout + .clone(); + + let without_fallback = bat() + .arg("--color=always") + .arg("--style=plain") + .arg("--file-name=test.rs") + .write_stdin(content) + .assert() + .success() + .get_output() + .stdout + .clone(); + + assert_eq!( + from_utf8(&with_fallback).expect("output is valid utf-8"), + from_utf8(&without_fallback).expect("output is valid utf-8") + ); +} + +#[test] +fn fallback_syntax_does_not_override_explicit_language() { + let content = "{\"a\": 1}\n"; + + let with_fallback = bat() + .arg("--color=always") + .arg("--style=plain") + .arg("--language=json") + .arg("--fallback-syntax=rust") + .arg("--file-name=unknown.fallbacksyntax") + .write_stdin(content) + .assert() + .success() + .get_output() + .stdout + .clone(); + + let without_fallback = bat() + .arg("--color=always") + .arg("--style=plain") + .arg("--language=json") + .arg("--file-name=unknown.fallbacksyntax") + .write_stdin(content) + .assert() + .success() + .get_output() + .stdout + .clone(); + + assert_eq!( + from_utf8(&with_fallback).expect("output is valid utf-8"), + from_utf8(&without_fallback).expect("output is valid utf-8") + ); +} + +#[test] +fn invalid_fallback_syntax_returns_error() { + bat() + .arg("--color=always") + .arg("--style=plain") + .arg("--file-name=unknown.fallbacksyntax") + .arg("--fallback-syntax=InvalidSyntax") + .write_stdin("foo\n") + .assert() + .failure() + .stderr(predicate::str::contains("unknown syntax: 'InvalidSyntax'")); +} + #[test] fn show_all_mode() { bat()