diff --git a/CHANGELOG.md b/CHANGELOG.md index c67cf148..faf869a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ - Map `BUILD` case sensitively to Python (Starlark) for Bazel, see #3576 (@vorburger) ## Bugfixes +- Treat ZIP archives as binary content based on their magic header, see #0000 (@officialasishkumar) - Fix i686 `.deb` package using incorrect architecture name (`i686` instead of `i386`), preventing installation on Debian. Closes #3611, see #3650 (@Sim-hu) - Fix inconsistent `.deb` MUSL package names (aarch64-musl used `arm64` instead of `musl-linux-arm64`, and `musleabihf` target missed `bat-musl` prefix). Closes #3482, see #3642 (@mvanhorn) - Fix incorrect text width computation when using `--binary=as-text` with non-printable characters in caret notation, see #3640 and #3631 (@eyupcanakman) diff --git a/src/input.rs b/src/input.rs index 29846abe..30f13f98 100644 --- a/src/input.rs +++ b/src/input.rs @@ -261,11 +261,7 @@ impl<'a> InputReader<'a> { let mut first_line = vec![]; reader.read_until(b'\n', &mut first_line).ok(); - let content_type = if first_line.is_empty() { - None - } else { - Some(content_inspector::inspect(&first_line[..])) - }; + let content_type = inspect_content_type(&first_line); if content_type == Some(ContentType::UTF_16LE) { read_utf16_line(&mut reader, &mut first_line, 0x00, 0x0A).ok(); @@ -319,6 +315,25 @@ impl<'a> InputReader<'a> { } } +fn inspect_content_type(first_line: &[u8]) -> Option { + if first_line.is_empty() { + return None; + } + + let content_type = content_inspector::inspect(first_line); + if content_type == ContentType::UTF_8 && has_zip_signature(first_line) { + Some(ContentType::BINARY) + } else { + Some(content_type) + } +} + +fn has_zip_signature(bytes: &[u8]) -> bool { + [b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"] + .into_iter() + .any(|signature| bytes.starts_with(signature)) +} + fn read_utf16_line( reader: &mut R, buf: &mut Vec, @@ -374,6 +389,22 @@ fn basic() { assert!(buffer.is_empty()); } +#[test] +fn zip_magic_headers_are_treated_as_binary() { + for content in [b"PK\x03\x04hello", b"PK\x05\x06hello", b"PK\x07\x08hello"] { + let reader = InputReader::new(&content[..]); + assert_eq!(Some(ContentType::BINARY), reader.content_type); + } +} + +#[test] +fn non_zip_pk_prefix_is_not_treated_as_binary() { + assert_eq!( + Some(ContentType::UTF_8), + inspect_content_type(b"PK\x03\x03hello") + ); +} + #[test] fn utf16le() { let content = b"\xFF\xFE\x73\x00\x0A\x00\x64\x00"; diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index 96da50d7..d7e44301 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -2093,6 +2093,24 @@ fn header_binary() { .stderr(""); } +#[test] +fn header_zip_file_is_binary() { + let tmp_dir = tempdir().expect("can create temporary directory"); + let tmp_path = tmp_dir.path().join("test.zip"); + std::fs::write(&tmp_path, b"PK\x03\x04hello").expect("can write temporary file"); + + bat() + .arg(&tmp_path) + .arg("--decorations=always") + .arg("--style=header") + .arg("-r=0:0") + .arg("--file-name=test.zip") + .assert() + .success() + .stdout("File: test.zip \n") + .stderr(""); +} + #[test] fn header_full_binary() { bat()