This commit is contained in:
Haoren(Harrison) Hu 2025-07-05 15:10:51 +02:00 committed by GitHub
commit 39fcb27d57
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -22,9 +22,119 @@ use crate::util::{Dir, TestCommand};
// searched for in the first few KB of the file and in a match. Normally, NUL
// bytes are searched for everywhere.
//
// TODO: Add tests for binary file detection when using memory maps.
const HAY: &'static [u8] = include_bytes!("./data/sherlock-nul.txt");
// Tests for binary file detection when using memory maps.
// As noted in the original comments, with memory maps binary detection
// works differently - NUL bytes are only searched for in the first few KB
// of the file and in matches.
// Test that matches in a binary file with memory maps work as expected
// with implicit file search (via glob pattern)
rgtest!(mmap_match_implicit, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
cmd.args(&["-n", "Project Gutenberg EBook", "-g", "hay"]);
// With mmap, we get a match and a warning about binary content
let expected = "\
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
hay: WARNING: stopped searching binary file after match (found \"\\0\" byte around offset 77041)
";
eqnice!(expected, cmd.stdout());
});
// Test with an explicit file argument when using memory maps
rgtest!(mmap_match_explicit, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
cmd.args(&["-n", "Project Gutenberg EBook", "hay"]);
let expected = "\
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
";
eqnice!(expected, cmd.stdout());
});
// Test specifically with a pattern that matches near the NUL byte
// which should trigger binary detection with memory maps
rgtest!(mmap_match_near_nul, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
// Pattern that matches around line 1898 where the NUL byte is
cmd.args(&["-n", "abcdef", "hay"]); // Note: Using direct file path instead of glob
let expected = "\
binary file matches (found \"\\0\" byte around offset 77041)
";
eqnice!(expected, cmd.stdout());
});
// Test with --count option to ensure full file scanning works with mmap
rgtest!(mmap_match_count, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
cmd.args(&["-c", "Project Gutenberg EBook", "hay"]);
// With mmap, since we're counting all matches and might not
// encounter the NUL byte during initial detection, the count
// should still be reported
eqnice!("1\n", cmd.stdout());
});
// Test binary detection with mmap when pattern would match before and after NUL byte
rgtest!(mmap_match_multiple, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
cmd.args(&["-n", "Project Gutenberg EBook|Heaven", "hay"]); // Use explicit file path
// With explicit file and memory maps, matches before and after NUL byte are shown
let expected = "\
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
1871:\"No. Heaven knows what the objects of his studies are. But here we
";
eqnice!(expected, cmd.stdout());
});
// Test that --binary flag works properly with mmap
rgtest!(mmap_binary_flag, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
cmd.args(&["-n", "--binary", "Heaven", "-g", "hay"]); // Use glob pattern
// With --binary flag and glob pattern, we get a binary file message
let expected = "\
hay: binary file matches (found \"\\0\" byte around offset 77041)
";
eqnice!(expected, cmd.stdout());
});
// Test that using -a/--text flag works as expected with mmap
rgtest!(mmap_text_flag, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
cmd.args(&["-n", "--text", "Heaven", "-g", "hay"]);
// With --text flag, binary detection should be disabled
let expected = "\
hay:1871:\"No. Heaven knows what the objects of his studies are. But here we
";
eqnice!(expected, cmd.stdout());
});
// Test pattern that only matches after the NUL byte with memory maps
rgtest!(mmap_after_nul_match, |dir: Dir, mut cmd: TestCommand| {
dir.create_bytes("hay", HAY);
cmd.args(&["-n", "medical student", "hay"]); // Use explicit file path
// With explicit file and memory maps, all matches are shown
let expected = "\
176:\"A medical student, I suppose?\" said I.
409:\"A medical student, I suppose?\" said I.
642:\"A medical student, I suppose?\" said I.
875:\"A medical student, I suppose?\" said I.
1108:\"A medical student, I suppose?\" said I.
1341:\"A medical student, I suppose?\" said I.
1574:\"A medical student, I suppose?\" said I.
1807:\"A medical student, I suppose?\" said I.
1867:\"And yet you say he is not a medical student?\"
";
eqnice!(expected, cmd.stdout());
});
// This tests that ripgrep prints a warning message if it finds and prints a
// match in a binary file before detecting that it is a binary file. The point
// here is to notify that user that the search of the file is only partially