mirror of
https://github.com/uutils/coreutils.git
synced 2025-07-07 21:45:01 +00:00
Merge pull request #7484 from karlmcdowall/head_bug_bug_bug
head: fix bug with non-terminated files.
This commit is contained in:
commit
5936408922
2 changed files with 88 additions and 13 deletions
|
@ -3,9 +3,10 @@
|
|||
// For the full copyright and license information, please view the LICENSE
|
||||
// file that was distributed with this source code.
|
||||
|
||||
// spell-checker:ignore (vars) seekable
|
||||
// spell-checker:ignore (vars) seekable memrchr
|
||||
|
||||
use clap::{Arg, ArgAction, ArgMatches, Command};
|
||||
use memchr::memrchr_iter;
|
||||
use std::ffi::OsString;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufWriter, Read, Seek, SeekFrom, Write};
|
||||
|
@ -363,30 +364,50 @@ where
|
|||
|
||||
let mut buffer = [0u8; BUF_SIZE];
|
||||
|
||||
let mut i = 0u64;
|
||||
let mut lines = 0u64;
|
||||
let mut check_last_byte_first_loop = true;
|
||||
let mut bytes_remaining_to_search = file_size;
|
||||
|
||||
loop {
|
||||
// the casts here are ok, `buffer.len()` should never be above a few k
|
||||
let bytes_remaining_to_search = file_size - i;
|
||||
let bytes_to_read_this_loop = bytes_remaining_to_search.min(BUF_SIZE.try_into().unwrap());
|
||||
let bytes_to_read_this_loop =
|
||||
bytes_remaining_to_search.min(buffer.len().try_into().unwrap());
|
||||
let read_start_offset = bytes_remaining_to_search - bytes_to_read_this_loop;
|
||||
let buffer = &mut buffer[..bytes_to_read_this_loop.try_into().unwrap()];
|
||||
bytes_remaining_to_search -= bytes_to_read_this_loop;
|
||||
|
||||
input.seek(SeekFrom::Start(read_start_offset))?;
|
||||
input.read_exact(buffer)?;
|
||||
for byte in buffer.iter().rev() {
|
||||
if byte == &separator {
|
||||
lines += 1;
|
||||
}
|
||||
// if it were just `n`,
|
||||
|
||||
// Unfortunately need special handling for the case that the input file doesn't have
|
||||
// a terminating `separator` character.
|
||||
// If the input file doesn't end with a `separator` character, add an extra line to our
|
||||
// `line` counter. In the case that `n` is 0 we need to return here since we've
|
||||
// obviously found our 0th-line-from-the-end offset.
|
||||
if check_last_byte_first_loop {
|
||||
check_last_byte_first_loop = false;
|
||||
if let Some(last_byte_of_file) = buffer.last() {
|
||||
if last_byte_of_file != &separator {
|
||||
if n == 0 {
|
||||
input.rewind()?;
|
||||
return Ok(file_size);
|
||||
}
|
||||
assert_eq!(lines, 0);
|
||||
lines = 1;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for separator_offset in memrchr_iter(separator, &buffer[..]) {
|
||||
lines += 1;
|
||||
if lines == n + 1 {
|
||||
input.rewind()?;
|
||||
return Ok(file_size - i);
|
||||
return Ok(read_start_offset
|
||||
+ TryInto::<u64>::try_into(separator_offset).unwrap()
|
||||
+ 1);
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
if file_size - i == 0 {
|
||||
if read_start_offset == 0 {
|
||||
input.rewind()?;
|
||||
return Ok(0);
|
||||
}
|
||||
|
@ -732,4 +753,23 @@ mod tests {
|
|||
0
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_nth_line_from_end_non_terminated() {
|
||||
// Validate the find_nth_line_from_end for files that are not terminated with a final
|
||||
// newline character.
|
||||
let input_file = "a\nb";
|
||||
let mut input = Cursor::new(input_file);
|
||||
assert_eq!(find_nth_line_from_end(&mut input, 0, b'\n').unwrap(), 3);
|
||||
assert_eq!(find_nth_line_from_end(&mut input, 1, b'\n').unwrap(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_find_nth_line_from_end_empty() {
|
||||
// Validate the find_nth_line_from_end for files that are empty.
|
||||
let input_file = "";
|
||||
let mut input = Cursor::new(input_file);
|
||||
assert_eq!(find_nth_line_from_end(&mut input, 0, b'\n').unwrap(), 0);
|
||||
assert_eq!(find_nth_line_from_end(&mut input, 1, b'\n').unwrap(), 0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -149,6 +149,15 @@ fn test_zero_terminated_syntax_2() {
|
|||
.stdout_is("x\0y");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_non_terminated_input() {
|
||||
new_ucmd!()
|
||||
.args(&["-n", "-1"])
|
||||
.pipe_in("x\ny")
|
||||
.succeeds()
|
||||
.stdout_is("x\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_zero_terminated_negative_lines() {
|
||||
new_ucmd!()
|
||||
|
@ -442,12 +451,19 @@ fn test_all_but_last_lines_large_file() {
|
|||
let scene = TestScenario::new(util_name!());
|
||||
let fixtures = &scene.fixtures;
|
||||
let seq_20000_file_name = "seq_20000";
|
||||
let seq_20000_truncated_file_name = "seq_20000_truncated";
|
||||
let seq_1000_file_name = "seq_1000";
|
||||
scene
|
||||
.cmd("seq")
|
||||
.arg("20000")
|
||||
.set_stdout(fixtures.make_file(seq_20000_file_name))
|
||||
.succeeds();
|
||||
// Create a file the same as seq_20000 except for the final terminating endline.
|
||||
scene
|
||||
.ucmd()
|
||||
.args(&["-c", "-1", seq_20000_file_name])
|
||||
.set_stdout(fixtures.make_file(seq_20000_truncated_file_name))
|
||||
.succeeds();
|
||||
scene
|
||||
.cmd("seq")
|
||||
.arg("1000")
|
||||
|
@ -459,7 +475,7 @@ fn test_all_but_last_lines_large_file() {
|
|||
.ucmd()
|
||||
.args(&["-n", "-19000", seq_20000_file_name])
|
||||
.succeeds()
|
||||
.stdout_only_fixture("seq_1000");
|
||||
.stdout_only_fixture(seq_1000_file_name);
|
||||
|
||||
scene
|
||||
.ucmd()
|
||||
|
@ -472,6 +488,25 @@ fn test_all_but_last_lines_large_file() {
|
|||
.args(&["-n", "-20001", seq_20000_file_name])
|
||||
.succeeds()
|
||||
.stdout_only_fixture("emptyfile.txt");
|
||||
|
||||
// Confirm correct behavior when the input file doesn't end with a newline.
|
||||
scene
|
||||
.ucmd()
|
||||
.args(&["-n", "-19000", seq_20000_truncated_file_name])
|
||||
.succeeds()
|
||||
.stdout_only_fixture(seq_1000_file_name);
|
||||
|
||||
scene
|
||||
.ucmd()
|
||||
.args(&["-n", "-20000", seq_20000_truncated_file_name])
|
||||
.succeeds()
|
||||
.stdout_only_fixture("emptyfile.txt");
|
||||
|
||||
scene
|
||||
.ucmd()
|
||||
.args(&["-n", "-20001", seq_20000_truncated_file_name])
|
||||
.succeeds()
|
||||
.stdout_only_fixture("emptyfile.txt");
|
||||
}
|
||||
|
||||
#[cfg(all(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue