checksum: Introduce a DigestOutput type...

... to prevent a preemptive computation of the hex encoding.
This commit is contained in:
Dorian Peron 2025-11-25 01:07:38 +01:00
parent b2feb825a7
commit 2a248de1fb
5 changed files with 104 additions and 79 deletions

View file

@ -123,7 +123,7 @@ default = []
# * non-default features
backup-control = []
colors = []
checksum = ["data-encoding", "quoting-style", "sum"]
checksum = ["quoting-style", "sum", "base64-simd"]
encoding = ["data-encoding", "data-encoding-macro", "z85", "base64-simd"]
entries = ["libc"]
extendedbigdecimal = ["bigdecimal", "num-traits"]
@ -171,6 +171,7 @@ sum = [
"blake3",
"sm3",
"crc-fast",
"data-encoding",
]
update-control = ["parser"]
utf8 = []

View file

@ -13,7 +13,8 @@ use std::path::Path;
use crate::checksum::{ChecksumError, SizedAlgoKind, digest_reader, escape_filename};
use crate::error::{FromIo, UResult, USimpleError};
use crate::line_ending::LineEnding;
use crate::{encoding, show, translate};
use crate::sum::DigestOutput;
use crate::{show, translate};
/// Use the same buffer size as GNU when reading a file to create a checksum
/// from it: 32 KiB.
@ -139,10 +140,11 @@ pub fn figure_out_output_format(
fn print_legacy_checksum(
options: &ChecksumComputeOptions,
filename: &OsStr,
sum: &str,
sum: &DigestOutput,
size: usize,
) -> UResult<()> {
debug_assert!(options.algo_kind.is_legacy());
debug_assert!(matches!(sum, DigestOutput::U16(_) | DigestOutput::Crc(_)));
let (escaped_filename, prefix) = if options.line_ending == LineEnding::Nul {
(filename.to_string_lossy().to_string(), "")
@ -150,28 +152,24 @@ fn print_legacy_checksum(
escape_filename(filename)
};
print!("{prefix}");
// Print the sum
match options.algo_kind {
SizedAlgoKind::Sysv => print!(
"{} {}",
sum.parse::<u16>().unwrap(),
match (options.algo_kind, sum) {
(SizedAlgoKind::Sysv, DigestOutput::U16(sum)) => print!(
"{prefix}{sum} {}",
size.div_ceil(options.algo_kind.bitlen()),
),
SizedAlgoKind::Bsd => {
(SizedAlgoKind::Bsd, DigestOutput::U16(sum)) => {
// The BSD checksum output is 5 digit integer
let bsd_width = 5;
print!(
"{:0bsd_width$} {:bsd_width$}",
sum.parse::<u16>().unwrap(),
"{prefix}{sum:0bsd_width$} {:bsd_width$}",
size.div_ceil(options.algo_kind.bitlen()),
);
}
SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => {
print!("{sum} {size}");
(SizedAlgoKind::Crc | SizedAlgoKind::Crc32b, DigestOutput::Crc(sum)) => {
print!("{prefix}{sum} {size}");
}
_ => unreachable!("Not a legacy algorithm"),
(algo, output) => unreachable!("Bug: Invalid legacy checksum ({algo:?}, {output:?})"),
}
// Print the filename after a space if not stdin
@ -284,49 +282,39 @@ where
let mut digest = options.algo_kind.create_digest();
let (sum_hex, sz) = digest_reader(
&mut digest,
&mut file,
options.binary,
options.algo_kind.bitlen(),
)
.map_err_context(|| translate!("cksum-error-failed-to-read-input"))?;
let (digest_output, sz) = digest_reader(&mut digest, &mut file, options.binary)
.map_err_context(|| translate!("cksum-error-failed-to-read-input"))?;
// Encodes the sum if df is Base64, leaves as-is otherwise.
let encode_sum = |sum: String, df: DigestFormat| {
let encode_sum = |sum: DigestOutput, df: DigestFormat| {
if df.is_base64() {
encoding::for_cksum::BASE64.encode(&hex::decode(sum).unwrap())
sum.to_base64()
} else {
sum
sum.to_hex()
}
};
match options.output_format {
OutputFormat::Raw => {
let bytes = match options.algo_kind {
SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => {
sum_hex.parse::<u32>().unwrap().to_be_bytes().to_vec()
}
SizedAlgoKind::Sysv | SizedAlgoKind::Bsd => {
sum_hex.parse::<u16>().unwrap().to_be_bytes().to_vec()
}
_ => hex::decode(sum_hex).unwrap(),
};
// Cannot handle multiple files anyway, output immediately.
io::stdout().write_all(&bytes)?;
digest_output.write_raw(io::stdout())?;
return Ok(());
}
OutputFormat::Legacy => {
print_legacy_checksum(&options, filename, &sum_hex, sz)?;
print_legacy_checksum(&options, filename, &digest_output, sz)?;
}
OutputFormat::Tagged(digest_format) => {
print_tagged_checksum(&options, filename, &encode_sum(sum_hex, digest_format))?;
print_tagged_checksum(
&options,
filename,
&encode_sum(digest_output, digest_format)?,
)?;
}
OutputFormat::Untagged(digest_format, reading_mode) => {
print_untagged_checksum(
&options,
filename,
&encode_sum(sum_hex, digest_format),
&encode_sum(digest_output, digest_format)?,
reading_mode,
)?;
}

View file

@ -15,8 +15,8 @@ use thiserror::Error;
use crate::error::{UError, UResult};
use crate::show_error;
use crate::sum::{
Blake2b, Blake3, Bsd, CRC32B, Crc, Digest, DigestWriter, Md5, Sha1, Sha3_224, Sha3_256,
Sha3_384, Sha3_512, Sha224, Sha256, Sha384, Sha512, Shake128, Shake256, Sm3, SysV,
Blake2b, Blake3, Bsd, CRC32B, Crc, Digest, DigestOutput, DigestWriter, Md5, Sha1, Sha3_224,
Sha3_256, Sha3_384, Sha3_512, Sha224, Sha256, Sha384, Sha512, Shake128, Shake256, Sm3, SysV,
};
pub mod compute;
@ -420,8 +420,7 @@ pub fn digest_reader<T: Read>(
digest: &mut Box<dyn Digest>,
reader: &mut T,
binary: bool,
output_bits: usize,
) -> io::Result<(String, usize)> {
) -> io::Result<(DigestOutput, usize)> {
digest.reset();
// Read bytes from `reader` and write those bytes to `digest`.
@ -440,14 +439,7 @@ pub fn digest_reader<T: Read>(
let output_size = std::io::copy(reader, &mut digest_writer)? as usize;
digest_writer.finalize();
if digest.output_bits() > 0 {
Ok((digest.result_str(), output_size))
} else {
// Assume it's SHAKE. result_str() doesn't work with shake (as of 8/30/2016)
let mut bytes = vec![0; output_bits.div_ceil(8)];
digest.hash_finalize(&mut bytes);
Ok((hex::encode(bytes), output_size))
}
Ok((digest.result(), output_size))
}
/// Calculates the length of the digest.

View file

@ -660,16 +660,11 @@ fn compute_and_check_digest_from_file(
// TODO: improve function signature to use ReadingMode instead of binary bool
// Set binary to false because --binary is not supported with --check
let (calculated_checksum, _) = digest_reader(
&mut digest,
&mut file_reader,
/* binary */ false,
algo.bitlen(),
)
.unwrap();
let (calculated_checksum, _) =
digest_reader(&mut digest, &mut file_reader, /* binary */ false).unwrap();
// Do the checksum validation
let checksum_correct = expected_checksum == calculated_checksum;
let checksum_correct = expected_checksum == calculated_checksum.to_hex()?;
print_file_report(
std::io::stdout(),
filename,

View file

@ -12,12 +12,52 @@
//! [`DigestWriter`] struct provides a wrapper around [`Digest`] that
//! implements the [`Write`] trait, for use in situations where calling
//! [`write`] would be useful.
use std::io::Write;
use hex::encode;
use std::io::{self, Write};
use data_encoding::BASE64;
#[cfg(windows)]
use memchr::memmem;
use crate::error::{UResult, USimpleError};
/// Represents the output of a checksum computation.
#[derive(Debug)]
pub enum DigestOutput {
/// Varying-size output
Vec(Vec<u8>),
/// Legacy output for Crc and Crc32B modes
Crc(u32),
/// Legacy output for Sysv and BSD modes
U16(u16),
}
impl DigestOutput {
pub fn write_raw(&self, mut w: impl std::io::Write) -> io::Result<()> {
match self {
Self::Vec(buf) => w.write_all(buf),
// For legacy outputs, print them in big endian
Self::Crc(n) => w.write_all(&n.to_be_bytes()),
Self::U16(n) => w.write_all(&n.to_be_bytes()),
}
}
pub fn to_hex(&self) -> UResult<String> {
match self {
Self::Vec(buf) => Ok(hex::encode(buf)),
_ => Err(USimpleError::new(1, "Legacy output cannot be encoded")),
}
}
pub fn to_base64(&self) -> UResult<String> {
match self {
Self::Vec(buf) => Ok(BASE64.encode(buf)),
_ => Err(USimpleError::new(1, "Legacy output cannot be encoded")),
}
}
}
pub trait Digest {
fn new() -> Self
where
@ -29,10 +69,11 @@ pub trait Digest {
fn output_bytes(&self) -> usize {
self.output_bits().div_ceil(8)
}
fn result_str(&mut self) -> String {
fn result(&mut self) -> DigestOutput {
let mut buf: Vec<u8> = vec![0; self.output_bytes()];
self.hash_finalize(&mut buf);
encode(buf)
DigestOutput::Vec(buf)
}
}
@ -167,10 +208,12 @@ impl Digest for Crc {
out.copy_from_slice(&self.digest.finalize().to_ne_bytes());
}
fn result_str(&mut self) -> String {
fn result(&mut self) -> DigestOutput {
let mut out: [u8; 8] = [0; 8];
self.hash_finalize(&mut out);
u64::from_ne_bytes(out).to_string()
let x = u64::from_ne_bytes(out);
DigestOutput::Crc((x & (u32::MAX as u64)) as u32)
}
fn reset(&mut self) {
@ -214,10 +257,10 @@ impl Digest for CRC32B {
32
}
fn result_str(&mut self) -> String {
fn result(&mut self) -> DigestOutput {
let mut out = [0; 4];
self.hash_finalize(&mut out);
format!("{}", u32::from_be_bytes(out))
DigestOutput::Crc(u32::from_be_bytes(out))
}
}
@ -240,10 +283,10 @@ impl Digest for Bsd {
out.copy_from_slice(&self.state.to_ne_bytes());
}
fn result_str(&mut self) -> String {
let mut _out: Vec<u8> = vec![0; 2];
fn result(&mut self) -> DigestOutput {
let mut _out = [0; 2];
self.hash_finalize(&mut _out);
format!("{}", self.state)
DigestOutput::U16(self.state)
}
fn reset(&mut self) {
@ -275,10 +318,10 @@ impl Digest for SysV {
out.copy_from_slice(&(self.state as u16).to_ne_bytes());
}
fn result_str(&mut self) -> String {
let mut _out: Vec<u8> = vec![0; 2];
fn result(&mut self) -> DigestOutput {
let mut _out = [0; 2];
self.hash_finalize(&mut _out);
format!("{}", self.state)
DigestOutput::U16((self.state & (u16::MAX as u32)) as u16)
}
fn reset(&mut self) {
@ -292,7 +335,7 @@ impl Digest for SysV {
// Implements the Digest trait for sha2 / sha3 algorithms with fixed output
macro_rules! impl_digest_common {
($algo_type: ty, $size: expr) => {
($algo_type: ty, $size: literal) => {
impl Digest for $algo_type {
fn new() -> Self {
Self(Default::default())
@ -319,7 +362,7 @@ macro_rules! impl_digest_common {
// Implements the Digest trait for sha2 / sha3 algorithms with variable output
macro_rules! impl_digest_shake {
($algo_type: ty) => {
($algo_type: ty, $output_bits: literal) => {
impl Digest for $algo_type {
fn new() -> Self {
Self(Default::default())
@ -338,7 +381,13 @@ macro_rules! impl_digest_shake {
}
fn output_bits(&self) -> usize {
0
$output_bits
}
fn result(&mut self) -> DigestOutput {
let mut bytes = vec![0; self.output_bits().div_ceil(8)];
self.hash_finalize(&mut bytes);
DigestOutput::Vec(bytes)
}
}
};
@ -368,8 +417,8 @@ impl_digest_common!(Sha3_512, 512);
pub struct Shake128(sha3::Shake128);
pub struct Shake256(sha3::Shake256);
impl_digest_shake!(Shake128);
impl_digest_shake!(Shake256);
impl_digest_shake!(Shake128, 256);
impl_digest_shake!(Shake256, 512);
/// A struct that writes to a digest.
///
@ -501,14 +550,14 @@ mod tests {
writer_crlf.write_all(b"\r").unwrap();
writer_crlf.write_all(b"\n").unwrap();
writer_crlf.finalize();
let result_crlf = digest.result_str();
let result_crlf = digest.result();
// We expect "\r\n" to be replaced with "\n" in text mode on Windows.
let mut digest = Box::new(Md5::new()) as Box<dyn Digest>;
let mut writer_lf = DigestWriter::new(&mut digest, false);
writer_lf.write_all(b"\n").unwrap();
writer_lf.finalize();
let result_lf = digest.result_str();
let result_lf = digest.result();
assert_eq!(result_crlf, result_lf);
}