mirror of
https://github.com/uutils/coreutils.git
synced 2025-12-23 08:47:37 +00:00
checksum: Introduce a DigestOutput type...
... to prevent a preemptive computation of the hex encoding.
This commit is contained in:
parent
b2feb825a7
commit
2a248de1fb
5 changed files with 104 additions and 79 deletions
|
|
@ -123,7 +123,7 @@ default = []
|
|||
# * non-default features
|
||||
backup-control = []
|
||||
colors = []
|
||||
checksum = ["data-encoding", "quoting-style", "sum"]
|
||||
checksum = ["quoting-style", "sum", "base64-simd"]
|
||||
encoding = ["data-encoding", "data-encoding-macro", "z85", "base64-simd"]
|
||||
entries = ["libc"]
|
||||
extendedbigdecimal = ["bigdecimal", "num-traits"]
|
||||
|
|
@ -171,6 +171,7 @@ sum = [
|
|||
"blake3",
|
||||
"sm3",
|
||||
"crc-fast",
|
||||
"data-encoding",
|
||||
]
|
||||
update-control = ["parser"]
|
||||
utf8 = []
|
||||
|
|
|
|||
|
|
@ -13,7 +13,8 @@ use std::path::Path;
|
|||
use crate::checksum::{ChecksumError, SizedAlgoKind, digest_reader, escape_filename};
|
||||
use crate::error::{FromIo, UResult, USimpleError};
|
||||
use crate::line_ending::LineEnding;
|
||||
use crate::{encoding, show, translate};
|
||||
use crate::sum::DigestOutput;
|
||||
use crate::{show, translate};
|
||||
|
||||
/// Use the same buffer size as GNU when reading a file to create a checksum
|
||||
/// from it: 32 KiB.
|
||||
|
|
@ -139,10 +140,11 @@ pub fn figure_out_output_format(
|
|||
fn print_legacy_checksum(
|
||||
options: &ChecksumComputeOptions,
|
||||
filename: &OsStr,
|
||||
sum: &str,
|
||||
sum: &DigestOutput,
|
||||
size: usize,
|
||||
) -> UResult<()> {
|
||||
debug_assert!(options.algo_kind.is_legacy());
|
||||
debug_assert!(matches!(sum, DigestOutput::U16(_) | DigestOutput::Crc(_)));
|
||||
|
||||
let (escaped_filename, prefix) = if options.line_ending == LineEnding::Nul {
|
||||
(filename.to_string_lossy().to_string(), "")
|
||||
|
|
@ -150,28 +152,24 @@ fn print_legacy_checksum(
|
|||
escape_filename(filename)
|
||||
};
|
||||
|
||||
print!("{prefix}");
|
||||
|
||||
// Print the sum
|
||||
match options.algo_kind {
|
||||
SizedAlgoKind::Sysv => print!(
|
||||
"{} {}",
|
||||
sum.parse::<u16>().unwrap(),
|
||||
match (options.algo_kind, sum) {
|
||||
(SizedAlgoKind::Sysv, DigestOutput::U16(sum)) => print!(
|
||||
"{prefix}{sum} {}",
|
||||
size.div_ceil(options.algo_kind.bitlen()),
|
||||
),
|
||||
SizedAlgoKind::Bsd => {
|
||||
(SizedAlgoKind::Bsd, DigestOutput::U16(sum)) => {
|
||||
// The BSD checksum output is 5 digit integer
|
||||
let bsd_width = 5;
|
||||
print!(
|
||||
"{:0bsd_width$} {:bsd_width$}",
|
||||
sum.parse::<u16>().unwrap(),
|
||||
"{prefix}{sum:0bsd_width$} {:bsd_width$}",
|
||||
size.div_ceil(options.algo_kind.bitlen()),
|
||||
);
|
||||
}
|
||||
SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => {
|
||||
print!("{sum} {size}");
|
||||
(SizedAlgoKind::Crc | SizedAlgoKind::Crc32b, DigestOutput::Crc(sum)) => {
|
||||
print!("{prefix}{sum} {size}");
|
||||
}
|
||||
_ => unreachable!("Not a legacy algorithm"),
|
||||
(algo, output) => unreachable!("Bug: Invalid legacy checksum ({algo:?}, {output:?})"),
|
||||
}
|
||||
|
||||
// Print the filename after a space if not stdin
|
||||
|
|
@ -284,49 +282,39 @@ where
|
|||
|
||||
let mut digest = options.algo_kind.create_digest();
|
||||
|
||||
let (sum_hex, sz) = digest_reader(
|
||||
&mut digest,
|
||||
&mut file,
|
||||
options.binary,
|
||||
options.algo_kind.bitlen(),
|
||||
)
|
||||
.map_err_context(|| translate!("cksum-error-failed-to-read-input"))?;
|
||||
let (digest_output, sz) = digest_reader(&mut digest, &mut file, options.binary)
|
||||
.map_err_context(|| translate!("cksum-error-failed-to-read-input"))?;
|
||||
|
||||
// Encodes the sum if df is Base64, leaves as-is otherwise.
|
||||
let encode_sum = |sum: String, df: DigestFormat| {
|
||||
let encode_sum = |sum: DigestOutput, df: DigestFormat| {
|
||||
if df.is_base64() {
|
||||
encoding::for_cksum::BASE64.encode(&hex::decode(sum).unwrap())
|
||||
sum.to_base64()
|
||||
} else {
|
||||
sum
|
||||
sum.to_hex()
|
||||
}
|
||||
};
|
||||
|
||||
match options.output_format {
|
||||
OutputFormat::Raw => {
|
||||
let bytes = match options.algo_kind {
|
||||
SizedAlgoKind::Crc | SizedAlgoKind::Crc32b => {
|
||||
sum_hex.parse::<u32>().unwrap().to_be_bytes().to_vec()
|
||||
}
|
||||
SizedAlgoKind::Sysv | SizedAlgoKind::Bsd => {
|
||||
sum_hex.parse::<u16>().unwrap().to_be_bytes().to_vec()
|
||||
}
|
||||
_ => hex::decode(sum_hex).unwrap(),
|
||||
};
|
||||
// Cannot handle multiple files anyway, output immediately.
|
||||
io::stdout().write_all(&bytes)?;
|
||||
digest_output.write_raw(io::stdout())?;
|
||||
return Ok(());
|
||||
}
|
||||
OutputFormat::Legacy => {
|
||||
print_legacy_checksum(&options, filename, &sum_hex, sz)?;
|
||||
print_legacy_checksum(&options, filename, &digest_output, sz)?;
|
||||
}
|
||||
OutputFormat::Tagged(digest_format) => {
|
||||
print_tagged_checksum(&options, filename, &encode_sum(sum_hex, digest_format))?;
|
||||
print_tagged_checksum(
|
||||
&options,
|
||||
filename,
|
||||
&encode_sum(digest_output, digest_format)?,
|
||||
)?;
|
||||
}
|
||||
OutputFormat::Untagged(digest_format, reading_mode) => {
|
||||
print_untagged_checksum(
|
||||
&options,
|
||||
filename,
|
||||
&encode_sum(sum_hex, digest_format),
|
||||
&encode_sum(digest_output, digest_format)?,
|
||||
reading_mode,
|
||||
)?;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,8 +15,8 @@ use thiserror::Error;
|
|||
use crate::error::{UError, UResult};
|
||||
use crate::show_error;
|
||||
use crate::sum::{
|
||||
Blake2b, Blake3, Bsd, CRC32B, Crc, Digest, DigestWriter, Md5, Sha1, Sha3_224, Sha3_256,
|
||||
Sha3_384, Sha3_512, Sha224, Sha256, Sha384, Sha512, Shake128, Shake256, Sm3, SysV,
|
||||
Blake2b, Blake3, Bsd, CRC32B, Crc, Digest, DigestOutput, DigestWriter, Md5, Sha1, Sha3_224,
|
||||
Sha3_256, Sha3_384, Sha3_512, Sha224, Sha256, Sha384, Sha512, Shake128, Shake256, Sm3, SysV,
|
||||
};
|
||||
|
||||
pub mod compute;
|
||||
|
|
@ -420,8 +420,7 @@ pub fn digest_reader<T: Read>(
|
|||
digest: &mut Box<dyn Digest>,
|
||||
reader: &mut T,
|
||||
binary: bool,
|
||||
output_bits: usize,
|
||||
) -> io::Result<(String, usize)> {
|
||||
) -> io::Result<(DigestOutput, usize)> {
|
||||
digest.reset();
|
||||
|
||||
// Read bytes from `reader` and write those bytes to `digest`.
|
||||
|
|
@ -440,14 +439,7 @@ pub fn digest_reader<T: Read>(
|
|||
let output_size = std::io::copy(reader, &mut digest_writer)? as usize;
|
||||
digest_writer.finalize();
|
||||
|
||||
if digest.output_bits() > 0 {
|
||||
Ok((digest.result_str(), output_size))
|
||||
} else {
|
||||
// Assume it's SHAKE. result_str() doesn't work with shake (as of 8/30/2016)
|
||||
let mut bytes = vec![0; output_bits.div_ceil(8)];
|
||||
digest.hash_finalize(&mut bytes);
|
||||
Ok((hex::encode(bytes), output_size))
|
||||
}
|
||||
Ok((digest.result(), output_size))
|
||||
}
|
||||
|
||||
/// Calculates the length of the digest.
|
||||
|
|
|
|||
|
|
@ -660,16 +660,11 @@ fn compute_and_check_digest_from_file(
|
|||
|
||||
// TODO: improve function signature to use ReadingMode instead of binary bool
|
||||
// Set binary to false because --binary is not supported with --check
|
||||
let (calculated_checksum, _) = digest_reader(
|
||||
&mut digest,
|
||||
&mut file_reader,
|
||||
/* binary */ false,
|
||||
algo.bitlen(),
|
||||
)
|
||||
.unwrap();
|
||||
let (calculated_checksum, _) =
|
||||
digest_reader(&mut digest, &mut file_reader, /* binary */ false).unwrap();
|
||||
|
||||
// Do the checksum validation
|
||||
let checksum_correct = expected_checksum == calculated_checksum;
|
||||
let checksum_correct = expected_checksum == calculated_checksum.to_hex()?;
|
||||
print_file_report(
|
||||
std::io::stdout(),
|
||||
filename,
|
||||
|
|
|
|||
|
|
@ -12,12 +12,52 @@
|
|||
//! [`DigestWriter`] struct provides a wrapper around [`Digest`] that
|
||||
//! implements the [`Write`] trait, for use in situations where calling
|
||||
//! [`write`] would be useful.
|
||||
use std::io::Write;
|
||||
|
||||
use hex::encode;
|
||||
use std::io::{self, Write};
|
||||
|
||||
use data_encoding::BASE64;
|
||||
|
||||
#[cfg(windows)]
|
||||
use memchr::memmem;
|
||||
|
||||
use crate::error::{UResult, USimpleError};
|
||||
|
||||
/// Represents the output of a checksum computation.
|
||||
#[derive(Debug)]
|
||||
pub enum DigestOutput {
|
||||
/// Varying-size output
|
||||
Vec(Vec<u8>),
|
||||
/// Legacy output for Crc and Crc32B modes
|
||||
Crc(u32),
|
||||
/// Legacy output for Sysv and BSD modes
|
||||
U16(u16),
|
||||
}
|
||||
|
||||
impl DigestOutput {
|
||||
pub fn write_raw(&self, mut w: impl std::io::Write) -> io::Result<()> {
|
||||
match self {
|
||||
Self::Vec(buf) => w.write_all(buf),
|
||||
// For legacy outputs, print them in big endian
|
||||
Self::Crc(n) => w.write_all(&n.to_be_bytes()),
|
||||
Self::U16(n) => w.write_all(&n.to_be_bytes()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_hex(&self) -> UResult<String> {
|
||||
match self {
|
||||
Self::Vec(buf) => Ok(hex::encode(buf)),
|
||||
_ => Err(USimpleError::new(1, "Legacy output cannot be encoded")),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_base64(&self) -> UResult<String> {
|
||||
match self {
|
||||
Self::Vec(buf) => Ok(BASE64.encode(buf)),
|
||||
_ => Err(USimpleError::new(1, "Legacy output cannot be encoded")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Digest {
|
||||
fn new() -> Self
|
||||
where
|
||||
|
|
@ -29,10 +69,11 @@ pub trait Digest {
|
|||
fn output_bytes(&self) -> usize {
|
||||
self.output_bits().div_ceil(8)
|
||||
}
|
||||
fn result_str(&mut self) -> String {
|
||||
|
||||
fn result(&mut self) -> DigestOutput {
|
||||
let mut buf: Vec<u8> = vec![0; self.output_bytes()];
|
||||
self.hash_finalize(&mut buf);
|
||||
encode(buf)
|
||||
DigestOutput::Vec(buf)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -167,10 +208,12 @@ impl Digest for Crc {
|
|||
out.copy_from_slice(&self.digest.finalize().to_ne_bytes());
|
||||
}
|
||||
|
||||
fn result_str(&mut self) -> String {
|
||||
fn result(&mut self) -> DigestOutput {
|
||||
let mut out: [u8; 8] = [0; 8];
|
||||
self.hash_finalize(&mut out);
|
||||
u64::from_ne_bytes(out).to_string()
|
||||
|
||||
let x = u64::from_ne_bytes(out);
|
||||
DigestOutput::Crc((x & (u32::MAX as u64)) as u32)
|
||||
}
|
||||
|
||||
fn reset(&mut self) {
|
||||
|
|
@ -214,10 +257,10 @@ impl Digest for CRC32B {
|
|||
32
|
||||
}
|
||||
|
||||
fn result_str(&mut self) -> String {
|
||||
fn result(&mut self) -> DigestOutput {
|
||||
let mut out = [0; 4];
|
||||
self.hash_finalize(&mut out);
|
||||
format!("{}", u32::from_be_bytes(out))
|
||||
DigestOutput::Crc(u32::from_be_bytes(out))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -240,10 +283,10 @@ impl Digest for Bsd {
|
|||
out.copy_from_slice(&self.state.to_ne_bytes());
|
||||
}
|
||||
|
||||
fn result_str(&mut self) -> String {
|
||||
let mut _out: Vec<u8> = vec![0; 2];
|
||||
fn result(&mut self) -> DigestOutput {
|
||||
let mut _out = [0; 2];
|
||||
self.hash_finalize(&mut _out);
|
||||
format!("{}", self.state)
|
||||
DigestOutput::U16(self.state)
|
||||
}
|
||||
|
||||
fn reset(&mut self) {
|
||||
|
|
@ -275,10 +318,10 @@ impl Digest for SysV {
|
|||
out.copy_from_slice(&(self.state as u16).to_ne_bytes());
|
||||
}
|
||||
|
||||
fn result_str(&mut self) -> String {
|
||||
let mut _out: Vec<u8> = vec![0; 2];
|
||||
fn result(&mut self) -> DigestOutput {
|
||||
let mut _out = [0; 2];
|
||||
self.hash_finalize(&mut _out);
|
||||
format!("{}", self.state)
|
||||
DigestOutput::U16((self.state & (u16::MAX as u32)) as u16)
|
||||
}
|
||||
|
||||
fn reset(&mut self) {
|
||||
|
|
@ -292,7 +335,7 @@ impl Digest for SysV {
|
|||
|
||||
// Implements the Digest trait for sha2 / sha3 algorithms with fixed output
|
||||
macro_rules! impl_digest_common {
|
||||
($algo_type: ty, $size: expr) => {
|
||||
($algo_type: ty, $size: literal) => {
|
||||
impl Digest for $algo_type {
|
||||
fn new() -> Self {
|
||||
Self(Default::default())
|
||||
|
|
@ -319,7 +362,7 @@ macro_rules! impl_digest_common {
|
|||
|
||||
// Implements the Digest trait for sha2 / sha3 algorithms with variable output
|
||||
macro_rules! impl_digest_shake {
|
||||
($algo_type: ty) => {
|
||||
($algo_type: ty, $output_bits: literal) => {
|
||||
impl Digest for $algo_type {
|
||||
fn new() -> Self {
|
||||
Self(Default::default())
|
||||
|
|
@ -338,7 +381,13 @@ macro_rules! impl_digest_shake {
|
|||
}
|
||||
|
||||
fn output_bits(&self) -> usize {
|
||||
0
|
||||
$output_bits
|
||||
}
|
||||
|
||||
fn result(&mut self) -> DigestOutput {
|
||||
let mut bytes = vec![0; self.output_bits().div_ceil(8)];
|
||||
self.hash_finalize(&mut bytes);
|
||||
DigestOutput::Vec(bytes)
|
||||
}
|
||||
}
|
||||
};
|
||||
|
|
@ -368,8 +417,8 @@ impl_digest_common!(Sha3_512, 512);
|
|||
|
||||
pub struct Shake128(sha3::Shake128);
|
||||
pub struct Shake256(sha3::Shake256);
|
||||
impl_digest_shake!(Shake128);
|
||||
impl_digest_shake!(Shake256);
|
||||
impl_digest_shake!(Shake128, 256);
|
||||
impl_digest_shake!(Shake256, 512);
|
||||
|
||||
/// A struct that writes to a digest.
|
||||
///
|
||||
|
|
@ -501,14 +550,14 @@ mod tests {
|
|||
writer_crlf.write_all(b"\r").unwrap();
|
||||
writer_crlf.write_all(b"\n").unwrap();
|
||||
writer_crlf.finalize();
|
||||
let result_crlf = digest.result_str();
|
||||
let result_crlf = digest.result();
|
||||
|
||||
// We expect "\r\n" to be replaced with "\n" in text mode on Windows.
|
||||
let mut digest = Box::new(Md5::new()) as Box<dyn Digest>;
|
||||
let mut writer_lf = DigestWriter::new(&mut digest, false);
|
||||
writer_lf.write_all(b"\n").unwrap();
|
||||
writer_lf.finalize();
|
||||
let result_lf = digest.result_str();
|
||||
let result_lf = digest.result();
|
||||
|
||||
assert_eq!(result_crlf, result_lf);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue