From 92bb655b0ce6f3a5ebc452c2d51cb12fbbf91b80 Mon Sep 17 00:00:00 2001
From: karanabe <152078880+karanabe@users.noreply.github.com>
Date: Sun, 9 Nov 2025 02:41:15 +0900
Subject: [PATCH] fix(base64): allow padded chunks mid-stream

---
 src/uu/base32/src/base_common.rs        | 12 ++--
 src/uucore/src/lib/features/encoding.rs | 88 +++++++++++++++++++++----
 tests/by-util/test_base64.rs            | 30 +++++++++
 3 files changed, 111 insertions(+), 19 deletions(-)
diff --git a/src/uu/base32/src/base_common.rs b/src/uu/base32/src/base_common.rs
index fe13e46cc..96d28e189 100644
--- a/src/uu/base32/src/base_common.rs
+++ b/src/uu/base32/src/base_common.rs
@@ -171,18 +171,16 @@ pub fn get_input(config: &Config) -> UResult<Box<dyn ReadSeek>> {
     }
 }
 
-/// Determines if the input buffer ends with padding ('=') after trimming trailing whitespace.
+/// Determines if the input buffer contains any padding ('=') ignoring trailing whitespace.
 fn read_and_has_padding<R: Read>(input: &mut R) -> UResult<(bool, Vec<u8>)> {
     let mut buf = Vec::new();
     input
         .read_to_end(&mut buf)
         .map_err(|err| USimpleError::new(1, format_read_error(err.kind())))?;
 
-    // Reverse iterator and skip trailing whitespace without extra collections
-    let has_padding = buf
-        .iter()
-        .rfind(|&&byte| !byte.is_ascii_whitespace())
-        .is_some_and(|&byte| byte == b'=');
+    // Treat the stream as padded if any '=' exists (GNU coreutils continues decoding
+    // even when padding bytes are followed by more data).
+    let has_padding = buf.contains(&b'=');
 
     Ok((has_padding, buf))
 }
@@ -665,6 +663,8 @@ mod tests {
             ("aGVsbG8sIHdvcmxkIQ== \n", true),
             ("aGVsbG8sIHdvcmxkIQ=", true),
             ("aGVsbG8sIHdvcmxkIQ= ", true),
+            ("MTIzNA==MTIzNA", true),
+            ("MTIzNA==\nMTIzNA", true),
             ("aGVsbG8sIHdvcmxkIQ \n", false),
             ("aGVsbG8sIHdvcmxkIQ", false),
         ];
diff --git a/src/uucore/src/lib/features/encoding.rs b/src/uucore/src/lib/features/encoding.rs
index 6a2dccd4f..6c6261c2c 100644
--- a/src/uucore/src/lib/features/encoding.rs
+++ b/src/uucore/src/lib/features/encoding.rs
@@ -22,6 +22,26 @@ pub struct Base64SimdWrapper {
 }
 
 impl Base64SimdWrapper {
+    fn decode_with_standard(input: &[u8], output: &mut Vec<u8>) -> Result<(), ()> {
+        match base64_simd::STANDARD.decode_to_vec(input) {
+            Ok(decoded_bytes) => {
+                output.extend_from_slice(&decoded_bytes);
+                Ok(())
+            }
+            Err(_) => Err(()),
+        }
+    }
+
+    fn decode_with_no_pad(input: &[u8], output: &mut Vec<u8>) -> Result<(), ()> {
+        match base64_simd::STANDARD_NO_PAD.decode_to_vec(input) {
+            Ok(decoded_bytes) => {
+                output.extend_from_slice(&decoded_bytes);
+                Ok(())
+            }
+            Err(_) => Err(()),
+        }
+    }
+
     pub fn new(
         use_padding: bool,
         valid_decoding_multiple: usize,
@@ -47,22 +67,64 @@ impl SupportsFastDecodeAndEncode for Base64SimdWrapper {
     }
 
     fn decode_into_vec(&self, input: &[u8], output: &mut Vec<u8>) -> UResult<()> {
-        let decoded = if self.use_padding {
-            base64_simd::STANDARD.decode_to_vec(input)
+        let original_len = output.len();
+
+        let decode_result = if self.use_padding {
+            // GNU coreutils keeps decoding even when '=' appears before the true end
+            // of the stream (e.g. concatenated padded chunks). Mirror that logic
+            // by splitting at each '='-containing quantum, decoding those 4-byte
+            // groups with the padded variant, then letting the remainder fall back
+            // to whichever alphabet fits.
+            let mut start = 0usize;
+            while start < input.len() {
+                let remaining = &input[start..];
+
+                if remaining.is_empty() {
+                    break;
+                }
+
+                if let Some(eq_rel_idx) = remaining.iter().position(|&b| b == b'=') {
+                    let blocks = (eq_rel_idx / 4) + 1;
+                    let segment_len = blocks * 4;
+
+                    if segment_len > remaining.len() {
+                        return Err(USimpleError::new(1, "error: invalid input".to_owned()));
+                    }
+
+                    if Self::decode_with_standard(&remaining[..segment_len], output).is_err() {
+                        return Err(USimpleError::new(1, "error: invalid input".to_owned()));
+                    }
+
+                    start += segment_len;
+                } else {
+                    // If there are no more '=' bytes the tail might still be padded
+                    // (len % 4 == 0) or purposely unpadded (GNU --ignore-garbage or
+                    // concatenated streams), so select the matching alphabet.
+                    let decoder = if remaining.len() % 4 == 0 {
+                        Self::decode_with_standard
+                    } else {
+                        Self::decode_with_no_pad
+                    };
+
+                    if decoder(remaining, output).is_err() {
+                        return Err(USimpleError::new(1, "error: invalid input".to_owned()));
+                    }
+
+                    break;
+                }
+            }
+
+            Ok(())
         } else {
-            base64_simd::STANDARD_NO_PAD.decode_to_vec(input)
+            Self::decode_with_no_pad(input, output)
+                .map_err(|_| USimpleError::new(1, "error: invalid input".to_owned()))
         };
 
-        match decoded {
-            Ok(decoded_bytes) => {
-                output.extend_from_slice(&decoded_bytes);
-                Ok(())
-            }
-            Err(_) => {
-                // Restore original length on error
-                output.truncate(output.len());
-                Err(USimpleError::new(1, "error: invalid input".to_owned()))
-            }
+        if let Err(err) = decode_result {
+            output.truncate(original_len);
+            Err(err)
+        } else {
+            Ok(())
         }
     }
 
diff --git a/tests/by-util/test_base64.rs b/tests/by-util/test_base64.rs
index b46b8e0b1..f3657bb77 100644
--- a/tests/by-util/test_base64.rs
+++ b/tests/by-util/test_base64.rs
@@ -2,6 +2,9 @@
 //
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
+
+// spell-checker:ignore unpadded, QUJD
+
 #[cfg(target_os = "linux")]
 use uutests::at_and_ucmd;
 use uutests::new_ucmd;
@@ -108,6 +111,33 @@ fn test_decode_repeat_flags() {
         .stdout_only("hello, world!");
 }
 
+#[test]
+fn test_decode_padded_block_followed_by_unpadded_tail() {
+    new_ucmd!()
+        .arg("--decode")
+        .pipe_in("MTIzNA==MTIzNA")
+        .succeeds()
+        .stdout_only("12341234");
+}
+
+#[test]
+fn test_decode_padded_block_followed_by_aligned_tail() {
+    new_ucmd!()
+        .arg("--decode")
+        .pipe_in("MTIzNA==QUJD")
+        .succeeds()
+        .stdout_only("1234ABC");
+}
+
+#[test]
+fn test_decode_unpadded_stream_without_equals() {
+    new_ucmd!()
+        .arg("--decode")
+        .pipe_in("MTIzNA")
+        .succeeds()
+        .stdout_only("1234");
+}
+
 #[test]
 fn test_garbage() {
     let input = "aGVsbG8sIHdvcmxkIQ==\0"; // spell-checker:disable-line