wip

2025-12-23 08:47:45 +00:00 · 2025-02-18 22:53:27 -05:00 · 2025-02-18 22:53:27 -05:00 · 007a4bffe9
commit 007a4bffe9
parent 8bea2f5533
20 changed files with 3560 additions and 2914 deletions
--- a/bench/src/datetime.rs
+++ b/bench/src/datetime.rs
@ -111,7 +111,7 @@ fn add_years_months_days(c: &mut Criterion) {
 /// This is useful when you have a known time zone already and want to get
 /// a specific instant for many distinct civil datetimes in that time zone.
 fn to_timestamp_static(c: &mut Criterion) {
-    const NAME: &str = "civil_datetime/to_datetime_static";
+    const NAME: &str = "civil_datetime/to_timestamp_static";
    const TZNAME: &str = "America/New_York";
    const STAMP: i64 = 1719755160;
    const DATETIME: civil::DateTime = civil::date(2024, 6, 30).at(9, 46, 0, 0);
--- a/crates/jiff-cli/cmd/generate/crc32.rs
+++ b/crates/jiff-cli/cmd/generate/crc32.rs
@ -46,7 +46,7 @@ pub fn run(p: &mut Parser) -> anyhow::Result<()> {
    args::configure(p, USAGE, &mut [&mut config])?;

    let jiff = config.jiff();
-    let table_path = jiff.join("src/util/crc32/table.rs");
+    let table_path = jiff.join("src/shared/crc32/table.rs");
    write_crc_tables(&table_path).with_context(|| {
        format!("failed to write CRC32 data table to {}", table_path.display())
    })?;
--- a/src/lib.rs
+++ b/src/lib.rs
@ -731,6 +731,8 @@ mod error;
 pub mod fmt;
 #[cfg(feature = "std")]
 mod now;
+#[doc(hidden)]
+pub mod shared;
 mod signed_duration;
 mod span;
 mod timestamp;
--- a/src/shared/crc32/mod.rs
+++ b/src/shared/crc32/mod.rs
@ -1,4 +1,4 @@
-use crate::util::crc32::table::{TABLE, TABLE16};
+use self::table::{TABLE, TABLE16};

 mod table;

--- a/src/shared/crc32/table.rs
+++ b/src/shared/crc32/table.rs
@ -1,4 +1,6 @@
-pub const TABLE: [u32; 256] = [
+// auto-generated by: jiff-cli generate crc32
+
+pub(super) const TABLE: [u32; 256] = [
    0, 4067132163, 3778769143, 324072436, 3348797215, 904991772, 648144872,
    3570033899, 2329499855, 2024987596, 1809983544, 2575936315, 1296289744,
    3207089363, 2893594407, 1578318884, 274646895, 3795141740, 4049975192,
@ -44,7 +46,7 @@ pub const TABLE: [u32; 256] = [
    1279665062, 1595330642, 2910671697,
 ];

-pub const TABLE16: [[u32; 256]; 16] = [
+pub(super) const TABLE16: [[u32; 256]; 16] = [
    [
        0, 4067132163, 3778769143, 324072436, 3348797215, 904991772,
        648144872, 3570033899, 2329499855, 2024987596, 1809983544, 2575936315,
--- a/src/shared/mod.rs
+++ b/src/shared/mod.rs
@ -0,0 +1,166 @@
+/*!
+TODO
+*/
+
+use core::ops::Range;
+
+pub type TzifStatic = Tzif<
+    &'static str,
+    &'static [TzifLocalTimeType],
+    &'static [TzifTransition],
+>;
+
+#[cfg(feature = "alloc")]
+pub type TzifOwned = Tzif<
+    alloc::string::String,
+    alloc::vec::Vec<TzifLocalTimeType>,
+    alloc::vec::Vec<TzifTransition>,
+>;
+
+#[derive(Debug)]
+pub struct Tzif<STRING, TYPES, TRANS> {
+    pub fixed: TzifFixed<STRING>,
+    pub types: TYPES,
+    pub transitions: TRANS,
+}
+
+#[derive(Debug)]
+pub struct TzifFixed<STRING> {
+    pub name: Option<STRING>,
+    pub version: u8,
+    pub checksum: u32,
+    pub designations: STRING,
+    pub posix_tz: Option<PosixTimeZone<STRING>>,
+}
+
+// only-jiff-impl-start
+impl TzifFixed<&'static str> {
+    pub const fn to_jiff(
+        &self,
+        types: &'static [crate::tz::tzif::LocalTimeType],
+        trans: &'static [crate::tz::tzif::Transition],
+    ) -> crate::tz::tzif::TzifStatic {
+        crate::tz::tzif::TzifStatic::from_shared_const(self, types, trans)
+    }
+}
+// only-jiff-impl-end
+
+#[derive(Debug)]
+pub struct TzifLocalTimeType {
+    pub offset: i32,
+    pub is_dst: bool,
+    pub designation: Range<u8>,
+    pub indicator: TzifIndicator,
+}
+
+// only-jiff-impl-start
+impl TzifLocalTimeType {
+    pub const fn to_jiff(&self) -> crate::tz::tzif::LocalTimeType {
+        crate::tz::tzif::LocalTimeType::from_shared(self)
+    }
+}
+// only-jiff-impl-end
+
+#[derive(Debug)]
+pub enum TzifIndicator {
+    LocalWall,
+    LocalStandard,
+    UTStandard,
+}
+
+#[derive(Debug)]
+pub struct TzifTransition {
+    pub timestamp: i64,
+    pub type_index: u8,
+}
+
+// only-jiff-impl-start
+impl TzifTransition {
+    pub const fn to_jiff(
+        &self,
+        prev_offset: i32,
+        this_offset: i32,
+    ) -> crate::tz::tzif::Transition {
+        crate::tz::tzif::Transition::from_shared(
+            self,
+            prev_offset,
+            this_offset,
+        )
+    }
+}
+// only-jiff-impl-end
+
+#[derive(Debug, Eq, PartialEq)]
+pub struct PosixTimeZone<ABBREV> {
+    pub std_abbrev: ABBREV,
+    pub std_offset: i32,
+    pub dst: Option<PosixDst<ABBREV>>,
+}
+
+#[derive(Debug, Eq, PartialEq)]
+pub struct PosixDst<ABBREV> {
+    pub abbrev: ABBREV,
+    pub offset: i32,
+    pub rule: Option<PosixRule>,
+}
+
+#[derive(Debug, Eq, PartialEq)]
+pub struct PosixRule {
+    pub start: PosixDayTime,
+    pub end: PosixDayTime,
+}
+
+#[derive(Debug, Eq, PartialEq)]
+pub struct PosixDayTime {
+    pub date: PosixDay,
+    pub time: i32,
+}
+
+#[derive(Debug, Eq, PartialEq)]
+pub enum PosixDay {
+    /// Julian day in a year, no counting for leap days.
+    ///
+    /// Valid range is `1..=365`.
+    JulianOne(i16),
+    /// Julian day in a year, counting for leap days.
+    ///
+    /// Valid range is `0..=365`.
+    JulianZero(i16),
+    /// The nth weekday of a month.
+    WeekdayOfMonth {
+        /// The month.
+        ///
+        /// Valid range is: `1..=12`.
+        month: i8,
+        /// The week.
+        ///
+        /// Valid range is `1..=5`.
+        ///
+        /// One interesting thing to note here (or my interpretation anyway),
+        /// is that a week of `4` means the "4th weekday in a month" where as
+        /// a week of `5` means the "last weekday in a month, even if it's the
+        /// 4th weekday."
+        week: i8,
+        /// The weekday.
+        ///
+        /// Valid range is `0..=6`, with `0` corresponding to Sunday.
+        weekday: i8,
+    },
+}
+
+// only-jiff-impl-start
+impl PosixTimeZone<&'static str> {
+    pub const fn to_jiff(&self) -> crate::tz::posix::ReasonablePosixTimeZone {
+        crate::tz::posix::ReasonablePosixTimeZone::from_shared_const(self)
+    }
+}
+// only-jiff-impl-end
+
+// Does not require `alloc`, but is only used when `alloc` is enabled.
+#[cfg(feature = "alloc")]
+pub(crate) mod crc32;
+#[cfg(feature = "alloc")]
+pub(crate) mod posix;
+#[cfg(feature = "alloc")]
+pub(crate) mod tzif;
+pub(crate) mod util;
--- a/src/shared/posix.rs
+++ b/src/shared/posix.rs
--- a/src/shared/tzif.rs
+++ b/src/shared/tzif.rs
@ -0,0 +1,789 @@
+#![allow(warnings)]
+
+use alloc::{string::String, vec};
+
+use super::{
+    util::{Byte, Bytes},
+    PosixTimeZone, TzifFixed, TzifIndicator, TzifLocalTimeType, TzifOwned,
+    TzifTransition,
+};
+
+macro_rules! err {
+    ($($tt:tt)*) => {{
+        self::Error(alloc::format!($($tt)*))
+    }}
+}
+
+// These are Jiff min and max timestamp (in seconds) values.
+//
+// The TZif parser will clamp timestamps to this range. It's
+// not ideal, but Jiff can't handle values outside of this range
+// and completely refusing to use TZif data with pathological
+// timestamps in typically irrelevant transitions is bad juju.
+//
+// Ref: https://github.com/BurntSushi/jiff/issues/163
+// Ref: https://github.com/BurntSushi/jiff/pull/164
+const TIMESTAMP_MIN: i64 = -377705023201;
+const TIMESTAMP_MAX: i64 = 253402207200;
+
+// Similarly for offsets, although in this case, if we find
+// an offset outside of this range, we do actually error. This
+// is because it could result in true incorrect datetimes for
+// actual transitions.
+//
+// But our supported offset range is `-25:59:59..=+25:59:59`.
+// There's no real time zone with offsets even close to those
+// boundaries.
+//
+// If there is pathological data that we should ignore, then
+// we should wait for a real bug report in order to determine
+// the right way to ignore/clamp it.
+const OFFSET_MIN: i32 = -93599;
+const OFFSET_MAX: i32 = 93599;
+
+/// An error that can be returned when parsing.
+#[derive(Debug)]
+pub struct Error(String);
+
+impl core::fmt::Display for Error {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        core::fmt::Display::fmt(&self.0, f)
+    }
+}
+
+impl TzifOwned {
+    /// Parses the given data as a TZif formatted file.
+    ///
+    /// The name given is attached to the `Tzif` value returned, but is
+    /// otherwise not significant.
+    ///
+    /// If the given data is not recognized to be valid TZif, then an error is
+    /// returned.
+    ///
+    /// In general, callers may assume that it is safe to pass arbitrary or
+    /// even untrusted data to this function and count on it not panicking
+    /// or using resources that aren't limited to a small constant factor of
+    /// the size of the data itself. That is, callers can reliably limit the
+    /// resources used by limiting the size of the data given to this parse
+    /// function.
+    pub(crate) fn parse(
+        name: Option<String>,
+        bytes: &[u8],
+    ) -> Result<TzifOwned, Error> {
+        let original = bytes;
+        let name = name.into();
+        let (header32, rest) = Header::parse(4, bytes)
+            .map_err(|e| err!("failed to parse 32-bit header: {e}"))?;
+        let (mut tzif, rest) = if header32.version == 0 {
+            TzifOwned::parse32(name, header32, rest)?
+        } else {
+            TzifOwned::parse64(name, header32, rest)?
+        };
+        // Compute the checksum using the entire contents of the TZif data.
+        let tzif_raw_len = (rest.as_ptr() as usize)
+            .checked_sub(original.as_ptr() as usize)
+            .unwrap();
+        let tzif_raw_bytes = &original[..tzif_raw_len];
+        tzif.fixed.checksum = super::crc32::sum(tzif_raw_bytes);
+        Ok(tzif)
+    }
+
+    fn parse32<'b>(
+        name: Option<String>,
+        header32: Header,
+        bytes: &'b [u8],
+    ) -> Result<(TzifOwned, &'b [u8]), Error> {
+        let mut tzif = TzifOwned {
+            fixed: TzifFixed {
+                name,
+                version: header32.version,
+                // filled in later
+                checksum: 0,
+                designations: String::new(),
+                posix_tz: None,
+            },
+            types: vec![],
+            transitions: vec![],
+        };
+        let rest = tzif.parse_transitions(&header32, bytes)?;
+        let rest = tzif.parse_transition_types(&header32, rest)?;
+        let rest = tzif.parse_local_time_types(&header32, rest)?;
+        let rest = tzif.parse_time_zone_designations(&header32, rest)?;
+        let rest = tzif.parse_leap_seconds(&header32, rest)?;
+        let rest = tzif.parse_indicators(&header32, rest)?;
+        Ok((tzif, rest))
+    }
+
+    fn parse64<'b>(
+        name: Option<String>,
+        header32: Header,
+        bytes: &'b [u8],
+    ) -> Result<(TzifOwned, &'b [u8]), Error> {
+        let (_, rest) = try_split_at(
+            "V1 TZif data block",
+            bytes,
+            header32.data_block_len()?,
+        )?;
+        let (header64, rest) = Header::parse(8, rest)
+            .map_err(|e| err!("failed to parse 64-bit header: {e}"))?;
+        let mut tzif = TzifOwned {
+            fixed: TzifFixed {
+                name,
+                version: header64.version,
+                // filled in later
+                checksum: 0,
+                designations: String::new(),
+                posix_tz: None,
+            },
+            types: vec![],
+            transitions: vec![],
+        };
+        let rest = tzif.parse_transitions(&header64, rest)?;
+        let rest = tzif.parse_transition_types(&header64, rest)?;
+        let rest = tzif.parse_local_time_types(&header64, rest)?;
+        let rest = tzif.parse_time_zone_designations(&header64, rest)?;
+        let rest = tzif.parse_leap_seconds(&header64, rest)?;
+        let rest = tzif.parse_indicators(&header64, rest)?;
+        let rest = tzif.parse_footer(&header64, rest)?;
+        // Note that we specifically and unfortunately do not "validate"
+        // the POSIX TZ string here. We *should* check that it is
+        // consistent with the last transition. Since:
+        //
+        // RFC 8536 says, "If the string is nonempty and one or more
+        // transitions appear in the version 2+ data, the string MUST be
+        // consistent with the last version 2+ transition."
+        //
+        // But in this context, we don't have any of the infrastructure
+        // to actually do TZ operations on a POSIX time zone. It requires
+        // civil datetimes and a bunch of other bullshit. This means that
+        // this verification step doesn't run when using the `jiff-tzdb-static`
+        // proc macro. However, we do still run it when parsing TZif data
+        // at runtime.
+        //
+        // We otherwise don't check that the TZif data is fully valid. It is
+        // possible for it to contain superfluous information. For example, a
+        // non-zero local time type that is never referenced by a transition.
+        Ok((tzif, rest))
+    }
+
+    fn parse_transitions<'b>(
+        &mut self,
+        header: &Header,
+        bytes: &'b [u8],
+    ) -> Result<&'b [u8], Error> {
+        let (bytes, rest) = try_split_at(
+            "transition times data block",
+            bytes,
+            header.transition_times_len()?,
+        )?;
+        let mut it = bytes.chunks_exact(header.time_size);
+        // RFC 8536 says: "If there are no transitions, local time for all
+        // timestamps is specified by the TZ string in the footer if present
+        // and nonempty; otherwise, it is specified by time type 0."
+        //
+        // RFC 8536 also says: "Local time for timestamps before the first
+        // transition is specified by the first time type (time type
+        // 0)."
+        //
+        // So if there are no transitions, pushing this dummy one will result
+        // in the desired behavior even when it's the only transition.
+        // Similarly, since this is the minimum timestamp value, it will
+        // trigger for any times before the first transition found in the TZif
+        // data.
+        self.transitions
+            .push(TzifTransition { timestamp: TIMESTAMP_MIN, type_index: 0 });
+        while let Some(chunk) = it.next() {
+            let mut timestamp = if header.is_32bit() {
+                i64::from(from_be_bytes_i32(chunk))
+            } else {
+                from_be_bytes_i64(chunk)
+            };
+            if !(TIMESTAMP_MIN <= timestamp && timestamp <= TIMESTAMP_MAX) {
+                // We really shouldn't error here just because the Unix
+                // timestamp is outside what Jiff supports. Since what Jiff
+                // supports is _somewhat_ arbitrary. But Jiff's supported
+                // range is good enough for all realistic purposes, so we
+                // just clamp an out-of-range Unix timestamp to the Jiff
+                // min or max value.
+                //
+                // This can't result in the sorting order being wrong, but
+                // it can result in a transition that is duplicative with
+                // the dummy transition we inserted above. This should be
+                // fine.
+                let clamped = timestamp.clamp(TIMESTAMP_MIN, TIMESTAMP_MAX);
+                // only-jiff-warn-start
+                warn!(
+                    "found Unix timestamp {timestamp} that is outside \
+                     Jiff's supported range, clamping to {clamped}",
+                );
+                // only-jiff-warn-end
+                timestamp = clamped;
+            }
+            self.transitions.push(TzifTransition {
+                timestamp,
+                // We can't fill in the type index yet. We fill this in
+                // later when we parse the transition types.
+                type_index: 0,
+            });
+        }
+        assert!(it.remainder().is_empty());
+        Ok(rest)
+    }
+
+    fn parse_transition_types<'b>(
+        &mut self,
+        header: &Header,
+        bytes: &'b [u8],
+    ) -> Result<&'b [u8], Error> {
+        let (bytes, rest) = try_split_at(
+            "transition types data block",
+            bytes,
+            header.transition_types_len()?,
+        )?;
+        // We start our transition indices at 1 because we always insert a
+        // dummy first transition corresponding to `Timestamp::MIN`. Its type
+        // index is always 0, so there's no need to change it here.
+        for (transition_index, &type_index) in (1..).zip(bytes) {
+            if usize::from(type_index) >= header.tzh_typecnt {
+                return Err(err!(
+                    "found transition type index {type_index},
+                     but there are only {} local time types",
+                    header.tzh_typecnt,
+                ));
+            }
+            self.transitions[transition_index].type_index = type_index;
+        }
+        Ok(rest)
+    }
+
+    fn parse_local_time_types<'b>(
+        &mut self,
+        header: &Header,
+        bytes: &'b [u8],
+    ) -> Result<&'b [u8], Error> {
+        let (bytes, rest) = try_split_at(
+            "local time types data block",
+            bytes,
+            header.local_time_types_len()?,
+        )?;
+        let mut it = bytes.chunks_exact(6);
+        while let Some(chunk) = it.next() {
+            let offset = from_be_bytes_i32(&chunk[..4]);
+            if !(OFFSET_MIN <= offset && offset <= OFFSET_MAX) {
+                return Err(err!(
+                    "found local time type with out-of-bounds offset: {offset}"
+                ));
+            }
+            let is_dst = chunk[4] == 1;
+            let designation = chunk[5]..chunk[5];
+            self.types.push(TzifLocalTimeType {
+                offset,
+                is_dst,
+                designation,
+                indicator: TzifIndicator::LocalWall,
+            });
+        }
+        assert!(it.remainder().is_empty());
+        Ok(rest)
+    }
+
+    fn parse_time_zone_designations<'b>(
+        &mut self,
+        header: &Header,
+        bytes: &'b [u8],
+    ) -> Result<&'b [u8], Error> {
+        let (bytes, rest) = try_split_at(
+            "time zone designations data block",
+            bytes,
+            header.time_zone_designations_len()?,
+        )?;
+        self.fixed.designations =
+            String::from_utf8(bytes.to_vec()).map_err(|_| {
+                err!(
+                    "time zone designations are not valid UTF-8: {:?}",
+                    Bytes(bytes),
+                )
+            })?;
+        // Holy hell, this is brutal. The boundary conditions are crazy.
+        for (i, typ) in self.types.iter_mut().enumerate() {
+            let start = usize::from(typ.designation.start);
+            let Some(suffix) = self.fixed.designations.get(start..) else {
+                return Err(err!(
+                    "local time type {i} has designation index of {start}, \
+                     but cannot be more than {}",
+                    self.fixed.designations.len(),
+                ));
+            };
+            let Some(len) = suffix.find('\x00') else {
+                return Err(err!(
+                    "local time type {i} has designation index of {start}, \
+                     but could not find NUL terminator after it in \
+                     designations: {:?}",
+                    self.fixed.designations,
+                ));
+            };
+            let Some(end) = start.checked_add(len) else {
+                return Err(err!(
+                    "local time type {i} has designation index of {start}, \
+                     but its length {len} is too big",
+                ));
+            };
+            typ.designation.end = u8::try_from(end).map_err(|_| {
+                err!(
+                    "local time type {i} has designation range of \
+                     {start}..{end}, but end is too big",
+                )
+            })?;
+        }
+        Ok(rest)
+    }
+
+    /// This parses the leap second corrections in the TZif data.
+    ///
+    /// Note that we only parse and verify them. We don't actually use them.
+    /// Jiff effectively ignores leap seconds.
+    fn parse_leap_seconds<'b>(
+        &mut self,
+        header: &Header,
+        bytes: &'b [u8],
+    ) -> Result<&'b [u8], Error> {
+        let (bytes, rest) = try_split_at(
+            "leap seconds data block",
+            bytes,
+            header.leap_second_len()?,
+        )?;
+        let chunk_len = header
+            .time_size
+            .checked_add(4)
+            .expect("time_size plus 4 fits in usize");
+        let mut it = bytes.chunks_exact(chunk_len);
+        while let Some(chunk) = it.next() {
+            let (occur_bytes, _corr_bytes) = chunk.split_at(header.time_size);
+            let occur = if header.is_32bit() {
+                i64::from(from_be_bytes_i32(occur_bytes))
+            } else {
+                from_be_bytes_i64(occur_bytes)
+            };
+            if !(TIMESTAMP_MIN <= occur && occur <= TIMESTAMP_MAX) {
+                // only-jiff-warn-start
+                warn!(
+                    "leap second occurrence {occur} is \
+                     not in Jiff's supported range"
+                )
+                // only-jiff-warn-end
+            }
+        }
+        assert!(it.remainder().is_empty());
+        Ok(rest)
+    }
+
+    fn parse_indicators<'b>(
+        &mut self,
+        header: &Header,
+        bytes: &'b [u8],
+    ) -> Result<&'b [u8], Error> {
+        let (std_wall_bytes, rest) = try_split_at(
+            "standard/wall indicators data block",
+            bytes,
+            header.standard_wall_len()?,
+        )?;
+        let (ut_local_bytes, rest) = try_split_at(
+            "UT/local indicators data block",
+            rest,
+            header.ut_local_len()?,
+        )?;
+        if std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() {
+            // This is a weird case, but technically possible only if all
+            // UT/local indicators are 0. If any are 1, then it's an error,
+            // because it would require the corresponding std/wall indicator
+            // to be 1 too. Which it can't be, because there aren't any. So
+            // we just check that they're all zeros.
+            for (i, &byte) in ut_local_bytes.iter().enumerate() {
+                if byte != 0 {
+                    return Err(err!(
+                        "found UT/local indicator '{byte}' for local time \
+                         type {i}, but it must be 0 since all std/wall \
+                         indicators are 0",
+                    ));
+                }
+            }
+        } else if !std_wall_bytes.is_empty() && ut_local_bytes.is_empty() {
+            for (i, &byte) in std_wall_bytes.iter().enumerate() {
+                // Indexing is OK because Header guarantees that the number of
+                // indicators is 0 or equal to the number of types.
+                self.types[i].indicator = if byte == 0 {
+                    TzifIndicator::LocalWall
+                } else if byte == 1 {
+                    TzifIndicator::LocalStandard
+                } else {
+                    return Err(err!(
+                        "found invalid std/wall indicator '{byte}' for \
+                         local time type {i}, it must be 0 or 1",
+                    ));
+                };
+            }
+        } else if !std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() {
+            assert_eq!(std_wall_bytes.len(), ut_local_bytes.len());
+            let it = std_wall_bytes.iter().zip(ut_local_bytes);
+            for (i, (&stdwall, &utlocal)) in it.enumerate() {
+                // Indexing is OK because Header guarantees that the number of
+                // indicators is 0 or equal to the number of types.
+                self.types[i].indicator = match (stdwall, utlocal) {
+                    (0, 0) => TzifIndicator::LocalWall,
+                    (1, 0) => TzifIndicator::LocalStandard,
+                    (1, 1) => TzifIndicator::UTStandard,
+                    (0, 1) => {
+                        return Err(err!(
+                            "found illegal ut-wall combination for \
+                             local time type {i}, only local-wall, \
+                             local-standard and ut-standard are allowed",
+                        ))
+                    }
+                    _ => {
+                        return Err(err!(
+                            "found illegal std/wall or ut/local value for \
+                             local time type {i}, each must be 0 or 1",
+                        ))
+                    }
+                };
+            }
+        } else {
+            // If they're both empty then we don't need to do anything. Every
+            // local time type record already has the correct default for this
+            // case set.
+            debug_assert!(std_wall_bytes.is_empty());
+            debug_assert!(ut_local_bytes.is_empty());
+        }
+        Ok(rest)
+    }
+
+    fn parse_footer<'b>(
+        &mut self,
+        _header: &Header,
+        bytes: &'b [u8],
+    ) -> Result<&'b [u8], Error> {
+        if bytes.is_empty() {
+            return Err(err!(
+                "invalid V2+ TZif footer, expected \\n, \
+                 but found unexpected end of data",
+            ));
+        }
+        if bytes[0] != b'\n' {
+            return Err(err!(
+                "invalid V2+ TZif footer, expected {:?}, but found {:?}",
+                Byte(b'\n'),
+                Byte(bytes[0]),
+            ));
+        }
+        let bytes = &bytes[1..];
+        // Only scan up to 1KB for a NUL terminator in case we somehow got
+        // passed a huge block of bytes.
+        let toscan = &bytes[..bytes.len().min(1024)];
+        let Some(nlat) = toscan.iter().position(|&b| b == b'\n') else {
+            return Err(err!(
+                "invalid V2 TZif footer, could not find {:?} \
+                 terminator in: {:?}",
+                Byte(b'\n'),
+                Bytes(toscan),
+            ));
+        };
+        let (bytes, rest) = bytes.split_at(nlat);
+        if !bytes.is_empty() {
+            let posix_tz =
+                PosixTimeZone::parse(bytes).map_err(|e| err!("{e}"))?;
+            // We could in theory limit TZ strings to their strict POSIX
+            // definition here for TZif V2, but I don't think there is any
+            // harm in allowing the extensions in V2 formatted TZif data. Note
+            // that the GNU tooling allow it via the `TZ` environment variable
+            // even though POSIX doesn't specify it. This all seems okay to me
+            // because the V3+ extension is a strict superset of functionality.
+            if let Some(ref dst) = posix_tz.dst {
+                if dst.rule.is_none() {
+                    return Err(err!(
+                        "TZ string `{}` in v3+ tzfile has DST \
+                         but no transition rules",
+                        Bytes(bytes),
+                    ));
+                }
+            }
+            self.fixed.posix_tz = Some(posix_tz);
+        }
+        Ok(&rest[1..])
+    }
+}
+
+/// The header for a TZif formatted file.
+///
+/// V2+ TZif format have two headers: one for V1 data, and then a second
+/// following the V1 data block that describes another data block which uses
+/// 64-bit timestamps. The two headers both have the same format and both
+/// use 32-bit big-endian encoded integers.
+#[derive(Debug)]
+struct Header {
+    /// The size of the timestamps encoded in the data block.
+    ///
+    /// This is guaranteed to be either 4 (for V1) or 8 (for the 64-bit header
+    /// block in V2+).
+    time_size: usize,
+    /// The file format version.
+    ///
+    /// Note that this is either a NUL byte (for version 1), or an ASCII byte
+    /// corresponding to the version number. That is, `0x32` for `2`, `0x33`
+    /// for `3` or `0x34` for `4`. Note also that just because zoneinfo might
+    /// have been recently generated does not mean it uses the latest format
+    /// version. It seems like newer versions are only compiled by `zic` when
+    /// they are needed. For example, `America/New_York` on my system (as of
+    /// `2024-03-25`) has version `0x32`, but `Asia/Jerusalem` has version
+    /// `0x33`.
+    version: u8,
+    /// Number of UT/local indicators stored in the file.
+    ///
+    /// This is checked to be either equal to `0` or equal to `tzh_typecnt`.
+    tzh_ttisutcnt: usize,
+    /// The number of standard/wall indicators stored in the file.
+    ///
+    /// This is checked to be either equal to `0` or equal to `tzh_typecnt`.
+    tzh_ttisstdcnt: usize,
+    /// The number of leap seconds for which data entries are stored in the
+    /// file.
+    tzh_leapcnt: usize,
+    /// The number of transition times for which data entries are stored in
+    /// the file.
+    tzh_timecnt: usize,
+    /// The number of local time types for which data entries are stored in the
+    /// file.
+    ///
+    /// This is checked to be at least `1`.
+    tzh_typecnt: usize,
+    /// The number of bytes of time zone abbreviation strings stored in the
+    /// file.
+    ///
+    /// This is checked to be at least `1`.
+    tzh_charcnt: usize,
+}
+
+impl Header {
+    /// Parse the header record from the given bytes.
+    ///
+    /// Upon success, return the header and all bytes after the header.
+    ///
+    /// The given `time_size` must be 4 or 8, corresponding to either the
+    /// V1 header block or the V2+ header block, respectively.
+    fn parse(
+        time_size: usize,
+        bytes: &[u8],
+    ) -> Result<(Header, &[u8]), Error> {
+        assert!(time_size == 4 || time_size == 8, "time size must be 4 or 8");
+        if bytes.len() < 44 {
+            return Err(err!("invalid header: too short"));
+        }
+        let (magic, rest) = bytes.split_at(4);
+        if magic != b"TZif" {
+            return Err(err!("invalid header: magic bytes mismatch"));
+        }
+        let (version, rest) = rest.split_at(1);
+        let (_reserved, rest) = rest.split_at(15);
+
+        let (tzh_ttisutcnt_bytes, rest) = rest.split_at(4);
+        let (tzh_ttisstdcnt_bytes, rest) = rest.split_at(4);
+        let (tzh_leapcnt_bytes, rest) = rest.split_at(4);
+        let (tzh_timecnt_bytes, rest) = rest.split_at(4);
+        let (tzh_typecnt_bytes, rest) = rest.split_at(4);
+        let (tzh_charcnt_bytes, rest) = rest.split_at(4);
+
+        let tzh_ttisutcnt = from_be_bytes_u32_to_usize(tzh_ttisutcnt_bytes)
+            .map_err(|e| err!("failed to parse tzh_ttisutcnt: {e}"))?;
+        let tzh_ttisstdcnt = from_be_bytes_u32_to_usize(tzh_ttisstdcnt_bytes)
+            .map_err(|e| err!("failed to parse tzh_ttisstdcnt: {e}"))?;
+        let tzh_leapcnt = from_be_bytes_u32_to_usize(tzh_leapcnt_bytes)
+            .map_err(|e| err!("failed to parse tzh_leapcnt: {e}"))?;
+        let tzh_timecnt = from_be_bytes_u32_to_usize(tzh_timecnt_bytes)
+            .map_err(|e| err!("failed to parse tzh_timecnt: {e}"))?;
+        let tzh_typecnt = from_be_bytes_u32_to_usize(tzh_typecnt_bytes)
+            .map_err(|e| err!("failed to parse tzh_typecnt: {e}"))?;
+        let tzh_charcnt = from_be_bytes_u32_to_usize(tzh_charcnt_bytes)
+            .map_err(|e| err!("failed to parse tzh_charcnt: {e}"))?;
+
+        if tzh_ttisutcnt != 0 && tzh_ttisutcnt != tzh_typecnt {
+            return Err(err!(
+                "expected tzh_ttisutcnt={tzh_ttisutcnt} to be zero \
+                 or equal to tzh_typecnt={tzh_typecnt}",
+            ));
+        }
+        if tzh_ttisstdcnt != 0 && tzh_ttisstdcnt != tzh_typecnt {
+            return Err(err!(
+                "expected tzh_ttisstdcnt={tzh_ttisstdcnt} to be zero \
+                 or equal to tzh_typecnt={tzh_typecnt}",
+            ));
+        }
+        if tzh_typecnt < 1 {
+            return Err(err!(
+                "expected tzh_typecnt={tzh_typecnt} to be at least 1",
+            ));
+        }
+        if tzh_charcnt < 1 {
+            return Err(err!(
+                "expected tzh_charcnt={tzh_charcnt} to be at least 1",
+            ));
+        }
+
+        let header = Header {
+            time_size,
+            version: version[0],
+            tzh_ttisutcnt,
+            tzh_ttisstdcnt,
+            tzh_leapcnt,
+            tzh_timecnt,
+            tzh_typecnt,
+            tzh_charcnt,
+        };
+        Ok((header, rest))
+    }
+
+    /// Returns true if this header is for a 32-bit data block.
+    ///
+    /// When false, it is guaranteed that this header is for a 64-bit data
+    /// block.
+    fn is_32bit(&self) -> bool {
+        self.time_size == 4
+    }
+
+    /// Returns the size of the data block, in bytes, for this header.
+    ///
+    /// This returns an error if the arithmetic required to compute the
+    /// length would overflow.
+    ///
+    /// This is useful for, e.g., skipping over the 32-bit V1 data block in
+    /// V2+ TZif formatted files.
+    fn data_block_len(&self) -> Result<usize, Error> {
+        let a = self.transition_times_len()?;
+        let b = self.transition_types_len()?;
+        let c = self.local_time_types_len()?;
+        let d = self.time_zone_designations_len()?;
+        let e = self.leap_second_len()?;
+        let f = self.standard_wall_len()?;
+        let g = self.ut_local_len()?;
+        a.checked_add(b)
+            .and_then(|z| z.checked_add(c))
+            .and_then(|z| z.checked_add(d))
+            .and_then(|z| z.checked_add(e))
+            .and_then(|z| z.checked_add(f))
+            .and_then(|z| z.checked_add(g))
+            .ok_or_else(|| {
+                err!(
+                    "length of data block in V{} tzfile is too big",
+                    self.version
+                )
+            })
+    }
+
+    fn transition_times_len(&self) -> Result<usize, Error> {
+        self.tzh_timecnt.checked_mul(self.time_size).ok_or_else(|| {
+            err!("tzh_timecnt value {} is too big", self.tzh_timecnt)
+        })
+    }
+
+    fn transition_types_len(&self) -> Result<usize, Error> {
+        Ok(self.tzh_timecnt)
+    }
+
+    fn local_time_types_len(&self) -> Result<usize, Error> {
+        self.tzh_typecnt.checked_mul(6).ok_or_else(|| {
+            err!("tzh_typecnt value {} is too big", self.tzh_typecnt)
+        })
+    }
+
+    fn time_zone_designations_len(&self) -> Result<usize, Error> {
+        Ok(self.tzh_charcnt)
+    }
+
+    fn leap_second_len(&self) -> Result<usize, Error> {
+        let record_len = self
+            .time_size
+            .checked_add(4)
+            .expect("4-or-8 plus 4 always fits in usize");
+        self.tzh_leapcnt.checked_mul(record_len).ok_or_else(|| {
+            err!("tzh_leapcnt value {} is too big", self.tzh_leapcnt)
+        })
+    }
+
+    fn standard_wall_len(&self) -> Result<usize, Error> {
+        Ok(self.tzh_ttisstdcnt)
+    }
+
+    fn ut_local_len(&self) -> Result<usize, Error> {
+        Ok(self.tzh_ttisutcnt)
+    }
+}
+
+/// Splits the given slice of bytes at the index given.
+///
+/// If the index is out of range (greater than `bytes.len()`) then an error is
+/// returned. The error message will include the `what` string given, which is
+/// meant to describe the thing being split.
+fn try_split_at<'b>(
+    what: &'static str,
+    bytes: &'b [u8],
+    at: usize,
+) -> Result<(&'b [u8], &'b [u8]), Error> {
+    if at > bytes.len() {
+        Err(err!(
+            "expected at least {at} bytes for {what}, \
+             but found only {} bytes",
+            bytes.len(),
+        ))
+    } else {
+        Ok(bytes.split_at(at))
+    }
+}
+
+/// Interprets the given slice as an unsigned 32-bit big endian integer,
+/// attempts to convert it to a `usize` and returns it.
+///
+/// # Panics
+///
+/// When `bytes.len() != 4`.
+///
+/// # Errors
+///
+/// This errors if the `u32` parsed from the given bytes cannot fit in a
+/// `usize`.
+fn from_be_bytes_u32_to_usize(bytes: &[u8]) -> Result<usize, Error> {
+    let n = from_be_bytes_u32(bytes);
+    usize::try_from(n).map_err(|_| {
+        err!(
+            "failed to parse integer {n} (too big, max allowed is {}",
+            usize::MAX
+        )
+    })
+}
+
+/// Interprets the given slice as an unsigned 32-bit big endian integer and
+/// returns it.
+///
+/// # Panics
+///
+/// When `bytes.len() != 4`.
+fn from_be_bytes_u32(bytes: &[u8]) -> u32 {
+    u32::from_be_bytes(bytes.try_into().unwrap())
+}
+
+/// Interprets the given slice as a signed 32-bit big endian integer and
+/// returns it.
+///
+/// # Panics
+///
+/// When `bytes.len() != 4`.
+fn from_be_bytes_i32(bytes: &[u8]) -> i32 {
+    i32::from_be_bytes(bytes.try_into().unwrap())
+}
+
+/// Interprets the given slice as a signed 64-bit big endian integer and
+/// returns it.
+///
+/// # Panics
+///
+/// When `bytes.len() != 8`.
+fn from_be_bytes_i64(bytes: &[u8]) -> i64 {
+    i64::from_be_bytes(bytes.try_into().unwrap())
+}
--- a/src/shared/util.rs
+++ b/src/shared/util.rs
@ -0,0 +1,128 @@
+/// Provides a convenient `Debug` implementation for a `u8`.
+///
+/// The `Debug` impl treats the byte as an ASCII, and emits a human
+/// readable representation of it. If the byte isn't ASCII, then it's
+/// emitted as a hex escape sequence.
+#[derive(Clone, Copy)]
+pub(crate) struct Byte(pub u8);
+
+impl core::fmt::Display for Byte {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        if self.0 == b' ' {
+            return write!(f, " ");
+        }
+        // 10 bytes is enough for any output from ascii::escape_default.
+        let mut bytes = [0u8; 10];
+        let mut len = 0;
+        for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
+            // capitalize \xab to \xAB
+            if i >= 2 && b'a' <= b && b <= b'f' {
+                b -= 32;
+            }
+            bytes[len] = b;
+            len += 1;
+        }
+        write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
+    }
+}
+
+impl core::fmt::Debug for Byte {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "\"")?;
+        core::fmt::Display::fmt(self, f)?;
+        write!(f, "\"")?;
+        Ok(())
+    }
+}
+
+/// Provides a convenient `Debug` implementation for `&[u8]`.
+///
+/// This generally works best when the bytes are presumed to be mostly
+/// UTF-8, but will work for anything. For any bytes that aren't UTF-8,
+/// they are emitted as hex escape sequences.
+#[derive(Clone, Copy)]
+pub(crate) struct Bytes<'a>(pub &'a [u8]);
+
+impl<'a> core::fmt::Display for Bytes<'a> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        // This is a sad re-implementation of a similar impl found in bstr.
+        let mut bytes = self.0;
+        while let Some(result) = utf8_decode(bytes) {
+            let ch = match result {
+                Ok(ch) => ch,
+                Err(byte) => {
+                    write!(f, r"\x{:02x}", byte)?;
+                    bytes = &bytes[1..];
+                    continue;
+                }
+            };
+            bytes = &bytes[ch.len_utf8()..];
+            match ch {
+                '\0' => write!(f, "\\0")?,
+                '\x01'..='\x7f' => {
+                    write!(f, "{}", (ch as u8).escape_ascii())?;
+                }
+                _ => write!(f, "{}", ch.escape_debug())?,
+            }
+        }
+        Ok(())
+    }
+}
+
+impl<'a> core::fmt::Debug for Bytes<'a> {
+    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
+        write!(f, "\"")?;
+        core::fmt::Display::fmt(self, f)?;
+        write!(f, "\"")?;
+        Ok(())
+    }
+}
+
+/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
+///
+/// If no valid encoding of a codepoint exists at the beginning of the
+/// given byte slice, then the first byte is returned instead.
+///
+/// This returns `None` if and only if `bytes` is empty.
+///
+/// This never panics.
+///
+/// *WARNING*: This is not designed for performance. If you're looking for
+/// a fast UTF-8 decoder, this is not it. If you feel like you need one in
+/// this crate, then please file an issue and discuss your use case.
+pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
+    /// Given a UTF-8 leading byte, this returns the total number of code
+    /// units in the following encoded codepoint.
+    ///
+    /// If the given byte is not a valid UTF-8 leading byte, then this
+    /// returns `None`.
+    fn utf8_len(byte: u8) -> Option<usize> {
+        if byte <= 0x7F {
+            return Some(1);
+        } else if byte & 0b1100_0000 == 0b1000_0000 {
+            return None;
+        } else if byte <= 0b1101_1111 {
+            Some(2)
+        } else if byte <= 0b1110_1111 {
+            Some(3)
+        } else if byte <= 0b1111_0111 {
+            Some(4)
+        } else {
+            None
+        }
+    }
+
+    if bytes.is_empty() {
+        return None;
+    }
+    let len = match utf8_len(bytes[0]) {
+        None => return Some(Err(bytes[0])),
+        Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
+        Some(1) => return Some(Ok(char::from(bytes[0]))),
+        Some(len) => len,
+    };
+    match core::str::from_utf8(&bytes[..len]) {
+        Ok(s) => Some(Ok(s.chars().next().unwrap())),
+        Err(_) => Some(Err(bytes[0])),
+    }
+}
--- a/src/tz/mod.rs
+++ b/src/tz/mod.rs
@ -101,15 +101,13 @@ mod ambiguous;
 mod concatenated;
 mod db;
 mod offset;
-#[cfg(feature = "alloc")]
 pub(crate) mod posix;
 #[cfg(feature = "tz-system")]
 mod system;
 #[cfg(all(test, feature = "alloc"))]
 mod testdata;
 mod timezone;
-#[cfg(feature = "alloc")]
-mod tzif;
+pub(crate) mod tzif;
 // See module comment for WIP status. :-(
 #[cfg(test)]
 mod zic;
--- a/src/tz/offset.rs
+++ b/src/tz/offset.rs
@ -232,7 +232,7 @@ impl Offset {
    // warrant its existence. And I think I'd rather `Offset::hms` be const and
    // exported instead of this monstrosity.
    #[inline]
-    const fn constant_seconds(seconds: i32) -> Offset {
+    pub(crate) const fn constant_seconds(seconds: i32) -> Offset {
        if !t::SpanZoneOffset::contains(seconds) {
            panic!("invalid time zone offset seconds")
        }
--- a/src/tz/posix.rs
+++ b/src/tz/posix.rs
--- a/src/tz/testdata.rs
+++ b/src/tz/testdata.rs
@ -1,6 +1,6 @@
 use alloc::string::ToString;

-use crate::tz::tzif::Tzif;
+use crate::tz::tzif::TzifOwned;

 /// A concatenated list of TZif data with a header and an index block.
 ///
@ -94,20 +94,20 @@ impl TzifTestFile {

    /// Parse this test TZif data into a structured representation.
    #[cfg(not(miri))]
-    pub(crate) fn parse(self) -> Tzif {
+    pub(crate) fn parse(self) -> TzifOwned {
        let name = Some(self.name.to_string());
-        Tzif::parse(name, self.data).unwrap_or_else(|err| {
+        TzifOwned::parse(name, self.data).unwrap_or_else(|err| {
            panic!("failed to parse TZif test file for {:?}: {err}", self.name)
        })
    }

    /// Parse this test TZif data as if it were V1.
    #[cfg(not(miri))]
-    pub(crate) fn parse_v1(self) -> Tzif {
+    pub(crate) fn parse_v1(self) -> TzifOwned {
        let name = Some(self.name.to_string());
        let mut data = self.data.to_vec();
        data[4] = 0;
-        Tzif::parse(name, &data).unwrap_or_else(|err| {
+        TzifOwned::parse(name, &data).unwrap_or_else(|err| {
            panic!(
                "failed to parse V1 TZif test file for {:?}: {err}",
                self.name
--- a/src/tz/timezone.rs
+++ b/src/tz/timezone.rs
@ -1534,7 +1534,7 @@ impl core::fmt::Display for TimeZonePosix {
 #[cfg(feature = "alloc")]
 #[derive(Eq, PartialEq)]
 struct TimeZoneTzif {
-    tzif: crate::tz::tzif::Tzif,
+    tzif: crate::tz::tzif::TzifOwned,
 }

 #[cfg(feature = "alloc")]
--- a/src/tz/tzif.rs
+++ b/src/tz/tzif.rs
--- a/src/util/array_str.rs
+++ b/src/util/array_str.rs
@ -46,11 +46,6 @@ impl<const N: usize> ArrayStr<N> {
        Some(ArrayStr { bytes, len: len as u8 })
    }

-    /// Returns the capacity of this fixed string.
-    pub(crate) const fn capacity() -> usize {
-        N
-    }
-
    /// Append the bytes given to the end of this string.
    ///
    /// If the capacity would be exceeded, then this is a no-op and `false`
--- a/src/util/constant.rs
+++ b/src/util/constant.rs
@ -0,0 +1,13 @@
+/// Unwrap an `Option<T>` in a `const` context.
+///
+/// If it fails, panics with the given message.
+macro_rules! unwrap {
+    ($val:expr, $msg:expr$(,)?) => {
+        match $val {
+            Some(val) => val,
+            None => panic!($msg),
+        }
+    };
+}
+
+pub(crate) use unwrap;
--- a/src/util/escape.rs
+++ b/src/util/escape.rs
@ -4,91 +4,8 @@ Provides convenience routines for escaping raw bytes.
 This was copied from `regex-automata` with a few light edits.
 */

-use crate::util::utf8;
-
-/// Provides a convenient `Debug` implementation for a `u8`.
-///
-/// The `Debug` impl treats the byte as an ASCII, and emits a human readable
-/// representation of it. If the byte isn't ASCII, then it's emitted as a hex
-/// escape sequence.
-#[derive(Clone, Copy)]
-pub struct Byte(pub u8);
-
-impl core::fmt::Display for Byte {
-    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-        if self.0 == b' ' {
-            return write!(f, " ");
-        }
-        // 10 bytes is enough to cover any output from ascii::escape_default.
-        let mut bytes = [0u8; 10];
-        let mut len = 0;
-        for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
-            // capitalize \xab to \xAB
-            if i >= 2 && b'a' <= b && b <= b'f' {
-                b -= 32;
-            }
-            bytes[len] = b;
-            len += 1;
-        }
-        write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
-    }
-}
-
-impl core::fmt::Debug for Byte {
-    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-        write!(f, "\"")?;
-        core::fmt::Display::fmt(self, f)?;
-        write!(f, "\"")?;
-        Ok(())
-    }
-}
-
-/// Provides a convenient `Debug` implementation for `&[u8]`.
-///
-/// This generally works best when the bytes are presumed to be mostly UTF-8,
-/// but will work for anything. For any bytes that aren't UTF-8, they are
-/// emitted as hex escape sequences.
-#[derive(Clone, Copy)]
-pub struct Bytes<'a>(pub &'a [u8]);
-
-impl<'a> core::fmt::Display for Bytes<'a> {
-    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-        // This is a sad re-implementation of a similar impl found in bstr.
-        let mut bytes = self.0;
-        while let Some(result) = utf8::decode(bytes) {
-            let ch = match result {
-                Ok(ch) => ch,
-                Err(byte) => {
-                    write!(f, r"\x{:02x}", byte)?;
-                    bytes = &bytes[1..];
-                    continue;
-                }
-            };
-            bytes = &bytes[ch.len_utf8()..];
-            match ch {
-                '\0' => write!(f, "\\0")?,
-                // ASCII control characters except \0, \n, \r, \t
-                '\x01'..='\x08'
-                | '\x0b'
-                | '\x0c'
-                | '\x0e'..='\x19'
-                | '\x7f' => {
-                    write!(f, "\\x{:02x}", u32::from(ch))?;
-                }
-                '\n' | '\r' | '\t' | _ => {
-                    write!(f, "{}", ch.escape_debug())?;
-                }
-            }
-        }
-        Ok(())
-    }
-}
-
-impl<'a> core::fmt::Debug for Bytes<'a> {
-    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-        write!(f, "\"")?;
-        core::fmt::Display::fmt(self, f)?;
-        write!(f, "\"")?;
-        Ok(())
-    }
-}
+// These were originally defined here, but they got moved to
+// shared since they're needed there. We re-export them here
+// because this is really where they should live, but they're
+// in shared because `jiff-tzdb-static` needs it.
+pub(crate) use crate::shared::util::{Byte, Bytes};
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@ -7,7 +7,7 @@ pub(crate) mod borrow;
 ))]
 pub(crate) mod cache;
 pub(crate) mod common;
-pub(crate) mod crc32;
+pub(crate) mod constant;
 pub(crate) mod escape;
 #[cfg(feature = "std")]
 pub(crate) mod fs;
--- a/src/util/utf8.rs
+++ b/src/util/utf8.rs
@ -13,19 +13,7 @@ use core::cmp::Ordering;
 /// fast UTF-8 decoder, this is not it. If you feel like you need one in this
 /// crate, then please file an issue and discuss your use case.
 pub(crate) fn decode(bytes: &[u8]) -> Option<Result<char, u8>> {
-    if bytes.is_empty() {
-        return None;
-    }
-    let len = match utf8_len(bytes[0]) {
-        None => return Some(Err(bytes[0])),
-        Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
-        Some(1) => return Some(Ok(char::from(bytes[0]))),
-        Some(len) => len,
-    };
-    match core::str::from_utf8(&bytes[..len]) {
-        Ok(s) => Some(Ok(s.chars().next().unwrap())),
-        Err(_) => Some(Err(bytes[0])),
-    }
+    crate::shared::util::utf8_decode(bytes)
 }

 /// Like std's `eq_ignore_ascii_case`, but returns a full `Ordering`.
@ -58,24 +46,3 @@ pub(crate) fn cmp_ignore_ascii_case_bytes(s1: &[u8], s2: &[u8]) -> Ordering {
        }
    }
 }
-
-/// Given a UTF-8 leading byte, this returns the total number of code units
-/// in the following encoded codepoint.
-///
-/// If the given byte is not a valid UTF-8 leading byte, then this returns
-/// `None`.
-fn utf8_len(byte: u8) -> Option<usize> {
-    if byte <= 0x7F {
-        return Some(1);
-    } else if byte & 0b1100_0000 == 0b1000_0000 {
-        return None;
-    } else if byte <= 0b1101_1111 {
-        Some(2)
-    } else if byte <= 0b1110_1111 {
-        Some(3)
-    } else if byte <= 0b1111_0111 {
-        Some(4)
-    } else {
-        None
-    }
-}