mirror of
https://github.com/BurntSushi/jiff.git
synced 2025-12-23 08:47:45 +00:00
wip
This commit is contained in:
parent
8bea2f5533
commit
007a4bffe9
20 changed files with 3560 additions and 2914 deletions
|
|
@ -111,7 +111,7 @@ fn add_years_months_days(c: &mut Criterion) {
|
|||
/// This is useful when you have a known time zone already and want to get
|
||||
/// a specific instant for many distinct civil datetimes in that time zone.
|
||||
fn to_timestamp_static(c: &mut Criterion) {
|
||||
const NAME: &str = "civil_datetime/to_datetime_static";
|
||||
const NAME: &str = "civil_datetime/to_timestamp_static";
|
||||
const TZNAME: &str = "America/New_York";
|
||||
const STAMP: i64 = 1719755160;
|
||||
const DATETIME: civil::DateTime = civil::date(2024, 6, 30).at(9, 46, 0, 0);
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ pub fn run(p: &mut Parser) -> anyhow::Result<()> {
|
|||
args::configure(p, USAGE, &mut [&mut config])?;
|
||||
|
||||
let jiff = config.jiff();
|
||||
let table_path = jiff.join("src/util/crc32/table.rs");
|
||||
let table_path = jiff.join("src/shared/crc32/table.rs");
|
||||
write_crc_tables(&table_path).with_context(|| {
|
||||
format!("failed to write CRC32 data table to {}", table_path.display())
|
||||
})?;
|
||||
|
|
|
|||
|
|
@ -731,6 +731,8 @@ mod error;
|
|||
pub mod fmt;
|
||||
#[cfg(feature = "std")]
|
||||
mod now;
|
||||
#[doc(hidden)]
|
||||
pub mod shared;
|
||||
mod signed_duration;
|
||||
mod span;
|
||||
mod timestamp;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use crate::util::crc32::table::{TABLE, TABLE16};
|
||||
use self::table::{TABLE, TABLE16};
|
||||
|
||||
mod table;
|
||||
|
||||
|
|
@ -1,4 +1,6 @@
|
|||
pub const TABLE: [u32; 256] = [
|
||||
// auto-generated by: jiff-cli generate crc32
|
||||
|
||||
pub(super) const TABLE: [u32; 256] = [
|
||||
0, 4067132163, 3778769143, 324072436, 3348797215, 904991772, 648144872,
|
||||
3570033899, 2329499855, 2024987596, 1809983544, 2575936315, 1296289744,
|
||||
3207089363, 2893594407, 1578318884, 274646895, 3795141740, 4049975192,
|
||||
|
|
@ -44,7 +46,7 @@ pub const TABLE: [u32; 256] = [
|
|||
1279665062, 1595330642, 2910671697,
|
||||
];
|
||||
|
||||
pub const TABLE16: [[u32; 256]; 16] = [
|
||||
pub(super) const TABLE16: [[u32; 256]; 16] = [
|
||||
[
|
||||
0, 4067132163, 3778769143, 324072436, 3348797215, 904991772,
|
||||
648144872, 3570033899, 2329499855, 2024987596, 1809983544, 2575936315,
|
||||
166
src/shared/mod.rs
Normal file
166
src/shared/mod.rs
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
/*!
|
||||
TODO
|
||||
*/
|
||||
|
||||
use core::ops::Range;
|
||||
|
||||
pub type TzifStatic = Tzif<
|
||||
&'static str,
|
||||
&'static [TzifLocalTimeType],
|
||||
&'static [TzifTransition],
|
||||
>;
|
||||
|
||||
#[cfg(feature = "alloc")]
|
||||
pub type TzifOwned = Tzif<
|
||||
alloc::string::String,
|
||||
alloc::vec::Vec<TzifLocalTimeType>,
|
||||
alloc::vec::Vec<TzifTransition>,
|
||||
>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Tzif<STRING, TYPES, TRANS> {
|
||||
pub fixed: TzifFixed<STRING>,
|
||||
pub types: TYPES,
|
||||
pub transitions: TRANS,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TzifFixed<STRING> {
|
||||
pub name: Option<STRING>,
|
||||
pub version: u8,
|
||||
pub checksum: u32,
|
||||
pub designations: STRING,
|
||||
pub posix_tz: Option<PosixTimeZone<STRING>>,
|
||||
}
|
||||
|
||||
// only-jiff-impl-start
|
||||
impl TzifFixed<&'static str> {
|
||||
pub const fn to_jiff(
|
||||
&self,
|
||||
types: &'static [crate::tz::tzif::LocalTimeType],
|
||||
trans: &'static [crate::tz::tzif::Transition],
|
||||
) -> crate::tz::tzif::TzifStatic {
|
||||
crate::tz::tzif::TzifStatic::from_shared_const(self, types, trans)
|
||||
}
|
||||
}
|
||||
// only-jiff-impl-end
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TzifLocalTimeType {
|
||||
pub offset: i32,
|
||||
pub is_dst: bool,
|
||||
pub designation: Range<u8>,
|
||||
pub indicator: TzifIndicator,
|
||||
}
|
||||
|
||||
// only-jiff-impl-start
|
||||
impl TzifLocalTimeType {
|
||||
pub const fn to_jiff(&self) -> crate::tz::tzif::LocalTimeType {
|
||||
crate::tz::tzif::LocalTimeType::from_shared(self)
|
||||
}
|
||||
}
|
||||
// only-jiff-impl-end
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum TzifIndicator {
|
||||
LocalWall,
|
||||
LocalStandard,
|
||||
UTStandard,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TzifTransition {
|
||||
pub timestamp: i64,
|
||||
pub type_index: u8,
|
||||
}
|
||||
|
||||
// only-jiff-impl-start
|
||||
impl TzifTransition {
|
||||
pub const fn to_jiff(
|
||||
&self,
|
||||
prev_offset: i32,
|
||||
this_offset: i32,
|
||||
) -> crate::tz::tzif::Transition {
|
||||
crate::tz::tzif::Transition::from_shared(
|
||||
self,
|
||||
prev_offset,
|
||||
this_offset,
|
||||
)
|
||||
}
|
||||
}
|
||||
// only-jiff-impl-end
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct PosixTimeZone<ABBREV> {
|
||||
pub std_abbrev: ABBREV,
|
||||
pub std_offset: i32,
|
||||
pub dst: Option<PosixDst<ABBREV>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct PosixDst<ABBREV> {
|
||||
pub abbrev: ABBREV,
|
||||
pub offset: i32,
|
||||
pub rule: Option<PosixRule>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct PosixRule {
|
||||
pub start: PosixDayTime,
|
||||
pub end: PosixDayTime,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct PosixDayTime {
|
||||
pub date: PosixDay,
|
||||
pub time: i32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub enum PosixDay {
|
||||
/// Julian day in a year, no counting for leap days.
|
||||
///
|
||||
/// Valid range is `1..=365`.
|
||||
JulianOne(i16),
|
||||
/// Julian day in a year, counting for leap days.
|
||||
///
|
||||
/// Valid range is `0..=365`.
|
||||
JulianZero(i16),
|
||||
/// The nth weekday of a month.
|
||||
WeekdayOfMonth {
|
||||
/// The month.
|
||||
///
|
||||
/// Valid range is: `1..=12`.
|
||||
month: i8,
|
||||
/// The week.
|
||||
///
|
||||
/// Valid range is `1..=5`.
|
||||
///
|
||||
/// One interesting thing to note here (or my interpretation anyway),
|
||||
/// is that a week of `4` means the "4th weekday in a month" where as
|
||||
/// a week of `5` means the "last weekday in a month, even if it's the
|
||||
/// 4th weekday."
|
||||
week: i8,
|
||||
/// The weekday.
|
||||
///
|
||||
/// Valid range is `0..=6`, with `0` corresponding to Sunday.
|
||||
weekday: i8,
|
||||
},
|
||||
}
|
||||
|
||||
// only-jiff-impl-start
|
||||
impl PosixTimeZone<&'static str> {
|
||||
pub const fn to_jiff(&self) -> crate::tz::posix::ReasonablePosixTimeZone {
|
||||
crate::tz::posix::ReasonablePosixTimeZone::from_shared_const(self)
|
||||
}
|
||||
}
|
||||
// only-jiff-impl-end
|
||||
|
||||
// Does not require `alloc`, but is only used when `alloc` is enabled.
|
||||
#[cfg(feature = "alloc")]
|
||||
pub(crate) mod crc32;
|
||||
#[cfg(feature = "alloc")]
|
||||
pub(crate) mod posix;
|
||||
#[cfg(feature = "alloc")]
|
||||
pub(crate) mod tzif;
|
||||
pub(crate) mod util;
|
||||
1939
src/shared/posix.rs
Normal file
1939
src/shared/posix.rs
Normal file
File diff suppressed because it is too large
Load diff
789
src/shared/tzif.rs
Normal file
789
src/shared/tzif.rs
Normal file
|
|
@ -0,0 +1,789 @@
|
|||
#![allow(warnings)]
|
||||
|
||||
use alloc::{string::String, vec};
|
||||
|
||||
use super::{
|
||||
util::{Byte, Bytes},
|
||||
PosixTimeZone, TzifFixed, TzifIndicator, TzifLocalTimeType, TzifOwned,
|
||||
TzifTransition,
|
||||
};
|
||||
|
||||
macro_rules! err {
|
||||
($($tt:tt)*) => {{
|
||||
self::Error(alloc::format!($($tt)*))
|
||||
}}
|
||||
}
|
||||
|
||||
// These are Jiff min and max timestamp (in seconds) values.
|
||||
//
|
||||
// The TZif parser will clamp timestamps to this range. It's
|
||||
// not ideal, but Jiff can't handle values outside of this range
|
||||
// and completely refusing to use TZif data with pathological
|
||||
// timestamps in typically irrelevant transitions is bad juju.
|
||||
//
|
||||
// Ref: https://github.com/BurntSushi/jiff/issues/163
|
||||
// Ref: https://github.com/BurntSushi/jiff/pull/164
|
||||
const TIMESTAMP_MIN: i64 = -377705023201;
|
||||
const TIMESTAMP_MAX: i64 = 253402207200;
|
||||
|
||||
// Similarly for offsets, although in this case, if we find
|
||||
// an offset outside of this range, we do actually error. This
|
||||
// is because it could result in true incorrect datetimes for
|
||||
// actual transitions.
|
||||
//
|
||||
// But our supported offset range is `-25:59:59..=+25:59:59`.
|
||||
// There's no real time zone with offsets even close to those
|
||||
// boundaries.
|
||||
//
|
||||
// If there is pathological data that we should ignore, then
|
||||
// we should wait for a real bug report in order to determine
|
||||
// the right way to ignore/clamp it.
|
||||
const OFFSET_MIN: i32 = -93599;
|
||||
const OFFSET_MAX: i32 = 93599;
|
||||
|
||||
/// An error that can be returned when parsing.
|
||||
#[derive(Debug)]
|
||||
pub struct Error(String);
|
||||
|
||||
impl core::fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
core::fmt::Display::fmt(&self.0, f)
|
||||
}
|
||||
}
|
||||
|
||||
impl TzifOwned {
|
||||
/// Parses the given data as a TZif formatted file.
|
||||
///
|
||||
/// The name given is attached to the `Tzif` value returned, but is
|
||||
/// otherwise not significant.
|
||||
///
|
||||
/// If the given data is not recognized to be valid TZif, then an error is
|
||||
/// returned.
|
||||
///
|
||||
/// In general, callers may assume that it is safe to pass arbitrary or
|
||||
/// even untrusted data to this function and count on it not panicking
|
||||
/// or using resources that aren't limited to a small constant factor of
|
||||
/// the size of the data itself. That is, callers can reliably limit the
|
||||
/// resources used by limiting the size of the data given to this parse
|
||||
/// function.
|
||||
pub(crate) fn parse(
|
||||
name: Option<String>,
|
||||
bytes: &[u8],
|
||||
) -> Result<TzifOwned, Error> {
|
||||
let original = bytes;
|
||||
let name = name.into();
|
||||
let (header32, rest) = Header::parse(4, bytes)
|
||||
.map_err(|e| err!("failed to parse 32-bit header: {e}"))?;
|
||||
let (mut tzif, rest) = if header32.version == 0 {
|
||||
TzifOwned::parse32(name, header32, rest)?
|
||||
} else {
|
||||
TzifOwned::parse64(name, header32, rest)?
|
||||
};
|
||||
// Compute the checksum using the entire contents of the TZif data.
|
||||
let tzif_raw_len = (rest.as_ptr() as usize)
|
||||
.checked_sub(original.as_ptr() as usize)
|
||||
.unwrap();
|
||||
let tzif_raw_bytes = &original[..tzif_raw_len];
|
||||
tzif.fixed.checksum = super::crc32::sum(tzif_raw_bytes);
|
||||
Ok(tzif)
|
||||
}
|
||||
|
||||
fn parse32<'b>(
|
||||
name: Option<String>,
|
||||
header32: Header,
|
||||
bytes: &'b [u8],
|
||||
) -> Result<(TzifOwned, &'b [u8]), Error> {
|
||||
let mut tzif = TzifOwned {
|
||||
fixed: TzifFixed {
|
||||
name,
|
||||
version: header32.version,
|
||||
// filled in later
|
||||
checksum: 0,
|
||||
designations: String::new(),
|
||||
posix_tz: None,
|
||||
},
|
||||
types: vec![],
|
||||
transitions: vec![],
|
||||
};
|
||||
let rest = tzif.parse_transitions(&header32, bytes)?;
|
||||
let rest = tzif.parse_transition_types(&header32, rest)?;
|
||||
let rest = tzif.parse_local_time_types(&header32, rest)?;
|
||||
let rest = tzif.parse_time_zone_designations(&header32, rest)?;
|
||||
let rest = tzif.parse_leap_seconds(&header32, rest)?;
|
||||
let rest = tzif.parse_indicators(&header32, rest)?;
|
||||
Ok((tzif, rest))
|
||||
}
|
||||
|
||||
fn parse64<'b>(
|
||||
name: Option<String>,
|
||||
header32: Header,
|
||||
bytes: &'b [u8],
|
||||
) -> Result<(TzifOwned, &'b [u8]), Error> {
|
||||
let (_, rest) = try_split_at(
|
||||
"V1 TZif data block",
|
||||
bytes,
|
||||
header32.data_block_len()?,
|
||||
)?;
|
||||
let (header64, rest) = Header::parse(8, rest)
|
||||
.map_err(|e| err!("failed to parse 64-bit header: {e}"))?;
|
||||
let mut tzif = TzifOwned {
|
||||
fixed: TzifFixed {
|
||||
name,
|
||||
version: header64.version,
|
||||
// filled in later
|
||||
checksum: 0,
|
||||
designations: String::new(),
|
||||
posix_tz: None,
|
||||
},
|
||||
types: vec![],
|
||||
transitions: vec![],
|
||||
};
|
||||
let rest = tzif.parse_transitions(&header64, rest)?;
|
||||
let rest = tzif.parse_transition_types(&header64, rest)?;
|
||||
let rest = tzif.parse_local_time_types(&header64, rest)?;
|
||||
let rest = tzif.parse_time_zone_designations(&header64, rest)?;
|
||||
let rest = tzif.parse_leap_seconds(&header64, rest)?;
|
||||
let rest = tzif.parse_indicators(&header64, rest)?;
|
||||
let rest = tzif.parse_footer(&header64, rest)?;
|
||||
// Note that we specifically and unfortunately do not "validate"
|
||||
// the POSIX TZ string here. We *should* check that it is
|
||||
// consistent with the last transition. Since:
|
||||
//
|
||||
// RFC 8536 says, "If the string is nonempty and one or more
|
||||
// transitions appear in the version 2+ data, the string MUST be
|
||||
// consistent with the last version 2+ transition."
|
||||
//
|
||||
// But in this context, we don't have any of the infrastructure
|
||||
// to actually do TZ operations on a POSIX time zone. It requires
|
||||
// civil datetimes and a bunch of other bullshit. This means that
|
||||
// this verification step doesn't run when using the `jiff-tzdb-static`
|
||||
// proc macro. However, we do still run it when parsing TZif data
|
||||
// at runtime.
|
||||
//
|
||||
// We otherwise don't check that the TZif data is fully valid. It is
|
||||
// possible for it to contain superfluous information. For example, a
|
||||
// non-zero local time type that is never referenced by a transition.
|
||||
Ok((tzif, rest))
|
||||
}
|
||||
|
||||
fn parse_transitions<'b>(
|
||||
&mut self,
|
||||
header: &Header,
|
||||
bytes: &'b [u8],
|
||||
) -> Result<&'b [u8], Error> {
|
||||
let (bytes, rest) = try_split_at(
|
||||
"transition times data block",
|
||||
bytes,
|
||||
header.transition_times_len()?,
|
||||
)?;
|
||||
let mut it = bytes.chunks_exact(header.time_size);
|
||||
// RFC 8536 says: "If there are no transitions, local time for all
|
||||
// timestamps is specified by the TZ string in the footer if present
|
||||
// and nonempty; otherwise, it is specified by time type 0."
|
||||
//
|
||||
// RFC 8536 also says: "Local time for timestamps before the first
|
||||
// transition is specified by the first time type (time type
|
||||
// 0)."
|
||||
//
|
||||
// So if there are no transitions, pushing this dummy one will result
|
||||
// in the desired behavior even when it's the only transition.
|
||||
// Similarly, since this is the minimum timestamp value, it will
|
||||
// trigger for any times before the first transition found in the TZif
|
||||
// data.
|
||||
self.transitions
|
||||
.push(TzifTransition { timestamp: TIMESTAMP_MIN, type_index: 0 });
|
||||
while let Some(chunk) = it.next() {
|
||||
let mut timestamp = if header.is_32bit() {
|
||||
i64::from(from_be_bytes_i32(chunk))
|
||||
} else {
|
||||
from_be_bytes_i64(chunk)
|
||||
};
|
||||
if !(TIMESTAMP_MIN <= timestamp && timestamp <= TIMESTAMP_MAX) {
|
||||
// We really shouldn't error here just because the Unix
|
||||
// timestamp is outside what Jiff supports. Since what Jiff
|
||||
// supports is _somewhat_ arbitrary. But Jiff's supported
|
||||
// range is good enough for all realistic purposes, so we
|
||||
// just clamp an out-of-range Unix timestamp to the Jiff
|
||||
// min or max value.
|
||||
//
|
||||
// This can't result in the sorting order being wrong, but
|
||||
// it can result in a transition that is duplicative with
|
||||
// the dummy transition we inserted above. This should be
|
||||
// fine.
|
||||
let clamped = timestamp.clamp(TIMESTAMP_MIN, TIMESTAMP_MAX);
|
||||
// only-jiff-warn-start
|
||||
warn!(
|
||||
"found Unix timestamp {timestamp} that is outside \
|
||||
Jiff's supported range, clamping to {clamped}",
|
||||
);
|
||||
// only-jiff-warn-end
|
||||
timestamp = clamped;
|
||||
}
|
||||
self.transitions.push(TzifTransition {
|
||||
timestamp,
|
||||
// We can't fill in the type index yet. We fill this in
|
||||
// later when we parse the transition types.
|
||||
type_index: 0,
|
||||
});
|
||||
}
|
||||
assert!(it.remainder().is_empty());
|
||||
Ok(rest)
|
||||
}
|
||||
|
||||
fn parse_transition_types<'b>(
|
||||
&mut self,
|
||||
header: &Header,
|
||||
bytes: &'b [u8],
|
||||
) -> Result<&'b [u8], Error> {
|
||||
let (bytes, rest) = try_split_at(
|
||||
"transition types data block",
|
||||
bytes,
|
||||
header.transition_types_len()?,
|
||||
)?;
|
||||
// We start our transition indices at 1 because we always insert a
|
||||
// dummy first transition corresponding to `Timestamp::MIN`. Its type
|
||||
// index is always 0, so there's no need to change it here.
|
||||
for (transition_index, &type_index) in (1..).zip(bytes) {
|
||||
if usize::from(type_index) >= header.tzh_typecnt {
|
||||
return Err(err!(
|
||||
"found transition type index {type_index},
|
||||
but there are only {} local time types",
|
||||
header.tzh_typecnt,
|
||||
));
|
||||
}
|
||||
self.transitions[transition_index].type_index = type_index;
|
||||
}
|
||||
Ok(rest)
|
||||
}
|
||||
|
||||
fn parse_local_time_types<'b>(
|
||||
&mut self,
|
||||
header: &Header,
|
||||
bytes: &'b [u8],
|
||||
) -> Result<&'b [u8], Error> {
|
||||
let (bytes, rest) = try_split_at(
|
||||
"local time types data block",
|
||||
bytes,
|
||||
header.local_time_types_len()?,
|
||||
)?;
|
||||
let mut it = bytes.chunks_exact(6);
|
||||
while let Some(chunk) = it.next() {
|
||||
let offset = from_be_bytes_i32(&chunk[..4]);
|
||||
if !(OFFSET_MIN <= offset && offset <= OFFSET_MAX) {
|
||||
return Err(err!(
|
||||
"found local time type with out-of-bounds offset: {offset}"
|
||||
));
|
||||
}
|
||||
let is_dst = chunk[4] == 1;
|
||||
let designation = chunk[5]..chunk[5];
|
||||
self.types.push(TzifLocalTimeType {
|
||||
offset,
|
||||
is_dst,
|
||||
designation,
|
||||
indicator: TzifIndicator::LocalWall,
|
||||
});
|
||||
}
|
||||
assert!(it.remainder().is_empty());
|
||||
Ok(rest)
|
||||
}
|
||||
|
||||
fn parse_time_zone_designations<'b>(
|
||||
&mut self,
|
||||
header: &Header,
|
||||
bytes: &'b [u8],
|
||||
) -> Result<&'b [u8], Error> {
|
||||
let (bytes, rest) = try_split_at(
|
||||
"time zone designations data block",
|
||||
bytes,
|
||||
header.time_zone_designations_len()?,
|
||||
)?;
|
||||
self.fixed.designations =
|
||||
String::from_utf8(bytes.to_vec()).map_err(|_| {
|
||||
err!(
|
||||
"time zone designations are not valid UTF-8: {:?}",
|
||||
Bytes(bytes),
|
||||
)
|
||||
})?;
|
||||
// Holy hell, this is brutal. The boundary conditions are crazy.
|
||||
for (i, typ) in self.types.iter_mut().enumerate() {
|
||||
let start = usize::from(typ.designation.start);
|
||||
let Some(suffix) = self.fixed.designations.get(start..) else {
|
||||
return Err(err!(
|
||||
"local time type {i} has designation index of {start}, \
|
||||
but cannot be more than {}",
|
||||
self.fixed.designations.len(),
|
||||
));
|
||||
};
|
||||
let Some(len) = suffix.find('\x00') else {
|
||||
return Err(err!(
|
||||
"local time type {i} has designation index of {start}, \
|
||||
but could not find NUL terminator after it in \
|
||||
designations: {:?}",
|
||||
self.fixed.designations,
|
||||
));
|
||||
};
|
||||
let Some(end) = start.checked_add(len) else {
|
||||
return Err(err!(
|
||||
"local time type {i} has designation index of {start}, \
|
||||
but its length {len} is too big",
|
||||
));
|
||||
};
|
||||
typ.designation.end = u8::try_from(end).map_err(|_| {
|
||||
err!(
|
||||
"local time type {i} has designation range of \
|
||||
{start}..{end}, but end is too big",
|
||||
)
|
||||
})?;
|
||||
}
|
||||
Ok(rest)
|
||||
}
|
||||
|
||||
/// This parses the leap second corrections in the TZif data.
|
||||
///
|
||||
/// Note that we only parse and verify them. We don't actually use them.
|
||||
/// Jiff effectively ignores leap seconds.
|
||||
fn parse_leap_seconds<'b>(
|
||||
&mut self,
|
||||
header: &Header,
|
||||
bytes: &'b [u8],
|
||||
) -> Result<&'b [u8], Error> {
|
||||
let (bytes, rest) = try_split_at(
|
||||
"leap seconds data block",
|
||||
bytes,
|
||||
header.leap_second_len()?,
|
||||
)?;
|
||||
let chunk_len = header
|
||||
.time_size
|
||||
.checked_add(4)
|
||||
.expect("time_size plus 4 fits in usize");
|
||||
let mut it = bytes.chunks_exact(chunk_len);
|
||||
while let Some(chunk) = it.next() {
|
||||
let (occur_bytes, _corr_bytes) = chunk.split_at(header.time_size);
|
||||
let occur = if header.is_32bit() {
|
||||
i64::from(from_be_bytes_i32(occur_bytes))
|
||||
} else {
|
||||
from_be_bytes_i64(occur_bytes)
|
||||
};
|
||||
if !(TIMESTAMP_MIN <= occur && occur <= TIMESTAMP_MAX) {
|
||||
// only-jiff-warn-start
|
||||
warn!(
|
||||
"leap second occurrence {occur} is \
|
||||
not in Jiff's supported range"
|
||||
)
|
||||
// only-jiff-warn-end
|
||||
}
|
||||
}
|
||||
assert!(it.remainder().is_empty());
|
||||
Ok(rest)
|
||||
}
|
||||
|
||||
fn parse_indicators<'b>(
|
||||
&mut self,
|
||||
header: &Header,
|
||||
bytes: &'b [u8],
|
||||
) -> Result<&'b [u8], Error> {
|
||||
let (std_wall_bytes, rest) = try_split_at(
|
||||
"standard/wall indicators data block",
|
||||
bytes,
|
||||
header.standard_wall_len()?,
|
||||
)?;
|
||||
let (ut_local_bytes, rest) = try_split_at(
|
||||
"UT/local indicators data block",
|
||||
rest,
|
||||
header.ut_local_len()?,
|
||||
)?;
|
||||
if std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() {
|
||||
// This is a weird case, but technically possible only if all
|
||||
// UT/local indicators are 0. If any are 1, then it's an error,
|
||||
// because it would require the corresponding std/wall indicator
|
||||
// to be 1 too. Which it can't be, because there aren't any. So
|
||||
// we just check that they're all zeros.
|
||||
for (i, &byte) in ut_local_bytes.iter().enumerate() {
|
||||
if byte != 0 {
|
||||
return Err(err!(
|
||||
"found UT/local indicator '{byte}' for local time \
|
||||
type {i}, but it must be 0 since all std/wall \
|
||||
indicators are 0",
|
||||
));
|
||||
}
|
||||
}
|
||||
} else if !std_wall_bytes.is_empty() && ut_local_bytes.is_empty() {
|
||||
for (i, &byte) in std_wall_bytes.iter().enumerate() {
|
||||
// Indexing is OK because Header guarantees that the number of
|
||||
// indicators is 0 or equal to the number of types.
|
||||
self.types[i].indicator = if byte == 0 {
|
||||
TzifIndicator::LocalWall
|
||||
} else if byte == 1 {
|
||||
TzifIndicator::LocalStandard
|
||||
} else {
|
||||
return Err(err!(
|
||||
"found invalid std/wall indicator '{byte}' for \
|
||||
local time type {i}, it must be 0 or 1",
|
||||
));
|
||||
};
|
||||
}
|
||||
} else if !std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() {
|
||||
assert_eq!(std_wall_bytes.len(), ut_local_bytes.len());
|
||||
let it = std_wall_bytes.iter().zip(ut_local_bytes);
|
||||
for (i, (&stdwall, &utlocal)) in it.enumerate() {
|
||||
// Indexing is OK because Header guarantees that the number of
|
||||
// indicators is 0 or equal to the number of types.
|
||||
self.types[i].indicator = match (stdwall, utlocal) {
|
||||
(0, 0) => TzifIndicator::LocalWall,
|
||||
(1, 0) => TzifIndicator::LocalStandard,
|
||||
(1, 1) => TzifIndicator::UTStandard,
|
||||
(0, 1) => {
|
||||
return Err(err!(
|
||||
"found illegal ut-wall combination for \
|
||||
local time type {i}, only local-wall, \
|
||||
local-standard and ut-standard are allowed",
|
||||
))
|
||||
}
|
||||
_ => {
|
||||
return Err(err!(
|
||||
"found illegal std/wall or ut/local value for \
|
||||
local time type {i}, each must be 0 or 1",
|
||||
))
|
||||
}
|
||||
};
|
||||
}
|
||||
} else {
|
||||
// If they're both empty then we don't need to do anything. Every
|
||||
// local time type record already has the correct default for this
|
||||
// case set.
|
||||
debug_assert!(std_wall_bytes.is_empty());
|
||||
debug_assert!(ut_local_bytes.is_empty());
|
||||
}
|
||||
Ok(rest)
|
||||
}
|
||||
|
||||
fn parse_footer<'b>(
|
||||
&mut self,
|
||||
_header: &Header,
|
||||
bytes: &'b [u8],
|
||||
) -> Result<&'b [u8], Error> {
|
||||
if bytes.is_empty() {
|
||||
return Err(err!(
|
||||
"invalid V2+ TZif footer, expected \\n, \
|
||||
but found unexpected end of data",
|
||||
));
|
||||
}
|
||||
if bytes[0] != b'\n' {
|
||||
return Err(err!(
|
||||
"invalid V2+ TZif footer, expected {:?}, but found {:?}",
|
||||
Byte(b'\n'),
|
||||
Byte(bytes[0]),
|
||||
));
|
||||
}
|
||||
let bytes = &bytes[1..];
|
||||
// Only scan up to 1KB for a NUL terminator in case we somehow got
|
||||
// passed a huge block of bytes.
|
||||
let toscan = &bytes[..bytes.len().min(1024)];
|
||||
let Some(nlat) = toscan.iter().position(|&b| b == b'\n') else {
|
||||
return Err(err!(
|
||||
"invalid V2 TZif footer, could not find {:?} \
|
||||
terminator in: {:?}",
|
||||
Byte(b'\n'),
|
||||
Bytes(toscan),
|
||||
));
|
||||
};
|
||||
let (bytes, rest) = bytes.split_at(nlat);
|
||||
if !bytes.is_empty() {
|
||||
let posix_tz =
|
||||
PosixTimeZone::parse(bytes).map_err(|e| err!("{e}"))?;
|
||||
// We could in theory limit TZ strings to their strict POSIX
|
||||
// definition here for TZif V2, but I don't think there is any
|
||||
// harm in allowing the extensions in V2 formatted TZif data. Note
|
||||
// that the GNU tooling allow it via the `TZ` environment variable
|
||||
// even though POSIX doesn't specify it. This all seems okay to me
|
||||
// because the V3+ extension is a strict superset of functionality.
|
||||
if let Some(ref dst) = posix_tz.dst {
|
||||
if dst.rule.is_none() {
|
||||
return Err(err!(
|
||||
"TZ string `{}` in v3+ tzfile has DST \
|
||||
but no transition rules",
|
||||
Bytes(bytes),
|
||||
));
|
||||
}
|
||||
}
|
||||
self.fixed.posix_tz = Some(posix_tz);
|
||||
}
|
||||
Ok(&rest[1..])
|
||||
}
|
||||
}
|
||||
|
||||
/// The header for a TZif formatted file.
|
||||
///
|
||||
/// V2+ TZif format have two headers: one for V1 data, and then a second
|
||||
/// following the V1 data block that describes another data block which uses
|
||||
/// 64-bit timestamps. The two headers both have the same format and both
|
||||
/// use 32-bit big-endian encoded integers.
|
||||
#[derive(Debug)]
|
||||
struct Header {
|
||||
/// The size of the timestamps encoded in the data block.
|
||||
///
|
||||
/// This is guaranteed to be either 4 (for V1) or 8 (for the 64-bit header
|
||||
/// block in V2+).
|
||||
time_size: usize,
|
||||
/// The file format version.
|
||||
///
|
||||
/// Note that this is either a NUL byte (for version 1), or an ASCII byte
|
||||
/// corresponding to the version number. That is, `0x32` for `2`, `0x33`
|
||||
/// for `3` or `0x34` for `4`. Note also that just because zoneinfo might
|
||||
/// have been recently generated does not mean it uses the latest format
|
||||
/// version. It seems like newer versions are only compiled by `zic` when
|
||||
/// they are needed. For example, `America/New_York` on my system (as of
|
||||
/// `2024-03-25`) has version `0x32`, but `Asia/Jerusalem` has version
|
||||
/// `0x33`.
|
||||
version: u8,
|
||||
/// Number of UT/local indicators stored in the file.
|
||||
///
|
||||
/// This is checked to be either equal to `0` or equal to `tzh_typecnt`.
|
||||
tzh_ttisutcnt: usize,
|
||||
/// The number of standard/wall indicators stored in the file.
|
||||
///
|
||||
/// This is checked to be either equal to `0` or equal to `tzh_typecnt`.
|
||||
tzh_ttisstdcnt: usize,
|
||||
/// The number of leap seconds for which data entries are stored in the
|
||||
/// file.
|
||||
tzh_leapcnt: usize,
|
||||
/// The number of transition times for which data entries are stored in
|
||||
/// the file.
|
||||
tzh_timecnt: usize,
|
||||
/// The number of local time types for which data entries are stored in the
|
||||
/// file.
|
||||
///
|
||||
/// This is checked to be at least `1`.
|
||||
tzh_typecnt: usize,
|
||||
/// The number of bytes of time zone abbreviation strings stored in the
|
||||
/// file.
|
||||
///
|
||||
/// This is checked to be at least `1`.
|
||||
tzh_charcnt: usize,
|
||||
}
|
||||
|
||||
impl Header {
|
||||
/// Parse the header record from the given bytes.
|
||||
///
|
||||
/// Upon success, return the header and all bytes after the header.
|
||||
///
|
||||
/// The given `time_size` must be 4 or 8, corresponding to either the
|
||||
/// V1 header block or the V2+ header block, respectively.
|
||||
fn parse(
|
||||
time_size: usize,
|
||||
bytes: &[u8],
|
||||
) -> Result<(Header, &[u8]), Error> {
|
||||
assert!(time_size == 4 || time_size == 8, "time size must be 4 or 8");
|
||||
if bytes.len() < 44 {
|
||||
return Err(err!("invalid header: too short"));
|
||||
}
|
||||
let (magic, rest) = bytes.split_at(4);
|
||||
if magic != b"TZif" {
|
||||
return Err(err!("invalid header: magic bytes mismatch"));
|
||||
}
|
||||
let (version, rest) = rest.split_at(1);
|
||||
let (_reserved, rest) = rest.split_at(15);
|
||||
|
||||
let (tzh_ttisutcnt_bytes, rest) = rest.split_at(4);
|
||||
let (tzh_ttisstdcnt_bytes, rest) = rest.split_at(4);
|
||||
let (tzh_leapcnt_bytes, rest) = rest.split_at(4);
|
||||
let (tzh_timecnt_bytes, rest) = rest.split_at(4);
|
||||
let (tzh_typecnt_bytes, rest) = rest.split_at(4);
|
||||
let (tzh_charcnt_bytes, rest) = rest.split_at(4);
|
||||
|
||||
let tzh_ttisutcnt = from_be_bytes_u32_to_usize(tzh_ttisutcnt_bytes)
|
||||
.map_err(|e| err!("failed to parse tzh_ttisutcnt: {e}"))?;
|
||||
let tzh_ttisstdcnt = from_be_bytes_u32_to_usize(tzh_ttisstdcnt_bytes)
|
||||
.map_err(|e| err!("failed to parse tzh_ttisstdcnt: {e}"))?;
|
||||
let tzh_leapcnt = from_be_bytes_u32_to_usize(tzh_leapcnt_bytes)
|
||||
.map_err(|e| err!("failed to parse tzh_leapcnt: {e}"))?;
|
||||
let tzh_timecnt = from_be_bytes_u32_to_usize(tzh_timecnt_bytes)
|
||||
.map_err(|e| err!("failed to parse tzh_timecnt: {e}"))?;
|
||||
let tzh_typecnt = from_be_bytes_u32_to_usize(tzh_typecnt_bytes)
|
||||
.map_err(|e| err!("failed to parse tzh_typecnt: {e}"))?;
|
||||
let tzh_charcnt = from_be_bytes_u32_to_usize(tzh_charcnt_bytes)
|
||||
.map_err(|e| err!("failed to parse tzh_charcnt: {e}"))?;
|
||||
|
||||
if tzh_ttisutcnt != 0 && tzh_ttisutcnt != tzh_typecnt {
|
||||
return Err(err!(
|
||||
"expected tzh_ttisutcnt={tzh_ttisutcnt} to be zero \
|
||||
or equal to tzh_typecnt={tzh_typecnt}",
|
||||
));
|
||||
}
|
||||
if tzh_ttisstdcnt != 0 && tzh_ttisstdcnt != tzh_typecnt {
|
||||
return Err(err!(
|
||||
"expected tzh_ttisstdcnt={tzh_ttisstdcnt} to be zero \
|
||||
or equal to tzh_typecnt={tzh_typecnt}",
|
||||
));
|
||||
}
|
||||
if tzh_typecnt < 1 {
|
||||
return Err(err!(
|
||||
"expected tzh_typecnt={tzh_typecnt} to be at least 1",
|
||||
));
|
||||
}
|
||||
if tzh_charcnt < 1 {
|
||||
return Err(err!(
|
||||
"expected tzh_charcnt={tzh_charcnt} to be at least 1",
|
||||
));
|
||||
}
|
||||
|
||||
let header = Header {
|
||||
time_size,
|
||||
version: version[0],
|
||||
tzh_ttisutcnt,
|
||||
tzh_ttisstdcnt,
|
||||
tzh_leapcnt,
|
||||
tzh_timecnt,
|
||||
tzh_typecnt,
|
||||
tzh_charcnt,
|
||||
};
|
||||
Ok((header, rest))
|
||||
}
|
||||
|
||||
/// Returns true if this header is for a 32-bit data block.
|
||||
///
|
||||
/// When false, it is guaranteed that this header is for a 64-bit data
|
||||
/// block.
|
||||
fn is_32bit(&self) -> bool {
|
||||
self.time_size == 4
|
||||
}
|
||||
|
||||
/// Returns the size of the data block, in bytes, for this header.
|
||||
///
|
||||
/// This returns an error if the arithmetic required to compute the
|
||||
/// length would overflow.
|
||||
///
|
||||
/// This is useful for, e.g., skipping over the 32-bit V1 data block in
|
||||
/// V2+ TZif formatted files.
|
||||
fn data_block_len(&self) -> Result<usize, Error> {
|
||||
let a = self.transition_times_len()?;
|
||||
let b = self.transition_types_len()?;
|
||||
let c = self.local_time_types_len()?;
|
||||
let d = self.time_zone_designations_len()?;
|
||||
let e = self.leap_second_len()?;
|
||||
let f = self.standard_wall_len()?;
|
||||
let g = self.ut_local_len()?;
|
||||
a.checked_add(b)
|
||||
.and_then(|z| z.checked_add(c))
|
||||
.and_then(|z| z.checked_add(d))
|
||||
.and_then(|z| z.checked_add(e))
|
||||
.and_then(|z| z.checked_add(f))
|
||||
.and_then(|z| z.checked_add(g))
|
||||
.ok_or_else(|| {
|
||||
err!(
|
||||
"length of data block in V{} tzfile is too big",
|
||||
self.version
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn transition_times_len(&self) -> Result<usize, Error> {
|
||||
self.tzh_timecnt.checked_mul(self.time_size).ok_or_else(|| {
|
||||
err!("tzh_timecnt value {} is too big", self.tzh_timecnt)
|
||||
})
|
||||
}
|
||||
|
||||
fn transition_types_len(&self) -> Result<usize, Error> {
|
||||
Ok(self.tzh_timecnt)
|
||||
}
|
||||
|
||||
fn local_time_types_len(&self) -> Result<usize, Error> {
|
||||
self.tzh_typecnt.checked_mul(6).ok_or_else(|| {
|
||||
err!("tzh_typecnt value {} is too big", self.tzh_typecnt)
|
||||
})
|
||||
}
|
||||
|
||||
fn time_zone_designations_len(&self) -> Result<usize, Error> {
|
||||
Ok(self.tzh_charcnt)
|
||||
}
|
||||
|
||||
fn leap_second_len(&self) -> Result<usize, Error> {
|
||||
let record_len = self
|
||||
.time_size
|
||||
.checked_add(4)
|
||||
.expect("4-or-8 plus 4 always fits in usize");
|
||||
self.tzh_leapcnt.checked_mul(record_len).ok_or_else(|| {
|
||||
err!("tzh_leapcnt value {} is too big", self.tzh_leapcnt)
|
||||
})
|
||||
}
|
||||
|
||||
fn standard_wall_len(&self) -> Result<usize, Error> {
|
||||
Ok(self.tzh_ttisstdcnt)
|
||||
}
|
||||
|
||||
fn ut_local_len(&self) -> Result<usize, Error> {
|
||||
Ok(self.tzh_ttisutcnt)
|
||||
}
|
||||
}
|
||||
|
||||
/// Splits the given slice of bytes at the index given.
|
||||
///
|
||||
/// If the index is out of range (greater than `bytes.len()`) then an error is
|
||||
/// returned. The error message will include the `what` string given, which is
|
||||
/// meant to describe the thing being split.
|
||||
fn try_split_at<'b>(
|
||||
what: &'static str,
|
||||
bytes: &'b [u8],
|
||||
at: usize,
|
||||
) -> Result<(&'b [u8], &'b [u8]), Error> {
|
||||
if at > bytes.len() {
|
||||
Err(err!(
|
||||
"expected at least {at} bytes for {what}, \
|
||||
but found only {} bytes",
|
||||
bytes.len(),
|
||||
))
|
||||
} else {
|
||||
Ok(bytes.split_at(at))
|
||||
}
|
||||
}
|
||||
|
||||
/// Interprets the given slice as an unsigned 32-bit big endian integer,
|
||||
/// attempts to convert it to a `usize` and returns it.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `bytes.len() != 4`.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// This errors if the `u32` parsed from the given bytes cannot fit in a
|
||||
/// `usize`.
|
||||
fn from_be_bytes_u32_to_usize(bytes: &[u8]) -> Result<usize, Error> {
|
||||
let n = from_be_bytes_u32(bytes);
|
||||
usize::try_from(n).map_err(|_| {
|
||||
err!(
|
||||
"failed to parse integer {n} (too big, max allowed is {}",
|
||||
usize::MAX
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Interprets the given slice as an unsigned 32-bit big endian integer and
|
||||
/// returns it.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `bytes.len() != 4`.
|
||||
fn from_be_bytes_u32(bytes: &[u8]) -> u32 {
|
||||
u32::from_be_bytes(bytes.try_into().unwrap())
|
||||
}
|
||||
|
||||
/// Interprets the given slice as a signed 32-bit big endian integer and
|
||||
/// returns it.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `bytes.len() != 4`.
|
||||
fn from_be_bytes_i32(bytes: &[u8]) -> i32 {
|
||||
i32::from_be_bytes(bytes.try_into().unwrap())
|
||||
}
|
||||
|
||||
/// Interprets the given slice as a signed 64-bit big endian integer and
|
||||
/// returns it.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `bytes.len() != 8`.
|
||||
fn from_be_bytes_i64(bytes: &[u8]) -> i64 {
|
||||
i64::from_be_bytes(bytes.try_into().unwrap())
|
||||
}
|
||||
128
src/shared/util.rs
Normal file
128
src/shared/util.rs
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
/// Provides a convenient `Debug` implementation for a `u8`.
|
||||
///
|
||||
/// The `Debug` impl treats the byte as an ASCII, and emits a human
|
||||
/// readable representation of it. If the byte isn't ASCII, then it's
|
||||
/// emitted as a hex escape sequence.
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct Byte(pub u8);
|
||||
|
||||
impl core::fmt::Display for Byte {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
if self.0 == b' ' {
|
||||
return write!(f, " ");
|
||||
}
|
||||
// 10 bytes is enough for any output from ascii::escape_default.
|
||||
let mut bytes = [0u8; 10];
|
||||
let mut len = 0;
|
||||
for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
|
||||
// capitalize \xab to \xAB
|
||||
if i >= 2 && b'a' <= b && b <= b'f' {
|
||||
b -= 32;
|
||||
}
|
||||
bytes[len] = b;
|
||||
len += 1;
|
||||
}
|
||||
write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
impl core::fmt::Debug for Byte {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
write!(f, "\"")?;
|
||||
core::fmt::Display::fmt(self, f)?;
|
||||
write!(f, "\"")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Provides a convenient `Debug` implementation for `&[u8]`.
|
||||
///
|
||||
/// This generally works best when the bytes are presumed to be mostly
|
||||
/// UTF-8, but will work for anything. For any bytes that aren't UTF-8,
|
||||
/// they are emitted as hex escape sequences.
|
||||
#[derive(Clone, Copy)]
|
||||
pub(crate) struct Bytes<'a>(pub &'a [u8]);
|
||||
|
||||
impl<'a> core::fmt::Display for Bytes<'a> {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
// This is a sad re-implementation of a similar impl found in bstr.
|
||||
let mut bytes = self.0;
|
||||
while let Some(result) = utf8_decode(bytes) {
|
||||
let ch = match result {
|
||||
Ok(ch) => ch,
|
||||
Err(byte) => {
|
||||
write!(f, r"\x{:02x}", byte)?;
|
||||
bytes = &bytes[1..];
|
||||
continue;
|
||||
}
|
||||
};
|
||||
bytes = &bytes[ch.len_utf8()..];
|
||||
match ch {
|
||||
'\0' => write!(f, "\\0")?,
|
||||
'\x01'..='\x7f' => {
|
||||
write!(f, "{}", (ch as u8).escape_ascii())?;
|
||||
}
|
||||
_ => write!(f, "{}", ch.escape_debug())?,
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> core::fmt::Debug for Bytes<'a> {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
write!(f, "\"")?;
|
||||
core::fmt::Display::fmt(self, f)?;
|
||||
write!(f, "\"")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
|
||||
///
|
||||
/// If no valid encoding of a codepoint exists at the beginning of the
|
||||
/// given byte slice, then the first byte is returned instead.
|
||||
///
|
||||
/// This returns `None` if and only if `bytes` is empty.
|
||||
///
|
||||
/// This never panics.
|
||||
///
|
||||
/// *WARNING*: This is not designed for performance. If you're looking for
|
||||
/// a fast UTF-8 decoder, this is not it. If you feel like you need one in
|
||||
/// this crate, then please file an issue and discuss your use case.
|
||||
pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> {
|
||||
/// Given a UTF-8 leading byte, this returns the total number of code
|
||||
/// units in the following encoded codepoint.
|
||||
///
|
||||
/// If the given byte is not a valid UTF-8 leading byte, then this
|
||||
/// returns `None`.
|
||||
fn utf8_len(byte: u8) -> Option<usize> {
|
||||
if byte <= 0x7F {
|
||||
return Some(1);
|
||||
} else if byte & 0b1100_0000 == 0b1000_0000 {
|
||||
return None;
|
||||
} else if byte <= 0b1101_1111 {
|
||||
Some(2)
|
||||
} else if byte <= 0b1110_1111 {
|
||||
Some(3)
|
||||
} else if byte <= 0b1111_0111 {
|
||||
Some(4)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
if bytes.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let len = match utf8_len(bytes[0]) {
|
||||
None => return Some(Err(bytes[0])),
|
||||
Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
|
||||
Some(1) => return Some(Ok(char::from(bytes[0]))),
|
||||
Some(len) => len,
|
||||
};
|
||||
match core::str::from_utf8(&bytes[..len]) {
|
||||
Ok(s) => Some(Ok(s.chars().next().unwrap())),
|
||||
Err(_) => Some(Err(bytes[0])),
|
||||
}
|
||||
}
|
||||
|
|
@ -101,15 +101,13 @@ mod ambiguous;
|
|||
mod concatenated;
|
||||
mod db;
|
||||
mod offset;
|
||||
#[cfg(feature = "alloc")]
|
||||
pub(crate) mod posix;
|
||||
#[cfg(feature = "tz-system")]
|
||||
mod system;
|
||||
#[cfg(all(test, feature = "alloc"))]
|
||||
mod testdata;
|
||||
mod timezone;
|
||||
#[cfg(feature = "alloc")]
|
||||
mod tzif;
|
||||
pub(crate) mod tzif;
|
||||
// See module comment for WIP status. :-(
|
||||
#[cfg(test)]
|
||||
mod zic;
|
||||
|
|
|
|||
|
|
@ -232,7 +232,7 @@ impl Offset {
|
|||
// warrant its existence. And I think I'd rather `Offset::hms` be const and
|
||||
// exported instead of this monstrosity.
|
||||
#[inline]
|
||||
const fn constant_seconds(seconds: i32) -> Offset {
|
||||
pub(crate) const fn constant_seconds(seconds: i32) -> Offset {
|
||||
if !t::SpanZoneOffset::contains(seconds) {
|
||||
panic!("invalid time zone offset seconds")
|
||||
}
|
||||
|
|
|
|||
2147
src/tz/posix.rs
2147
src/tz/posix.rs
File diff suppressed because it is too large
Load diff
|
|
@ -1,6 +1,6 @@
|
|||
use alloc::string::ToString;
|
||||
|
||||
use crate::tz::tzif::Tzif;
|
||||
use crate::tz::tzif::TzifOwned;
|
||||
|
||||
/// A concatenated list of TZif data with a header and an index block.
|
||||
///
|
||||
|
|
@ -94,20 +94,20 @@ impl TzifTestFile {
|
|||
|
||||
/// Parse this test TZif data into a structured representation.
|
||||
#[cfg(not(miri))]
|
||||
pub(crate) fn parse(self) -> Tzif {
|
||||
pub(crate) fn parse(self) -> TzifOwned {
|
||||
let name = Some(self.name.to_string());
|
||||
Tzif::parse(name, self.data).unwrap_or_else(|err| {
|
||||
TzifOwned::parse(name, self.data).unwrap_or_else(|err| {
|
||||
panic!("failed to parse TZif test file for {:?}: {err}", self.name)
|
||||
})
|
||||
}
|
||||
|
||||
/// Parse this test TZif data as if it were V1.
|
||||
#[cfg(not(miri))]
|
||||
pub(crate) fn parse_v1(self) -> Tzif {
|
||||
pub(crate) fn parse_v1(self) -> TzifOwned {
|
||||
let name = Some(self.name.to_string());
|
||||
let mut data = self.data.to_vec();
|
||||
data[4] = 0;
|
||||
Tzif::parse(name, &data).unwrap_or_else(|err| {
|
||||
TzifOwned::parse(name, &data).unwrap_or_else(|err| {
|
||||
panic!(
|
||||
"failed to parse V1 TZif test file for {:?}: {err}",
|
||||
self.name
|
||||
|
|
|
|||
|
|
@ -1534,7 +1534,7 @@ impl core::fmt::Display for TimeZonePosix {
|
|||
#[cfg(feature = "alloc")]
|
||||
#[derive(Eq, PartialEq)]
|
||||
struct TimeZoneTzif {
|
||||
tzif: crate::tz::tzif::Tzif,
|
||||
tzif: crate::tz::tzif::TzifOwned,
|
||||
}
|
||||
|
||||
#[cfg(feature = "alloc")]
|
||||
|
|
|
|||
1125
src/tz/tzif.rs
1125
src/tz/tzif.rs
File diff suppressed because it is too large
Load diff
|
|
@ -46,11 +46,6 @@ impl<const N: usize> ArrayStr<N> {
|
|||
Some(ArrayStr { bytes, len: len as u8 })
|
||||
}
|
||||
|
||||
/// Returns the capacity of this fixed string.
|
||||
pub(crate) const fn capacity() -> usize {
|
||||
N
|
||||
}
|
||||
|
||||
/// Append the bytes given to the end of this string.
|
||||
///
|
||||
/// If the capacity would be exceeded, then this is a no-op and `false`
|
||||
|
|
|
|||
13
src/util/constant.rs
Normal file
13
src/util/constant.rs
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
/// Unwrap an `Option<T>` in a `const` context.
|
||||
///
|
||||
/// If it fails, panics with the given message.
|
||||
macro_rules! unwrap {
|
||||
($val:expr, $msg:expr$(,)?) => {
|
||||
match $val {
|
||||
Some(val) => val,
|
||||
None => panic!($msg),
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
pub(crate) use unwrap;
|
||||
|
|
@ -4,91 +4,8 @@ Provides convenience routines for escaping raw bytes.
|
|||
This was copied from `regex-automata` with a few light edits.
|
||||
*/
|
||||
|
||||
use crate::util::utf8;
|
||||
|
||||
/// Provides a convenient `Debug` implementation for a `u8`.
|
||||
///
|
||||
/// The `Debug` impl treats the byte as an ASCII, and emits a human readable
|
||||
/// representation of it. If the byte isn't ASCII, then it's emitted as a hex
|
||||
/// escape sequence.
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Byte(pub u8);
|
||||
|
||||
impl core::fmt::Display for Byte {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
if self.0 == b' ' {
|
||||
return write!(f, " ");
|
||||
}
|
||||
// 10 bytes is enough to cover any output from ascii::escape_default.
|
||||
let mut bytes = [0u8; 10];
|
||||
let mut len = 0;
|
||||
for (i, mut b) in core::ascii::escape_default(self.0).enumerate() {
|
||||
// capitalize \xab to \xAB
|
||||
if i >= 2 && b'a' <= b && b <= b'f' {
|
||||
b -= 32;
|
||||
}
|
||||
bytes[len] = b;
|
||||
len += 1;
|
||||
}
|
||||
write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
impl core::fmt::Debug for Byte {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
write!(f, "\"")?;
|
||||
core::fmt::Display::fmt(self, f)?;
|
||||
write!(f, "\"")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Provides a convenient `Debug` implementation for `&[u8]`.
|
||||
///
|
||||
/// This generally works best when the bytes are presumed to be mostly UTF-8,
|
||||
/// but will work for anything. For any bytes that aren't UTF-8, they are
|
||||
/// emitted as hex escape sequences.
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Bytes<'a>(pub &'a [u8]);
|
||||
|
||||
impl<'a> core::fmt::Display for Bytes<'a> {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
// This is a sad re-implementation of a similar impl found in bstr.
|
||||
let mut bytes = self.0;
|
||||
while let Some(result) = utf8::decode(bytes) {
|
||||
let ch = match result {
|
||||
Ok(ch) => ch,
|
||||
Err(byte) => {
|
||||
write!(f, r"\x{:02x}", byte)?;
|
||||
bytes = &bytes[1..];
|
||||
continue;
|
||||
}
|
||||
};
|
||||
bytes = &bytes[ch.len_utf8()..];
|
||||
match ch {
|
||||
'\0' => write!(f, "\\0")?,
|
||||
// ASCII control characters except \0, \n, \r, \t
|
||||
'\x01'..='\x08'
|
||||
| '\x0b'
|
||||
| '\x0c'
|
||||
| '\x0e'..='\x19'
|
||||
| '\x7f' => {
|
||||
write!(f, "\\x{:02x}", u32::from(ch))?;
|
||||
}
|
||||
'\n' | '\r' | '\t' | _ => {
|
||||
write!(f, "{}", ch.escape_debug())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> core::fmt::Debug for Bytes<'a> {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
write!(f, "\"")?;
|
||||
core::fmt::Display::fmt(self, f)?;
|
||||
write!(f, "\"")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
// These were originally defined here, but they got moved to
|
||||
// shared since they're needed there. We re-export them here
|
||||
// because this is really where they should live, but they're
|
||||
// in shared because `jiff-tzdb-static` needs it.
|
||||
pub(crate) use crate::shared::util::{Byte, Bytes};
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ pub(crate) mod borrow;
|
|||
))]
|
||||
pub(crate) mod cache;
|
||||
pub(crate) mod common;
|
||||
pub(crate) mod crc32;
|
||||
pub(crate) mod constant;
|
||||
pub(crate) mod escape;
|
||||
#[cfg(feature = "std")]
|
||||
pub(crate) mod fs;
|
||||
|
|
|
|||
|
|
@ -13,19 +13,7 @@ use core::cmp::Ordering;
|
|||
/// fast UTF-8 decoder, this is not it. If you feel like you need one in this
|
||||
/// crate, then please file an issue and discuss your use case.
|
||||
pub(crate) fn decode(bytes: &[u8]) -> Option<Result<char, u8>> {
|
||||
if bytes.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let len = match utf8_len(bytes[0]) {
|
||||
None => return Some(Err(bytes[0])),
|
||||
Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
|
||||
Some(1) => return Some(Ok(char::from(bytes[0]))),
|
||||
Some(len) => len,
|
||||
};
|
||||
match core::str::from_utf8(&bytes[..len]) {
|
||||
Ok(s) => Some(Ok(s.chars().next().unwrap())),
|
||||
Err(_) => Some(Err(bytes[0])),
|
||||
}
|
||||
crate::shared::util::utf8_decode(bytes)
|
||||
}
|
||||
|
||||
/// Like std's `eq_ignore_ascii_case`, but returns a full `Ordering`.
|
||||
|
|
@ -58,24 +46,3 @@ pub(crate) fn cmp_ignore_ascii_case_bytes(s1: &[u8], s2: &[u8]) -> Ordering {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Given a UTF-8 leading byte, this returns the total number of code units
|
||||
/// in the following encoded codepoint.
|
||||
///
|
||||
/// If the given byte is not a valid UTF-8 leading byte, then this returns
|
||||
/// `None`.
|
||||
fn utf8_len(byte: u8) -> Option<usize> {
|
||||
if byte <= 0x7F {
|
||||
return Some(1);
|
||||
} else if byte & 0b1100_0000 == 0b1000_0000 {
|
||||
return None;
|
||||
} else if byte <= 0b1101_1111 {
|
||||
Some(2)
|
||||
} else if byte <= 0b1110_1111 {
|
||||
Some(3)
|
||||
} else if byte <= 0b1111_0111 {
|
||||
Some(4)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue