From 007a4bffe964a9176b00a8e1ba0a1577d3ab3323 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 18 Feb 2025 22:53:27 -0500 Subject: [PATCH] wip --- bench/src/datetime.rs | 2 +- crates/jiff-cli/cmd/generate/crc32.rs | 2 +- src/lib.rs | 2 + src/{util => shared}/crc32/mod.rs | 2 +- src/{util => shared}/crc32/table.rs | 6 +- src/shared/mod.rs | 166 ++ src/shared/posix.rs | 1939 ++++++++++++++++++++++ src/shared/tzif.rs | 789 +++++++++ src/shared/util.rs | 128 ++ src/tz/mod.rs | 4 +- src/tz/offset.rs | 2 +- src/tz/posix.rs | 2147 +++---------------------- src/tz/testdata.rs | 10 +- src/tz/timezone.rs | 2 +- src/tz/tzif.rs | 1125 ++++--------- src/util/array_str.rs | 5 - src/util/constant.rs | 13 + src/util/escape.rs | 93 +- src/util/mod.rs | 2 +- src/util/utf8.rs | 35 +- 20 files changed, 3560 insertions(+), 2914 deletions(-) rename src/{util => shared}/crc32/mod.rs (97%) rename src/{util => shared}/crc32/table.rs (99%) create mode 100644 src/shared/mod.rs create mode 100644 src/shared/posix.rs create mode 100644 src/shared/tzif.rs create mode 100644 src/shared/util.rs create mode 100644 src/util/constant.rs diff --git a/bench/src/datetime.rs b/bench/src/datetime.rs index 6698974..c91aa5a 100644 --- a/bench/src/datetime.rs +++ b/bench/src/datetime.rs @@ -111,7 +111,7 @@ fn add_years_months_days(c: &mut Criterion) { /// This is useful when you have a known time zone already and want to get /// a specific instant for many distinct civil datetimes in that time zone. fn to_timestamp_static(c: &mut Criterion) { - const NAME: &str = "civil_datetime/to_datetime_static"; + const NAME: &str = "civil_datetime/to_timestamp_static"; const TZNAME: &str = "America/New_York"; const STAMP: i64 = 1719755160; const DATETIME: civil::DateTime = civil::date(2024, 6, 30).at(9, 46, 0, 0); diff --git a/crates/jiff-cli/cmd/generate/crc32.rs b/crates/jiff-cli/cmd/generate/crc32.rs index e914527..077904d 100644 --- a/crates/jiff-cli/cmd/generate/crc32.rs +++ b/crates/jiff-cli/cmd/generate/crc32.rs @@ -46,7 +46,7 @@ pub fn run(p: &mut Parser) -> anyhow::Result<()> { args::configure(p, USAGE, &mut [&mut config])?; let jiff = config.jiff(); - let table_path = jiff.join("src/util/crc32/table.rs"); + let table_path = jiff.join("src/shared/crc32/table.rs"); write_crc_tables(&table_path).with_context(|| { format!("failed to write CRC32 data table to {}", table_path.display()) })?; diff --git a/src/lib.rs b/src/lib.rs index d412f41..44af780 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -731,6 +731,8 @@ mod error; pub mod fmt; #[cfg(feature = "std")] mod now; +#[doc(hidden)] +pub mod shared; mod signed_duration; mod span; mod timestamp; diff --git a/src/util/crc32/mod.rs b/src/shared/crc32/mod.rs similarity index 97% rename from src/util/crc32/mod.rs rename to src/shared/crc32/mod.rs index ac6e2a0..f37b6be 100644 --- a/src/util/crc32/mod.rs +++ b/src/shared/crc32/mod.rs @@ -1,4 +1,4 @@ -use crate::util::crc32::table::{TABLE, TABLE16}; +use self::table::{TABLE, TABLE16}; mod table; diff --git a/src/util/crc32/table.rs b/src/shared/crc32/table.rs similarity index 99% rename from src/util/crc32/table.rs rename to src/shared/crc32/table.rs index e799856..b73de91 100644 --- a/src/util/crc32/table.rs +++ b/src/shared/crc32/table.rs @@ -1,4 +1,6 @@ -pub const TABLE: [u32; 256] = [ +// auto-generated by: jiff-cli generate crc32 + +pub(super) const TABLE: [u32; 256] = [ 0, 4067132163, 3778769143, 324072436, 3348797215, 904991772, 648144872, 3570033899, 2329499855, 2024987596, 1809983544, 2575936315, 1296289744, 3207089363, 2893594407, 1578318884, 274646895, 3795141740, 4049975192, @@ -44,7 +46,7 @@ pub const TABLE: [u32; 256] = [ 1279665062, 1595330642, 2910671697, ]; -pub const TABLE16: [[u32; 256]; 16] = [ +pub(super) const TABLE16: [[u32; 256]; 16] = [ [ 0, 4067132163, 3778769143, 324072436, 3348797215, 904991772, 648144872, 3570033899, 2329499855, 2024987596, 1809983544, 2575936315, diff --git a/src/shared/mod.rs b/src/shared/mod.rs new file mode 100644 index 0000000..28e218b --- /dev/null +++ b/src/shared/mod.rs @@ -0,0 +1,166 @@ +/*! +TODO +*/ + +use core::ops::Range; + +pub type TzifStatic = Tzif< + &'static str, + &'static [TzifLocalTimeType], + &'static [TzifTransition], +>; + +#[cfg(feature = "alloc")] +pub type TzifOwned = Tzif< + alloc::string::String, + alloc::vec::Vec, + alloc::vec::Vec, +>; + +#[derive(Debug)] +pub struct Tzif { + pub fixed: TzifFixed, + pub types: TYPES, + pub transitions: TRANS, +} + +#[derive(Debug)] +pub struct TzifFixed { + pub name: Option, + pub version: u8, + pub checksum: u32, + pub designations: STRING, + pub posix_tz: Option>, +} + +// only-jiff-impl-start +impl TzifFixed<&'static str> { + pub const fn to_jiff( + &self, + types: &'static [crate::tz::tzif::LocalTimeType], + trans: &'static [crate::tz::tzif::Transition], + ) -> crate::tz::tzif::TzifStatic { + crate::tz::tzif::TzifStatic::from_shared_const(self, types, trans) + } +} +// only-jiff-impl-end + +#[derive(Debug)] +pub struct TzifLocalTimeType { + pub offset: i32, + pub is_dst: bool, + pub designation: Range, + pub indicator: TzifIndicator, +} + +// only-jiff-impl-start +impl TzifLocalTimeType { + pub const fn to_jiff(&self) -> crate::tz::tzif::LocalTimeType { + crate::tz::tzif::LocalTimeType::from_shared(self) + } +} +// only-jiff-impl-end + +#[derive(Debug)] +pub enum TzifIndicator { + LocalWall, + LocalStandard, + UTStandard, +} + +#[derive(Debug)] +pub struct TzifTransition { + pub timestamp: i64, + pub type_index: u8, +} + +// only-jiff-impl-start +impl TzifTransition { + pub const fn to_jiff( + &self, + prev_offset: i32, + this_offset: i32, + ) -> crate::tz::tzif::Transition { + crate::tz::tzif::Transition::from_shared( + self, + prev_offset, + this_offset, + ) + } +} +// only-jiff-impl-end + +#[derive(Debug, Eq, PartialEq)] +pub struct PosixTimeZone { + pub std_abbrev: ABBREV, + pub std_offset: i32, + pub dst: Option>, +} + +#[derive(Debug, Eq, PartialEq)] +pub struct PosixDst { + pub abbrev: ABBREV, + pub offset: i32, + pub rule: Option, +} + +#[derive(Debug, Eq, PartialEq)] +pub struct PosixRule { + pub start: PosixDayTime, + pub end: PosixDayTime, +} + +#[derive(Debug, Eq, PartialEq)] +pub struct PosixDayTime { + pub date: PosixDay, + pub time: i32, +} + +#[derive(Debug, Eq, PartialEq)] +pub enum PosixDay { + /// Julian day in a year, no counting for leap days. + /// + /// Valid range is `1..=365`. + JulianOne(i16), + /// Julian day in a year, counting for leap days. + /// + /// Valid range is `0..=365`. + JulianZero(i16), + /// The nth weekday of a month. + WeekdayOfMonth { + /// The month. + /// + /// Valid range is: `1..=12`. + month: i8, + /// The week. + /// + /// Valid range is `1..=5`. + /// + /// One interesting thing to note here (or my interpretation anyway), + /// is that a week of `4` means the "4th weekday in a month" where as + /// a week of `5` means the "last weekday in a month, even if it's the + /// 4th weekday." + week: i8, + /// The weekday. + /// + /// Valid range is `0..=6`, with `0` corresponding to Sunday. + weekday: i8, + }, +} + +// only-jiff-impl-start +impl PosixTimeZone<&'static str> { + pub const fn to_jiff(&self) -> crate::tz::posix::ReasonablePosixTimeZone { + crate::tz::posix::ReasonablePosixTimeZone::from_shared_const(self) + } +} +// only-jiff-impl-end + +// Does not require `alloc`, but is only used when `alloc` is enabled. +#[cfg(feature = "alloc")] +pub(crate) mod crc32; +#[cfg(feature = "alloc")] +pub(crate) mod posix; +#[cfg(feature = "alloc")] +pub(crate) mod tzif; +pub(crate) mod util; diff --git a/src/shared/posix.rs b/src/shared/posix.rs new file mode 100644 index 0000000..9efe9a7 --- /dev/null +++ b/src/shared/posix.rs @@ -0,0 +1,1939 @@ +use alloc::string::String; + +use super::{ + util::{Byte, Bytes}, + PosixDay, PosixDayTime, PosixDst, PosixRule, PosixTimeZone, +}; + +macro_rules! err { + ($($tt:tt)*) => {{ + self::Error(alloc::format!($($tt)*)) + }} +} + +/// An error that can be returned when parsing. +#[derive(Debug)] +pub struct Error(String); + +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + core::fmt::Display::fmt(&self.0, f) + } +} + +#[cfg(feature = "alloc")] +impl PosixTimeZone { + /// Parse a POSIX `TZ` environment variable, assuming it's a rule and not + /// an implementation defined value, from the given bytes. + pub fn parse(bytes: &[u8]) -> Result, Error> { + // We enable the IANA v3+ extensions here. (Namely, that the time + // specification hour value has the range `-167..=167` instead of + // `0..=24`.) Requiring strict POSIX rules doesn't seem necessary + // since the extension is a strict superset. Plus, GNU tooling + // seems to accept the extension. + let parser = + Parser { ianav3plus: true, ..Parser::new(bytes.as_ref()) }; + parser.parse() + } + + /// Like parse, but parses a prefix of the input given and returns whatever + /// is remaining. + pub fn parse_prefix<'b>( + bytes: &'b [u8], + ) -> Result<(PosixTimeZone, &'b [u8]), Error> { + let parser = + Parser { ianav3plus: true, ..Parser::new(bytes.as_ref()) }; + parser.parse_prefix() + } +} + +/// A parser for POSIX time zones. +#[derive(Debug)] +struct Parser<'s> { + /// The `TZ` string that we're parsing. + tz: &'s [u8], + /// The parser's current position in `tz`. + pos: core::cell::Cell, + /// Whether to use IANA rules, i.e., when parsing a TZ string in a TZif + /// file of version 3 or greater. From `tzfile(5)`: + /// + /// > First, the hours part of its transition times may be signed and range + /// > from `-167` through `167` instead of the POSIX-required unsigned + /// > values from `0` through `24`. Second, DST is in effect all year if + /// > it starts January 1 at 00:00 and ends December 31 at 24:00 plus the + /// > difference between daylight saving and standard time. + /// + /// At time of writing, I don't think I understand the significance of + /// the second part above. (RFC 8536 elaborates that it is meant to be an + /// explicit clarification of something that POSIX itself implies.) But the + /// first part is clear: it permits the hours to be a bigger range. + ianav3plus: bool, +} + +impl<'s> Parser<'s> { + /// Create a new parser for extracting a POSIX time zone from the given + /// bytes. + fn new>(tz: &'s B) -> Parser<'s> { + Parser { + tz: tz.as_ref(), + pos: core::cell::Cell::new(0), + ianav3plus: false, + } + } + + /// Parses a POSIX time zone from the current position of the parser and + /// ensures that the entire TZ string corresponds to a single valid POSIX + /// time zone. + fn parse(&self) -> Result, Error> { + let (time_zone, remaining) = self.parse_prefix()?; + if !remaining.is_empty() { + return Err(err!( + "expected entire TZ string to be a valid POSIX \ + time zone, but found `{}` after what would otherwise \ + be a valid POSIX TZ string", + Bytes(remaining), + )); + } + Ok(time_zone) + } + + /// Parses a POSIX time zone from the current position of the parser and + /// returns the remaining input. + fn parse_prefix( + &self, + ) -> Result<(PosixTimeZone, &'s [u8]), Error> { + let time_zone = self.parse_posix_time_zone()?; + Ok((time_zone, self.remaining())) + } + + /// Parse a POSIX time zone from the current position of the parser. + /// + /// Upon success, the parser will be positioned immediately following the + /// TZ string. + fn parse_posix_time_zone(&self) -> Result, Error> { + let std_abbrev = self + .parse_abbreviation() + .map_err(|e| err!("failed to parse standard abbreviation: {e}"))?; + let std_offset = self + .parse_posix_offset() + .map_err(|e| err!("failed to parse standard offset: {e}"))?; + let mut dst = None; + if !self.is_done() + && (self.byte().is_ascii_alphabetic() || self.byte() == b'<') + { + dst = Some(self.parse_posix_dst(std_offset)?); + } + Ok(PosixTimeZone { std_abbrev, std_offset, dst }) + } + + /// Parse a DST zone with an optional explicit transition rule. + /// + /// This assumes the parser is positioned at the first byte of the DST + /// abbreviation. + /// + /// Upon success, the parser will be positioned immediately after the end + /// of the DST transition rule (which might just be the abbreviation, but + /// might also include explicit start/end datetime specifications). + fn parse_posix_dst( + &self, + std_offset: i32, + ) -> Result, Error> { + let abbrev = self + .parse_abbreviation() + .map_err(|e| err!("failed to parse DST abbreviation: {e}"))?; + // This is the default: one hour ahead of standard time. We may + // override this if the DST portion specifies an offset. (But it + // usually doesn't.) + let offset = std_offset + 3600; + let mut dst = PosixDst { abbrev, offset, rule: None }; + if self.is_done() { + return Ok(dst); + } + if self.byte() != b',' { + dst.offset = self + .parse_posix_offset() + .map_err(|e| err!("failed to parse DST offset: {e}"))?; + if self.is_done() { + return Ok(dst); + } + } + if self.byte() != b',' { + return Err(err!( + "after parsing DST offset in POSIX time zone string, \ + found `{}` but expected a ','", + Byte(self.byte()), + )); + } + if !self.bump() { + return Err(err!( + "after parsing DST offset in POSIX time zone string, \ + found end of string after a trailing ','", + )); + } + dst.rule = Some(self.parse_rule()?); + Ok(dst) + } + + /// Parse a time zone abbreviation. + /// + /// This assumes the parser is positioned at the first byte of + /// the abbreviation. This is either the first character in the + /// abbreviation, or the opening quote of a quoted abbreviation. + /// + /// Upon success, the parser will be positioned immediately following + /// the abbreviation name. + /// + /// The string returned is guaranteed to be no more than 30 bytes. + /// (This restriction is somewhat arbitrary, but it's so we can put + /// the abbreviation in a fixed capacity array.) + fn parse_abbreviation(&self) -> Result { + if self.byte() == b'<' { + if !self.bump() { + return Err(err!( + "found opening '<' quote for abbreviation in \ + POSIX time zone string, and expected a name \ + following it, but found the end of string instead" + )); + } + self.parse_quoted_abbreviation() + } else { + self.parse_unquoted_abbreviation() + } + } + + /// Parses an unquoted time zone abbreviation. + /// + /// This assumes the parser is position at the first byte in the + /// abbreviation. + /// + /// Upon success, the parser will be positioned immediately after the + /// last byte in the abbreviation. + /// + /// The string returned is guaranteed to be no more than 30 bytes. + /// (This restriction is somewhat arbitrary, but it's so we can put + /// the abbreviation in a fixed capacity array.) + fn parse_unquoted_abbreviation(&self) -> Result { + const MAX_LEN: usize = 30; + + let start = self.pos(); + for i in 0.. { + if !self.byte().is_ascii_alphabetic() { + break; + } + if i >= MAX_LEN { + return Err(err!( + "expected abbreviation with at most {MAX_LEN} bytes, \ + but found a longer abbreviation beginning with `{}`", + Bytes(&self.tz[start..i]), + )); + } + if !self.bump() { + break; + } + } + let end = self.pos(); + let abbrev = + core::str::from_utf8(&self.tz[start..end]).map_err(|_| { + // NOTE: I believe this error is technically impossible + // since the loop above restricts letters in an + // abbreviation to ASCII. So everything from `start` to + // `end` is ASCII and thus should be UTF-8. But it doesn't + // cost us anything to report an error here in case the + // code above evolves somehow. + err!( + "found abbreviation `{}`, but it is not valid UTF-8", + Bytes(&self.tz[start..end]), + ) + })?; + if abbrev.len() < 3 { + return Err(err!( + "expected abbreviation with 3 or more bytes, but found \ + abbreviation {:?} with {} bytes", + abbrev, + abbrev.len(), + )); + } + Ok(String::from(abbrev)) + } + + /// Parses a quoted time zone abbreviation. + /// + /// This assumes the parser is positioned immediately after the opening + /// `<` quote. That is, at the first byte in the abbreviation. + /// + /// Upon success, the parser will be positioned immediately after the + /// closing `>` quote. + /// + /// The string returned is guaranteed to be no more than 30 bytes. + /// (This restriction is somewhat arbitrary, but it's so we can put + /// the abbreviation in a fixed capacity array.) + fn parse_quoted_abbreviation(&self) -> Result { + const MAX_LEN: usize = 30; + + let start = self.pos(); + for i in 0.. { + if !self.byte().is_ascii_alphanumeric() + && self.byte() != b'+' + && self.byte() != b'-' + { + break; + } + if i >= MAX_LEN { + return Err(err!( + "expected abbreviation with at most {MAX_LEN} bytes, \ + but found a longer abbreviation beginning with `{}`", + Bytes(&self.tz[start..i]), + )); + } + if !self.bump() { + break; + } + } + let end = self.pos(); + let abbrev = + core::str::from_utf8(&self.tz[start..end]).map_err(|_| { + // NOTE: I believe this error is technically impossible + // since the loop above restricts letters in an + // abbreviation to ASCII. So everything from `start` to + // `end` is ASCII and thus should be UTF-8. But it doesn't + // cost us anything to report an error here in case the + // code above evolves somehow. + err!( + "found abbreviation `{}`, but it is not valid UTF-8", + Bytes(&self.tz[start..end]), + ) + })?; + if self.is_done() { + return Err(err!( + "found non-empty quoted abbreviation {abbrev:?}, but \ + did not find expected end-of-quoted abbreviation \ + '>' character", + )); + } + if self.byte() != b'>' { + return Err(err!( + "found non-empty quoted abbreviation {abbrev:?}, but \ + found `{}` instead of end-of-quoted abbreviation '>' \ + character", + Byte(self.byte()), + )); + } + self.bump(); + if abbrev.len() < 3 { + return Err(err!( + "expected abbreviation with 3 or more bytes, but found \ + abbreviation {abbrev:?} with {} bytes", + abbrev.len(), + )); + } + Ok(String::from(abbrev)) + } + + /// Parse a POSIX time offset. + /// + /// This assumes the parser is positioned at the first byte of the + /// offset. This can either be a digit (for a positive offset) or the + /// sign of the offset (which must be either `-` or `+`). + /// + /// Upon success, the parser will be positioned immediately after the + /// end of the offset. + fn parse_posix_offset(&self) -> Result { + let sign = self + .parse_optional_sign() + .map_err(|e| { + err!( + "failed to parse sign for time offset \ + in POSIX time zone string: {e}", + ) + })? + .unwrap_or(1); + let hour = self.parse_hour_posix()?; + let (mut minute, mut second) = (0, 0); + if self.maybe_byte() == Some(b':') { + if !self.bump() { + return Err(err!( + "incomplete time in POSIX timezone (missing minutes)", + )); + } + minute = self.parse_minute()?; + if self.maybe_byte() == Some(b':') { + if !self.bump() { + return Err(err!( + "incomplete time in POSIX timezone (missing seconds)", + )); + } + second = self.parse_second()?; + } + } + let mut seconds = i32::from(hour) * 3600; + seconds += i32::from(minute) * 60; + seconds += i32::from(second); + // Yes, we flip the sign, because POSIX is backwards. + // For example, `EST5` corresponds to `-05:00`. + seconds *= i32::from(-sign); + // Must be true because the parsing routines for hours, minutes + // and seconds enforce they are in the ranges -24..=24, 0..=59 + // and 0..=59, respectively. + assert!( + -89999 <= seconds && seconds <= 89999, + "offset seconds {seconds} is out of range" + ); + Ok(seconds) + } + + /// Parses a POSIX DST transition rule. + /// + /// This assumes the parser is positioned at the first byte in the + /// rule. That is, it comes immediately after the DST abbreviation or + /// its optional offset. + /// + /// Upon success, the parser will be positioned immediately after the + /// DST transition rule. In typical cases, this corresponds to the end + /// of the TZ string. + fn parse_rule(&self) -> Result { + let start = self.parse_posix_datetime_spec().map_err(|e| { + err!("failed to parse start of DST transition rule: {e}") + })?; + if self.maybe_byte() != Some(b',') || !self.bump() { + return Err(err!( + "expected end of DST rule after parsing the start \ + of the DST rule" + )); + } + let end = self.parse_posix_datetime_spec().map_err(|e| { + err!("failed to parse end of DST transition rule: {e}") + })?; + Ok(PosixRule { start, end }) + } + + /// Parses a POSIX datetime specification. + /// + /// This assumes the parser is position at the first byte where a + /// datetime specification is expected to occur. + /// + /// Upon success, the parser will be positioned after the datetime + /// specification. This will either be immediately after the date, or + /// if it's present, the time part of the specification. + fn parse_posix_datetime_spec(&self) -> Result { + let mut daytime = PosixDayTime { + date: self.parse_posix_date_spec()?, + time: 2 * 3600, // the default if the time is absent + }; + if self.maybe_byte() != Some(b'/') { + return Ok(daytime); + } + if !self.bump() { + return Err(err!( + "expected time specification after '/' following a date + specification in a POSIX time zone DST transition rule", + )); + } + daytime.time = self.parse_posix_time_spec()?; + Ok(daytime) + } + + /// Parses a POSIX date specification. + /// + /// This assumes the parser is positioned at the first byte of the date + /// specification. This can be `J` (for one based Julian day without + /// leap days), `M` (for "weekday of month") or a digit starting the + /// zero based Julian day with leap days. This routine will validate + /// that the position points to one of these possible values. That is, + /// the caller doesn't need to parse the `M` or the `J` or the leading + /// digit. The caller should just call this routine when it *expect* a + /// date specification to follow. + /// + /// Upon success, the parser will be positioned immediately after the + /// date specification. + fn parse_posix_date_spec(&self) -> Result { + match self.byte() { + b'J' => { + if !self.bump() { + return Err(err!( + "expected one-based Julian day after 'J' in date \ + specification of a POSIX time zone DST \ + transition rule, but got the end of the string \ + instead" + )); + } + Ok(PosixDay::JulianOne(self.parse_posix_julian_day_no_leap()?)) + } + b'0'..=b'9' => Ok(PosixDay::JulianZero( + self.parse_posix_julian_day_with_leap()?, + )), + b'M' => { + if !self.bump() { + return Err(err!( + "expected month-week-weekday after 'M' in date \ + specification of a POSIX time zone DST \ + transition rule, but got the end of the string \ + instead" + )); + } + let (month, week, weekday) = self.parse_weekday_of_month()?; + Ok(PosixDay::WeekdayOfMonth { month, week, weekday }) + } + _ => Err(err!( + "expected 'J', a digit or 'M' at the beginning of a date \ + specification of a POSIX time zone DST transition rule, \ + but got `{}` instead", + Byte(self.byte()), + )), + } + } + + /// Parses a POSIX Julian day that does not include leap days + /// (`1 <= n <= 365`). + /// + /// This assumes the parser is positioned just after the `J` and at the + /// first digit of the Julian day. Upon success, the parser will be + /// positioned immediately following the day number. + fn parse_posix_julian_day_no_leap(&self) -> Result { + let number = self + .parse_number_with_upto_n_digits(3) + .map_err(|e| err!("invalid one based Julian day: {e}"))?; + let number = i16::try_from(number).map_err(|_| { + err!( + "one based Julian day `{number}` in POSIX time zone \ + does not fit into 16-bit integer" + ) + })?; + if !(1 <= number && number <= 365) { + return Err(err!( + "parsed one based Julian day `{number}`, \ + but one based Julian day in POSIX time zone \ + must be in range 1..=365", + )); + } + Ok(number) + } + + /// Parses a POSIX Julian day that includes leap days (`0 <= n <= + /// 365`). + /// + /// This assumes the parser is positioned at the first digit of the + /// Julian day. Upon success, the parser will be positioned immediately + /// following the day number. + fn parse_posix_julian_day_with_leap(&self) -> Result { + let number = self + .parse_number_with_upto_n_digits(3) + .map_err(|e| err!("invalid zero based Julian day: {e}"))?; + let number = i16::try_from(number).map_err(|_| { + err!( + "zero based Julian day `{number}` in POSIX time zone \ + does not fit into 16-bit integer" + ) + })?; + if !(0 <= number && number <= 365) { + return Err(err!( + "parsed zero based Julian day `{number}`, \ + but zero based Julian day in POSIX time zone \ + must be in range 0..=365", + )); + } + Ok(number) + } + + /// Parses a POSIX "weekday of month" specification. + /// + /// This assumes the parser is positioned just after the `M` byte and + /// at the first digit of the month. Upon success, the parser will be + /// positioned immediately following the "weekday of the month" that + /// was parsed. + /// + /// The tuple returned is month (1..=12), week (1..=5) and weekday + /// (0..=6 with 0=Sunday). + fn parse_weekday_of_month(&self) -> Result<(i8, i8, i8), Error> { + let month = self.parse_month()?; + if self.maybe_byte() != Some(b'.') { + return Err(err!( + "expected '.' after month `{month}` in \ + POSIX time zone rule" + )); + } + if !self.bump() { + return Err(err!( + "expected week after month `{month}` in \ + POSIX time zone rule" + )); + } + let week = self.parse_week()?; + if self.maybe_byte() != Some(b'.') { + return Err(err!( + "expected '.' after week `{week}` in POSIX time zone rule" + )); + } + if !self.bump() { + return Err(err!( + "expected day-of-week after week `{week}` in \ + POSIX time zone rule" + )); + } + let weekday = self.parse_weekday()?; + Ok((month, week, weekday)) + } + + /// This parses a POSIX time specification in the format + /// `[+/-]hh?[:mm[:ss]]`. + /// + /// This assumes the parser is positioned at the first `h` (or the + /// sign, if present). Upon success, the parser will be positioned + /// immediately following the end of the time specification. + fn parse_posix_time_spec(&self) -> Result { + let (sign, hour) = if self.ianav3plus { + let sign = self + .parse_optional_sign() + .map_err(|e| { + err!( + "failed to parse sign for transition time \ + in POSIX time zone string: {e}", + ) + })? + .unwrap_or(1); + let hour = self.parse_hour_ianav3plus()?; + (sign, hour) + } else { + (1, i16::from(self.parse_hour_posix()?)) + }; + let (mut minute, mut second) = (0, 0); + if self.maybe_byte() == Some(b':') { + if !self.bump() { + return Err(err!( + "incomplete transition time in \ + POSIX time zone string (missing minutes)", + )); + } + minute = self.parse_minute()?; + if self.maybe_byte() == Some(b':') { + if !self.bump() { + return Err(err!( + "incomplete transition time in \ + POSIX time zone string (missing seconds)", + )); + } + second = self.parse_second()?; + } + } + let mut seconds = i32::from(hour) * 3600; + seconds += i32::from(minute) * 60; + seconds += i32::from(second); + seconds *= i32::from(sign); + // Must be true because the parsing routines for hours, minutes + // and seconds enforce they are in the ranges -167..=167, 0..=59 + // and 0..=59, respectively. + assert!(-604799 <= seconds && seconds <= 604799); + Ok(seconds) + } + + /// Parses a month. + /// + /// This is expected to be positioned at the first digit. Upon success, + /// the parser will be positioned after the month (which may contain + /// two digits). + fn parse_month(&self) -> Result { + let number = self.parse_number_with_upto_n_digits(2)?; + let number = i8::try_from(number).map_err(|_| { + err!( + "month `{number}` in POSIX time zone \ + does not fit into 8-bit integer" + ) + })?; + if !(1 <= number && number <= 12) { + return Err(err!( + "parsed month `{number}`, but month in \ + POSIX time zone must be in range 1..=12", + )); + } + Ok(number) + } + + /// Parses a week-of-month number. + /// + /// This is expected to be positioned at the first digit. Upon success, + /// the parser will be positioned after the week digit. + fn parse_week(&self) -> Result { + let number = self.parse_number_with_exactly_n_digits(1)?; + let number = i8::try_from(number).map_err(|_| { + err!( + "week `{number}` in POSIX time zone \ + does not fit into 8-bit integer" + ) + })?; + if !(1 <= number && number <= 5) { + return Err(err!( + "parsed week `{number}`, but week in \ + POSIX time zone must be in range 1..=5" + )); + } + Ok(number) + } + + /// Parses a weekday number. + /// + /// This is expected to be positioned at the first digit. Upon success, + /// the parser will be positioned after the week digit. + /// + /// The weekday returned is guaranteed to be in the range `0..=6`, with + /// `0` corresponding to Sunday. + fn parse_weekday(&self) -> Result { + let number = self.parse_number_with_exactly_n_digits(1)?; + let number = i8::try_from(number).map_err(|_| { + err!( + "weekday `{number}` in POSIX time zone \ + does not fit into 8-bit integer" + ) + })?; + if !(0 <= number && number <= 6) { + return Err(err!( + "parsed weekday `{number}`, but weekday in \ + POSIX time zone must be in range `0..=6` \ + (with `0` corresponding to Sunday)", + )); + } + Ok(number) + } + + /// Parses an hour from a POSIX time specification with the IANA + /// v3+ extension. That is, the hour may be in the range `0..=167`. + /// (Callers should parse an optional sign preceding the hour digits + /// when IANA V3+ parsing is enabled.) + /// + /// The hour is allowed to be a single digit (unlike minutes or + /// seconds). + /// + /// This assumes the parser is positioned at the position where the + /// first hour digit should occur. Upon success, the parser will be + /// positioned immediately after the last hour digit. + fn parse_hour_ianav3plus(&self) -> Result { + // Callers should only be using this method when IANA v3+ parsing + // is enabled. + assert!(self.ianav3plus); + let number = self + .parse_number_with_upto_n_digits(3) + .map_err(|e| err!("invalid hour digits: {e}"))?; + let number = i16::try_from(number).map_err(|_| { + err!( + "hour `{number}` in POSIX time zone \ + does not fit into 16-bit integer" + ) + })?; + if !(0 <= number && number <= 167) { + // The error message says -167 but the check above uses 0. + // This is because the caller is responsible for parsing + // the sign. + return Err(err!( + "parsed hour `{number}`, but hour in IANA v3+ \ + POSIX time zone must be in range `-167..=167`", + )); + } + Ok(number) + } + + /// Parses an hour from a POSIX time specification, with the allowed + /// range being `0..=24`. + /// + /// The hour is allowed to be a single digit (unlike minutes or + /// seconds). + /// + /// This assumes the parser is positioned at the position where the + /// first hour digit should occur. Upon success, the parser will be + /// positioned immediately after the last hour digit. + fn parse_hour_posix(&self) -> Result { + let number = self + .parse_number_with_upto_n_digits(2) + .map_err(|e| err!("invalid hour digits: {e}"))?; + let number = i8::try_from(number).map_err(|_| { + err!( + "hour `{number}` in POSIX time zone \ + does not fit into 8-bit integer" + ) + })?; + if !(0 <= number && number <= 24) { + return Err(err!( + "parsed hour `{number}`, but hour in \ + POSIX time zone must be in range `0..=24`", + )); + } + Ok(number) + } + + /// Parses a minute from a POSIX time specification. + /// + /// The minute must be exactly two digits. + /// + /// This assumes the parser is positioned at the position where the + /// first minute digit should occur. Upon success, the parser will be + /// positioned immediately after the second minute digit. + fn parse_minute(&self) -> Result { + let number = self + .parse_number_with_exactly_n_digits(2) + .map_err(|e| err!("invalid minute digits: {e}"))?; + let number = i8::try_from(number).map_err(|_| { + err!( + "minute `{number}` in POSIX time zone \ + does not fit into 8-bit integer" + ) + })?; + if !(0 <= number && number <= 59) { + return Err(err!( + "parsed minute `{number}`, but minute in \ + POSIX time zone must be in range `0..=59`", + )); + } + Ok(number) + } + + /// Parses a second from a POSIX time specification. + /// + /// The second must be exactly two digits. + /// + /// This assumes the parser is positioned at the position where the + /// first second digit should occur. Upon success, the parser will be + /// positioned immediately after the second second digit. + fn parse_second(&self) -> Result { + let number = self + .parse_number_with_exactly_n_digits(2) + .map_err(|e| err!("invalid second digits: {e}"))?; + let number = i8::try_from(number).map_err(|_| { + err!( + "second `{number}` in POSIX time zone \ + does not fit into 8-bit integer" + ) + })?; + if !(0 <= number && number <= 59) { + return Err(err!( + "parsed second `{number}`, but second in \ + POSIX time zone must be in range `0..=59`", + )); + } + Ok(number) + } + + /// Parses a signed 64-bit integer expressed in exactly `n` digits. + /// + /// If `n` digits could not be found (or if the `TZ` string ends before + /// `n` digits could be found), then this returns an error. + /// + /// This assumes that `n >= 1` and that the parser is positioned at the + /// first digit. Upon success, the parser is positioned immediately + /// after the `n`th digit. + fn parse_number_with_exactly_n_digits( + &self, + n: usize, + ) -> Result { + assert!(n >= 1, "numbers must have at least 1 digit"); + let start = self.pos(); + let mut number: i32 = 0; + for i in 0..n { + if self.is_done() { + return Err(err!("expected {n} digits, but found {i}")); + } + let byte = self.byte(); + let digit = match byte.checked_sub(b'0') { + None => { + return Err(err!( + "invalid digit, expected 0-9 but got {}", + Byte(byte), + )); + } + Some(digit) if digit > 9 => { + return Err(err!( + "invalid digit, expected 0-9 but got {}", + Byte(byte), + )) + } + Some(digit) => { + debug_assert!((0..=9).contains(&digit)); + i32::from(digit) + } + }; + number = number + .checked_mul(10) + .and_then(|n| n.checked_add(digit)) + .ok_or_else(|| { + err!( + "number `{}` too big to parse into 64-bit integer", + Bytes(&self.tz[start..i]), + ) + })?; + self.bump(); + } + Ok(number) + } + + /// Parses a signed 64-bit integer expressed with up to `n` digits and + /// at least 1 digit. + /// + /// This assumes that `n >= 1` and that the parser is positioned at the + /// first digit. Upon success, the parser is position immediately after + /// the last digit (which can be at most `n`). + fn parse_number_with_upto_n_digits(&self, n: usize) -> Result { + assert!(n >= 1, "numbers must have at least 1 digit"); + let start = self.pos(); + let mut number: i32 = 0; + for i in 0..n { + if self.is_done() || !self.byte().is_ascii_digit() { + if i == 0 { + return Err(err!("invalid number, no digits found")); + } + break; + } + let digit = i32::from(self.byte() - b'0'); + number = number + .checked_mul(10) + .and_then(|n| n.checked_add(digit)) + .ok_or_else(|| { + err!( + "number `{}` too big to parse into 64-bit integer", + Bytes(&self.tz[start..i]), + ) + })?; + self.bump(); + } + Ok(number) + } + + /// Parses an optional sign. + /// + /// This assumes the parser is positioned at the position where a + /// positive or negative sign is permitted. If one exists, then it + /// is consumed and returned. Moreover, if one exists, then this + /// guarantees that it is not the last byte in the input. That is, upon + /// success, it is valid to call `self.byte()`. + fn parse_optional_sign(&self) -> Result, Error> { + if self.is_done() { + return Ok(None); + } + Ok(match self.byte() { + b'-' => { + if !self.bump() { + return Err(err!( + "expected digit after '-' sign, \ + but got end of input", + )); + } + Some(-1) + } + b'+' => { + if !self.bump() { + return Err(err!( + "expected digit after '+' sign, \ + but got end of input", + )); + } + Some(1) + } + _ => None, + }) + } +} + +/// Helper routines for parsing a POSIX `TZ` string. +impl<'s> Parser<'s> { + /// Bump the parser to the next byte. + /// + /// If the end of the input has been reached, then `false` is returned. + fn bump(&self) -> bool { + if self.is_done() { + return false; + } + self.pos.set( + self.pos().checked_add(1).expect("pos cannot overflow usize"), + ); + !self.is_done() + } + + /// Returns true if the next call to `bump` would return false. + fn is_done(&self) -> bool { + self.pos() == self.tz.len() + } + + /// Return the byte at the current position of the parser. + /// + /// This panics if the parser is positioned at the end of the TZ + /// string. + fn byte(&self) -> u8 { + self.tz[self.pos()] + } + + /// Return the byte at the current position of the parser. If the TZ + /// string has been exhausted, then this returns `None`. + fn maybe_byte(&self) -> Option { + self.tz.get(self.pos()).copied() + } + + /// Return the current byte offset of the parser. + /// + /// The offset starts at `0` from the beginning of the TZ string. + fn pos(&self) -> usize { + self.pos.get() + } + + /// Returns the remaining bytes of the TZ string. + /// + /// This includes `self.byte()`. It may be empty. + fn remaining(&self) -> &'s [u8] { + &self.tz[self.pos()..] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn parser(s: &str) -> Parser<'_> { + Parser::new(s.as_bytes()) + } + + #[test] + fn parse() { + let p = parser("NZST-12NZDT,J60,J300"); + assert_eq!( + p.parse().unwrap(), + PosixTimeZone { + std_abbrev: "NZST".into(), + std_offset: 12 * 60 * 60, + dst: Some(PosixDst { + abbrev: "NZDT".into(), + offset: 13 * 60 * 60, + rule: Some(PosixRule { + start: PosixDayTime { + date: PosixDay::JulianOne(60), + time: 2 * 60 * 60, + }, + end: PosixDayTime { + date: PosixDay::JulianOne(300), + time: 2 * 60 * 60, + }, + }), + }), + }, + ); + + let p = Parser::new("NZST-12NZDT,J60,J300WAT"); + assert!(p.parse().is_err()); + } + + #[test] + fn parse_posix_time_zone() { + let p = Parser::new("NZST-12NZDT,M9.5.0,M4.1.0/3"); + assert_eq!( + p.parse_posix_time_zone().unwrap(), + PosixTimeZone { + std_abbrev: "NZST".into(), + std_offset: 12 * 60 * 60, + dst: Some(PosixDst { + abbrev: "NZDT".into(), + offset: 13 * 60 * 60, + rule: Some(PosixRule { + start: PosixDayTime { + date: PosixDay::WeekdayOfMonth { + month: 9, + week: 5, + weekday: 0, + }, + time: 2 * 60 * 60, + }, + end: PosixDayTime { + date: PosixDay::WeekdayOfMonth { + month: 4, + week: 1, + weekday: 0, + }, + time: 3 * 60 * 60, + }, + }) + }), + }, + ); + + let p = Parser::new("NZST-12NZDT,M9.5.0,M4.1.0/3WAT"); + assert_eq!( + p.parse_posix_time_zone().unwrap(), + PosixTimeZone { + std_abbrev: "NZST".into(), + std_offset: 12 * 60 * 60, + dst: Some(PosixDst { + abbrev: "NZDT".into(), + offset: 13 * 60 * 60, + rule: Some(PosixRule { + start: PosixDayTime { + date: PosixDay::WeekdayOfMonth { + month: 9, + week: 5, + weekday: 0, + }, + time: 2 * 60 * 60, + }, + end: PosixDayTime { + date: PosixDay::WeekdayOfMonth { + month: 4, + week: 1, + weekday: 0, + }, + time: 3 * 60 * 60, + }, + }) + }), + }, + ); + + let p = Parser::new("NZST-12NZDT,J60,J300"); + assert_eq!( + p.parse_posix_time_zone().unwrap(), + PosixTimeZone { + std_abbrev: "NZST".into(), + std_offset: 12 * 60 * 60, + dst: Some(PosixDst { + abbrev: "NZDT".into(), + offset: 13 * 60 * 60, + rule: Some(PosixRule { + start: PosixDayTime { + date: PosixDay::JulianOne(60), + time: 2 * 60 * 60, + }, + end: PosixDayTime { + date: PosixDay::JulianOne(300), + time: 2 * 60 * 60, + }, + }), + }), + }, + ); + + let p = Parser::new("NZST-12NZDT,J60,J300WAT"); + assert_eq!( + p.parse_posix_time_zone().unwrap(), + PosixTimeZone { + std_abbrev: "NZST".into(), + std_offset: 12 * 60 * 60, + dst: Some(PosixDst { + abbrev: "NZDT".into(), + offset: 13 * 60 * 60, + rule: Some(PosixRule { + start: PosixDayTime { + date: PosixDay::JulianOne(60), + time: 2 * 60 * 60, + }, + end: PosixDayTime { + date: PosixDay::JulianOne(300), + time: 2 * 60 * 60, + }, + }), + }), + }, + ); + } + + #[test] + fn parse_posix_dst() { + let p = Parser::new("NZDT,M9.5.0,M4.1.0/3"); + assert_eq!( + p.parse_posix_dst(12 * 60 * 60).unwrap(), + PosixDst { + abbrev: "NZDT".into(), + offset: 13 * 60 * 60, + rule: Some(PosixRule { + start: PosixDayTime { + date: PosixDay::WeekdayOfMonth { + month: 9, + week: 5, + weekday: 0, + }, + time: 2 * 60 * 60, + }, + end: PosixDayTime { + date: PosixDay::WeekdayOfMonth { + month: 4, + week: 1, + weekday: 0, + }, + time: 3 * 60 * 60, + }, + }), + }, + ); + + let p = Parser::new("NZDT,J60,J300"); + assert_eq!( + p.parse_posix_dst(12 * 60 * 60).unwrap(), + PosixDst { + abbrev: "NZDT".into(), + offset: 13 * 60 * 60, + rule: Some(PosixRule { + start: PosixDayTime { + date: PosixDay::JulianOne(60), + time: 2 * 60 * 60, + }, + end: PosixDayTime { + date: PosixDay::JulianOne(300), + time: 2 * 60 * 60, + }, + }), + }, + ); + + let p = Parser::new("NZDT-7,J60,J300"); + assert_eq!( + p.parse_posix_dst(12 * 60 * 60).unwrap(), + PosixDst { + abbrev: "NZDT".into(), + offset: 7 * 60 * 60, + rule: Some(PosixRule { + start: PosixDayTime { + date: PosixDay::JulianOne(60), + time: 2 * 60 * 60, + }, + end: PosixDayTime { + date: PosixDay::JulianOne(300), + time: 2 * 60 * 60, + }, + }), + }, + ); + + let p = Parser::new("NZDT+7,J60,J300"); + assert_eq!( + p.parse_posix_dst(12 * 60 * 60).unwrap(), + PosixDst { + abbrev: "NZDT".into(), + offset: -7 * 60 * 60, + rule: Some(PosixRule { + start: PosixDayTime { + date: PosixDay::JulianOne(60), + time: 2 * 60 * 60, + }, + end: PosixDayTime { + date: PosixDay::JulianOne(300), + time: 2 * 60 * 60, + }, + }), + }, + ); + + let p = Parser::new("NZDT7,J60,J300"); + assert_eq!( + p.parse_posix_dst(12 * 60 * 60).unwrap(), + PosixDst { + abbrev: "NZDT".into(), + offset: -7 * 60 * 60, + rule: Some(PosixRule { + start: PosixDayTime { + date: PosixDay::JulianOne(60), + time: 2 * 60 * 60, + }, + end: PosixDayTime { + date: PosixDay::JulianOne(300), + time: 2 * 60 * 60, + }, + }), + }, + ); + + let p = Parser::new("NZDT7,"); + assert!(p.parse_posix_dst(12 * 60 * 60).is_err()); + + let p = Parser::new("NZDT7!"); + assert!(p.parse_posix_dst(12 * 60 * 60).is_err()); + } + + #[test] + fn parse_abbreviation() { + let p = Parser::new("ABC"); + assert_eq!(p.parse_abbreviation().unwrap(), "ABC"); + + let p = Parser::new(""); + assert_eq!(p.parse_abbreviation().unwrap(), "ABC"); + + let p = Parser::new("<+09>"); + assert_eq!(p.parse_abbreviation().unwrap(), "+09"); + + let p = Parser::new("+09"); + assert!(p.parse_abbreviation().is_err()); + } + + #[test] + fn parse_unquoted_abbreviation() { + let p = Parser::new("ABC"); + assert_eq!(p.parse_unquoted_abbreviation().unwrap(), "ABC"); + + let p = Parser::new("ABCXYZ"); + assert_eq!(p.parse_unquoted_abbreviation().unwrap(), "ABCXYZ"); + + let p = Parser::new("ABC123"); + assert_eq!(p.parse_unquoted_abbreviation().unwrap(), "ABC"); + + let tz = "a".repeat(30); + let p = Parser::new(&tz); + assert_eq!(p.parse_unquoted_abbreviation().unwrap(), &*tz); + + let p = Parser::new("a"); + assert!(p.parse_unquoted_abbreviation().is_err()); + + let p = Parser::new("ab"); + assert!(p.parse_unquoted_abbreviation().is_err()); + + let p = Parser::new("ab1"); + assert!(p.parse_unquoted_abbreviation().is_err()); + + let tz = "a".repeat(31); + let p = Parser::new(&tz); + assert!(p.parse_unquoted_abbreviation().is_err()); + + let p = Parser::new(b"ab\xFFcd"); + assert!(p.parse_unquoted_abbreviation().is_err()); + } + + #[test] + fn parse_quoted_abbreviation() { + // The inputs look a little funny here, but that's because + // 'parse_quoted_abbreviation' starts after the opening quote + // has been parsed. + + let p = Parser::new("ABC>"); + assert_eq!(p.parse_quoted_abbreviation().unwrap(), "ABC"); + + let p = Parser::new("ABCXYZ>"); + assert_eq!(p.parse_quoted_abbreviation().unwrap(), "ABCXYZ"); + + let p = Parser::new("ABC>123"); + assert_eq!(p.parse_quoted_abbreviation().unwrap(), "ABC"); + + let p = Parser::new("ABC123>"); + assert_eq!(p.parse_quoted_abbreviation().unwrap(), "ABC123"); + + let p = Parser::new("ab1>"); + assert_eq!(p.parse_quoted_abbreviation().unwrap(), "ab1"); + + let p = Parser::new("+09>"); + assert_eq!(p.parse_quoted_abbreviation().unwrap(), "+09"); + + let p = Parser::new("-09>"); + assert_eq!(p.parse_quoted_abbreviation().unwrap(), "-09"); + + let tz = alloc::format!("{}>", "a".repeat(30)); + let p = Parser::new(&tz); + assert_eq!( + p.parse_quoted_abbreviation().unwrap(), + tz.trim_end_matches(">") + ); + + let p = Parser::new("a>"); + assert!(p.parse_quoted_abbreviation().is_err()); + + let p = Parser::new("ab>"); + assert!(p.parse_quoted_abbreviation().is_err()); + + let tz = alloc::format!("{}>", "a".repeat(31)); + let p = Parser::new(&tz); + assert!(p.parse_quoted_abbreviation().is_err()); + + let p = Parser::new(b"ab\xFFcd>"); + assert!(p.parse_quoted_abbreviation().is_err()); + + let p = Parser::new("ABC"); + assert!(p.parse_quoted_abbreviation().is_err()); + + let p = Parser::new("ABC!>"); + assert!(p.parse_quoted_abbreviation().is_err()); + } + + #[test] + fn parse_posix_offset() { + let p = Parser::new("5"); + assert_eq!(p.parse_posix_offset().unwrap(), -5 * 60 * 60); + + let p = Parser::new("+5"); + assert_eq!(p.parse_posix_offset().unwrap(), -5 * 60 * 60); + + let p = Parser::new("-5"); + assert_eq!(p.parse_posix_offset().unwrap(), 5 * 60 * 60); + + let p = Parser::new("-12:34:56"); + assert_eq!( + p.parse_posix_offset().unwrap(), + 12 * 60 * 60 + 34 * 60 + 56, + ); + + let p = Parser::new("a"); + assert!(p.parse_posix_offset().is_err()); + + let p = Parser::new("-"); + assert!(p.parse_posix_offset().is_err()); + + let p = Parser::new("+"); + assert!(p.parse_posix_offset().is_err()); + + let p = Parser::new("-a"); + assert!(p.parse_posix_offset().is_err()); + + let p = Parser::new("+a"); + assert!(p.parse_posix_offset().is_err()); + + let p = Parser::new("-25"); + assert!(p.parse_posix_offset().is_err()); + + let p = Parser::new("+25"); + assert!(p.parse_posix_offset().is_err()); + + // This checks that we don't accidentally permit IANA rules for + // offset parsing. Namely, the IANA tzfile v3+ extension only applies + // to transition times. But since POSIX says that the "time" for the + // offset and transition is the same format, it would be an easy + // implementation mistake to implement the more flexible rule for + // IANA and have it accidentally also apply to the offset. So we check + // that it doesn't here. + let p = Parser { ianav3plus: true, ..Parser::new("25") }; + assert!(p.parse_posix_offset().is_err()); + let p = Parser { ianav3plus: true, ..Parser::new("+25") }; + assert!(p.parse_posix_offset().is_err()); + let p = Parser { ianav3plus: true, ..Parser::new("-25") }; + assert!(p.parse_posix_offset().is_err()); + } + + #[test] + fn parse_rule() { + let p = Parser::new("M9.5.0,M4.1.0/3"); + assert_eq!( + p.parse_rule().unwrap(), + PosixRule { + start: PosixDayTime { + date: PosixDay::WeekdayOfMonth { + month: 9, + week: 5, + weekday: 0, + }, + time: 2 * 60 * 60, + }, + end: PosixDayTime { + date: PosixDay::WeekdayOfMonth { + month: 4, + week: 1, + weekday: 0, + }, + time: 3 * 60 * 60, + }, + }, + ); + + let p = Parser::new("M9.5.0"); + assert!(p.parse_rule().is_err()); + + let p = Parser::new(",M9.5.0,M4.1.0/3"); + assert!(p.parse_rule().is_err()); + + let p = Parser::new("M9.5.0/"); + assert!(p.parse_rule().is_err()); + + let p = Parser::new("M9.5.0,M4.1.0/"); + assert!(p.parse_rule().is_err()); + } + + #[test] + fn parse_posix_datetime_spec() { + let p = Parser::new("J1"); + assert_eq!( + p.parse_posix_datetime_spec().unwrap(), + PosixDayTime { date: PosixDay::JulianOne(1), time: 2 * 60 * 60 }, + ); + + let p = Parser::new("J1/3"); + assert_eq!( + p.parse_posix_datetime_spec().unwrap(), + PosixDayTime { date: PosixDay::JulianOne(1), time: 3 * 60 * 60 }, + ); + + let p = Parser::new("M4.1.0/3"); + assert_eq!( + p.parse_posix_datetime_spec().unwrap(), + PosixDayTime { + date: PosixDay::WeekdayOfMonth { + month: 4, + week: 1, + weekday: 0, + }, + time: 3 * 60 * 60, + }, + ); + + let p = Parser::new("1/3:45:05"); + assert_eq!( + p.parse_posix_datetime_spec().unwrap(), + PosixDayTime { + date: PosixDay::JulianZero(1), + time: 3 * 60 * 60 + 45 * 60 + 5, + }, + ); + + let p = Parser::new("a"); + assert!(p.parse_posix_datetime_spec().is_err()); + + let p = Parser::new("J1/"); + assert!(p.parse_posix_datetime_spec().is_err()); + + let p = Parser::new("1/"); + assert!(p.parse_posix_datetime_spec().is_err()); + + let p = Parser::new("M4.1.0/"); + assert!(p.parse_posix_datetime_spec().is_err()); + } + + #[test] + fn parse_posix_date_spec() { + let p = Parser::new("J1"); + assert_eq!(p.parse_posix_date_spec().unwrap(), PosixDay::JulianOne(1)); + let p = Parser::new("J365"); + assert_eq!( + p.parse_posix_date_spec().unwrap(), + PosixDay::JulianOne(365) + ); + + let p = Parser::new("0"); + assert_eq!( + p.parse_posix_date_spec().unwrap(), + PosixDay::JulianZero(0) + ); + let p = Parser::new("1"); + assert_eq!( + p.parse_posix_date_spec().unwrap(), + PosixDay::JulianZero(1) + ); + let p = Parser::new("365"); + assert_eq!( + p.parse_posix_date_spec().unwrap(), + PosixDay::JulianZero(365) + ); + + let p = Parser::new("M9.5.0"); + assert_eq!( + p.parse_posix_date_spec().unwrap(), + PosixDay::WeekdayOfMonth { month: 9, week: 5, weekday: 0 }, + ); + let p = Parser::new("M9.5.6"); + assert_eq!( + p.parse_posix_date_spec().unwrap(), + PosixDay::WeekdayOfMonth { month: 9, week: 5, weekday: 6 }, + ); + let p = Parser::new("M09.5.6"); + assert_eq!( + p.parse_posix_date_spec().unwrap(), + PosixDay::WeekdayOfMonth { month: 9, week: 5, weekday: 6 }, + ); + let p = Parser::new("M12.1.1"); + assert_eq!( + p.parse_posix_date_spec().unwrap(), + PosixDay::WeekdayOfMonth { month: 12, week: 1, weekday: 1 }, + ); + + let p = Parser::new("a"); + assert!(p.parse_posix_date_spec().is_err()); + + let p = Parser::new("j"); + assert!(p.parse_posix_date_spec().is_err()); + + let p = Parser::new("m"); + assert!(p.parse_posix_date_spec().is_err()); + + let p = Parser::new("n"); + assert!(p.parse_posix_date_spec().is_err()); + + let p = Parser::new("J366"); + assert!(p.parse_posix_date_spec().is_err()); + + let p = Parser::new("366"); + assert!(p.parse_posix_date_spec().is_err()); + } + + #[test] + fn parse_posix_julian_day_no_leap() { + let p = Parser::new("1"); + assert_eq!(p.parse_posix_julian_day_no_leap().unwrap(), 1); + + let p = Parser::new("001"); + assert_eq!(p.parse_posix_julian_day_no_leap().unwrap(), 1); + + let p = Parser::new("365"); + assert_eq!(p.parse_posix_julian_day_no_leap().unwrap(), 365); + + let p = Parser::new("3655"); + assert_eq!(p.parse_posix_julian_day_no_leap().unwrap(), 365); + + let p = Parser::new("0"); + assert!(p.parse_posix_julian_day_no_leap().is_err()); + + let p = Parser::new("366"); + assert!(p.parse_posix_julian_day_no_leap().is_err()); + } + + #[test] + fn parse_posix_julian_day_with_leap() { + let p = Parser::new("0"); + assert_eq!(p.parse_posix_julian_day_with_leap().unwrap(), 0); + + let p = Parser::new("1"); + assert_eq!(p.parse_posix_julian_day_with_leap().unwrap(), 1); + + let p = Parser::new("001"); + assert_eq!(p.parse_posix_julian_day_with_leap().unwrap(), 1); + + let p = Parser::new("365"); + assert_eq!(p.parse_posix_julian_day_with_leap().unwrap(), 365); + + let p = Parser::new("3655"); + assert_eq!(p.parse_posix_julian_day_with_leap().unwrap(), 365); + + let p = Parser::new("366"); + assert!(p.parse_posix_julian_day_with_leap().is_err()); + } + + #[test] + fn parse_weekday_of_month() { + let p = Parser::new("9.5.0"); + assert_eq!(p.parse_weekday_of_month().unwrap(), (9, 5, 0)); + + let p = Parser::new("9.1.6"); + assert_eq!(p.parse_weekday_of_month().unwrap(), (9, 1, 6)); + + let p = Parser::new("09.1.6"); + assert_eq!(p.parse_weekday_of_month().unwrap(), (9, 1, 6)); + + let p = Parser::new("9"); + assert!(p.parse_weekday_of_month().is_err()); + + let p = Parser::new("9."); + assert!(p.parse_weekday_of_month().is_err()); + + let p = Parser::new("9.5"); + assert!(p.parse_weekday_of_month().is_err()); + + let p = Parser::new("9.5."); + assert!(p.parse_weekday_of_month().is_err()); + + let p = Parser::new("0.5.0"); + assert!(p.parse_weekday_of_month().is_err()); + + let p = Parser::new("13.5.0"); + assert!(p.parse_weekday_of_month().is_err()); + + let p = Parser::new("9.0.0"); + assert!(p.parse_weekday_of_month().is_err()); + + let p = Parser::new("9.6.0"); + assert!(p.parse_weekday_of_month().is_err()); + + let p = Parser::new("9.5.7"); + assert!(p.parse_weekday_of_month().is_err()); + } + + #[test] + fn parse_posix_time_spec() { + let p = Parser::new("5"); + assert_eq!(p.parse_posix_time_spec().unwrap(), 5 * 60 * 60); + + let p = Parser::new("22"); + assert_eq!(p.parse_posix_time_spec().unwrap(), 22 * 60 * 60); + + let p = Parser::new("02"); + assert_eq!(p.parse_posix_time_spec().unwrap(), 2 * 60 * 60); + + let p = Parser::new("5:45"); + assert_eq!(p.parse_posix_time_spec().unwrap(), 5 * 60 * 60 + 45 * 60); + + let p = Parser::new("5:45:12"); + assert_eq!( + p.parse_posix_time_spec().unwrap(), + 5 * 60 * 60 + 45 * 60 + 12 + ); + + let p = Parser::new("5:45:129"); + assert_eq!( + p.parse_posix_time_spec().unwrap(), + 5 * 60 * 60 + 45 * 60 + 12 + ); + + let p = Parser::new("5:45:12:"); + assert_eq!( + p.parse_posix_time_spec().unwrap(), + 5 * 60 * 60 + 45 * 60 + 12 + ); + + let p = Parser { ianav3plus: true, ..Parser::new("+5:45:12") }; + assert_eq!( + p.parse_posix_time_spec().unwrap(), + 5 * 60 * 60 + 45 * 60 + 12 + ); + + let p = Parser { ianav3plus: true, ..Parser::new("-5:45:12") }; + assert_eq!( + p.parse_posix_time_spec().unwrap(), + -(5 * 60 * 60 + 45 * 60 + 12) + ); + + let p = Parser { ianav3plus: true, ..Parser::new("-167:45:12") }; + assert_eq!( + p.parse_posix_time_spec().unwrap(), + -(167 * 60 * 60 + 45 * 60 + 12), + ); + + let p = Parser::new("25"); + assert!(p.parse_posix_time_spec().is_err()); + + let p = Parser::new("12:2"); + assert!(p.parse_posix_time_spec().is_err()); + + let p = Parser::new("12:"); + assert!(p.parse_posix_time_spec().is_err()); + + let p = Parser::new("12:23:5"); + assert!(p.parse_posix_time_spec().is_err()); + + let p = Parser::new("12:23:"); + assert!(p.parse_posix_time_spec().is_err()); + + let p = Parser { ianav3plus: true, ..Parser::new("168") }; + assert!(p.parse_posix_time_spec().is_err()); + + let p = Parser { ianav3plus: true, ..Parser::new("-168") }; + assert!(p.parse_posix_time_spec().is_err()); + + let p = Parser { ianav3plus: true, ..Parser::new("+168") }; + assert!(p.parse_posix_time_spec().is_err()); + } + + #[test] + fn parse_month() { + let p = Parser::new("1"); + assert_eq!(p.parse_month().unwrap(), 1); + + // Should this be allowed? POSIX spec is unclear. + // We allow it because our parse does stop at 2 + // digits, so this seems harmless. Namely, '001' + // results in an error. + let p = Parser::new("01"); + assert_eq!(p.parse_month().unwrap(), 1); + + let p = Parser::new("12"); + assert_eq!(p.parse_month().unwrap(), 12); + + let p = Parser::new("0"); + assert!(p.parse_month().is_err()); + + let p = Parser::new("00"); + assert!(p.parse_month().is_err()); + + let p = Parser::new("001"); + assert!(p.parse_month().is_err()); + + let p = Parser::new("13"); + assert!(p.parse_month().is_err()); + } + + #[test] + fn parse_week() { + let p = Parser::new("1"); + assert_eq!(p.parse_week().unwrap(), 1); + + let p = Parser::new("5"); + assert_eq!(p.parse_week().unwrap(), 5); + + let p = Parser::new("55"); + assert_eq!(p.parse_week().unwrap(), 5); + + let p = Parser::new("0"); + assert!(p.parse_week().is_err()); + + let p = Parser::new("6"); + assert!(p.parse_week().is_err()); + + let p = Parser::new("00"); + assert!(p.parse_week().is_err()); + + let p = Parser::new("01"); + assert!(p.parse_week().is_err()); + + let p = Parser::new("05"); + assert!(p.parse_week().is_err()); + } + + #[test] + fn parse_weekday() { + let p = Parser::new("0"); + assert_eq!(p.parse_weekday().unwrap(), 0); + + let p = Parser::new("1"); + assert_eq!(p.parse_weekday().unwrap(), 1); + + let p = Parser::new("6"); + assert_eq!(p.parse_weekday().unwrap(), 6); + + let p = Parser::new("00"); + assert_eq!(p.parse_weekday().unwrap(), 0); + + let p = Parser::new("06"); + assert_eq!(p.parse_weekday().unwrap(), 0); + + let p = Parser::new("60"); + assert_eq!(p.parse_weekday().unwrap(), 6); + + let p = Parser::new("7"); + assert!(p.parse_weekday().is_err()); + } + + #[test] + fn parse_hour_posix() { + let p = Parser::new("5"); + assert_eq!(p.parse_hour_posix().unwrap(), 5); + + let p = Parser::new("0"); + assert_eq!(p.parse_hour_posix().unwrap(), 0); + + let p = Parser::new("00"); + assert_eq!(p.parse_hour_posix().unwrap(), 0); + + let p = Parser::new("24"); + assert_eq!(p.parse_hour_posix().unwrap(), 24); + + let p = Parser::new("100"); + assert_eq!(p.parse_hour_posix().unwrap(), 10); + + let p = Parser::new("25"); + assert!(p.parse_hour_posix().is_err()); + + let p = Parser::new("99"); + assert!(p.parse_hour_posix().is_err()); + } + + #[test] + fn parse_hour_ianav3plus() { + let new = |input| Parser { ianav3plus: true, ..Parser::new(input) }; + + let p = new("5"); + assert_eq!(p.parse_hour_ianav3plus().unwrap(), 5); + + let p = new("0"); + assert_eq!(p.parse_hour_ianav3plus().unwrap(), 0); + + let p = new("00"); + assert_eq!(p.parse_hour_ianav3plus().unwrap(), 0); + + let p = new("000"); + assert_eq!(p.parse_hour_ianav3plus().unwrap(), 0); + + let p = new("24"); + assert_eq!(p.parse_hour_ianav3plus().unwrap(), 24); + + let p = new("100"); + assert_eq!(p.parse_hour_ianav3plus().unwrap(), 100); + + let p = new("1000"); + assert_eq!(p.parse_hour_ianav3plus().unwrap(), 100); + + let p = new("167"); + assert_eq!(p.parse_hour_ianav3plus().unwrap(), 167); + + let p = new("168"); + assert!(p.parse_hour_ianav3plus().is_err()); + + let p = new("999"); + assert!(p.parse_hour_ianav3plus().is_err()); + } + + #[test] + fn parse_minute() { + let p = Parser::new("00"); + assert_eq!(p.parse_minute().unwrap(), 0); + + let p = Parser::new("24"); + assert_eq!(p.parse_minute().unwrap(), 24); + + let p = Parser::new("59"); + assert_eq!(p.parse_minute().unwrap(), 59); + + let p = Parser::new("599"); + assert_eq!(p.parse_minute().unwrap(), 59); + + let p = Parser::new("0"); + assert!(p.parse_minute().is_err()); + + let p = Parser::new("1"); + assert!(p.parse_minute().is_err()); + + let p = Parser::new("9"); + assert!(p.parse_minute().is_err()); + + let p = Parser::new("60"); + assert!(p.parse_minute().is_err()); + } + + #[test] + fn parse_second() { + let p = Parser::new("00"); + assert_eq!(p.parse_second().unwrap(), 0); + + let p = Parser::new("24"); + assert_eq!(p.parse_second().unwrap(), 24); + + let p = Parser::new("59"); + assert_eq!(p.parse_second().unwrap(), 59); + + let p = Parser::new("599"); + assert_eq!(p.parse_second().unwrap(), 59); + + let p = Parser::new("0"); + assert!(p.parse_second().is_err()); + + let p = Parser::new("1"); + assert!(p.parse_second().is_err()); + + let p = Parser::new("9"); + assert!(p.parse_second().is_err()); + + let p = Parser::new("60"); + assert!(p.parse_second().is_err()); + } + + #[test] + fn parse_number_with_exactly_n_digits() { + let p = Parser::new("1"); + assert_eq!(p.parse_number_with_exactly_n_digits(1).unwrap(), 1); + + let p = Parser::new("12"); + assert_eq!(p.parse_number_with_exactly_n_digits(2).unwrap(), 12); + + let p = Parser::new("123"); + assert_eq!(p.parse_number_with_exactly_n_digits(2).unwrap(), 12); + + let p = Parser::new(""); + assert!(p.parse_number_with_exactly_n_digits(1).is_err()); + + let p = Parser::new("1"); + assert!(p.parse_number_with_exactly_n_digits(2).is_err()); + + let p = Parser::new("12"); + assert!(p.parse_number_with_exactly_n_digits(3).is_err()); + } + + #[test] + fn parse_number_with_upto_n_digits() { + let p = Parser::new("1"); + assert_eq!(p.parse_number_with_upto_n_digits(1).unwrap(), 1); + + let p = Parser::new("1"); + assert_eq!(p.parse_number_with_upto_n_digits(2).unwrap(), 1); + + let p = Parser::new("12"); + assert_eq!(p.parse_number_with_upto_n_digits(2).unwrap(), 12); + + let p = Parser::new("12"); + assert_eq!(p.parse_number_with_upto_n_digits(3).unwrap(), 12); + + let p = Parser::new("123"); + assert_eq!(p.parse_number_with_upto_n_digits(2).unwrap(), 12); + + let p = Parser::new(""); + assert!(p.parse_number_with_upto_n_digits(1).is_err()); + + let p = Parser::new("a"); + assert!(p.parse_number_with_upto_n_digits(1).is_err()); + } +} diff --git a/src/shared/tzif.rs b/src/shared/tzif.rs new file mode 100644 index 0000000..9fe1c2c --- /dev/null +++ b/src/shared/tzif.rs @@ -0,0 +1,789 @@ +#![allow(warnings)] + +use alloc::{string::String, vec}; + +use super::{ + util::{Byte, Bytes}, + PosixTimeZone, TzifFixed, TzifIndicator, TzifLocalTimeType, TzifOwned, + TzifTransition, +}; + +macro_rules! err { + ($($tt:tt)*) => {{ + self::Error(alloc::format!($($tt)*)) + }} +} + +// These are Jiff min and max timestamp (in seconds) values. +// +// The TZif parser will clamp timestamps to this range. It's +// not ideal, but Jiff can't handle values outside of this range +// and completely refusing to use TZif data with pathological +// timestamps in typically irrelevant transitions is bad juju. +// +// Ref: https://github.com/BurntSushi/jiff/issues/163 +// Ref: https://github.com/BurntSushi/jiff/pull/164 +const TIMESTAMP_MIN: i64 = -377705023201; +const TIMESTAMP_MAX: i64 = 253402207200; + +// Similarly for offsets, although in this case, if we find +// an offset outside of this range, we do actually error. This +// is because it could result in true incorrect datetimes for +// actual transitions. +// +// But our supported offset range is `-25:59:59..=+25:59:59`. +// There's no real time zone with offsets even close to those +// boundaries. +// +// If there is pathological data that we should ignore, then +// we should wait for a real bug report in order to determine +// the right way to ignore/clamp it. +const OFFSET_MIN: i32 = -93599; +const OFFSET_MAX: i32 = 93599; + +/// An error that can be returned when parsing. +#[derive(Debug)] +pub struct Error(String); + +impl core::fmt::Display for Error { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + core::fmt::Display::fmt(&self.0, f) + } +} + +impl TzifOwned { + /// Parses the given data as a TZif formatted file. + /// + /// The name given is attached to the `Tzif` value returned, but is + /// otherwise not significant. + /// + /// If the given data is not recognized to be valid TZif, then an error is + /// returned. + /// + /// In general, callers may assume that it is safe to pass arbitrary or + /// even untrusted data to this function and count on it not panicking + /// or using resources that aren't limited to a small constant factor of + /// the size of the data itself. That is, callers can reliably limit the + /// resources used by limiting the size of the data given to this parse + /// function. + pub(crate) fn parse( + name: Option, + bytes: &[u8], + ) -> Result { + let original = bytes; + let name = name.into(); + let (header32, rest) = Header::parse(4, bytes) + .map_err(|e| err!("failed to parse 32-bit header: {e}"))?; + let (mut tzif, rest) = if header32.version == 0 { + TzifOwned::parse32(name, header32, rest)? + } else { + TzifOwned::parse64(name, header32, rest)? + }; + // Compute the checksum using the entire contents of the TZif data. + let tzif_raw_len = (rest.as_ptr() as usize) + .checked_sub(original.as_ptr() as usize) + .unwrap(); + let tzif_raw_bytes = &original[..tzif_raw_len]; + tzif.fixed.checksum = super::crc32::sum(tzif_raw_bytes); + Ok(tzif) + } + + fn parse32<'b>( + name: Option, + header32: Header, + bytes: &'b [u8], + ) -> Result<(TzifOwned, &'b [u8]), Error> { + let mut tzif = TzifOwned { + fixed: TzifFixed { + name, + version: header32.version, + // filled in later + checksum: 0, + designations: String::new(), + posix_tz: None, + }, + types: vec![], + transitions: vec![], + }; + let rest = tzif.parse_transitions(&header32, bytes)?; + let rest = tzif.parse_transition_types(&header32, rest)?; + let rest = tzif.parse_local_time_types(&header32, rest)?; + let rest = tzif.parse_time_zone_designations(&header32, rest)?; + let rest = tzif.parse_leap_seconds(&header32, rest)?; + let rest = tzif.parse_indicators(&header32, rest)?; + Ok((tzif, rest)) + } + + fn parse64<'b>( + name: Option, + header32: Header, + bytes: &'b [u8], + ) -> Result<(TzifOwned, &'b [u8]), Error> { + let (_, rest) = try_split_at( + "V1 TZif data block", + bytes, + header32.data_block_len()?, + )?; + let (header64, rest) = Header::parse(8, rest) + .map_err(|e| err!("failed to parse 64-bit header: {e}"))?; + let mut tzif = TzifOwned { + fixed: TzifFixed { + name, + version: header64.version, + // filled in later + checksum: 0, + designations: String::new(), + posix_tz: None, + }, + types: vec![], + transitions: vec![], + }; + let rest = tzif.parse_transitions(&header64, rest)?; + let rest = tzif.parse_transition_types(&header64, rest)?; + let rest = tzif.parse_local_time_types(&header64, rest)?; + let rest = tzif.parse_time_zone_designations(&header64, rest)?; + let rest = tzif.parse_leap_seconds(&header64, rest)?; + let rest = tzif.parse_indicators(&header64, rest)?; + let rest = tzif.parse_footer(&header64, rest)?; + // Note that we specifically and unfortunately do not "validate" + // the POSIX TZ string here. We *should* check that it is + // consistent with the last transition. Since: + // + // RFC 8536 says, "If the string is nonempty and one or more + // transitions appear in the version 2+ data, the string MUST be + // consistent with the last version 2+ transition." + // + // But in this context, we don't have any of the infrastructure + // to actually do TZ operations on a POSIX time zone. It requires + // civil datetimes and a bunch of other bullshit. This means that + // this verification step doesn't run when using the `jiff-tzdb-static` + // proc macro. However, we do still run it when parsing TZif data + // at runtime. + // + // We otherwise don't check that the TZif data is fully valid. It is + // possible for it to contain superfluous information. For example, a + // non-zero local time type that is never referenced by a transition. + Ok((tzif, rest)) + } + + fn parse_transitions<'b>( + &mut self, + header: &Header, + bytes: &'b [u8], + ) -> Result<&'b [u8], Error> { + let (bytes, rest) = try_split_at( + "transition times data block", + bytes, + header.transition_times_len()?, + )?; + let mut it = bytes.chunks_exact(header.time_size); + // RFC 8536 says: "If there are no transitions, local time for all + // timestamps is specified by the TZ string in the footer if present + // and nonempty; otherwise, it is specified by time type 0." + // + // RFC 8536 also says: "Local time for timestamps before the first + // transition is specified by the first time type (time type + // 0)." + // + // So if there are no transitions, pushing this dummy one will result + // in the desired behavior even when it's the only transition. + // Similarly, since this is the minimum timestamp value, it will + // trigger for any times before the first transition found in the TZif + // data. + self.transitions + .push(TzifTransition { timestamp: TIMESTAMP_MIN, type_index: 0 }); + while let Some(chunk) = it.next() { + let mut timestamp = if header.is_32bit() { + i64::from(from_be_bytes_i32(chunk)) + } else { + from_be_bytes_i64(chunk) + }; + if !(TIMESTAMP_MIN <= timestamp && timestamp <= TIMESTAMP_MAX) { + // We really shouldn't error here just because the Unix + // timestamp is outside what Jiff supports. Since what Jiff + // supports is _somewhat_ arbitrary. But Jiff's supported + // range is good enough for all realistic purposes, so we + // just clamp an out-of-range Unix timestamp to the Jiff + // min or max value. + // + // This can't result in the sorting order being wrong, but + // it can result in a transition that is duplicative with + // the dummy transition we inserted above. This should be + // fine. + let clamped = timestamp.clamp(TIMESTAMP_MIN, TIMESTAMP_MAX); + // only-jiff-warn-start + warn!( + "found Unix timestamp {timestamp} that is outside \ + Jiff's supported range, clamping to {clamped}", + ); + // only-jiff-warn-end + timestamp = clamped; + } + self.transitions.push(TzifTransition { + timestamp, + // We can't fill in the type index yet. We fill this in + // later when we parse the transition types. + type_index: 0, + }); + } + assert!(it.remainder().is_empty()); + Ok(rest) + } + + fn parse_transition_types<'b>( + &mut self, + header: &Header, + bytes: &'b [u8], + ) -> Result<&'b [u8], Error> { + let (bytes, rest) = try_split_at( + "transition types data block", + bytes, + header.transition_types_len()?, + )?; + // We start our transition indices at 1 because we always insert a + // dummy first transition corresponding to `Timestamp::MIN`. Its type + // index is always 0, so there's no need to change it here. + for (transition_index, &type_index) in (1..).zip(bytes) { + if usize::from(type_index) >= header.tzh_typecnt { + return Err(err!( + "found transition type index {type_index}, + but there are only {} local time types", + header.tzh_typecnt, + )); + } + self.transitions[transition_index].type_index = type_index; + } + Ok(rest) + } + + fn parse_local_time_types<'b>( + &mut self, + header: &Header, + bytes: &'b [u8], + ) -> Result<&'b [u8], Error> { + let (bytes, rest) = try_split_at( + "local time types data block", + bytes, + header.local_time_types_len()?, + )?; + let mut it = bytes.chunks_exact(6); + while let Some(chunk) = it.next() { + let offset = from_be_bytes_i32(&chunk[..4]); + if !(OFFSET_MIN <= offset && offset <= OFFSET_MAX) { + return Err(err!( + "found local time type with out-of-bounds offset: {offset}" + )); + } + let is_dst = chunk[4] == 1; + let designation = chunk[5]..chunk[5]; + self.types.push(TzifLocalTimeType { + offset, + is_dst, + designation, + indicator: TzifIndicator::LocalWall, + }); + } + assert!(it.remainder().is_empty()); + Ok(rest) + } + + fn parse_time_zone_designations<'b>( + &mut self, + header: &Header, + bytes: &'b [u8], + ) -> Result<&'b [u8], Error> { + let (bytes, rest) = try_split_at( + "time zone designations data block", + bytes, + header.time_zone_designations_len()?, + )?; + self.fixed.designations = + String::from_utf8(bytes.to_vec()).map_err(|_| { + err!( + "time zone designations are not valid UTF-8: {:?}", + Bytes(bytes), + ) + })?; + // Holy hell, this is brutal. The boundary conditions are crazy. + for (i, typ) in self.types.iter_mut().enumerate() { + let start = usize::from(typ.designation.start); + let Some(suffix) = self.fixed.designations.get(start..) else { + return Err(err!( + "local time type {i} has designation index of {start}, \ + but cannot be more than {}", + self.fixed.designations.len(), + )); + }; + let Some(len) = suffix.find('\x00') else { + return Err(err!( + "local time type {i} has designation index of {start}, \ + but could not find NUL terminator after it in \ + designations: {:?}", + self.fixed.designations, + )); + }; + let Some(end) = start.checked_add(len) else { + return Err(err!( + "local time type {i} has designation index of {start}, \ + but its length {len} is too big", + )); + }; + typ.designation.end = u8::try_from(end).map_err(|_| { + err!( + "local time type {i} has designation range of \ + {start}..{end}, but end is too big", + ) + })?; + } + Ok(rest) + } + + /// This parses the leap second corrections in the TZif data. + /// + /// Note that we only parse and verify them. We don't actually use them. + /// Jiff effectively ignores leap seconds. + fn parse_leap_seconds<'b>( + &mut self, + header: &Header, + bytes: &'b [u8], + ) -> Result<&'b [u8], Error> { + let (bytes, rest) = try_split_at( + "leap seconds data block", + bytes, + header.leap_second_len()?, + )?; + let chunk_len = header + .time_size + .checked_add(4) + .expect("time_size plus 4 fits in usize"); + let mut it = bytes.chunks_exact(chunk_len); + while let Some(chunk) = it.next() { + let (occur_bytes, _corr_bytes) = chunk.split_at(header.time_size); + let occur = if header.is_32bit() { + i64::from(from_be_bytes_i32(occur_bytes)) + } else { + from_be_bytes_i64(occur_bytes) + }; + if !(TIMESTAMP_MIN <= occur && occur <= TIMESTAMP_MAX) { + // only-jiff-warn-start + warn!( + "leap second occurrence {occur} is \ + not in Jiff's supported range" + ) + // only-jiff-warn-end + } + } + assert!(it.remainder().is_empty()); + Ok(rest) + } + + fn parse_indicators<'b>( + &mut self, + header: &Header, + bytes: &'b [u8], + ) -> Result<&'b [u8], Error> { + let (std_wall_bytes, rest) = try_split_at( + "standard/wall indicators data block", + bytes, + header.standard_wall_len()?, + )?; + let (ut_local_bytes, rest) = try_split_at( + "UT/local indicators data block", + rest, + header.ut_local_len()?, + )?; + if std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() { + // This is a weird case, but technically possible only if all + // UT/local indicators are 0. If any are 1, then it's an error, + // because it would require the corresponding std/wall indicator + // to be 1 too. Which it can't be, because there aren't any. So + // we just check that they're all zeros. + for (i, &byte) in ut_local_bytes.iter().enumerate() { + if byte != 0 { + return Err(err!( + "found UT/local indicator '{byte}' for local time \ + type {i}, but it must be 0 since all std/wall \ + indicators are 0", + )); + } + } + } else if !std_wall_bytes.is_empty() && ut_local_bytes.is_empty() { + for (i, &byte) in std_wall_bytes.iter().enumerate() { + // Indexing is OK because Header guarantees that the number of + // indicators is 0 or equal to the number of types. + self.types[i].indicator = if byte == 0 { + TzifIndicator::LocalWall + } else if byte == 1 { + TzifIndicator::LocalStandard + } else { + return Err(err!( + "found invalid std/wall indicator '{byte}' for \ + local time type {i}, it must be 0 or 1", + )); + }; + } + } else if !std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() { + assert_eq!(std_wall_bytes.len(), ut_local_bytes.len()); + let it = std_wall_bytes.iter().zip(ut_local_bytes); + for (i, (&stdwall, &utlocal)) in it.enumerate() { + // Indexing is OK because Header guarantees that the number of + // indicators is 0 or equal to the number of types. + self.types[i].indicator = match (stdwall, utlocal) { + (0, 0) => TzifIndicator::LocalWall, + (1, 0) => TzifIndicator::LocalStandard, + (1, 1) => TzifIndicator::UTStandard, + (0, 1) => { + return Err(err!( + "found illegal ut-wall combination for \ + local time type {i}, only local-wall, \ + local-standard and ut-standard are allowed", + )) + } + _ => { + return Err(err!( + "found illegal std/wall or ut/local value for \ + local time type {i}, each must be 0 or 1", + )) + } + }; + } + } else { + // If they're both empty then we don't need to do anything. Every + // local time type record already has the correct default for this + // case set. + debug_assert!(std_wall_bytes.is_empty()); + debug_assert!(ut_local_bytes.is_empty()); + } + Ok(rest) + } + + fn parse_footer<'b>( + &mut self, + _header: &Header, + bytes: &'b [u8], + ) -> Result<&'b [u8], Error> { + if bytes.is_empty() { + return Err(err!( + "invalid V2+ TZif footer, expected \\n, \ + but found unexpected end of data", + )); + } + if bytes[0] != b'\n' { + return Err(err!( + "invalid V2+ TZif footer, expected {:?}, but found {:?}", + Byte(b'\n'), + Byte(bytes[0]), + )); + } + let bytes = &bytes[1..]; + // Only scan up to 1KB for a NUL terminator in case we somehow got + // passed a huge block of bytes. + let toscan = &bytes[..bytes.len().min(1024)]; + let Some(nlat) = toscan.iter().position(|&b| b == b'\n') else { + return Err(err!( + "invalid V2 TZif footer, could not find {:?} \ + terminator in: {:?}", + Byte(b'\n'), + Bytes(toscan), + )); + }; + let (bytes, rest) = bytes.split_at(nlat); + if !bytes.is_empty() { + let posix_tz = + PosixTimeZone::parse(bytes).map_err(|e| err!("{e}"))?; + // We could in theory limit TZ strings to their strict POSIX + // definition here for TZif V2, but I don't think there is any + // harm in allowing the extensions in V2 formatted TZif data. Note + // that the GNU tooling allow it via the `TZ` environment variable + // even though POSIX doesn't specify it. This all seems okay to me + // because the V3+ extension is a strict superset of functionality. + if let Some(ref dst) = posix_tz.dst { + if dst.rule.is_none() { + return Err(err!( + "TZ string `{}` in v3+ tzfile has DST \ + but no transition rules", + Bytes(bytes), + )); + } + } + self.fixed.posix_tz = Some(posix_tz); + } + Ok(&rest[1..]) + } +} + +/// The header for a TZif formatted file. +/// +/// V2+ TZif format have two headers: one for V1 data, and then a second +/// following the V1 data block that describes another data block which uses +/// 64-bit timestamps. The two headers both have the same format and both +/// use 32-bit big-endian encoded integers. +#[derive(Debug)] +struct Header { + /// The size of the timestamps encoded in the data block. + /// + /// This is guaranteed to be either 4 (for V1) or 8 (for the 64-bit header + /// block in V2+). + time_size: usize, + /// The file format version. + /// + /// Note that this is either a NUL byte (for version 1), or an ASCII byte + /// corresponding to the version number. That is, `0x32` for `2`, `0x33` + /// for `3` or `0x34` for `4`. Note also that just because zoneinfo might + /// have been recently generated does not mean it uses the latest format + /// version. It seems like newer versions are only compiled by `zic` when + /// they are needed. For example, `America/New_York` on my system (as of + /// `2024-03-25`) has version `0x32`, but `Asia/Jerusalem` has version + /// `0x33`. + version: u8, + /// Number of UT/local indicators stored in the file. + /// + /// This is checked to be either equal to `0` or equal to `tzh_typecnt`. + tzh_ttisutcnt: usize, + /// The number of standard/wall indicators stored in the file. + /// + /// This is checked to be either equal to `0` or equal to `tzh_typecnt`. + tzh_ttisstdcnt: usize, + /// The number of leap seconds for which data entries are stored in the + /// file. + tzh_leapcnt: usize, + /// The number of transition times for which data entries are stored in + /// the file. + tzh_timecnt: usize, + /// The number of local time types for which data entries are stored in the + /// file. + /// + /// This is checked to be at least `1`. + tzh_typecnt: usize, + /// The number of bytes of time zone abbreviation strings stored in the + /// file. + /// + /// This is checked to be at least `1`. + tzh_charcnt: usize, +} + +impl Header { + /// Parse the header record from the given bytes. + /// + /// Upon success, return the header and all bytes after the header. + /// + /// The given `time_size` must be 4 or 8, corresponding to either the + /// V1 header block or the V2+ header block, respectively. + fn parse( + time_size: usize, + bytes: &[u8], + ) -> Result<(Header, &[u8]), Error> { + assert!(time_size == 4 || time_size == 8, "time size must be 4 or 8"); + if bytes.len() < 44 { + return Err(err!("invalid header: too short")); + } + let (magic, rest) = bytes.split_at(4); + if magic != b"TZif" { + return Err(err!("invalid header: magic bytes mismatch")); + } + let (version, rest) = rest.split_at(1); + let (_reserved, rest) = rest.split_at(15); + + let (tzh_ttisutcnt_bytes, rest) = rest.split_at(4); + let (tzh_ttisstdcnt_bytes, rest) = rest.split_at(4); + let (tzh_leapcnt_bytes, rest) = rest.split_at(4); + let (tzh_timecnt_bytes, rest) = rest.split_at(4); + let (tzh_typecnt_bytes, rest) = rest.split_at(4); + let (tzh_charcnt_bytes, rest) = rest.split_at(4); + + let tzh_ttisutcnt = from_be_bytes_u32_to_usize(tzh_ttisutcnt_bytes) + .map_err(|e| err!("failed to parse tzh_ttisutcnt: {e}"))?; + let tzh_ttisstdcnt = from_be_bytes_u32_to_usize(tzh_ttisstdcnt_bytes) + .map_err(|e| err!("failed to parse tzh_ttisstdcnt: {e}"))?; + let tzh_leapcnt = from_be_bytes_u32_to_usize(tzh_leapcnt_bytes) + .map_err(|e| err!("failed to parse tzh_leapcnt: {e}"))?; + let tzh_timecnt = from_be_bytes_u32_to_usize(tzh_timecnt_bytes) + .map_err(|e| err!("failed to parse tzh_timecnt: {e}"))?; + let tzh_typecnt = from_be_bytes_u32_to_usize(tzh_typecnt_bytes) + .map_err(|e| err!("failed to parse tzh_typecnt: {e}"))?; + let tzh_charcnt = from_be_bytes_u32_to_usize(tzh_charcnt_bytes) + .map_err(|e| err!("failed to parse tzh_charcnt: {e}"))?; + + if tzh_ttisutcnt != 0 && tzh_ttisutcnt != tzh_typecnt { + return Err(err!( + "expected tzh_ttisutcnt={tzh_ttisutcnt} to be zero \ + or equal to tzh_typecnt={tzh_typecnt}", + )); + } + if tzh_ttisstdcnt != 0 && tzh_ttisstdcnt != tzh_typecnt { + return Err(err!( + "expected tzh_ttisstdcnt={tzh_ttisstdcnt} to be zero \ + or equal to tzh_typecnt={tzh_typecnt}", + )); + } + if tzh_typecnt < 1 { + return Err(err!( + "expected tzh_typecnt={tzh_typecnt} to be at least 1", + )); + } + if tzh_charcnt < 1 { + return Err(err!( + "expected tzh_charcnt={tzh_charcnt} to be at least 1", + )); + } + + let header = Header { + time_size, + version: version[0], + tzh_ttisutcnt, + tzh_ttisstdcnt, + tzh_leapcnt, + tzh_timecnt, + tzh_typecnt, + tzh_charcnt, + }; + Ok((header, rest)) + } + + /// Returns true if this header is for a 32-bit data block. + /// + /// When false, it is guaranteed that this header is for a 64-bit data + /// block. + fn is_32bit(&self) -> bool { + self.time_size == 4 + } + + /// Returns the size of the data block, in bytes, for this header. + /// + /// This returns an error if the arithmetic required to compute the + /// length would overflow. + /// + /// This is useful for, e.g., skipping over the 32-bit V1 data block in + /// V2+ TZif formatted files. + fn data_block_len(&self) -> Result { + let a = self.transition_times_len()?; + let b = self.transition_types_len()?; + let c = self.local_time_types_len()?; + let d = self.time_zone_designations_len()?; + let e = self.leap_second_len()?; + let f = self.standard_wall_len()?; + let g = self.ut_local_len()?; + a.checked_add(b) + .and_then(|z| z.checked_add(c)) + .and_then(|z| z.checked_add(d)) + .and_then(|z| z.checked_add(e)) + .and_then(|z| z.checked_add(f)) + .and_then(|z| z.checked_add(g)) + .ok_or_else(|| { + err!( + "length of data block in V{} tzfile is too big", + self.version + ) + }) + } + + fn transition_times_len(&self) -> Result { + self.tzh_timecnt.checked_mul(self.time_size).ok_or_else(|| { + err!("tzh_timecnt value {} is too big", self.tzh_timecnt) + }) + } + + fn transition_types_len(&self) -> Result { + Ok(self.tzh_timecnt) + } + + fn local_time_types_len(&self) -> Result { + self.tzh_typecnt.checked_mul(6).ok_or_else(|| { + err!("tzh_typecnt value {} is too big", self.tzh_typecnt) + }) + } + + fn time_zone_designations_len(&self) -> Result { + Ok(self.tzh_charcnt) + } + + fn leap_second_len(&self) -> Result { + let record_len = self + .time_size + .checked_add(4) + .expect("4-or-8 plus 4 always fits in usize"); + self.tzh_leapcnt.checked_mul(record_len).ok_or_else(|| { + err!("tzh_leapcnt value {} is too big", self.tzh_leapcnt) + }) + } + + fn standard_wall_len(&self) -> Result { + Ok(self.tzh_ttisstdcnt) + } + + fn ut_local_len(&self) -> Result { + Ok(self.tzh_ttisutcnt) + } +} + +/// Splits the given slice of bytes at the index given. +/// +/// If the index is out of range (greater than `bytes.len()`) then an error is +/// returned. The error message will include the `what` string given, which is +/// meant to describe the thing being split. +fn try_split_at<'b>( + what: &'static str, + bytes: &'b [u8], + at: usize, +) -> Result<(&'b [u8], &'b [u8]), Error> { + if at > bytes.len() { + Err(err!( + "expected at least {at} bytes for {what}, \ + but found only {} bytes", + bytes.len(), + )) + } else { + Ok(bytes.split_at(at)) + } +} + +/// Interprets the given slice as an unsigned 32-bit big endian integer, +/// attempts to convert it to a `usize` and returns it. +/// +/// # Panics +/// +/// When `bytes.len() != 4`. +/// +/// # Errors +/// +/// This errors if the `u32` parsed from the given bytes cannot fit in a +/// `usize`. +fn from_be_bytes_u32_to_usize(bytes: &[u8]) -> Result { + let n = from_be_bytes_u32(bytes); + usize::try_from(n).map_err(|_| { + err!( + "failed to parse integer {n} (too big, max allowed is {}", + usize::MAX + ) + }) +} + +/// Interprets the given slice as an unsigned 32-bit big endian integer and +/// returns it. +/// +/// # Panics +/// +/// When `bytes.len() != 4`. +fn from_be_bytes_u32(bytes: &[u8]) -> u32 { + u32::from_be_bytes(bytes.try_into().unwrap()) +} + +/// Interprets the given slice as a signed 32-bit big endian integer and +/// returns it. +/// +/// # Panics +/// +/// When `bytes.len() != 4`. +fn from_be_bytes_i32(bytes: &[u8]) -> i32 { + i32::from_be_bytes(bytes.try_into().unwrap()) +} + +/// Interprets the given slice as a signed 64-bit big endian integer and +/// returns it. +/// +/// # Panics +/// +/// When `bytes.len() != 8`. +fn from_be_bytes_i64(bytes: &[u8]) -> i64 { + i64::from_be_bytes(bytes.try_into().unwrap()) +} diff --git a/src/shared/util.rs b/src/shared/util.rs new file mode 100644 index 0000000..1dca3d0 --- /dev/null +++ b/src/shared/util.rs @@ -0,0 +1,128 @@ +/// Provides a convenient `Debug` implementation for a `u8`. +/// +/// The `Debug` impl treats the byte as an ASCII, and emits a human +/// readable representation of it. If the byte isn't ASCII, then it's +/// emitted as a hex escape sequence. +#[derive(Clone, Copy)] +pub(crate) struct Byte(pub u8); + +impl core::fmt::Display for Byte { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + if self.0 == b' ' { + return write!(f, " "); + } + // 10 bytes is enough for any output from ascii::escape_default. + let mut bytes = [0u8; 10]; + let mut len = 0; + for (i, mut b) in core::ascii::escape_default(self.0).enumerate() { + // capitalize \xab to \xAB + if i >= 2 && b'a' <= b && b <= b'f' { + b -= 32; + } + bytes[len] = b; + len += 1; + } + write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap()) + } +} + +impl core::fmt::Debug for Byte { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "\"")?; + core::fmt::Display::fmt(self, f)?; + write!(f, "\"")?; + Ok(()) + } +} + +/// Provides a convenient `Debug` implementation for `&[u8]`. +/// +/// This generally works best when the bytes are presumed to be mostly +/// UTF-8, but will work for anything. For any bytes that aren't UTF-8, +/// they are emitted as hex escape sequences. +#[derive(Clone, Copy)] +pub(crate) struct Bytes<'a>(pub &'a [u8]); + +impl<'a> core::fmt::Display for Bytes<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + // This is a sad re-implementation of a similar impl found in bstr. + let mut bytes = self.0; + while let Some(result) = utf8_decode(bytes) { + let ch = match result { + Ok(ch) => ch, + Err(byte) => { + write!(f, r"\x{:02x}", byte)?; + bytes = &bytes[1..]; + continue; + } + }; + bytes = &bytes[ch.len_utf8()..]; + match ch { + '\0' => write!(f, "\\0")?, + '\x01'..='\x7f' => { + write!(f, "{}", (ch as u8).escape_ascii())?; + } + _ => write!(f, "{}", ch.escape_debug())?, + } + } + Ok(()) + } +} + +impl<'a> core::fmt::Debug for Bytes<'a> { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + write!(f, "\"")?; + core::fmt::Display::fmt(self, f)?; + write!(f, "\"")?; + Ok(()) + } +} + +/// Decodes the next UTF-8 encoded codepoint from the given byte slice. +/// +/// If no valid encoding of a codepoint exists at the beginning of the +/// given byte slice, then the first byte is returned instead. +/// +/// This returns `None` if and only if `bytes` is empty. +/// +/// This never panics. +/// +/// *WARNING*: This is not designed for performance. If you're looking for +/// a fast UTF-8 decoder, this is not it. If you feel like you need one in +/// this crate, then please file an issue and discuss your use case. +pub(crate) fn utf8_decode(bytes: &[u8]) -> Option> { + /// Given a UTF-8 leading byte, this returns the total number of code + /// units in the following encoded codepoint. + /// + /// If the given byte is not a valid UTF-8 leading byte, then this + /// returns `None`. + fn utf8_len(byte: u8) -> Option { + if byte <= 0x7F { + return Some(1); + } else if byte & 0b1100_0000 == 0b1000_0000 { + return None; + } else if byte <= 0b1101_1111 { + Some(2) + } else if byte <= 0b1110_1111 { + Some(3) + } else if byte <= 0b1111_0111 { + Some(4) + } else { + None + } + } + + if bytes.is_empty() { + return None; + } + let len = match utf8_len(bytes[0]) { + None => return Some(Err(bytes[0])), + Some(len) if len > bytes.len() => return Some(Err(bytes[0])), + Some(1) => return Some(Ok(char::from(bytes[0]))), + Some(len) => len, + }; + match core::str::from_utf8(&bytes[..len]) { + Ok(s) => Some(Ok(s.chars().next().unwrap())), + Err(_) => Some(Err(bytes[0])), + } +} diff --git a/src/tz/mod.rs b/src/tz/mod.rs index 771d39f..f63411e 100644 --- a/src/tz/mod.rs +++ b/src/tz/mod.rs @@ -101,15 +101,13 @@ mod ambiguous; mod concatenated; mod db; mod offset; -#[cfg(feature = "alloc")] pub(crate) mod posix; #[cfg(feature = "tz-system")] mod system; #[cfg(all(test, feature = "alloc"))] mod testdata; mod timezone; -#[cfg(feature = "alloc")] -mod tzif; +pub(crate) mod tzif; // See module comment for WIP status. :-( #[cfg(test)] mod zic; diff --git a/src/tz/offset.rs b/src/tz/offset.rs index d381682..ec07e48 100644 --- a/src/tz/offset.rs +++ b/src/tz/offset.rs @@ -232,7 +232,7 @@ impl Offset { // warrant its existence. And I think I'd rather `Offset::hms` be const and // exported instead of this monstrosity. #[inline] - const fn constant_seconds(seconds: i32) -> Offset { + pub(crate) const fn constant_seconds(seconds: i32) -> Offset { if !t::SpanZoneOffset::contains(seconds) { panic!("invalid time zone offset seconds") } diff --git a/src/tz/posix.rs b/src/tz/posix.rs index 2d931f8..90053be 100644 --- a/src/tz/posix.rs +++ b/src/tz/posix.rs @@ -1,6 +1,13 @@ /*! Provides a parser for [POSIX's `TZ` environment variable][posix-env]. +NOTE: Sadly, at time of writing, the actual parser is in `src/shared/posix.rs`. +This is so it can be shared (via simple code copying) with proc macros like +the one found in `jiff-tzdb-static`. The parser populates a "lowest common +denominator" data type. In normal use in Jiff, this type is converted into +the types defined below. This module still does provide the various time zone +operations. Only the parsing is written elsewhere. + The `TZ` environment variable is most commonly used to set a time zone. For example, `TZ=America/New_York`. But it can also be used to tersely define DST transitions. Moreover, the format is not just used as an environment variable, @@ -47,12 +54,13 @@ other programs do in practice (for example, GNU date). This module works just fine in `no_std` mode. It also generally works fine without `alloc` too, modulo some APIs for parsing from an environment variable -(which need `std` anyway). The main problem is that the type defined here takes -up a lot of space (100+ bytes). A good chunk of that comes from representing -time zone abbreviations inline. In theory, only 6-10 bytes are needed for -simple cases like `TZ=EST5EDT,M3.2.0,M11.1.0`, but we make room for 30 byte -length abbreviations (times two). Plus, there's a much of room made for the -rule representation. +(which need `std` anyway) and the POSIX TZ parser using `String` to represent +abbreviations. (The latter could be fixed if necessary.) The main problem is +that the type defined here takes up a lot of space (100+ bytes). A good chunk +of that comes from representing time zone abbreviations inline. In theory, only +6-10 bytes are needed for simple cases like `TZ=EST5EDT,M3.2.0,M11.1.0`, but we +make room for 30 byte length abbreviations (times two). Plus, there's a much of +room made for the rule representation. When you then stuff this inside a `TimeZone` which cannot use heap allocation to force an indirection, you wind up with a very chunky `TimeZone`. And this in @@ -71,16 +79,21 @@ thinking about adding an `Unzoned` type that is just like `Zoned`, but requires the caller to pass in a `&TimeZone` for every API call. Less convenient for sure, but you get a more flexible type. +ADDENDUM: The above is still mostly true, but it looks like we are going to +allow static `TimeZone` values via a proc-macro. And this also requires +parsing POSIX time zones and building them at compile time. In order to make +this work well with `TimeZone` without indirection, we'll use pointer tagging. +This should help save `Zoned` in core-only environments. + [posix-env]: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08_03 [iana-env]: https://data.iana.org/time-zones/tzdb-2024a/theory.html#functions [musl-env]: https://wiki.musl-libc.org/environment-variables */ -use core::cell::Cell; - use crate::{ civil::{Date, DateTime, Time, Weekday}, error::{err, Error, ErrorContext}, + shared, timestamp::Timestamp, tz::{ timezone::TimeZoneAbbreviation, AmbiguousOffset, Dst, Offset, @@ -88,19 +101,14 @@ use crate::{ }, util::{ array_str::Abbreviation, - escape::{Byte, Bytes}, + escape::Bytes, parse, - rangeint::{ri16, ri32, ri8, RFrom, RInto}, - t::{self, Minute, Month, Second, Sign, SpanZoneOffset, Year, C}, + rangeint::{ri16, ri32, ri8, RInto}, + t::{self, Month, SpanZoneOffset, Year, C}, }, SignedDuration, }; -/// POSIX says the hour must be in the range `0..=24`, but that the default -/// hour for DST is one hour more than standard time. Therefore, the actual -/// allowed range is `0..=25`. (Although we require `0..=24` during parsing.) -type PosixHour = ri8<0, 25>; -type IanaHour = ri16<0, 167>; type PosixJulianDayNoLeap = ri16<1, 365>; type PosixJulianDayWithLeap = ri16<0, 365>; type PosixWeek = ri8<1, 5>; @@ -168,6 +176,13 @@ impl core::fmt::Display for PosixTzEnv { } } +// BREADCRUMBS: For a proc macro to work, we need to provide a way to +// introduce and eliminate a `ReasonablePosixTimeZone` in a `const` context. +// I really do not want to make nearly every type in this module completely +// `pub`, and would prefer to just expose a new type. But it's kind of a pain +// because of how nested this type is. Short of writing a parser that works +// in a `const` context, I don't really see a better way. + /// A "reasonable" POSIX time zone. /// /// This is the same as a regular POSIX time zone, but requires that if a DST @@ -193,7 +208,8 @@ impl core::fmt::Display for PosixTzEnv { /// [GNU C Library]: https://www.gnu.org/software/libc/manual/2.25/html_node/TZ-Variable.html /// [RFC 9636]: https://datatracker.ietf.org/doc/rfc9636/ #[derive(Clone, Debug, Eq, PartialEq)] -pub(crate) struct ReasonablePosixTimeZone { +#[doc(hidden)] // NOT part of Jiff's public API +pub struct ReasonablePosixTimeZone { std_abbrev: Abbreviation, std_offset: PosixOffset, dst: Option, @@ -201,6 +217,7 @@ pub(crate) struct ReasonablePosixTimeZone { impl ReasonablePosixTimeZone { /// Parse a IANA tzfile v3+ `TZ` string from the given bytes. + #[cfg(feature = "alloc")] pub(crate) fn parse( bytes: impl AsRef<[u8]>, ) -> Result { @@ -219,6 +236,7 @@ impl ReasonablePosixTimeZone { /// Like `parse`, but parses a POSIX TZ string from a prefix of the /// given input. And remaining input is returned. + #[cfg(feature = "alloc")] pub(crate) fn parse_prefix<'b, B: AsRef<[u8]> + ?Sized + 'b>( bytes: &'b B, ) -> Result<(ReasonablePosixTimeZone, &'b [u8]), Error> { @@ -236,6 +254,33 @@ impl ReasonablePosixTimeZone { Ok((reasonable, remaining)) } + /// Converts from the shared-but-internal API for use in proc macros. + /// + /// This works in a `const` context by requiring that the time zone + /// abbreviations are `static` strings. This is used when converting + /// code generated by a proc macro to this Jiff internal type. + pub(crate) const fn from_shared_const( + sh: &shared::PosixTimeZone<&'static str>, + ) -> ReasonablePosixTimeZone { + use crate::util::constant::unwrap; + + let std_abbrev = unwrap!( + Abbreviation::new(sh.std_abbrev), + "expected short enough std tz abbreviation" + ); + let std_offset = PosixOffset { + offset: unwrap!( + SpanZoneOffset::new_const(sh.std_offset), + "expected std offset in range", + ), + }; + let dst = match sh.dst { + None => None, + Some(ref dst) => Some(ReasonablePosixDst::from_shared_const(dst)), + }; + ReasonablePosixTimeZone { std_abbrev, std_offset, dst } + } + /// Returns the appropriate time zone offset to use for the given /// timestamp. /// @@ -575,6 +620,35 @@ struct ReasonablePosixDst { } impl ReasonablePosixDst { + /// Converts from the shared-but-internal API for use in proc macros. + /// + /// This works in a `const` context by requiring that the time zone + /// abbreviations are `static` strings. This is used when converting + /// code generated by a proc macro to this Jiff internal type. + const fn from_shared_const( + sh: &shared::PosixDst<&'static str>, + ) -> ReasonablePosixDst { + use crate::util::constant::unwrap; + + let abbrev = unwrap!( + Abbreviation::new(sh.abbrev), + "expected short enough dst tz abbreviation" + ); + let offset = PosixOffset { + offset: unwrap!( + SpanZoneOffset::new_const(sh.offset), + "expected dst offset in range", + ), + }; + let rule = match sh.rule { + None => { + panic!("expected reasonable POSIX time zone (DST has a rule)") + } + Some(ref rule) => Rule::from_shared(rule), + }; + ReasonablePosixDst { abbrev, offset, rule } + } + fn display( &self, std_offset: PosixOffset, @@ -616,25 +690,46 @@ pub(crate) struct PosixTimeZone { impl PosixTimeZone { /// Parse a POSIX `TZ` environment variable, assuming it's a rule and not /// an implementation defined value, from the given bytes. + #[cfg(feature = "alloc")] fn parse(bytes: impl AsRef<[u8]>) -> Result { - // We enable the IANA v3+ extensions here. (Namely, that the time - // specification hour value has the range `-167..=167` instead of - // `0..=24`.) Requiring strict POSIX rules doesn't seem necessary - // since the extension is a strict superset. Plus, GNU tooling - // seems to accept the extension. - let parser = - Parser { ianav3plus: true, ..Parser::new(bytes.as_ref()) }; - parser.parse() + let shared_tz = crate::shared::PosixTimeZone::parse(bytes.as_ref()) + .map_err(Error::adhoc)?; + let jiff_tz = PosixTimeZone::from_shared_owned(&shared_tz); + Ok(jiff_tz) } /// Like parse, but parses a prefix of the input given and returns whatever /// is remaining. + #[cfg(feature = "alloc")] fn parse_prefix<'b, B: AsRef<[u8]> + ?Sized + 'b>( bytes: &'b B, ) -> Result<(PosixTimeZone, &'b [u8]), Error> { - let parser = - Parser { ianav3plus: true, ..Parser::new(bytes.as_ref()) }; - parser.parse_prefix() + let (shared_tz, remaining) = + crate::shared::PosixTimeZone::parse_prefix(bytes.as_ref()) + .map_err(Error::adhoc)?; + let jiff_tz = PosixTimeZone::from_shared_owned(&shared_tz); + Ok((jiff_tz, remaining)) + } + + /// Converts from the shared-but-internal API for use in proc macros. + /// + /// This is not `const` since it accepts an owned `String` as a time zone + /// abbreviation. This is used when parsing POSIX time zones at runtime. + #[cfg(feature = "alloc")] + pub(crate) fn from_shared_owned( + sh: &shared::PosixTimeZone, + ) -> PosixTimeZone { + let std_abbrev = Abbreviation::new(&sh.std_abbrev) + .expect("expected short enough std tz abbreviation"); + let std_offset = PosixOffset { + offset: SpanZoneOffset::new(sh.std_offset) + .expect("expected std offset in range"), + }; + let dst = match sh.dst { + None => None, + Some(ref dst) => Some(PosixDst::from_shared_owned(dst)), + }; + PosixTimeZone { std_abbrev, std_offset, dst } } /// Transforms this POSIX time zone into a "reasonable" time zone. @@ -702,6 +797,27 @@ struct PosixDst { } impl PosixDst { + /// Converts from the shared-but-internal API for use in proc macros. + /// + /// This is not `const` since it accepts an owned `String` as a time zone + /// abbreviation. This is used when parsing POSIX time zones at runtime. + #[cfg(feature = "alloc")] + fn from_shared_owned( + sh: &shared::PosixDst, + ) -> PosixDst { + let abbrev = Abbreviation::new(&sh.abbrev) + .expect("expected short enough dst tz abbreviation"); + let offset = PosixOffset { + offset: SpanZoneOffset::new(sh.offset) + .expect("expected dst offset in range"), + }; + let rule = match sh.rule { + None => None, + Some(ref rule) => Some(Rule::from_shared(rule)), + }; + PosixDst { abbrev, offset, rule } + } + fn display( &self, std_offset: PosixOffset, @@ -773,6 +889,15 @@ struct Rule { end: PosixDateTimeSpec, } +impl Rule { + /// Converts from the shared-but-internal API for use in proc macros. + const fn from_shared(sh: &shared::PosixRule) -> Rule { + let start = PosixDateTimeSpec::from_shared(&sh.start); + let end = PosixDateTimeSpec::from_shared(&sh.end); + Rule { start, end } + } +} + impl core::fmt::Display for Rule { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { write!(f, "{},{}", self.start, self.end) @@ -788,6 +913,14 @@ struct PosixDateTimeSpec { } impl PosixDateTimeSpec { + /// Converts from the shared-but-internal API for use in proc macros. + const fn from_shared(sh: &shared::PosixDayTime) -> PosixDateTimeSpec { + PosixDateTimeSpec { + date: PosixDateSpec::from_shared(&sh.date), + time: PosixTimeSpec::from_shared(sh.time), + } + } + /// Turns this POSIX datetime spec into a civil datetime in the year given /// with the given offset. The datetimes returned are offset by the given /// offset. For wall clock time, an offset of `0` should be given. For @@ -868,6 +1001,53 @@ enum PosixDateSpec { } impl PosixDateSpec { + /// Converts from the shared-but-internal API for use in proc macros. + const fn from_shared(sh: &shared::PosixDay) -> PosixDateSpec { + use crate::util::constant::unwrap; + + match *sh { + shared::PosixDay::JulianOne(doy) => { + let doy = unwrap!( + PosixJulianDayNoLeap::new_const(doy), + "expected 1-based Julian day in range" + ); + PosixDateSpec::JulianOne(doy) + } + shared::PosixDay::JulianZero(doy) => { + let doy = unwrap!( + PosixJulianDayWithLeap::new_const(doy), + "expected 0-based Julian day in range" + ); + PosixDateSpec::JulianZero(doy) + } + shared::PosixDay::WeekdayOfMonth { month, week, weekday } => { + let month = unwrap!( + Month::new_const(month), + "expected weekday-of-month month in range" + ); + let week = unwrap!( + PosixWeek::new_const(week), + "expected weekday-of-month week in range" + ); + let weekday = match weekday { + 0 => Weekday::Sunday, + 1 => Weekday::Monday, + 2 => Weekday::Tuesday, + 3 => Weekday::Wednesday, + 4 => Weekday::Thursday, + 5 => Weekday::Friday, + 6 => Weekday::Saturday, + _ => panic!("expected weekday-of-month weekday in range"), + }; + PosixDateSpec::WeekdayOfMonth(WeekdayOfMonth { + month, + week, + weekday, + }) + } + } + } + /// Convert this date specification to a civil date in the year given. /// /// If this date specification couldn't be turned into a date in the year @@ -997,6 +1177,15 @@ impl PosixTimeSpec { duration: PosixTimeSeconds::new_unchecked(2 * 60 * 60), }; + /// Converts from the shared-but-internal API for use in proc macros. + const fn from_shared(seconds: i32) -> PosixTimeSpec { + let duration = crate::util::constant::unwrap!( + PosixTimeSeconds::new_const(seconds), + "expected POSIX time spec seconds in range", + ); + PosixTimeSpec { duration } + } + fn to_duration(&self) -> SignedDuration { SignedDuration::from_secs(self.duration.get().into()) } @@ -1024,785 +1213,6 @@ impl core::fmt::Display for PosixTimeSpec { } } -#[derive(Debug)] -struct Parser<'s> { - /// The `TZ` string that we're parsing. - tz: &'s [u8], - /// The parser's current position in `tz`. - pos: Cell, - /// Whether to use IANA rules, i.e., when parsing a TZ string in a TZif - /// file of version 3 or greater. From `tzfile(5)`: - /// - /// > First, the hours part of its transition times may be signed and range - /// > from `-167` through `167` instead of the POSIX-required unsigned - /// > values from `0` through `24`. Second, DST is in effect all year if - /// > it starts January 1 at 00:00 and ends December 31 at 24:00 plus the - /// > difference between daylight saving and standard time. - /// - /// At time of writing, I don't think I understand the significance of - /// the second part above. (RFC 8536 elaborates that it is meant to be an - /// explicit clarification of something that POSIX itself implies.) But the - /// first part is clear: it permits the hours to be a bigger range. - ianav3plus: bool, -} - -impl<'s> Parser<'s> { - fn new>(tz: &'s B) -> Parser<'s> { - Parser { tz: tz.as_ref(), pos: Cell::new(0), ianav3plus: false } - } - - /// Parses a POSIX time zone from the current position of the parser and - /// ensures that the entire TZ string corresponds to a single valid POSIX - /// time zone. - fn parse(&self) -> Result { - let (time_zone, remaining) = self.parse_prefix()?; - if !remaining.is_empty() { - return Err(err!( - "expected entire TZ string to be a valid POSIX \ - time zone, but found '{}' after what would otherwise \ - be a valid POSIX TZ string", - Bytes(remaining), - )); - } - Ok(time_zone) - } - - /// Parses a POSIX time zone from the current position of the parser and - /// returns the remaining input. - fn parse_prefix(&self) -> Result<(PosixTimeZone, &'s [u8]), Error> { - let time_zone = self.parse_posix_time_zone()?; - Ok((time_zone, self.remaining())) - } - - /// Parse a POSIX time zone from the current position of the parser. - /// - /// Upon success, the parser will be positioned immediately following the - /// TZ string. - fn parse_posix_time_zone(&self) -> Result { - let std_abbrev = self - .parse_abbreviation() - .map_err(|e| e.context("failed to parse standard abbreviation"))?; - let std_offset = self - .parse_posix_offset() - .map_err(|e| e.context("failed to parse standard offset"))?; - let mut dst = None; - if !self.is_done() - && (self.byte().is_ascii_alphabetic() || self.byte() == b'<') - { - dst = Some(self.parse_posix_dst(std_offset)?); - } - Ok(PosixTimeZone { std_abbrev, std_offset, dst }) - } - - /// Parse a DST zone with an optional explicit transition rule. - /// - /// This assumes the parser is positioned at the first byte of the DST - /// abbreviation. - /// - /// Upon success, the parser will be positioned immediately after the end - /// of the DST transition rule (which might just be the abbreviation, but - /// might also include explicit start/end datetime specifications). - fn parse_posix_dst( - &self, - std_offset: PosixOffset, - ) -> Result { - let abbrev = self - .parse_abbreviation() - .map_err(|e| e.context("failed to parse DST abbreviation"))?; - // This is the default: one hour ahead of standard time. We may - // override this if the DST portion specifies an offset. (But it - // usually doesn't.) - let offset = - PosixOffset { offset: std_offset.offset + t::SECONDS_PER_HOUR }; - let mut dst = PosixDst { abbrev, offset, rule: None }; - if self.is_done() { - return Ok(dst); - } - if self.byte() != b',' { - dst.offset = self - .parse_posix_offset() - .map_err(|e| e.context("failed to parse DST offset"))?; - if self.is_done() { - return Ok(dst); - } - } - if self.byte() != b',' { - return Err(err!( - "after parsing DST offset in POSIX time zone string, \ - found '{}' but expected a ','", - Byte(self.byte()), - )); - } - if !self.bump() { - return Err(err!( - "after parsing DST offset in POSIX time zone string, \ - found end of string after a trailing ','", - )); - } - dst.rule = Some(self.parse_rule()?); - Ok(dst) - } - - /// Parse a time zone abbreviation. - /// - /// This assumes the parser is positioned at the first byte of the - /// abbreviation. This is either the first character in the abbreviation, - /// or the opening quote of a quoted abbreviation. - /// - /// Upon success, the parser will be positioned immediately following the - /// abbreviation name. - fn parse_abbreviation(&self) -> Result { - if self.byte() == b'<' { - if !self.bump() { - return Err(err!( - "found opening '<' quote for abbreviation in \ - POSIX time zone string, and expected a name \ - following it, but found the end of string instead" - )); - } - self.parse_quoted_abbreviation() - } else { - self.parse_unquoted_abbreviation() - } - } - - /// Parses an unquoted time zone abbreviation. - /// - /// This assumes the parser is position at the first byte in the - /// abbreviation. - /// - /// Upon success, the parser will be positioned immediately after the - /// last byte in the abbreviation. - fn parse_unquoted_abbreviation(&self) -> Result { - let start = self.pos(); - for i in 0.. { - if !self.byte().is_ascii_alphabetic() { - break; - } - if i >= Abbreviation::capacity() { - return Err(err!( - "expected abbreviation with at most {} bytes, \ - but found a longer abbreviation beginning with '{}'", - Abbreviation::capacity(), - Bytes(&self.tz[start..i]), - )); - } - if !self.bump() { - break; - } - } - let end = self.pos(); - let abbrev = - core::str::from_utf8(&self.tz[start..end]).map_err(|_| { - // NOTE: I believe this error is technically impossible since - // the loop above restricts letters in an abbreviation to - // ASCII. So everything from `start` to `end` is ASCII and - // thus should be UTF-8. But it doesn't cost us anything to - // report an error here in case the code above evolves somehow. - err!( - "found abbreviation '{}', but it is not valid UTF-8", - Bytes(&self.tz[start..end]), - ) - })?; - if abbrev.len() < 3 { - return Err(err!( - "expected abbreviation with 3 or more bytes, but found \ - abbreviation {:?} with {} bytes", - abbrev, - abbrev.len(), - )); - } - // OK because we verified above that the abbreviation - // does not exceed `Abbreviation::capacity`. - Ok(Abbreviation::new(abbrev).unwrap()) - } - - /// Parses a quoted time zone abbreviation. - /// - /// This assumes the parser is positioned immediately after the opening - /// `<` quote. That is, at the first byte in the abbreviation. - /// - /// Upon success, the parser will be positioned immediately after the - /// closing `>` quote. - fn parse_quoted_abbreviation(&self) -> Result { - let start = self.pos(); - for i in 0.. { - if !self.byte().is_ascii_alphanumeric() - && self.byte() != b'+' - && self.byte() != b'-' - { - break; - } - if i >= Abbreviation::capacity() { - return Err(err!( - "expected abbreviation with at most {} bytes, \ - but found a longer abbreviation beginning with '{}'", - Abbreviation::capacity(), - Bytes(&self.tz[start..i]), - )); - } - if !self.bump() { - break; - } - } - let end = self.pos(); - let abbrev = - core::str::from_utf8(&self.tz[start..end]).map_err(|_| { - // NOTE: I believe this error is technically impossible since - // the loop above restricts letters in an abbreviation to - // ASCII. So everything from `start` to `end` is ASCII and - // thus should be UTF-8. But it doesn't cost us anything to - // report an error here in case the code above evolves somehow. - err!( - "found abbreviation '{}', but it is not valid UTF-8", - Bytes(&self.tz[start..end]), - ) - })?; - if self.is_done() { - return Err(err!( - "found non-empty quoted abbreviation {abbrev:?}, but \ - did not find expected end-of-quoted abbreviation \ - '>' character", - )); - } - if self.byte() != b'>' { - return Err(err!( - "found non-empty quoted abbreviation {abbrev:?}, but \ - found '{}' instead of end-of-quoted abbreviation '>' \ - character", - Byte(self.byte()), - )); - } - self.bump(); - if abbrev.len() < 3 { - return Err(err!( - "expected abbreviation with 3 or more bytes, but found \ - abbreviation {abbrev:?} with {} bytes", - abbrev.len(), - )); - } - // OK because we verified above that the abbreviation - // does not exceed `Abbreviation::capacity()`. - Ok(Abbreviation::new(abbrev).unwrap()) - } - - /// Parse a POSIX time offset. - /// - /// This assumes the parser is positioned at the first byte of the offset. - /// This can either be a digit (for a positive offset) or the sign of the - /// offset (which must be either `-` or `+`). - /// - /// Upon success, the parser will be positioned immediately after the end - /// of the offset. - fn parse_posix_offset(&self) -> Result { - let sign = self - .parse_optional_sign() - .map_err(|e| { - e.context( - "failed to parse sign for time offset \ - in POSIX time zone string", - ) - })? - .unwrap_or(Sign::N::<1>()); - let hour = self.parse_hour_posix()?; - let (mut minute, mut second) = (Minute::N::<0>(), Second::N::<0>()); - if self.maybe_byte() == Some(b':') { - if !self.bump() { - return Err(err!( - "incomplete time in POSIX timezone (missing minutes)", - )); - } - minute = self.parse_minute()?; - if self.maybe_byte() == Some(b':') { - if !self.bump() { - return Err(err!( - "incomplete time in POSIX timezone (missing seconds)", - )); - } - second = self.parse_second()?; - } - } - let mut seconds = SpanZoneOffset::N::<0>(); - seconds += t::SpanZoneOffset::rfrom(hour) * t::SECONDS_PER_HOUR; - seconds += t::SpanZoneOffset::rfrom(minute) * t::SECONDS_PER_MINUTE; - seconds += t::SpanZoneOffset::rfrom(second); - // Yes, we flip the sign, because POSIX is backwards. - // For example, `EST5` corresponds to `-05:00`. - Ok(PosixOffset { offset: seconds * -sign }) - } - - /// Parses a POSIX DST transition rule. - /// - /// This assumes the parser is positioned at the first byte in the rule. - /// That is, it comes immediately after the DST abbreviation or its - /// optional offset. - /// - /// Upon success, the parser will be positioned immediately after the - /// DST transition rule. In typical cases, this corresponds to the end of - /// the TZ string. - fn parse_rule(&self) -> Result { - let start = self.parse_posix_datetime_spec().map_err(|e| { - e.context("failed to parse start of DST transition rule") - })?; - if self.maybe_byte() != Some(b',') || !self.bump() { - return Err(err!( - "expected end of DST rule after parsing the start \ - of the DST rule" - )); - } - let end = self.parse_posix_datetime_spec().map_err(|e| { - e.context("failed to parse end of DST transition rule") - })?; - Ok(Rule { start, end }) - } - - /// Parses a POSIX datetime specification. - /// - /// This assumes the parser is position at the first byte where a datetime - /// specification is expected to occur. - /// - /// Upon success, the parser will be positioned after the datetime - /// specification. This will either be immediately after the date, or if - /// it's present, the time part of the specification. - fn parse_posix_datetime_spec(&self) -> Result { - let date = self.parse_posix_date_spec()?; - let time = PosixTimeSpec::DEFAULT; - let mut spec = PosixDateTimeSpec { date, time }; - if self.maybe_byte() != Some(b'/') { - return Ok(spec); - } - if !self.bump() { - return Err(err!( - "expected time specification after '/' following a date - specification in a POSIX time zone DST transition rule", - )); - } - spec.time = self.parse_posix_time_spec()?; - Ok(spec) - } - - /// Parses a POSIX date specification. - /// - /// This assumes the parser is positioned at the first byte of the date - /// specification. This can be `J` (for one based Julian day without leap - /// days), `M` (for "weekday of month") or a digit starting the zero based - /// Julian day with leap days. This routine will validate that the position - /// points to one of these possible values. That is, the caller doesn't - /// need to parse the `M` or the `J` or the leading digit. The caller - /// should just call this routine when it *expect* a date specification to - /// follow. - /// - /// Upon success, the parser will be positioned immediately after the date - /// specification. - fn parse_posix_date_spec(&self) -> Result { - match self.byte() { - b'J' => { - if !self.bump() { - return Err(err!( - "expected one-based Julian day after 'J' in date \ - specification of a POSIX time zone DST transition \ - rule, but got the end of the string instead" - )); - } - Ok(PosixDateSpec::JulianOne( - self.parse_posix_julian_day_no_leap()?, - )) - } - b'0'..=b'9' => Ok(PosixDateSpec::JulianZero( - self.parse_posix_julian_day_with_leap()?, - )), - b'M' => { - if !self.bump() { - return Err(err!( - "expected month-week-weekday after 'M' in date \ - specification of a POSIX time zone DST transition \ - rule, but got the end of the string instead" - )); - } - Ok(PosixDateSpec::WeekdayOfMonth( - self.parse_weekday_of_month()?, - )) - } - _ => Err(err!( - "expected 'J', a digit or 'M' at the beginning of a date \ - specification of a POSIX time zone DST transition rule, \ - but got '{}' instead", - Byte(self.byte()), - )), - } - } - - /// Parses a POSIX Julian day that does not include leap days - /// (`1 <= n <= 365`). - /// - /// This assumes the parser is positioned just after the `J` and at the - /// first digit of the Julian day. Upon success, the parser will be - /// positioned immediately following the day number. - fn parse_posix_julian_day_no_leap( - &self, - ) -> Result { - let number = self - .parse_number_with_upto_n_digits(3) - .map_err(|e| e.context("invalid one based Julian day"))?; - let day = PosixJulianDayNoLeap::new(number).ok_or_else(|| { - err!("invalid one based Julian day (must be in range 1..=365") - })?; - Ok(day) - } - - /// Parses a POSIX Julian day that includes leap days (`0 <= n <= 365`). - /// - /// This assumes the parser is positioned at the first digit of the Julian - /// day. Upon success, the parser will be positioned immediately following - /// the day number. - fn parse_posix_julian_day_with_leap( - &self, - ) -> Result { - let number = self - .parse_number_with_upto_n_digits(3) - .map_err(|e| e.context("invalid zero based Julian day"))?; - let day = PosixJulianDayWithLeap::new(number).ok_or_else(|| { - err!("invalid zero based Julian day (must be in range 0..=365") - })?; - Ok(day) - } - - /// Parses a POSIX "weekday of month" specification. - /// - /// This assumes the parser is positioned just after the `M` byte and - /// at the first digit of the month. Upon success, the parser will be - /// positioned immediately following the "weekday of the month" that was - /// parsed. - fn parse_weekday_of_month(&self) -> Result { - let month = self.parse_month()?; - if self.maybe_byte() != Some(b'.') { - return Err(err!( - "expected '.' after month '{month}' in POSIX time zone rule" - )); - } - if !self.bump() { - return Err(err!( - "expected week after month '{month}' in POSIX time zone rule" - )); - } - let week = self.parse_week()?; - if self.maybe_byte() != Some(b'.') { - return Err(err!( - "expected '.' after week '{week}' in POSIX time zone rule" - )); - } - if !self.bump() { - return Err(err!( - "expected day-of-week after week '{week}' in \ - POSIX time zone rule" - )); - } - let weekday = self.parse_weekday()?; - Ok(WeekdayOfMonth { month, week, weekday }) - } - - /// This parses a POSIX time specification in the format - /// `[+/-]hh?[:mm[:ss]]`. - /// - /// This assumes the parser is positioned at the first `h` (or the sign, - /// if present). Upon success, the parser will be positioned immediately - /// following the end of the time specification. - fn parse_posix_time_spec(&self) -> Result { - let (sign, hour) = if self.ianav3plus { - let sign = self - .parse_optional_sign() - .map_err(|e| { - e.context( - "failed to parse sign for transition time \ - in POSIX time zone string", - ) - })? - .unwrap_or(Sign::N::<1>()); - let hour = self.parse_hour_ianav3plus()?; - (sign, hour) - } else { - (Sign::N::<1>(), self.parse_hour_posix()?.rinto()) - }; - let (mut minute, mut second) = (Minute::N::<0>(), Second::N::<0>()); - if self.maybe_byte() == Some(b':') { - if !self.bump() { - return Err(err!( - "incomplete transition time in \ - POSIX time zone string (missing minutes)", - )); - } - minute = self.parse_minute()?; - if self.maybe_byte() == Some(b':') { - if !self.bump() { - return Err(err!( - "incomplete transition time in \ - POSIX time zone string (missing seconds)", - )); - } - second = self.parse_second()?; - } - } - let mut seconds = PosixTimeSeconds::rfrom(hour) * t::SECONDS_PER_HOUR; - seconds += PosixTimeSeconds::rfrom(minute) * t::SECONDS_PER_MINUTE; - seconds += second; - seconds *= sign; - Ok(PosixTimeSpec { duration: seconds }) - } - - /// Parses a month. - /// - /// This is expected to be positioned at the first digit. Upon success, - /// the parser will be positioned after the month (which may contain two - /// digits). - fn parse_month(&self) -> Result { - let number = self.parse_number_with_upto_n_digits(2)?; - let month = Month::new(number).ok_or_else(|| { - err!("month in POSIX time zone must be in range 1..=12") - })?; - Ok(month) - } - - /// Parses a week-of-month number. - /// - /// This is expected to be positioned at the first digit. Upon success, - /// the parser will be positioned after the week digit. - fn parse_week(&self) -> Result { - let number = self.parse_number_with_exactly_n_digits(1)?; - let week = PosixWeek::new(number).ok_or_else(|| { - err!("week in POSIX time zone must be in range 1..=5") - })?; - Ok(week) - } - - /// Parses a week-of-month number. - /// - /// This is expected to be positioned at the first digit. Upon success, - /// the parser will be positioned after the week digit. - fn parse_weekday(&self) -> Result { - let number = self.parse_number_with_exactly_n_digits(1)?; - let number8 = i8::try_from(number).map_err(|_| { - err!( - "weekday '{number}' in POSIX time zone \ - does not fit into 8-bit integer" - ) - })?; - let weekday = - Weekday::from_sunday_zero_offset(number8).map_err(|_| { - err!( - "weekday in POSIX time zone must be in range 0..=6 \ - (with 0 corresponding to Sunday), but got {number8}", - ) - })?; - Ok(weekday) - } - - /// Parses an hour from a POSIX time specification with the IANA v3+ - /// extension. That is, the hour may be in the range `0..=167`. (Callers - /// should parse an optional sign preceding the hour digits when IANA V3+ - /// parsing is enabled.) - /// - /// The hour is allowed to be a single digit (unlike minutes or seconds). - /// - /// This assumes the parser is positioned at the position where the first - /// hour digit should occur. Upon success, the parser will be positioned - /// immediately after the last hour digit. - fn parse_hour_ianav3plus(&self) -> Result { - // Callers should only be using this method when IANA v3+ parsing is - // enabled. - assert!(self.ianav3plus); - let number = self - .parse_number_with_upto_n_digits(3) - .map_err(|e| e.context("invalid hour digits"))?; - let hour = IanaHour::new(number).ok_or_else(|| { - err!( - "hour in POSIX (IANA v3+ style) \ - time zone must be in range -167..=167" - ) - })?; - Ok(hour) - } - - /// Parses an hour from a POSIX time specification, with the allowed range - /// being `0..=24`. - /// - /// The hour is allowed to be a single digit (unlike minutes or seconds). - /// - /// This assumes the parser is positioned at the position where the first - /// hour digit should occur. Upon success, the parser will be positioned - /// immediately after the last hour digit. - fn parse_hour_posix(&self) -> Result { - type PosixHour24 = ri8<0, 24>; - - let number = self - .parse_number_with_upto_n_digits(2) - .map_err(|e| e.context("invalid hour digits"))?; - let hour = PosixHour24::new(number).ok_or_else(|| { - err!("hour in POSIX time zone must be in range 0..=24") - })?; - Ok(hour.rinto()) - } - - /// Parses a minute from a POSIX time specification. - /// - /// The minute must be exactly two digits. - /// - /// This assumes the parser is positioned at the position where the first - /// minute digit should occur. Upon success, the parser will be positioned - /// immediately after the second minute digit. - fn parse_minute(&self) -> Result { - let number = self - .parse_number_with_exactly_n_digits(2) - .map_err(|e| e.context("invalid minute digits"))?; - let minute = Minute::new(number).ok_or_else(|| { - err!("minute in POSIX time zone must be in range 0..=59") - })?; - Ok(minute) - } - - /// Parses a second from a POSIX time specification. - /// - /// The second must be exactly two digits. - /// - /// This assumes the parser is positioned at the position where the first - /// second digit should occur. Upon success, the parser will be positioned - /// immediately after the second second digit. - fn parse_second(&self) -> Result { - let number = self - .parse_number_with_exactly_n_digits(2) - .map_err(|e| e.context("invalid second digits"))?; - let second = Second::new(number).ok_or_else(|| { - err!("second in POSIX time zone must be in range 0..=59") - })?; - Ok(second) - } - - /// Parses a signed 64-bit integer expressed in exactly `n` digits. - /// - /// If `n` digits could not be found (or if the `TZ` string ends before - /// `n` digits could be found), then this returns an error. - /// - /// This assumes that `n >= 1` and that the parser is positioned at the - /// first digit. Upon success, the parser is positioned immediately after - /// the `n`th digit. - fn parse_number_with_exactly_n_digits( - &self, - n: usize, - ) -> Result { - assert!(n >= 1, "numbers must have at least 1 digit"); - let start = self.pos(); - for i in 0..n { - if self.is_done() { - return Err(err!("expected {n} digits, but found {i}")); - } - if !self.byte().is_ascii_digit() { - return Err(err!("invalid digit '{}'", Byte(self.byte()))); - } - self.bump(); - } - let end = self.pos(); - parse::i64(&self.tz[start..end]) - } - - /// Parses a signed 64-bit integer expressed with up to `n` digits and at - /// least 1 digit. - /// - /// This assumes that `n >= 1` and that the parser is positioned at the - /// first digit. Upon success, the parser is position immediately after the - /// last digit (which can be at most `n`). - fn parse_number_with_upto_n_digits(&self, n: usize) -> Result { - assert!(n >= 1, "numbers must have at least 1 digit"); - let start = self.pos(); - for _ in 0..n { - if self.is_done() || !self.byte().is_ascii_digit() { - break; - } - self.bump(); - } - let end = self.pos(); - parse::i64(&self.tz[start..end]) - } - - /// Parses an optional sign. - /// - /// This assumes the parser is positioned at the position where a positive - /// or negative sign is permitted. If one exists, then it is consumed and - /// returned. Moreover, if one exists, then this guarantees that it is not - /// the last byte in the input. That is, upon success, it is valid to call - /// `self.byte()`. - fn parse_optional_sign(&self) -> Result, Error> { - if self.is_done() { - return Ok(None); - } - Ok(match self.byte() { - b'-' => { - if !self.bump() { - return Err(err!( - "expected digit after '-' sign, but got end of input", - )); - } - Some(Sign::N::<-1>()) - } - b'+' => { - if !self.bump() { - return Err(err!( - "expected digit after '+' sign, but got end of input", - )); - } - Some(Sign::N::<1>()) - } - _ => None, - }) - } -} - -/// Helper routines for parsing a POSIX `TZ` string. -impl<'s> Parser<'s> { - /// Bump the parser to the next byte. - /// - /// If the end of the input has been reached, then `false` is returned. - fn bump(&self) -> bool { - if self.is_done() { - return false; - } - self.pos.set( - self.pos().checked_add(1).expect("pos cannot overflow usize"), - ); - !self.is_done() - } - - /// Returns true if the next call to `bump` would return false. - fn is_done(&self) -> bool { - self.pos() == self.tz.len() - } - - /// Return the byte at the current position of the parser. - /// - /// This panics if the parser is positioned at the end of the TZ string. - fn byte(&self) -> u8 { - self.tz[self.pos()] - } - - /// Return the byte at the current position of the parser. If the TZ string - /// has been exhausted, then this returns `None`. - fn maybe_byte(&self) -> Option { - self.tz.get(self.pos()).copied() - } - - /// Return the current byte offset of the parser. - /// - /// The offset starts at `0` from the beginning of the TZ string. - fn pos(&self) -> usize { - self.pos.get() - } - - /// Returns the remaining bytes of the TZ string. - /// - /// This includes `self.byte()`. It may be empty. - fn remaining(&self) -> &'s [u8] { - &self.tz[self.pos()..] - } -} - /// A helper type for formatting a time zone abbreviation. /// /// Basically, this will write the `<` and `>` quotes if necessary, and @@ -1821,11 +1231,11 @@ impl core::fmt::Display for AbbreviationDisplay { } } -// Note that most of the tests below are for the parsing. For the actual time -// zone transition logic, that's unit tested in tz/mod.rs. +// The tests below all require parsing which requires alloc. +#[cfg(feature = "alloc")] #[cfg(test)] mod tests { - use std::string::ToString; + use alloc::string::ToString; use crate::{civil::date, tz::offset}; @@ -1853,42 +1263,6 @@ mod tests { tz } - /// DEBUG COMMAND - /// - /// Takes environment variable `JIFF_DEBUG_POSIX_TZ` as input, and prints - /// the Rust (extended) debug representation of it after parsing it as a - /// POSIX TZ string. - #[cfg(feature = "std")] - #[test] - fn debug_posix_tz() -> anyhow::Result<()> { - const ENV: &str = "JIFF_DEBUG_POSIX_TZ"; - let Some(val) = std::env::var_os(ENV) else { return Ok(()) }; - let val = val - .to_str() - .ok_or_else(|| err!("{ENV} contains invalid UTF-8"))?; - let tz = Parser::new(val).parse()?; - std::eprintln!("{tz:#?}"); - Ok(()) - } - - /// DEBUG COMMAND - /// - /// Takes environment variable `JIFF_DEBUG_IANA_TZ` as input, and prints - /// the Rust (extended) debug representation of it after parsing it as a - /// POSIX TZ string with IANA tzfile v3+ extensions. - #[cfg(feature = "std")] - #[test] - fn debug_iana_tz() -> anyhow::Result<()> { - const ENV: &str = "JIFF_DEBUG_IANA_TZ"; - let Some(val) = std::env::var_os(ENV) else { return Ok(()) }; - let val = val - .to_str() - .ok_or_else(|| err!("{ENV} contains invalid UTF-8"))?; - let tz = Parser { ianav3plus: true, ..Parser::new(val) }.parse()?; - std::eprintln!("{tz:#?}"); - Ok(()) - } - #[test] fn reasonable_to_dst_civil_datetime_utc_range() { let tz = reasonable_posix_time_zone("WART4WARST,J1/-3,J365/20"); @@ -1897,7 +1271,7 @@ mod tests { // out here, and I didn't adopt snapshot testing until I had // written out these tests by hand. ¯\_(ツ)_/¯ dst: tz.dst.as_ref().unwrap(), - offset: crate::tz::offset(-3), + offset: offset(-3), start: date(2024, 1, 1).at(1, 0, 0, 0), end: date(2024, 12, 31).at(23, 0, 0, 0), }; @@ -1906,7 +1280,7 @@ mod tests { let tz = reasonable_posix_time_zone("WART4WARST,J1/-4,J365/21"); let dst_info = DstInfo { dst: tz.dst.as_ref().unwrap(), - offset: crate::tz::offset(-3), + offset: offset(-3), start: date(2024, 1, 1).at(0, 0, 0, 0), end: date(2024, 12, 31).at(23, 59, 59, 999_999_999), }; @@ -1915,7 +1289,7 @@ mod tests { let tz = reasonable_posix_time_zone("EST5EDT,M3.2.0,M11.1.0"); let dst_info = DstInfo { dst: tz.dst.as_ref().unwrap(), - offset: crate::tz::offset(-4), + offset: offset(-4), start: date(2024, 3, 10).at(7, 0, 0, 0), end: date(2024, 11, 3).at(6, 0, 0, 0), }; @@ -2247,1078 +1621,7 @@ mod tests { }, ); - let p = Parser::new("America/New_York"); - assert!(p.parse().is_err()); - - let p = Parser::new(":America/New_York"); - assert!(p.parse().is_err()); - } - - #[test] - fn parse() { - let p = Parser::new("NZST-12NZDT,J60,J300"); - assert_eq!( - p.parse().unwrap(), - PosixTimeZone { - std_abbrev: "NZST".into(), - std_offset: offset(12).into(), - dst: Some(PosixDst { - abbrev: "NZDT".into(), - offset: offset(13).into(), - rule: Some(Rule { - start: PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(60).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - end: PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(300).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - }), - }), - }, - ); - - let p = Parser::new("NZST-12NZDT,J60,J300WAT"); - assert!(p.parse().is_err()); - } - - #[test] - fn parse_posix_time_zone() { - let p = Parser::new("NZST-12NZDT,M9.5.0,M4.1.0/3"); - assert_eq!( - p.parse_posix_time_zone().unwrap(), - PosixTimeZone { - std_abbrev: "NZST".into(), - std_offset: offset(12).into(), - dst: Some(PosixDst { - abbrev: "NZDT".into(), - offset: offset(13).into(), - rule: Some(Rule { - start: PosixDateTimeSpec { - date: PosixDateSpec::WeekdayOfMonth( - WeekdayOfMonth { - month: C(9).rinto(), - week: C(5).rinto(), - weekday: Weekday::Sunday, - } - ), - time: PosixTimeSpec::DEFAULT, - }, - end: PosixDateTimeSpec { - date: PosixDateSpec::WeekdayOfMonth( - WeekdayOfMonth { - month: C(4).rinto(), - week: C(1).rinto(), - weekday: Weekday::Sunday, - } - ), - time: PosixTimeSpec { - duration: PosixTimeSeconds::new(3 * 60 * 60) - .unwrap(), - }, - }, - }) - }), - }, - ); - - let p = Parser::new("NZST-12NZDT,M9.5.0,M4.1.0/3WAT"); - assert_eq!( - p.parse_posix_time_zone().unwrap(), - PosixTimeZone { - std_abbrev: "NZST".into(), - std_offset: offset(12).into(), - dst: Some(PosixDst { - abbrev: "NZDT".into(), - offset: offset(13).into(), - rule: Some(Rule { - start: PosixDateTimeSpec { - date: PosixDateSpec::WeekdayOfMonth( - WeekdayOfMonth { - month: C(9).rinto(), - week: C(5).rinto(), - weekday: Weekday::Sunday, - } - ), - time: PosixTimeSpec::DEFAULT, - }, - end: PosixDateTimeSpec { - date: PosixDateSpec::WeekdayOfMonth( - WeekdayOfMonth { - month: C(4).rinto(), - week: C(1).rinto(), - weekday: Weekday::Sunday, - } - ), - time: PosixTimeSpec { - duration: PosixTimeSeconds::new(3 * 60 * 60) - .unwrap(), - }, - }, - }) - }), - }, - ); - - let p = Parser::new("NZST-12NZDT,J60,J300"); - assert_eq!( - p.parse_posix_time_zone().unwrap(), - PosixTimeZone { - std_abbrev: "NZST".into(), - std_offset: offset(12).into(), - dst: Some(PosixDst { - abbrev: "NZDT".into(), - offset: offset(13).into(), - rule: Some(Rule { - start: PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(60).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - end: PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(300).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - }), - }), - }, - ); - - let p = Parser::new("NZST-12NZDT,J60,J300WAT"); - assert_eq!( - p.parse_posix_time_zone().unwrap(), - PosixTimeZone { - std_abbrev: "NZST".into(), - std_offset: offset(12).into(), - dst: Some(PosixDst { - abbrev: "NZDT".into(), - offset: offset(13).into(), - rule: Some(Rule { - start: PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(60).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - end: PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(300).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - }), - }), - }, - ); - } - - #[test] - fn parse_posix_dst() { - let std_offset = PosixOffset::from(offset(12)); - let p = Parser::new("NZDT,M9.5.0,M4.1.0/3"); - assert_eq!( - p.parse_posix_dst(std_offset).unwrap(), - PosixDst { - abbrev: "NZDT".into(), - offset: offset(13).into(), - rule: Some(Rule { - start: PosixDateTimeSpec { - date: PosixDateSpec::WeekdayOfMonth(WeekdayOfMonth { - month: C(9).rinto(), - week: C(5).rinto(), - weekday: Weekday::Sunday, - }), - time: PosixTimeSpec::DEFAULT, - }, - end: PosixDateTimeSpec { - date: PosixDateSpec::WeekdayOfMonth(WeekdayOfMonth { - month: C(4).rinto(), - week: C(1).rinto(), - weekday: Weekday::Sunday, - }), - time: PosixTimeSpec { - duration: PosixTimeSeconds::new(3 * 60 * 60) - .unwrap(), - }, - }, - }), - }, - ); - - let p = Parser::new("NZDT,J60,J300"); - assert_eq!( - p.parse_posix_dst(std_offset).unwrap(), - PosixDst { - abbrev: "NZDT".into(), - offset: offset(13).into(), - rule: Some(Rule { - start: PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(60).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - end: PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(300).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - }), - }, - ); - - let p = Parser::new("NZDT-7,J60,J300"); - assert_eq!( - p.parse_posix_dst(std_offset).unwrap(), - PosixDst { - abbrev: "NZDT".into(), - offset: offset(7).into(), - rule: Some(Rule { - start: PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(60).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - end: PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(300).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - }), - }, - ); - - let p = Parser::new("NZDT+7,J60,J300"); - assert_eq!( - p.parse_posix_dst(std_offset).unwrap(), - PosixDst { - abbrev: "NZDT".into(), - offset: offset(-7).into(), - rule: Some(Rule { - start: PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(60).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - end: PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(300).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - }), - }, - ); - - let p = Parser::new("NZDT7,J60,J300"); - assert_eq!( - p.parse_posix_dst(std_offset).unwrap(), - PosixDst { - abbrev: "NZDT".into(), - offset: offset(-7).into(), - rule: Some(Rule { - start: PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(60).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - end: PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(300).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - }), - }, - ); - - let p = Parser::new("NZDT7,"); - assert!(p.parse_posix_dst(std_offset).is_err()); - - let p = Parser::new("NZDT7!"); - assert!(p.parse_posix_dst(std_offset).is_err()); - } - - #[test] - fn parse_abbreviation() { - let p = Parser::new("ABC"); - assert_eq!(p.parse_abbreviation().unwrap(), "ABC"); - - let p = Parser::new(""); - assert_eq!(p.parse_abbreviation().unwrap(), "ABC"); - - let p = Parser::new("<+09>"); - assert_eq!(p.parse_abbreviation().unwrap(), "+09"); - - let p = Parser::new("+09"); - assert!(p.parse_abbreviation().is_err()); - } - - #[test] - fn parse_unquoted_abbreviation() { - let p = Parser::new("ABC"); - assert_eq!(p.parse_unquoted_abbreviation().unwrap(), "ABC"); - - let p = Parser::new("ABCXYZ"); - assert_eq!(p.parse_unquoted_abbreviation().unwrap(), "ABCXYZ"); - - let p = Parser::new("ABC123"); - assert_eq!(p.parse_unquoted_abbreviation().unwrap(), "ABC"); - - let tz = "a".repeat(30); - let p = Parser::new(&tz); - assert_eq!(p.parse_unquoted_abbreviation().unwrap(), &*tz); - - let p = Parser::new("a"); - assert!(p.parse_unquoted_abbreviation().is_err()); - - let p = Parser::new("ab"); - assert!(p.parse_unquoted_abbreviation().is_err()); - - let p = Parser::new("ab1"); - assert!(p.parse_unquoted_abbreviation().is_err()); - - let tz = "a".repeat(31); - let p = Parser::new(&tz); - assert!(p.parse_unquoted_abbreviation().is_err()); - - let p = Parser::new(b"ab\xFFcd"); - assert!(p.parse_unquoted_abbreviation().is_err()); - } - - #[test] - fn parse_quoted_abbreviation() { - // The inputs look a little funny here, but that's because - // 'parse_quoted_abbreviation' starts after the opening quote - // has been parsed. - - let p = Parser::new("ABC>"); - assert_eq!(p.parse_quoted_abbreviation().unwrap(), "ABC"); - - let p = Parser::new("ABCXYZ>"); - assert_eq!(p.parse_quoted_abbreviation().unwrap(), "ABCXYZ"); - - let p = Parser::new("ABC>123"); - assert_eq!(p.parse_quoted_abbreviation().unwrap(), "ABC"); - - let p = Parser::new("ABC123>"); - assert_eq!(p.parse_quoted_abbreviation().unwrap(), "ABC123"); - - let p = Parser::new("ab1>"); - assert_eq!(p.parse_quoted_abbreviation().unwrap(), "ab1"); - - let p = Parser::new("+09>"); - assert_eq!(p.parse_quoted_abbreviation().unwrap(), "+09"); - - let p = Parser::new("-09>"); - assert_eq!(p.parse_quoted_abbreviation().unwrap(), "-09"); - - let tz = alloc::format!("{}>", "a".repeat(30)); - let p = Parser::new(&tz); - assert_eq!( - p.parse_quoted_abbreviation().unwrap(), - tz.trim_end_matches(">") - ); - - let p = Parser::new("a>"); - assert!(p.parse_quoted_abbreviation().is_err()); - - let p = Parser::new("ab>"); - assert!(p.parse_quoted_abbreviation().is_err()); - - let tz = alloc::format!("{}>", "a".repeat(31)); - let p = Parser::new(&tz); - assert!(p.parse_quoted_abbreviation().is_err()); - - let p = Parser::new(b"ab\xFFcd>"); - assert!(p.parse_quoted_abbreviation().is_err()); - - let p = Parser::new("ABC"); - assert!(p.parse_quoted_abbreviation().is_err()); - - let p = Parser::new("ABC!>"); - assert!(p.parse_quoted_abbreviation().is_err()); - } - - #[test] - fn parse_posix_offset() { - let p = Parser::new("5"); - assert_eq!(p.parse_posix_offset().unwrap(), offset(-5).into(),); - - let p = Parser::new("+5"); - assert_eq!(p.parse_posix_offset().unwrap(), offset(-5).into(),); - - let p = Parser::new("-5"); - assert_eq!(p.parse_posix_offset().unwrap(), offset(5).into(),); - - let p = Parser::new("-12:34:56"); - assert_eq!( - p.parse_posix_offset().unwrap(), - PosixOffset::from( - Offset::from_seconds(12 * 60 * 60 + 34 * 60 + 56).unwrap() - ), - ); - - let p = Parser::new("a"); - assert!(p.parse_posix_offset().is_err()); - - let p = Parser::new("-"); - assert!(p.parse_posix_offset().is_err()); - - let p = Parser::new("+"); - assert!(p.parse_posix_offset().is_err()); - - let p = Parser::new("-a"); - assert!(p.parse_posix_offset().is_err()); - - let p = Parser::new("+a"); - assert!(p.parse_posix_offset().is_err()); - - let p = Parser::new("-25"); - assert!(p.parse_posix_offset().is_err()); - - let p = Parser::new("+25"); - assert!(p.parse_posix_offset().is_err()); - - // This checks that we don't accidentally permit IANA rules for - // offset parsing. Namely, the IANA tzfile v3+ extension only applies - // to transition times. But since POSIX says that the "time" for the - // offset and transition is the same format, it would be an easy - // implementation mistake to implement the more flexible rule for - // IANA and have it accidentally also apply to the offset. So we check - // that it doesn't here. - let p = Parser { ianav3plus: true, ..Parser::new("25") }; - assert!(p.parse_posix_offset().is_err()); - let p = Parser { ianav3plus: true, ..Parser::new("+25") }; - assert!(p.parse_posix_offset().is_err()); - let p = Parser { ianav3plus: true, ..Parser::new("-25") }; - assert!(p.parse_posix_offset().is_err()); - } - - #[test] - fn parse_rule() { - let p = Parser::new("M9.5.0,M4.1.0/3"); - assert_eq!( - p.parse_rule().unwrap(), - Rule { - start: PosixDateTimeSpec { - date: PosixDateSpec::WeekdayOfMonth(WeekdayOfMonth { - month: C(9).rinto(), - week: C(5).rinto(), - weekday: Weekday::Sunday, - }), - time: PosixTimeSpec::DEFAULT, - }, - end: PosixDateTimeSpec { - date: PosixDateSpec::WeekdayOfMonth(WeekdayOfMonth { - month: C(4).rinto(), - week: C(1).rinto(), - weekday: Weekday::Sunday, - }), - time: PosixTimeSpec { - duration: PosixTimeSeconds::new(3 * 60 * 60).unwrap(), - }, - }, - }, - ); - - let p = Parser::new("M9.5.0"); - assert!(p.parse_rule().is_err()); - - let p = Parser::new(",M9.5.0,M4.1.0/3"); - assert!(p.parse_rule().is_err()); - - let p = Parser::new("M9.5.0/"); - assert!(p.parse_rule().is_err()); - - let p = Parser::new("M9.5.0,M4.1.0/"); - assert!(p.parse_rule().is_err()); - } - - #[test] - fn parse_posix_datetime_spec() { - let p = Parser::new("J1"); - assert_eq!( - p.parse_posix_datetime_spec().unwrap(), - PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(1).rinto()), - time: PosixTimeSpec::DEFAULT, - }, - ); - - let p = Parser::new("J1/3"); - assert_eq!( - p.parse_posix_datetime_spec().unwrap(), - PosixDateTimeSpec { - date: PosixDateSpec::JulianOne(C(1).rinto()), - time: PosixTimeSpec { - duration: PosixTimeSeconds::new(3 * 60 * 60).unwrap(), - }, - }, - ); - - let p = Parser::new("M4.1.0/3"); - assert_eq!( - p.parse_posix_datetime_spec().unwrap(), - PosixDateTimeSpec { - date: PosixDateSpec::WeekdayOfMonth(WeekdayOfMonth { - month: C(4).rinto(), - week: C(1).rinto(), - weekday: Weekday::Sunday, - }), - time: PosixTimeSpec { - duration: PosixTimeSeconds::new(3 * 60 * 60).unwrap(), - }, - }, - ); - - let p = Parser::new("1/3:45:05"); - assert_eq!( - p.parse_posix_datetime_spec().unwrap(), - PosixDateTimeSpec { - date: PosixDateSpec::JulianZero(C(1).rinto()), - time: PosixTimeSpec { - duration: PosixTimeSeconds::new(3 * 60 * 60 + 45 * 60 + 5) - .unwrap(), - }, - }, - ); - - let p = Parser::new("a"); - assert!(p.parse_posix_datetime_spec().is_err()); - - let p = Parser::new("J1/"); - assert!(p.parse_posix_datetime_spec().is_err()); - - let p = Parser::new("1/"); - assert!(p.parse_posix_datetime_spec().is_err()); - - let p = Parser::new("M4.1.0/"); - assert!(p.parse_posix_datetime_spec().is_err()); - } - - #[test] - fn parse_posix_date_spec() { - let p = Parser::new("J1"); - assert_eq!( - p.parse_posix_date_spec().unwrap(), - PosixDateSpec::JulianOne(C(1).rinto()) - ); - let p = Parser::new("J365"); - assert_eq!( - p.parse_posix_date_spec().unwrap(), - PosixDateSpec::JulianOne(C(365).rinto()) - ); - - let p = Parser::new("0"); - assert_eq!( - p.parse_posix_date_spec().unwrap(), - PosixDateSpec::JulianZero(C(0).rinto()) - ); - let p = Parser::new("1"); - assert_eq!( - p.parse_posix_date_spec().unwrap(), - PosixDateSpec::JulianZero(C(1).rinto()) - ); - let p = Parser::new("365"); - assert_eq!( - p.parse_posix_date_spec().unwrap(), - PosixDateSpec::JulianZero(C(365).rinto()) - ); - - let p = Parser::new("M9.5.0"); - assert_eq!( - p.parse_posix_date_spec().unwrap(), - PosixDateSpec::WeekdayOfMonth(WeekdayOfMonth { - month: C(9).rinto(), - week: C(5).rinto(), - weekday: Weekday::Sunday, - }), - ); - let p = Parser::new("M9.5.6"); - assert_eq!( - p.parse_posix_date_spec().unwrap(), - PosixDateSpec::WeekdayOfMonth(WeekdayOfMonth { - month: C(9).rinto(), - week: C(5).rinto(), - weekday: Weekday::Saturday, - }), - ); - let p = Parser::new("M09.5.6"); - assert_eq!( - p.parse_posix_date_spec().unwrap(), - PosixDateSpec::WeekdayOfMonth(WeekdayOfMonth { - month: C(9).rinto(), - week: C(5).rinto(), - weekday: Weekday::Saturday, - }), - ); - let p = Parser::new("M12.1.1"); - assert_eq!( - p.parse_posix_date_spec().unwrap(), - PosixDateSpec::WeekdayOfMonth(WeekdayOfMonth { - month: C(12).rinto(), - week: C(1).rinto(), - weekday: Weekday::Monday, - }), - ); - - let p = Parser::new("a"); - assert!(p.parse_posix_date_spec().is_err()); - - let p = Parser::new("j"); - assert!(p.parse_posix_date_spec().is_err()); - - let p = Parser::new("m"); - assert!(p.parse_posix_date_spec().is_err()); - - let p = Parser::new("n"); - assert!(p.parse_posix_date_spec().is_err()); - - let p = Parser::new("J366"); - assert!(p.parse_posix_date_spec().is_err()); - - let p = Parser::new("366"); - assert!(p.parse_posix_date_spec().is_err()); - } - - #[test] - fn parse_posix_julian_day_no_leap() { - let p = Parser::new("1"); - assert_eq!(p.parse_posix_julian_day_no_leap().unwrap(), 1); - - let p = Parser::new("001"); - assert_eq!(p.parse_posix_julian_day_no_leap().unwrap(), 1); - - let p = Parser::new("365"); - assert_eq!(p.parse_posix_julian_day_no_leap().unwrap(), 365); - - let p = Parser::new("3655"); - assert_eq!(p.parse_posix_julian_day_no_leap().unwrap(), 365); - - let p = Parser::new("0"); - assert!(p.parse_posix_julian_day_no_leap().is_err()); - - let p = Parser::new("366"); - assert!(p.parse_posix_julian_day_no_leap().is_err()); - } - - #[test] - fn parse_posix_julian_day_with_leap() { - let p = Parser::new("0"); - assert_eq!(p.parse_posix_julian_day_with_leap().unwrap(), 0); - - let p = Parser::new("1"); - assert_eq!(p.parse_posix_julian_day_with_leap().unwrap(), 1); - - let p = Parser::new("001"); - assert_eq!(p.parse_posix_julian_day_with_leap().unwrap(), 1); - - let p = Parser::new("365"); - assert_eq!(p.parse_posix_julian_day_with_leap().unwrap(), 365); - - let p = Parser::new("3655"); - assert_eq!(p.parse_posix_julian_day_with_leap().unwrap(), 365); - - let p = Parser::new("366"); - assert!(p.parse_posix_julian_day_with_leap().is_err()); - } - - #[test] - fn parse_weekday_of_month() { - let p = Parser::new("9.5.0"); - assert_eq!( - p.parse_weekday_of_month().unwrap(), - WeekdayOfMonth { - month: C(9).rinto(), - week: C(5).rinto(), - weekday: Weekday::Sunday, - }, - ); - - let p = Parser::new("9.1.6"); - assert_eq!( - p.parse_weekday_of_month().unwrap(), - WeekdayOfMonth { - month: C(9).rinto(), - week: C(1).rinto(), - weekday: Weekday::Saturday, - }, - ); - - let p = Parser::new("09.1.6"); - assert_eq!( - p.parse_weekday_of_month().unwrap(), - WeekdayOfMonth { - month: C(9).rinto(), - week: C(1).rinto(), - weekday: Weekday::Saturday, - }, - ); - - let p = Parser::new("9"); - assert!(p.parse_weekday_of_month().is_err()); - - let p = Parser::new("9."); - assert!(p.parse_weekday_of_month().is_err()); - - let p = Parser::new("9.5"); - assert!(p.parse_weekday_of_month().is_err()); - - let p = Parser::new("9.5."); - assert!(p.parse_weekday_of_month().is_err()); - - let p = Parser::new("0.5.0"); - assert!(p.parse_weekday_of_month().is_err()); - - let p = Parser::new("13.5.0"); - assert!(p.parse_weekday_of_month().is_err()); - - let p = Parser::new("9.0.0"); - assert!(p.parse_weekday_of_month().is_err()); - - let p = Parser::new("9.6.0"); - assert!(p.parse_weekday_of_month().is_err()); - - let p = Parser::new("9.5.7"); - assert!(p.parse_weekday_of_month().is_err()); - } - - #[test] - fn parse_posix_time_spec() { - let p = Parser::new("5"); - assert_eq!( - p.parse_posix_time_spec().unwrap(), - PosixTimeSpec { - duration: PosixTimeSeconds::new(5 * 60 * 60).unwrap() - }, - ); - - let p = Parser::new("22"); - assert_eq!( - p.parse_posix_time_spec().unwrap(), - PosixTimeSpec { - duration: PosixTimeSeconds::new(22 * 60 * 60).unwrap() - }, - ); - - let p = Parser::new("02"); - assert_eq!( - p.parse_posix_time_spec().unwrap(), - PosixTimeSpec { - duration: PosixTimeSeconds::new(2 * 60 * 60).unwrap() - }, - ); - - let p = Parser::new("5:45"); - assert_eq!( - p.parse_posix_time_spec().unwrap(), - PosixTimeSpec { - duration: PosixTimeSeconds::new(5 * 60 * 60 + 45 * 60) - .unwrap() - }, - ); - - let p = Parser::new("5:45:12"); - assert_eq!( - p.parse_posix_time_spec().unwrap(), - PosixTimeSpec { - duration: PosixTimeSeconds::new(5 * 60 * 60 + 45 * 60 + 12) - .unwrap() - }, - ); - - let p = Parser::new("5:45:129"); - assert_eq!( - p.parse_posix_time_spec().unwrap(), - PosixTimeSpec { - duration: PosixTimeSeconds::new(5 * 60 * 60 + 45 * 60 + 12) - .unwrap() - }, - ); - - let p = Parser::new("5:45:12:"); - assert_eq!( - p.parse_posix_time_spec().unwrap(), - PosixTimeSpec { - duration: PosixTimeSeconds::new(5 * 60 * 60 + 45 * 60 + 12) - .unwrap() - }, - ); - - let p = Parser { ianav3plus: true, ..Parser::new("+5:45:12") }; - assert_eq!( - p.parse_posix_time_spec().unwrap(), - PosixTimeSpec { - duration: PosixTimeSeconds::new(5 * 60 * 60 + 45 * 60 + 12) - .unwrap() - }, - ); - - let p = Parser { ianav3plus: true, ..Parser::new("-5:45:12") }; - assert_eq!( - p.parse_posix_time_spec().unwrap(), - PosixTimeSpec { - duration: PosixTimeSeconds::new(-(5 * 60 * 60 + 45 * 60 + 12)) - .unwrap() - }, - ); - - let p = Parser { ianav3plus: true, ..Parser::new("-167:45:12") }; - assert_eq!( - p.parse_posix_time_spec().unwrap(), - PosixTimeSpec { - duration: PosixTimeSeconds::new( - -(167 * 60 * 60 + 45 * 60 + 12) - ) - .unwrap() - }, - ); - - let p = Parser::new("25"); - assert!(p.parse_posix_time_spec().is_err()); - - let p = Parser::new("12:2"); - assert!(p.parse_posix_time_spec().is_err()); - - let p = Parser::new("12:"); - assert!(p.parse_posix_time_spec().is_err()); - - let p = Parser::new("12:23:5"); - assert!(p.parse_posix_time_spec().is_err()); - - let p = Parser::new("12:23:"); - assert!(p.parse_posix_time_spec().is_err()); - - let p = Parser { ianav3plus: true, ..Parser::new("168") }; - assert!(p.parse_posix_time_spec().is_err()); - - let p = Parser { ianav3plus: true, ..Parser::new("-168") }; - assert!(p.parse_posix_time_spec().is_err()); - - let p = Parser { ianav3plus: true, ..Parser::new("+168") }; - assert!(p.parse_posix_time_spec().is_err()); - } - - #[test] - fn parse_month() { - let p = Parser::new("1"); - assert_eq!(p.parse_month().unwrap(), 1); - - // Should this be allowed? POSIX spec is unclear. - // We allow it because our parse does stop at 2 - // digits, so this seems harmless. Namely, '001' - // results in an error. - let p = Parser::new("01"); - assert_eq!(p.parse_month().unwrap(), 1); - - let p = Parser::new("12"); - assert_eq!(p.parse_month().unwrap(), 12); - - let p = Parser::new("0"); - assert!(p.parse_month().is_err()); - - let p = Parser::new("00"); - assert!(p.parse_month().is_err()); - - let p = Parser::new("001"); - assert!(p.parse_month().is_err()); - - let p = Parser::new("13"); - assert!(p.parse_month().is_err()); - } - - #[test] - fn parse_week() { - let p = Parser::new("1"); - assert_eq!(p.parse_week().unwrap(), 1); - - let p = Parser::new("5"); - assert_eq!(p.parse_week().unwrap(), 5); - - let p = Parser::new("55"); - assert_eq!(p.parse_week().unwrap(), 5); - - let p = Parser::new("0"); - assert!(p.parse_week().is_err()); - - let p = Parser::new("6"); - assert!(p.parse_week().is_err()); - - let p = Parser::new("00"); - assert!(p.parse_week().is_err()); - - let p = Parser::new("01"); - assert!(p.parse_week().is_err()); - - let p = Parser::new("05"); - assert!(p.parse_week().is_err()); - } - - #[test] - fn parse_weekday() { - let p = Parser::new("0"); - assert_eq!(p.parse_weekday().unwrap(), Weekday::Sunday); - - let p = Parser::new("1"); - assert_eq!(p.parse_weekday().unwrap(), Weekday::Monday); - - let p = Parser::new("6"); - assert_eq!(p.parse_weekday().unwrap(), Weekday::Saturday); - - let p = Parser::new("00"); - assert_eq!(p.parse_weekday().unwrap(), Weekday::Sunday); - - let p = Parser::new("06"); - assert_eq!(p.parse_weekday().unwrap(), Weekday::Sunday); - - let p = Parser::new("60"); - assert_eq!(p.parse_weekday().unwrap(), Weekday::Saturday); - - let p = Parser::new("7"); - assert!(p.parse_weekday().is_err()); - } - - #[test] - fn parse_hour_posix() { - let p = Parser::new("5"); - assert_eq!(p.parse_hour_posix().unwrap(), 5); - - let p = Parser::new("0"); - assert_eq!(p.parse_hour_posix().unwrap(), 0); - - let p = Parser::new("00"); - assert_eq!(p.parse_hour_posix().unwrap(), 0); - - let p = Parser::new("24"); - assert_eq!(p.parse_hour_posix().unwrap(), 24); - - let p = Parser::new("100"); - assert_eq!(p.parse_hour_posix().unwrap(), 10); - - let p = Parser::new("25"); - assert!(p.parse_hour_posix().is_err()); - - let p = Parser::new("99"); - assert!(p.parse_hour_posix().is_err()); - } - - #[test] - fn parse_hour_ianav3plus() { - let new = |input| Parser { ianav3plus: true, ..Parser::new(input) }; - - let p = new("5"); - assert_eq!(p.parse_hour_ianav3plus().unwrap(), 5); - - let p = new("0"); - assert_eq!(p.parse_hour_ianav3plus().unwrap(), 0); - - let p = new("00"); - assert_eq!(p.parse_hour_ianav3plus().unwrap(), 0); - - let p = new("000"); - assert_eq!(p.parse_hour_ianav3plus().unwrap(), 0); - - let p = new("24"); - assert_eq!(p.parse_hour_ianav3plus().unwrap(), 24); - - let p = new("100"); - assert_eq!(p.parse_hour_ianav3plus().unwrap(), 100); - - let p = new("1000"); - assert_eq!(p.parse_hour_ianav3plus().unwrap(), 100); - - let p = new("167"); - assert_eq!(p.parse_hour_ianav3plus().unwrap(), 167); - - let p = new("168"); - assert!(p.parse_hour_ianav3plus().is_err()); - - let p = new("999"); - assert!(p.parse_hour_ianav3plus().is_err()); - } - - #[test] - fn parse_minute() { - let p = Parser::new("00"); - assert_eq!(p.parse_minute().unwrap(), 0); - - let p = Parser::new("24"); - assert_eq!(p.parse_minute().unwrap(), 24); - - let p = Parser::new("59"); - assert_eq!(p.parse_minute().unwrap(), 59); - - let p = Parser::new("599"); - assert_eq!(p.parse_minute().unwrap(), 59); - - let p = Parser::new("0"); - assert!(p.parse_minute().is_err()); - - let p = Parser::new("1"); - assert!(p.parse_minute().is_err()); - - let p = Parser::new("9"); - assert!(p.parse_minute().is_err()); - - let p = Parser::new("60"); - assert!(p.parse_minute().is_err()); - } - - #[test] - fn parse_second() { - let p = Parser::new("00"); - assert_eq!(p.parse_second().unwrap(), 0); - - let p = Parser::new("24"); - assert_eq!(p.parse_second().unwrap(), 24); - - let p = Parser::new("59"); - assert_eq!(p.parse_second().unwrap(), 59); - - let p = Parser::new("599"); - assert_eq!(p.parse_second().unwrap(), 59); - - let p = Parser::new("0"); - assert!(p.parse_second().is_err()); - - let p = Parser::new("1"); - assert!(p.parse_second().is_err()); - - let p = Parser::new("9"); - assert!(p.parse_second().is_err()); - - let p = Parser::new("60"); - assert!(p.parse_second().is_err()); - } - - #[test] - fn parse_number_with_exactly_n_digits() { - let p = Parser::new("1"); - assert_eq!(p.parse_number_with_exactly_n_digits(1).unwrap(), 1); - - let p = Parser::new("12"); - assert_eq!(p.parse_number_with_exactly_n_digits(2).unwrap(), 12); - - let p = Parser::new("123"); - assert_eq!(p.parse_number_with_exactly_n_digits(2).unwrap(), 12); - - let p = Parser::new(""); - assert!(p.parse_number_with_exactly_n_digits(1).is_err()); - - let p = Parser::new("1"); - assert!(p.parse_number_with_exactly_n_digits(2).is_err()); - - let p = Parser::new("12"); - assert!(p.parse_number_with_exactly_n_digits(3).is_err()); - } - - #[test] - fn parse_number_with_upto_n_digits() { - let p = Parser::new("1"); - assert_eq!(p.parse_number_with_upto_n_digits(1).unwrap(), 1); - - let p = Parser::new("1"); - assert_eq!(p.parse_number_with_upto_n_digits(2).unwrap(), 1); - - let p = Parser::new("12"); - assert_eq!(p.parse_number_with_upto_n_digits(2).unwrap(), 12); - - let p = Parser::new("12"); - assert_eq!(p.parse_number_with_upto_n_digits(3).unwrap(), 12); - - let p = Parser::new("123"); - assert_eq!(p.parse_number_with_upto_n_digits(2).unwrap(), 12); - - let p = Parser::new(""); - assert!(p.parse_number_with_upto_n_digits(1).is_err()); - - let p = Parser::new("a"); - assert!(p.parse_number_with_upto_n_digits(1).is_err()); + assert!(ReasonablePosixTimeZone::parse("America/New_York").is_err()); + assert!(ReasonablePosixTimeZone::parse(":America/New_York").is_err()); } } diff --git a/src/tz/testdata.rs b/src/tz/testdata.rs index 8a6fba5..dddc019 100644 --- a/src/tz/testdata.rs +++ b/src/tz/testdata.rs @@ -1,6 +1,6 @@ use alloc::string::ToString; -use crate::tz::tzif::Tzif; +use crate::tz::tzif::TzifOwned; /// A concatenated list of TZif data with a header and an index block. /// @@ -94,20 +94,20 @@ impl TzifTestFile { /// Parse this test TZif data into a structured representation. #[cfg(not(miri))] - pub(crate) fn parse(self) -> Tzif { + pub(crate) fn parse(self) -> TzifOwned { let name = Some(self.name.to_string()); - Tzif::parse(name, self.data).unwrap_or_else(|err| { + TzifOwned::parse(name, self.data).unwrap_or_else(|err| { panic!("failed to parse TZif test file for {:?}: {err}", self.name) }) } /// Parse this test TZif data as if it were V1. #[cfg(not(miri))] - pub(crate) fn parse_v1(self) -> Tzif { + pub(crate) fn parse_v1(self) -> TzifOwned { let name = Some(self.name.to_string()); let mut data = self.data.to_vec(); data[4] = 0; - Tzif::parse(name, &data).unwrap_or_else(|err| { + TzifOwned::parse(name, &data).unwrap_or_else(|err| { panic!( "failed to parse V1 TZif test file for {:?}: {err}", self.name diff --git a/src/tz/timezone.rs b/src/tz/timezone.rs index 80b658b..65f68ae 100644 --- a/src/tz/timezone.rs +++ b/src/tz/timezone.rs @@ -1534,7 +1534,7 @@ impl core::fmt::Display for TimeZonePosix { #[cfg(feature = "alloc")] #[derive(Eq, PartialEq)] struct TimeZoneTzif { - tzif: crate::tz::tzif::Tzif, + tzif: crate::tz::tzif::TzifOwned, } #[cfg(feature = "alloc")] diff --git a/src/tz/tzif.rs b/src/tz/tzif.rs index 989cb89..1c4370b 100644 --- a/src/tz/tzif.rs +++ b/src/tz/tzif.rs @@ -10,23 +10,28 @@ These binary files are the ones commonly found in Unix distributions in the use core::ops::Range; -use alloc::{string::String, vec, vec::Vec}; +#[cfg(feature = "alloc")] +use alloc::{string::String, vec::Vec}; use crate::{ civil::DateTime, - error::{err, Error, ErrorContext}, + error::Error, + shared, timestamp::Timestamp, tz::{ posix::ReasonablePosixTimeZone, timezone::TimeZoneAbbreviation, AmbiguousOffset, Dst, Offset, TimeZoneOffsetInfo, TimeZoneTransition, }, - util::{ - crc32, - escape::{Byte, Bytes}, - t::UnixSeconds, - }, }; +/// The owned variant of `Tzif`. +#[cfg(feature = "alloc")] +pub(crate) type TzifOwned = Tzif, Vec>; + +/// The static variant of `Tzif`. +pub(crate) type TzifStatic = + Tzif<&'static str, &'static [LocalTimeType], &'static [Transition]>; + /// A time zone based on IANA TZif formatted data. /// /// TZif is a binary format described by RFC 8536. Its typical structure is to @@ -39,8 +44,9 @@ use crate::{ /// contents of TZif formatted data in memory, and turning it into a data type /// that can be used as a time zone. #[derive(Debug)] -pub(crate) struct Tzif { - name: Option, +#[doc(hidden)] // not part of Jiff's public API +pub struct Tzif { + name: Option, /// An ASCII byte corresponding to the version number. So, 0x50 is '2'. /// /// This is unused. It's only used in `test` compilation for emitting @@ -49,13 +55,85 @@ pub(crate) struct Tzif { #[allow(dead_code)] version: u8, checksum: u32, - transitions: Vec, - types: Vec, - designations: String, + designations: STRING, posix_tz: Option, + types: TYPES, + transitions: TRANS, } -impl Tzif { +impl TzifStatic { + /// Converts from the shared-but-internal API for use in proc macros. + /// + /// This specifically works in a `const` context. And it requires that + /// caller to pass in the parsed `Tzif` in its fixed form along with the + /// variable length local time types and transitions. (Technically, the + /// TZ identifier and the designations are also variable length despite + /// being parsed of `TzifFixed`, but in practice they can be handled just + /// fine via `&'static str`.) + /// + /// Notice that the `types` and `transitions` are *not* from the `shared` + /// API, but rather, from the types defined in this module. They have to + /// be this way because there's a conversion step that occurs. In practice, + /// this sort of thing is embedded as a literal in source code via a proc + /// macro. Like this: + /// + /// ```text + /// static TZIF: Tzif<&str, &[LocalTimeType], &[Transition]> = + /// Tzif::from_shared_const( + /// shared::TzifFixed { + /// name: Some("America/New_York"), + /// version: b'3', + /// checksum: 0xDEADBEEF, + /// designations: "ESTEDT", + /// posix_tz: None, + /// }, + /// &[ + /// shared::TzifLocalTimeType { + /// offset: -5 * 60 * 60, + /// is_dst: false, + /// designation: 0..3, + /// indicator: shared::TzifIndicator::LocalWall, + /// }.to_jiff(), + /// ], + /// &[ + /// shared::TzifTransition { + /// timestamp: 123456789, + /// type_index: 0, + /// }.to_jiff(-5, -5), + /// ], + /// ); + /// ``` + /// + /// Or something like that anyway. The point is, our `static` slices are + /// variable length and they need to be the right types. At least, I + /// couldn't see a simpler way to arrange this. + pub(crate) const fn from_shared_const( + sh: &shared::TzifFixed<&'static str>, + types: &'static [LocalTimeType], + transitions: &'static [Transition], + ) -> TzifStatic { + let name = sh.name; + let version = sh.version; + let checksum = sh.checksum; + let designations = sh.designations; + let posix_tz = match sh.posix_tz { + None => None, + Some(ref tz) => Some(tz.to_jiff()), + }; + Tzif { + name, + version, + checksum, + designations, + posix_tz, + types, + transitions, + } + } +} + +#[cfg(feature = "alloc")] +impl TzifOwned { /// Parses the given data as a TZif formatted file. /// /// The name given is attached to the `Tzif` value returned, but is @@ -73,28 +151,61 @@ impl Tzif { pub(crate) fn parse( name: Option, bytes: &[u8], - ) -> Result { - let original = bytes; - let name = name.into(); - let (header32, rest) = Header::parse(4, bytes) - .map_err(|e| e.context("failed to parse 32-bit header"))?; - let (mut tzif, rest) = if header32.version == 0 { - Tzif::parse32(name, header32, rest)? - } else { - Tzif::parse64(name, header32, rest)? - }; - // Compute the checksum using the entire contents of the TZif data. - let tzif_raw_len = (rest.as_ptr() as usize) - .checked_sub(original.as_ptr() as usize) - .unwrap(); - let tzif_raw_bytes = &original[..tzif_raw_len]; - tzif.checksum = crc32::sum(tzif_raw_bytes); + ) -> Result { + let sh = + shared::TzifOwned::parse(name, bytes).map_err(Error::adhoc)?; + let tzif = TzifOwned::from_shared_owned(&sh); Ok(tzif) } + /// Converts from the shared-but-internal API for use in proc macros. + /// + /// This is not `const` since it accepts owned `String` and `Vec` values + /// for variable length data inside `Tzif`. + pub(crate) fn from_shared_owned(sh: &shared::TzifOwned) -> TzifOwned { + let name = sh.fixed.name.clone(); + let version = sh.fixed.version; + let checksum = sh.fixed.checksum; + let designations = sh.fixed.designations.clone(); + let posix_tz = match sh.fixed.posix_tz { + None => None, + Some(ref tz) => { + let tz = + crate::tz::posix::PosixTimeZone::from_shared_owned(tz); + // OK because `shared::tzif` returns an error otherwise. + Some(tz.reasonable().unwrap()) + } + }; + let types: Vec = + sh.types.iter().map(shared::TzifLocalTimeType::to_jiff).collect(); + let mut transitions = Vec::with_capacity(sh.transitions.len()); + for (i, this) in sh.transitions.iter().enumerate() { + let prev = &sh.transitions[i.saturating_sub(1)]; + let prev_offset = sh.types[usize::from(prev.type_index)].offset; + let this_offset = sh.types[usize::from(this.type_index)].offset; + transitions.push(this.to_jiff(prev_offset, this_offset)); + } + Tzif { + name, + version, + checksum, + designations, + posix_tz, + types, + transitions, + } + } +} + +impl< + STRING: AsRef, + TYPES: AsRef<[LocalTimeType]>, + TRANS: AsRef<[Transition]>, + > Tzif +{ /// Returns the name given to this TZif data in its constructor. pub(crate) fn name(&self) -> Option<&str> { - self.name.as_deref() + self.name.as_ref().map(|n| n.as_ref()) } /// Returns the appropriate time zone offset to use for the given @@ -146,12 +257,13 @@ impl Tzif { // // The result of the dummy transition is that the code below is simpler // with fewer special cases. - assert!(!self.transitions.is_empty(), "transitions is non-empty"); - let index = if timestamp > self.transitions.last().unwrap().timestamp { - self.transitions.len() - 1 + assert!(!self.transitions().is_empty(), "transitions is non-empty"); + let index = if timestamp > self.transitions().last().unwrap().timestamp + { + self.transitions().len() - 1 } else { let search = self - .transitions + .transitions() // It is an optimization to compare only by the second instead // of the second and the nanosecond. This works for two // reasons. Firstly, the timestamps in TZif are limited to @@ -181,16 +293,16 @@ impl Tzif { // binary search returns an Err(len) for a time greater than the // maximum transition. But we account for that above by converting // Err(len) to Err(len-1). - assert!(index < self.transitions.len()); + assert!(index < self.transitions().len()); // RFC 8536 says: "Local time for timestamps on or after the last // transition is specified by the TZ string in the footer (Section 3.3) // if present and nonempty; otherwise, it is unspecified." // // Subtracting 1 is OK because we know self.transitions is not empty. - let t = if index < self.transitions.len() - 1 { + let t = if index < self.transitions().len() - 1 { // This is the typical case in "fat" TZif files: we found a // matching transition. - &self.transitions[index] + &self.transitions()[index] } else { match self.posix_tz.as_ref() { // This is the typical case in "slim" TZif files, where the @@ -207,7 +319,7 @@ impl Tzif { // This case is technically unspecified, but I think the // typical thing to do is to just use the last transition. // I'm not 100% sure on this one. - None => &self.transitions[index], + None => &self.transitions()[index], } }; Ok(self.local_time_type(t)) @@ -231,17 +343,17 @@ impl Tzif { // of timestamps. And in particular, each transition begins with a // possibly ambiguous range of wall clock times corresponding to either // a "gap" or "fold" in time. - assert!(!self.transitions.is_empty(), "transitions is non-empty"); + assert!(!self.transitions().is_empty(), "transitions is non-empty"); let search = - self.transitions.binary_search_by_key(&dt, |t| t.wall.start()); + self.transitions().binary_search_by_key(&dt, |t| t.wall.start()); let this_index = match search { Err(0) => unreachable!("impossible to come before DateTime::MIN"), Ok(i) => i, Err(i) => i.checked_sub(1).expect("i is non-zero"), }; - assert!(this_index < self.transitions.len()); + assert!(this_index < self.transitions().len()); - let this = &self.transitions[this_index]; + let this = &self.transitions()[this_index]; let this_offset = self.local_time_type(this).offset; // This is a little tricky, but we need to check for ambiguous civil // datetimes before possibly using the POSIX TZ string. Namely, a @@ -254,7 +366,7 @@ impl Tzif { // A gap/fold can only appear when there exists a previous // transition. let prev_index = this_index.checked_sub(1).unwrap(); - let prev = &self.transitions[prev_index]; + let prev = &self.transitions()[prev_index]; let prev_offset = self.local_time_type(prev).offset; return AmbiguousOffset::Gap { before: prev_offset, @@ -265,7 +377,7 @@ impl Tzif { // A gap/fold can only appear when there exists a previous // transition. let prev_index = this_index.checked_sub(1).unwrap(); - let prev = &self.transitions[prev_index]; + let prev = &self.transitions()[prev_index]; let prev_offset = self.local_time_type(prev).offset; return AmbiguousOffset::Fold { before: prev_offset, @@ -278,7 +390,7 @@ impl Tzif { // transitions in the TZif data. But, if we matched at or after the // last transition, then we need to use the POSIX TZ string (which // could still return an ambiguous offset). - if this_index == self.transitions.len() - 1 { + if this_index == self.transitions().len() - 1 { if let Some(tz) = self.posix_tz.as_ref() { return tz.to_ambiguous_kind(dt); } @@ -300,9 +412,9 @@ impl Tzif { &self, ts: Timestamp, ) -> Option { - assert!(!self.transitions.is_empty(), "transitions is non-empty"); + assert!(!self.transitions().is_empty(), "transitions is non-empty"); let search = - self.transitions.binary_search_by_key(&ts, |t| t.timestamp); + self.transitions().binary_search_by_key(&ts, |t| t.timestamp); let index = match search { Ok(i) | Err(i) => i.checked_sub(1)?, }; @@ -310,7 +422,7 @@ impl Tzif { // The first transition is a dummy that we insert, so if we land on // it here, treat it as if it doesn't exist. return None; - } else if index == self.transitions.len() - 1 { + } else if index == self.transitions().len() - 1 { if let Some(ref posix_tz) = self.posix_tz { // Since the POSIX TZ must be consistent with the last // transition, it must be the case that tzif_last <= @@ -322,11 +434,11 @@ impl Tzif { // of the TZif format if it does. return posix_tz.previous_transition(ts); } - &self.transitions[index] + &self.transitions()[index] } else { - &self.transitions[index] + &self.transitions()[index] }; - let typ = &self.types[usize::from(trans.type_index)]; + let typ = &self.types()[usize::from(trans.type_index)]; Some(TimeZoneTransition { timestamp: trans.timestamp, offset: typ.offset, @@ -341,9 +453,9 @@ impl Tzif { &self, ts: Timestamp, ) -> Option { - assert!(!self.transitions.is_empty(), "transitions is non-empty"); + assert!(!self.transitions().is_empty(), "transitions is non-empty"); let search = - self.transitions.binary_search_by_key(&ts, |t| t.timestamp); + self.transitions().binary_search_by_key(&ts, |t| t.timestamp); let index = match search { Ok(i) => i.checked_add(1)?, Err(i) => i, @@ -352,7 +464,7 @@ impl Tzif { // The first transition is a dummy that we insert, so if we land on // it here, treat it as if it doesn't exist. return None; - } else if index >= self.transitions.len() - 1 { + } else if index >= self.transitions().len() - 1 { if let Some(ref posix_tz) = self.posix_tz { // Since the POSIX TZ must be consistent with the last // transition, it must be the case that next.timestamp <= @@ -364,11 +476,11 @@ impl Tzif { // of the TZif format if it does. return posix_tz.next_transition(ts); } - self.transitions.last().expect("last transition") + self.transitions().last().expect("last transition") } else { - &self.transitions[index] + &self.transitions()[index] }; - let typ = &self.types[usize::from(trans.type_index)]; + let typ = &self.types()[usize::from(trans.type_index)]; Some(TimeZoneTransition { timestamp: trans.timestamp, offset: typ.offset, @@ -380,538 +492,44 @@ impl Tzif { fn designation(&self, typ: &LocalTimeType) -> &str { // OK because we verify that the designation range on every local // time type is a valid range into `self.designations`. - &self.designations[typ.designation()] + &self.designations()[typ.designation()] } fn local_time_type(&self, transition: &Transition) -> &LocalTimeType { // OK because we require that `type_index` always points to a valid // local time type. - &self.types[usize::from(transition.type_index)] + &self.types()[usize::from(transition.type_index)] } - fn first_transition(&self) -> &Transition { - // OK because we know we have at least one transition. This isn't - // true generally of the TZif format, since it does actually permit 0 - // transitions. But as part of parsing, we always add a "dummy" first - // transition corresponding to the minimum possible Jiff timestamp. - // This makes some logic for transition lookups a little simpler by - // reducing special cases. - self.transitions.first().unwrap() + fn designations(&self) -> &str { + self.designations.as_ref() } - fn parse32<'b>( - name: Option, - header32: Header, - bytes: &'b [u8], - ) -> Result<(Tzif, &'b [u8]), Error> { - let mut tzif = Tzif { - name, - version: header32.version, - // filled in later - checksum: 0, - transitions: vec![], - types: vec![], - designations: String::new(), - posix_tz: None, - }; - let rest = tzif.parse_transitions(&header32, bytes)?; - let rest = tzif.parse_transition_types(&header32, rest)?; - let rest = tzif.parse_local_time_types(&header32, rest)?; - let rest = tzif.parse_time_zone_designations(&header32, rest)?; - let rest = tzif.parse_leap_seconds(&header32, rest)?; - let rest = tzif.parse_indicators(&header32, rest)?; - tzif.set_wall_datetimes(); - Ok((tzif, rest)) + fn types(&self) -> &[LocalTimeType] { + self.types.as_ref() } - fn parse64<'b>( - name: Option, - header32: Header, - bytes: &'b [u8], - ) -> Result<(Tzif, &'b [u8]), Error> { - let (_, rest) = try_split_at( - "V1 TZif data block", - bytes, - header32.data_block_len()?, - )?; - let (header64, rest) = Header::parse(8, rest) - .map_err(|e| e.context("failed to parse 64-bit header"))?; - let mut tzif = Tzif { - name, - version: header64.version, - // filled in later - checksum: 0, - transitions: vec![], - types: vec![], - designations: String::new(), - posix_tz: None, - }; - let rest = tzif.parse_transitions(&header64, rest)?; - let rest = tzif.parse_transition_types(&header64, rest)?; - let rest = tzif.parse_local_time_types(&header64, rest)?; - let rest = tzif.parse_time_zone_designations(&header64, rest)?; - let rest = tzif.parse_leap_seconds(&header64, rest)?; - let rest = tzif.parse_indicators(&header64, rest)?; - let rest = tzif.parse_footer(&header64, rest)?; - // Validates that the POSIX TZ string we parsed (if one exists) is - // consistent with the last transition in this time zone. This is - // required by RFC 8536. - // - // RFC 8536 says, "If the string is nonempty and one or more - // transitions appear in the version 2+ data, the string MUST be - // consistent with the last version 2+ transition." - // - // We need to be a little careful, since we always have at least one - // transition (accounting for the dummy `Timestamp::MIN` transition). - // So if we only have 1 transition and a POSIX TZ string, then we - // should not validate it since it's equivalent to the case of 0 - // transitions and a POSIX TZ string. - if tzif.transitions.len() > 1 { - if let Some(ref tz) = tzif.posix_tz { - let last = tzif.transitions.last().expect("last transition"); - let typ = tzif.local_time_type(last); - let info = tz.to_offset_info(last.timestamp); - if info.offset() != typ.offset { - return Err(err!( - "expected last transition to have DST offset \ - of {}, but got {} according to POSIX TZ \ - string {}", - typ.offset, - info.offset(), - tz, - )); - } - if info.dst() != typ.is_dst { - return Err(err!( - "expected last transition to have is_dst={}, \ - but got is_dst={} according to POSIX TZ \ - string {}", - typ.is_dst.is_dst(), - info.dst().is_dst(), - tz, - )); - } - if info.abbreviation() != tzif.designation(&typ) { - return Err(err!( - "expected last transition to have \ - designation={}, \ - but got designation={} according to POSIX TZ \ - string {}", - info.abbreviation(), - tzif.designation(&typ), - tz, - )); - } - } - } - tzif.set_wall_datetimes(); - // N.B. We don't check that the TZif data is fully valid. It - // is possible for it to contain superfluous information. For - // example, a non-zero local time type that is never referenced - // by a transition. - Ok((tzif, rest)) - } - - fn parse_transitions<'b>( - &mut self, - header: &Header, - bytes: &'b [u8], - ) -> Result<&'b [u8], Error> { - let (bytes, rest) = try_split_at( - "transition times data block", - bytes, - header.transition_times_len()?, - )?; - let mut it = bytes.chunks_exact(header.time_size); - // RFC 8536 says: "If there are no transitions, local time for all - // timestamps is specified by the TZ string in the footer if present - // and nonempty; otherwise, it is specified by time type 0." - // - // RFC 8536 also says: "Local time for timestamps before the first - // transition is specified by the first time type (time type - // 0)." - // - // So if there are no transitions, pushing this dummy one will result - // in the desired behavior even when it's the only transition. - // Similarly, since this is the minimum timestamp value, it will - // trigger for any times before the first transition found in the TZif - // data. - self.transitions.push(Transition { - timestamp: Timestamp::MIN, - wall: TransitionWall::Unambiguous { start: DateTime::MIN }, - type_index: 0, - }); - while let Some(chunk) = it.next() { - let seconds = if header.is_32bit() { - i64::from(from_be_bytes_i32(chunk)) - } else { - from_be_bytes_i64(chunk) - }; - let timestamp = - Timestamp::from_second(seconds).unwrap_or_else(|_| { - // We really shouldn't error here just because the Unix - // timestamp is outside what Jiff supports. Since what Jiff - // supports is _somewhat_ arbitrary. But Jiff's supported - // range is good enough for all realistic purposes, so we - // just clamp an out-of-range Unix timestamp to the Jiff - // min or max value. - // - // This can't result in the sorting order being wrong, but - // it can result in a transition that is duplicative with - // the dummy transition we inserted above. This should be - // fine. - let clamped = seconds - .clamp(UnixSeconds::MIN_REPR, UnixSeconds::MAX_REPR); - warn!( - "found Unix timestamp {seconds} that is outside \ - Jiff's supported range, clamping to {clamped}", - ); - // Guaranteed to succeed since we clamped `seconds` such - // that it is in the supported range of `Timestamp`. - Timestamp::from_second(clamped).unwrap() - }); - self.transitions.push(Transition { - timestamp, - // We can't compute the wall clock times until we know the - // actual offset for the transition prior to this one. We don't - // know that until we parse the local time types. - wall: TransitionWall::Unambiguous { - start: DateTime::default(), - }, - // We can't fill in the type index either. We fill this in - // later when we parse the transition types. - type_index: 0, - }); - } - assert!(it.remainder().is_empty()); - Ok(rest) - } - - fn parse_transition_types<'b>( - &mut self, - header: &Header, - bytes: &'b [u8], - ) -> Result<&'b [u8], Error> { - let (bytes, rest) = try_split_at( - "transition types data block", - bytes, - header.transition_types_len()?, - )?; - // We start our transition indices at 1 because we always insert a - // dummy first transition corresponding to `Timestamp::MIN`. Its type - // index is always 0, so there's no need to change it here. - for (transition_index, &type_index) in (1..).zip(bytes) { - if usize::from(type_index) >= header.tzh_typecnt { - return Err(err!( - "found transition type index {type_index}, - but there are only {} local time types", - header.tzh_typecnt, - )); - } - self.transitions[transition_index].type_index = type_index; - } - Ok(rest) - } - - fn parse_local_time_types<'b>( - &mut self, - header: &Header, - bytes: &'b [u8], - ) -> Result<&'b [u8], Error> { - let (bytes, rest) = try_split_at( - "local time types data block", - bytes, - header.local_time_types_len()?, - )?; - let mut it = bytes.chunks_exact(6); - while let Some(chunk) = it.next() { - let offset_seconds = from_be_bytes_i32(&chunk[..4]); - let offset = - Offset::from_seconds(offset_seconds).map_err(|e| { - err!( - "found local time type with out-of-bounds offset: {e}" - ) - })?; - let is_dst = Dst::from(chunk[4] == 1); - let designation = chunk[5]..chunk[5]; - self.types.push(LocalTimeType { - offset, - is_dst, - designation, - indicator: Indicator::LocalWall, - }); - } - assert!(it.remainder().is_empty()); - Ok(rest) - } - - fn parse_time_zone_designations<'b>( - &mut self, - header: &Header, - bytes: &'b [u8], - ) -> Result<&'b [u8], Error> { - let (bytes, rest) = try_split_at( - "time zone designations data block", - bytes, - header.time_zone_designations_len()?, - )?; - self.designations = - String::from_utf8(bytes.to_vec()).map_err(|_| { - err!( - "time zone designations are not valid UTF-8: {:?}", - Bytes(bytes), - ) - })?; - // Holy hell, this is brutal. The boundary conditions are crazy. - for (i, typ) in self.types.iter_mut().enumerate() { - let start = usize::from(typ.designation.start); - let Some(suffix) = self.designations.get(start..) else { - return Err(err!( - "local time type {i} has designation index of {start}, \ - but cannot be more than {}", - self.designations.len(), - )); - }; - let Some(len) = suffix.find('\x00') else { - return Err(err!( - "local time type {i} has designation index of {start}, \ - but could not find NUL terminator after it in \ - designations: {:?}", - self.designations, - )); - }; - let Some(end) = start.checked_add(len) else { - return Err(err!( - "local time type {i} has designation index of {start}, \ - but its length {len} is too big", - )); - }; - typ.designation.end = u8::try_from(end).map_err(|_| { - err!( - "local time type {i} has designation range of \ - {start}..{end}, but end is too big", - ) - })?; - } - Ok(rest) - } - - /// This parses the leap second corrections in the TZif data. - /// - /// Note that we only parse and verify them. We don't actually use them. - /// Jiff effectively ignores leap seconds. - fn parse_leap_seconds<'b>( - &mut self, - header: &Header, - bytes: &'b [u8], - ) -> Result<&'b [u8], Error> { - let (bytes, rest) = try_split_at( - "leap seconds data block", - bytes, - header.leap_second_len()?, - )?; - let chunk_len = header - .time_size - .checked_add(4) - .expect("time_size plus 4 fits in usize"); - let mut it = bytes.chunks_exact(chunk_len); - while let Some(chunk) = it.next() { - let (occur_bytes, _corr_bytes) = chunk.split_at(header.time_size); - let occur_seconds = if header.is_32bit() { - i64::from(from_be_bytes_i32(occur_bytes)) - } else { - from_be_bytes_i64(occur_bytes) - }; - let _ = Timestamp::from_second(occur_seconds).map_err(|e| { - err!( - "leap second occurrence {occur_seconds} \ - is out of range: {e}" - ) - })?; - } - assert!(it.remainder().is_empty()); - Ok(rest) - } - - fn parse_indicators<'b>( - &mut self, - header: &Header, - bytes: &'b [u8], - ) -> Result<&'b [u8], Error> { - let (std_wall_bytes, rest) = try_split_at( - "standard/wall indicators data block", - bytes, - header.standard_wall_len()?, - )?; - let (ut_local_bytes, rest) = try_split_at( - "UT/local indicators data block", - rest, - header.ut_local_len()?, - )?; - if std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() { - // This is a weird case, but technically possible only if all - // UT/local indicators are 0. If any are 1, then it's an error, - // because it would require the corresponding std/wall indicator - // to be 1 too. Which it can't be, because there aren't any. So - // we just check that they're all zeros. - for (i, &byte) in ut_local_bytes.iter().enumerate() { - if byte != 0 { - return Err(err!( - "found UT/local indicator '{byte}' for local time \ - type {i}, but it must be 0 since all std/wall \ - indicators are 0", - )); - } - } - } else if !std_wall_bytes.is_empty() && ut_local_bytes.is_empty() { - for (i, &byte) in std_wall_bytes.iter().enumerate() { - // Indexing is OK because Header guarantees that the number of - // indicators is 0 or equal to the number of types. - self.types[i].indicator = if byte == 0 { - Indicator::LocalWall - } else if byte == 1 { - Indicator::LocalStandard - } else { - return Err(err!( - "found invalid std/wall indicator '{byte}' for \ - local time type {i}, it must be 0 or 1", - )); - }; - } - } else if !std_wall_bytes.is_empty() && !ut_local_bytes.is_empty() { - assert_eq!(std_wall_bytes.len(), ut_local_bytes.len()); - let it = std_wall_bytes.iter().zip(ut_local_bytes); - for (i, (&stdwall, &utlocal)) in it.enumerate() { - // Indexing is OK because Header guarantees that the number of - // indicators is 0 or equal to the number of types. - self.types[i].indicator = match (stdwall, utlocal) { - (0, 0) => Indicator::LocalWall, - (1, 0) => Indicator::LocalStandard, - (1, 1) => Indicator::UTStandard, - (0, 1) => { - return Err(err!( - "found illegal ut-wall combination for \ - local time type {i}, only local-wall, local-standard \ - and ut-standard are allowed", - )) - } - _ => { - return Err(err!( - "found illegal std/wall or ut/local value for \ - local time type {i}, each must be 0 or 1", - )) - } - }; - } - } else { - // If they're both empty then we don't need to do anything. Every - // local time type record already has the correct default for this - // case set. - debug_assert!(std_wall_bytes.is_empty()); - debug_assert!(ut_local_bytes.is_empty()); - } - Ok(rest) - } - - fn parse_footer<'b>( - &mut self, - _header: &Header, - bytes: &'b [u8], - ) -> Result<&'b [u8], Error> { - if bytes.is_empty() { - return Err(err!( - "invalid V2+ TZif footer, expected \\n, \ - but found unexpected end of data", - )); - } - if bytes[0] != b'\n' { - return Err(err!( - "invalid V2+ TZif footer, expected {:?}, but found {:?}", - Byte(b'\n'), - Byte(bytes[0]), - )); - } - let bytes = &bytes[1..]; - // Only scan up to 1KB for a NUL terminator in case we somehow got - // passed a huge block of bytes. - let toscan = &bytes[..bytes.len().min(1024)]; - let Some(nlat) = toscan.iter().position(|&b| b == b'\n') else { - return Err(err!( - "invalid V2 TZif footer, could not find {:?} \ - terminator in: {:?}", - Byte(b'\n'), - Bytes(toscan), - )); - }; - let (bytes, rest) = bytes.split_at(nlat); - if !bytes.is_empty() { - // We could in theory limit TZ strings to their strict POSIX - // definition here for TZif V2, but I don't think there is any - // harm in allowing the extensions in V2 formatted TZif data. Note - // that the GNU tooling allow it via the `TZ` environment variable - // even though POSIX doesn't specify it. This all seems okay to me - // because the V3+ extension is a strict superset of functionality. - self.posix_tz = Some(ReasonablePosixTimeZone::parse(bytes)?); - } - Ok(&rest[1..]) - } - - /// This sets the wall clock times for each transition. - /// - /// The wall clock time corresponds to time on the clock that the - /// transition begins. That is, it is the time offset by the previous - /// transition's offset. - /// - /// This also computes whether there is a gap or fold or neither between - /// each transition. This is used to resolve ambiguous timestamps when - /// given a civil datetime. - fn set_wall_datetimes(&mut self) { - let mut prev = self.local_time_type(self.first_transition()).offset; - // We iterate over indices instead of `transitions.iter_mut()` because - // of the borrow checker breaking composition. - for i in 0..self.transitions.len() { - let this = self.local_time_type(&self.transitions[i]).offset; - let t = &mut self.transitions[i]; - t.wall = if prev == this { - // Equivalent offsets means there can never be any ambiguity. - let start = prev.to_datetime(t.timestamp); - TransitionWall::Unambiguous { start } - } else if prev < this { - // When the offset of the previous transition is less, that - // means there is some non-zero amount of time that is - // "skipped" when moving to the next transition. Thus, we have - // a gap. The start of the gap is the offset which gets us the - // earliest time, i.e., the smaller of the two offsets. - let start = prev.to_datetime(t.timestamp); - let end = this.to_datetime(t.timestamp); - TransitionWall::Gap { start, end } - } else { - // When the offset of the previous transition is greater, that - // means there is some non-zero amount of time that will be - // replayed on a wall clock in this time zone. Thus, we have - // a fold. The start of the gold is the offset which gets us - // the earliest time, i.e., the smaller of the two offsets. - assert!(prev > this); - let start = this.to_datetime(t.timestamp); - let end = prev.to_datetime(t.timestamp); - TransitionWall::Fold { start, end } - }; - prev = this; - } + fn transitions(&self) -> &[Transition] { + self.transitions.as_ref() } } -impl Eq for Tzif {} +impl, TYPES, TRANS> Eq for Tzif {} -impl PartialEq for Tzif { - fn eq(&self, rhs: &Tzif) -> bool { - self.name == rhs.name && self.checksum == rhs.checksum +impl, TYPES, TRANS> PartialEq + for Tzif +{ + fn eq(&self, rhs: &Self) -> bool { + self.name.as_ref().map(|n| n.as_ref()) + == rhs.name.as_ref().map(|n| n.as_ref()) + && self.checksum == rhs.checksum } } /// A transition to a different offset. #[derive(Clone, Debug, Eq, PartialEq)] -struct Transition { +#[doc(hidden)] // not part of Jiff's public API +pub struct Transition { /// The UNIX leap time at which the transition starts. The transition /// continues up to and _not_ including the next transition. timestamp: Timestamp, @@ -925,6 +543,20 @@ struct Transition { type_index: u8, } +impl Transition { + /// Converts from the shared-but-internal API for use in proc macros. + pub(crate) const fn from_shared( + sh: &shared::TzifTransition, + prev_offset: i32, + this_offset: i32, + ) -> Transition { + let timestamp = Timestamp::constant(sh.timestamp, 0); + let wall = TransitionWall::new(sh.timestamp, prev_offset, this_offset); + let type_index = sh.type_index; + Transition { timestamp, wall, type_index } + } +} + /// The wall clock time for when a transition begins. /// /// This explicitly represents ambiguous wall clock times that occur at the @@ -1015,6 +647,54 @@ enum TransitionWall { } impl TransitionWall { + /// Creates transition data based on wall-clock time. + /// + /// This data isn't directly part of TZif, but can be derived from it. + /// It is principally done so that TZ lookups for civil datetime are + /// faster. That is, we pre-compute whatever we can here. + /// + /// `timestamp` corresponds to the timestamp of the respective transition. + /// `this_offset` is the offset associated with that transition (via the + /// corresponding local time type), and `prev_offset` is the offset of the + /// previous transition (also through its corresponding local time type). + const fn new( + timestamp: i64, + prev_offset: i32, + this_offset: i32, + ) -> TransitionWall { + const fn to_datetime(timestamp: i64, offset: i32) -> DateTime { + use crate::util::common::timestamp_to_datetime_zulu; + let (y, mo, d, h, m, s, n) = + timestamp_to_datetime_zulu(timestamp, 0, offset); + DateTime::constant(y, mo, d, h, m, s, n) + } + + if prev_offset == this_offset { + // Equivalent offsets means there can never be any ambiguity. + let start = to_datetime(timestamp, prev_offset); + TransitionWall::Unambiguous { start } + } else if prev_offset < this_offset { + // When the offset of the previous transition is less, that + // means there is some non-zero amount of time that is + // "skipped" when moving to the next transition. Thus, we have + // a gap. The start of the gap is the offset which gets us the + // earliest time, i.e., the smaller of the two offsets. + let start = to_datetime(timestamp, prev_offset); + let end = to_datetime(timestamp, this_offset); + TransitionWall::Gap { start, end } + } else { + // When the offset of the previous transition is greater, that + // means there is some non-zero amount of time that will be + // replayed on a wall clock in this time zone. Thus, we have + // a fold. The start of the gold is the offset which gets us + // the earliest time, i.e., the smaller of the two offsets. + assert!(prev_offset > this_offset); + let start = to_datetime(timestamp, this_offset); + let end = to_datetime(timestamp, prev_offset); + TransitionWall::Fold { start, end } + } + } + fn start(&self) -> DateTime { match *self { TransitionWall::Unambiguous { start } => start, @@ -1031,7 +711,8 @@ impl TransitionWall { /// abbreviation. (There is also an "indicator," but I have no clue what it /// means. See the `Indicator` type for a rant.) #[derive(Clone, Debug, Eq, PartialEq)] -struct LocalTimeType { +#[doc(hidden)] // not part of Jiff's public API +pub struct LocalTimeType { offset: Offset, is_dst: Dst, designation: Range, @@ -1039,6 +720,17 @@ struct LocalTimeType { } impl LocalTimeType { + /// Converts from the shared-but-internal API for use in proc macros. + pub(crate) const fn from_shared( + sh: &shared::TzifLocalTimeType, + ) -> LocalTimeType { + let offset = Offset::constant_seconds(sh.offset); + let is_dst = if sh.is_dst { Dst::Yes } else { Dst::No }; + let designation = sh.designation.start..sh.designation.end; + let indicator = Indicator::from_shared(&sh.indicator); + LocalTimeType { offset, is_dst, designation, indicator } + } + fn designation(&self) -> Range { usize::from(self.designation.start)..usize::from(self.designation.end) } @@ -1109,6 +801,17 @@ enum Indicator { UTStandard, } +impl Indicator { + /// Converts from the shared-but-internal API for use in proc macros. + pub(crate) const fn from_shared(sh: &shared::TzifIndicator) -> Indicator { + match *sh { + shared::TzifIndicator::LocalWall => Indicator::LocalWall, + shared::TzifIndicator::LocalStandard => Indicator::LocalStandard, + shared::TzifIndicator::UTStandard => Indicator::UTStandard, + } + } +} + impl core::fmt::Display for Indicator { fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { match *self { @@ -1119,210 +822,6 @@ impl core::fmt::Display for Indicator { } } -/// The header for a TZif formatted file. -/// -/// V2+ TZif format have two headers: one for V1 data, and then a second -/// following the V1 data block that describes another data block which uses -/// 64-bit timestamps. The two headers both have the same format and both -/// use 32-bit big-endian encoded integers. -#[derive(Debug)] -struct Header { - /// The size of the timestamps encoded in the data block. - /// - /// This is guaranteed to be either 4 (for V1) or 8 (for the 64-bit header - /// block in V2+). - time_size: usize, - /// The file format version. - /// - /// Note that this is either a NUL byte (for version 1), or an ASCII byte - /// corresponding to the version number. That is, `0x32` for `2`, `0x33` - /// for `3` or `0x34` for `4`. Note also that just because zoneinfo might - /// have been recently generated does not mean it uses the latest format - /// version. It seems like newer versions are only compiled by `zic` when - /// they are needed. For example, `America/New_York` on my system (as of - /// `2024-03-25`) has version `0x32`, but `Asia/Jerusalem` has version - /// `0x33`. - version: u8, - /// Number of UT/local indicators stored in the file. - /// - /// This is checked to be either equal to `0` or equal to `tzh_typecnt`. - tzh_ttisutcnt: usize, - /// The number of standard/wall indicators stored in the file. - /// - /// This is checked to be either equal to `0` or equal to `tzh_typecnt`. - tzh_ttisstdcnt: usize, - /// The number of leap seconds for which data entries are stored in the - /// file. - tzh_leapcnt: usize, - /// The number of transition times for which data entries are stored in - /// the file. - tzh_timecnt: usize, - /// The number of local time types for which data entries are stored in the - /// file. - /// - /// This is checked to be at least `1`. - tzh_typecnt: usize, - /// The number of bytes of time zone abbreviation strings stored in the - /// file. - /// - /// This is checked to be at least `1`. - tzh_charcnt: usize, -} - -impl Header { - /// Parse the header record from the given bytes. - /// - /// Upon success, return the header and all bytes after the header. - /// - /// The given `time_size` must be 4 or 8, corresponding to either the - /// V1 header block or the V2+ header block, respectively. - fn parse( - time_size: usize, - bytes: &[u8], - ) -> Result<(Header, &[u8]), Error> { - assert!(time_size == 4 || time_size == 8, "time size must be 4 or 8"); - if bytes.len() < 44 { - return Err(err!("invalid header: too short")); - } - let (magic, rest) = bytes.split_at(4); - if magic != b"TZif" { - return Err(err!("invalid header: magic bytes mismatch")); - } - let (version, rest) = rest.split_at(1); - let (_reserved, rest) = rest.split_at(15); - - let (tzh_ttisutcnt_bytes, rest) = rest.split_at(4); - let (tzh_ttisstdcnt_bytes, rest) = rest.split_at(4); - let (tzh_leapcnt_bytes, rest) = rest.split_at(4); - let (tzh_timecnt_bytes, rest) = rest.split_at(4); - let (tzh_typecnt_bytes, rest) = rest.split_at(4); - let (tzh_charcnt_bytes, rest) = rest.split_at(4); - - let tzh_ttisutcnt = from_be_bytes_u32_to_usize(tzh_ttisutcnt_bytes) - .map_err(|e| e.context("failed to parse tzh_ttisutcnt"))?; - let tzh_ttisstdcnt = from_be_bytes_u32_to_usize(tzh_ttisstdcnt_bytes) - .map_err(|e| e.context("failed to parse tzh_ttisstdcnt"))?; - let tzh_leapcnt = from_be_bytes_u32_to_usize(tzh_leapcnt_bytes) - .map_err(|e| e.context("failed to parse tzh_leapcnt"))?; - let tzh_timecnt = from_be_bytes_u32_to_usize(tzh_timecnt_bytes) - .map_err(|e| e.context("failed to parse tzh_timecnt"))?; - let tzh_typecnt = from_be_bytes_u32_to_usize(tzh_typecnt_bytes) - .map_err(|e| e.context("failed to parse tzh_typecnt"))?; - let tzh_charcnt = from_be_bytes_u32_to_usize(tzh_charcnt_bytes) - .map_err(|e| e.context("failed to parse tzh_charcnt"))?; - - if tzh_ttisutcnt != 0 && tzh_ttisutcnt != tzh_typecnt { - return Err(err!( - "expected tzh_ttisutcnt={tzh_ttisutcnt} to be zero \ - or equal to tzh_typecnt={tzh_typecnt}", - )); - } - if tzh_ttisstdcnt != 0 && tzh_ttisstdcnt != tzh_typecnt { - return Err(err!( - "expected tzh_ttisstdcnt={tzh_ttisstdcnt} to be zero \ - or equal to tzh_typecnt={tzh_typecnt}", - )); - } - if tzh_typecnt < 1 { - return Err(err!( - "expected tzh_typecnt={tzh_typecnt} to be at least 1", - )); - } - if tzh_charcnt < 1 { - return Err(err!( - "expected tzh_charcnt={tzh_charcnt} to be at least 1", - )); - } - - let header = Header { - time_size, - version: version[0], - tzh_ttisutcnt, - tzh_ttisstdcnt, - tzh_leapcnt, - tzh_timecnt, - tzh_typecnt, - tzh_charcnt, - }; - Ok((header, rest)) - } - - /// Returns true if this header is for a 32-bit data block. - /// - /// When false, it is guaranteed that this header is for a 64-bit data - /// block. - fn is_32bit(&self) -> bool { - self.time_size == 4 - } - - /// Returns the size of the data block, in bytes, for this header. - /// - /// This returns an error if the arithmetic required to compute the - /// length would overflow. - /// - /// This is useful for, e.g., skipping over the 32-bit V1 data block in - /// V2+ TZif formatted files. - fn data_block_len(&self) -> Result { - let a = self.transition_times_len()?; - let b = self.transition_types_len()?; - let c = self.local_time_types_len()?; - let d = self.time_zone_designations_len()?; - let e = self.leap_second_len()?; - let f = self.standard_wall_len()?; - let g = self.ut_local_len()?; - a.checked_add(b) - .and_then(|z| z.checked_add(c)) - .and_then(|z| z.checked_add(d)) - .and_then(|z| z.checked_add(e)) - .and_then(|z| z.checked_add(f)) - .and_then(|z| z.checked_add(g)) - .ok_or_else(|| { - err!( - "length of data block in V{} tzfile is too big", - self.version - ) - }) - } - - fn transition_times_len(&self) -> Result { - self.tzh_timecnt.checked_mul(self.time_size).ok_or_else(|| { - err!("tzh_timecnt value {} is too big", self.tzh_timecnt) - }) - } - - fn transition_types_len(&self) -> Result { - Ok(self.tzh_timecnt) - } - - fn local_time_types_len(&self) -> Result { - self.tzh_typecnt.checked_mul(6).ok_or_else(|| { - err!("tzh_typecnt value {} is too big", self.tzh_typecnt) - }) - } - - fn time_zone_designations_len(&self) -> Result { - Ok(self.tzh_charcnt) - } - - fn leap_second_len(&self) -> Result { - let record_len = self - .time_size - .checked_add(4) - .expect("4-or-8 plus 4 always fits in usize"); - self.tzh_leapcnt.checked_mul(record_len).ok_or_else(|| { - err!("tzh_leapcnt value {} is too big", self.tzh_leapcnt) - }) - } - - fn standard_wall_len(&self) -> Result { - Ok(self.tzh_ttisstdcnt) - } - - fn ut_local_len(&self) -> Result { - Ok(self.tzh_ttisutcnt) - } -} - /// Does a quick check that returns true if the data might be in TZif format. /// /// It is possible that this returns true even if the given data is not in TZif @@ -1334,81 +833,9 @@ pub(crate) fn is_possibly_tzif(data: &[u8]) -> bool { data.starts_with(b"TZif") } -/// Interprets the given slice as an unsigned 32-bit big endian integer, -/// attempts to convert it to a `usize` and returns it. -/// -/// # Panics -/// -/// When `bytes.len() != 4`. -/// -/// # Errors -/// -/// This errors if the `u32` parsed from the given bytes cannot fit in a -/// `usize`. -fn from_be_bytes_u32_to_usize(bytes: &[u8]) -> Result { - let n = from_be_bytes_u32(bytes); - usize::try_from(n).map_err(|_| { - err!( - "failed to parse integer {n} (too big, max allowed is {}", - usize::MAX - ) - }) -} - -/// Interprets the given slice as an unsigned 32-bit big endian integer and -/// returns it. -/// -/// # Panics -/// -/// When `bytes.len() != 4`. -fn from_be_bytes_u32(bytes: &[u8]) -> u32 { - u32::from_be_bytes(bytes.try_into().unwrap()) -} - -/// Interprets the given slice as a signed 32-bit big endian integer and -/// returns it. -/// -/// # Panics -/// -/// When `bytes.len() != 4`. -fn from_be_bytes_i32(bytes: &[u8]) -> i32 { - i32::from_be_bytes(bytes.try_into().unwrap()) -} - -/// Interprets the given slice as a signed 64-bit big endian integer and -/// returns it. -/// -/// # Panics -/// -/// When `bytes.len() != 8`. -fn from_be_bytes_i64(bytes: &[u8]) -> i64 { - i64::from_be_bytes(bytes.try_into().unwrap()) -} - -/// Splits the given slice of bytes at the index given. -/// -/// If the index is out of range (greater than `bytes.len()`) then an error is -/// returned. The error message will include the `what` string given, which is -/// meant to describe the thing being split. -fn try_split_at<'b>( - what: &'static str, - bytes: &'b [u8], - at: usize, -) -> Result<(&'b [u8], &'b [u8]), Error> { - if at > bytes.len() { - Err(err!( - "expected at least {at} bytes for {what}, \ - but found only {} bytes", - bytes.len(), - )) - } else { - Ok(bytes.split_at(at)) - } -} - -#[cfg(test)] +#[cfg(all(test, feature = "alloc"))] mod tests { - use alloc::string::ToString; + use alloc::{string::ToString, vec}; #[cfg(not(miri))] use crate::tz::testdata::TZIF_TEST_FILES; @@ -1424,7 +851,7 @@ mod tests { /// /// For this to work, we make sure everything in a `Tzif` value is /// represented in some way in this output. - fn tzif_to_human_readable(tzif: &Tzif) -> String { + fn tzif_to_human_readable(tzif: &TzifOwned) -> String { use std::io::Write; let mut out = tabwriter::TabWriter::new(vec![]) diff --git a/src/util/array_str.rs b/src/util/array_str.rs index 8428ab1..d564a16 100644 --- a/src/util/array_str.rs +++ b/src/util/array_str.rs @@ -46,11 +46,6 @@ impl ArrayStr { Some(ArrayStr { bytes, len: len as u8 }) } - /// Returns the capacity of this fixed string. - pub(crate) const fn capacity() -> usize { - N - } - /// Append the bytes given to the end of this string. /// /// If the capacity would be exceeded, then this is a no-op and `false` diff --git a/src/util/constant.rs b/src/util/constant.rs new file mode 100644 index 0000000..c9164fb --- /dev/null +++ b/src/util/constant.rs @@ -0,0 +1,13 @@ +/// Unwrap an `Option` in a `const` context. +/// +/// If it fails, panics with the given message. +macro_rules! unwrap { + ($val:expr, $msg:expr$(,)?) => { + match $val { + Some(val) => val, + None => panic!($msg), + } + }; +} + +pub(crate) use unwrap; diff --git a/src/util/escape.rs b/src/util/escape.rs index 2c49873..89e27d6 100644 --- a/src/util/escape.rs +++ b/src/util/escape.rs @@ -4,91 +4,8 @@ Provides convenience routines for escaping raw bytes. This was copied from `regex-automata` with a few light edits. */ -use crate::util::utf8; - -/// Provides a convenient `Debug` implementation for a `u8`. -/// -/// The `Debug` impl treats the byte as an ASCII, and emits a human readable -/// representation of it. If the byte isn't ASCII, then it's emitted as a hex -/// escape sequence. -#[derive(Clone, Copy)] -pub struct Byte(pub u8); - -impl core::fmt::Display for Byte { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - if self.0 == b' ' { - return write!(f, " "); - } - // 10 bytes is enough to cover any output from ascii::escape_default. - let mut bytes = [0u8; 10]; - let mut len = 0; - for (i, mut b) in core::ascii::escape_default(self.0).enumerate() { - // capitalize \xab to \xAB - if i >= 2 && b'a' <= b && b <= b'f' { - b -= 32; - } - bytes[len] = b; - len += 1; - } - write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap()) - } -} - -impl core::fmt::Debug for Byte { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - write!(f, "\"")?; - core::fmt::Display::fmt(self, f)?; - write!(f, "\"")?; - Ok(()) - } -} - -/// Provides a convenient `Debug` implementation for `&[u8]`. -/// -/// This generally works best when the bytes are presumed to be mostly UTF-8, -/// but will work for anything. For any bytes that aren't UTF-8, they are -/// emitted as hex escape sequences. -#[derive(Clone, Copy)] -pub struct Bytes<'a>(pub &'a [u8]); - -impl<'a> core::fmt::Display for Bytes<'a> { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - // This is a sad re-implementation of a similar impl found in bstr. - let mut bytes = self.0; - while let Some(result) = utf8::decode(bytes) { - let ch = match result { - Ok(ch) => ch, - Err(byte) => { - write!(f, r"\x{:02x}", byte)?; - bytes = &bytes[1..]; - continue; - } - }; - bytes = &bytes[ch.len_utf8()..]; - match ch { - '\0' => write!(f, "\\0")?, - // ASCII control characters except \0, \n, \r, \t - '\x01'..='\x08' - | '\x0b' - | '\x0c' - | '\x0e'..='\x19' - | '\x7f' => { - write!(f, "\\x{:02x}", u32::from(ch))?; - } - '\n' | '\r' | '\t' | _ => { - write!(f, "{}", ch.escape_debug())?; - } - } - } - Ok(()) - } -} - -impl<'a> core::fmt::Debug for Bytes<'a> { - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - write!(f, "\"")?; - core::fmt::Display::fmt(self, f)?; - write!(f, "\"")?; - Ok(()) - } -} +// These were originally defined here, but they got moved to +// shared since they're needed there. We re-export them here +// because this is really where they should live, but they're +// in shared because `jiff-tzdb-static` needs it. +pub(crate) use crate::shared::util::{Byte, Bytes}; diff --git a/src/util/mod.rs b/src/util/mod.rs index 6f8d320..b360f1f 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -7,7 +7,7 @@ pub(crate) mod borrow; ))] pub(crate) mod cache; pub(crate) mod common; -pub(crate) mod crc32; +pub(crate) mod constant; pub(crate) mod escape; #[cfg(feature = "std")] pub(crate) mod fs; diff --git a/src/util/utf8.rs b/src/util/utf8.rs index b1dad7b..e39731a 100644 --- a/src/util/utf8.rs +++ b/src/util/utf8.rs @@ -13,19 +13,7 @@ use core::cmp::Ordering; /// fast UTF-8 decoder, this is not it. If you feel like you need one in this /// crate, then please file an issue and discuss your use case. pub(crate) fn decode(bytes: &[u8]) -> Option> { - if bytes.is_empty() { - return None; - } - let len = match utf8_len(bytes[0]) { - None => return Some(Err(bytes[0])), - Some(len) if len > bytes.len() => return Some(Err(bytes[0])), - Some(1) => return Some(Ok(char::from(bytes[0]))), - Some(len) => len, - }; - match core::str::from_utf8(&bytes[..len]) { - Ok(s) => Some(Ok(s.chars().next().unwrap())), - Err(_) => Some(Err(bytes[0])), - } + crate::shared::util::utf8_decode(bytes) } /// Like std's `eq_ignore_ascii_case`, but returns a full `Ordering`. @@ -58,24 +46,3 @@ pub(crate) fn cmp_ignore_ascii_case_bytes(s1: &[u8], s2: &[u8]) -> Ordering { } } } - -/// Given a UTF-8 leading byte, this returns the total number of code units -/// in the following encoded codepoint. -/// -/// If the given byte is not a valid UTF-8 leading byte, then this returns -/// `None`. -fn utf8_len(byte: u8) -> Option { - if byte <= 0x7F { - return Some(1); - } else if byte & 0b1100_0000 == 0b1000_0000 { - return None; - } else if byte <= 0b1101_1111 { - Some(2) - } else if byte <= 0b1110_1111 { - Some(3) - } else if byte <= 0b1111_0111 { - Some(4) - } else { - None - } -}