From 9f3d5ebd2ffc8ef325cb3e2c2142df55d759d703 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Wed, 30 Apr 2025 16:24:14 -0400 Subject: [PATCH] fmt/strtime: add support for `%::z` and `%:::z` These are also motivated by their presence in GNU date. They are basically just different ways of printing offsets from UTC. We do some light refactoring to DRY up the code a bit, and to make it a little nicer to handle a variable number (up to a fixed size limit) of `:` characters preceding a directive. This is honestly just a giant clusterfuck (who thinks having `%z`, `%:z`, `%::z` and `%:::z` is a nice user experience!?!?), but so is `strptime`/`strftime` I guess. Oh well. Closes #342 --- CHANGELOG.md | 2 + src/fmt/offset.rs | 23 ++++- src/fmt/strtime/format.rs | 114 ++++++++++++++++------- src/fmt/strtime/mod.rs | 20 +++- src/fmt/strtime/parse.rs | 187 ++++++++++++++++++++++++++++++++------ 5 files changed, 283 insertions(+), 63 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb86c9d..6a4e9bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ Enhancements: Add support for the `%c`, `%r`, `%X` and `%x` conversion specifiers. * [#341](https://github.com/BurntSushi/jiff/issues/341): Add support for `%q` in `jiff::fmt::strtime` (prints quarter of year). +* [#342](https://github.com/BurntSushi/jiff/issues/342): +Add support for `%::z` and `%:::z` in `jiff::fmt::strtime`. * [#344](https://github.com/BurntSushi/jiff/issues/344): Add support for `%N` in `jiff::fmt::strtime` (alias for `%9f`). diff --git a/src/fmt/offset.rs b/src/fmt/offset.rs index b250b4a..7322117 100644 --- a/src/fmt/offset.rs +++ b/src/fmt/offset.rs @@ -290,6 +290,7 @@ impl core::fmt::Debug for Numeric { pub(crate) struct Parser { zulu: bool, require_minute: bool, + require_second: bool, subminute: bool, subsecond: bool, colon: Colon, @@ -301,6 +302,7 @@ impl Parser { Parser { zulu: true, require_minute: false, + require_second: false, subminute: true, subsecond: true, colon: Colon::Optional, @@ -326,6 +328,16 @@ impl Parser { Parser { require_minute: yes, ..self } } + /// When enabled, the second component of a time zone offset is required. + /// If no seconds (or minutes) are found, then an error is returned. + /// + /// When `subminute` is disabled, this setting has no effect. + /// + /// This is disabled by default. + pub(crate) const fn require_second(self, yes: bool) -> Parser { + Parser { require_second: yes, ..self } + } + /// When enabled, offsets with precision greater than integral minutes /// are supported. Specifically, when enabled, nanosecond precision is /// supported. @@ -462,7 +474,7 @@ impl Parser { return Err(err!( "parsed hour component of time zone offset from \ {original:?}, but could not find required colon \ - component", + separator", )); } true @@ -497,7 +509,7 @@ impl Parser { ) })?; if !has_minutes { - if self.require_minute { + if self.require_minute || (self.subminute && self.require_second) { return Err(err!( "parsed hour component of time zone offset from \ {original:?}, but could not find required minute \ @@ -544,6 +556,13 @@ impl Parser { ) })?; if !has_seconds { + if self.require_second { + return Err(err!( + "parsed hour and minute components of time zone offset \ + from {original:?}, but could not find required second \ + component", + )); + } return Ok(Parsed { value: numeric, input }); } diff --git a/src/fmt/strtime/format.rs b/src/fmt/strtime/format.rs index 1ab7dd6..e68cfea 100644 --- a/src/fmt/strtime/format.rs +++ b/src/fmt/strtime/format.rs @@ -69,7 +69,15 @@ impl<'c, 'f, 't, 'w, W: Write, L: Custom> Formatter<'c, 'f, 't, 'w, W, L> { b'n' => self.fmt_literal("\n").context("%n failed")?, b'P' => self.fmt_ampm_lower(&ext).context("%P failed")?, b'p' => self.fmt_ampm_upper(&ext).context("%p failed")?, - b'Q' => self.fmt_iana_nocolon().context("%Q failed")?, + b'Q' => match ext.colons { + 0 => self.fmt_iana_nocolon().context("%Q failed")?, + 1 => self.fmt_iana_colon().context("%:Q failed")?, + _ => { + return Err(err!( + "invalid number of `:` in `%Q` directive" + )) + } + }, b'q' => self.fmt_quarter(&ext).context("%q failed")?, b'R' => self.fmt_clock_nosecs(&ext).context("%R failed")?, b'r' => self.fmt_12hour_time(&ext).context("%r failed")?, @@ -87,28 +95,17 @@ impl<'c, 'f, 't, 'w, W: Write, L: Custom> Formatter<'c, 'f, 't, 'w, W, L> { b'Y' => self.fmt_year(&ext).context("%Y failed")?, b'y' => self.fmt_year2(&ext).context("%y failed")?, b'Z' => self.fmt_tzabbrev(&ext).context("%Z failed")?, - b'z' => self.fmt_offset_nocolon().context("%z failed")?, - b':' => { - if !self.bump_fmt() { + b'z' => match ext.colons { + 0 => self.fmt_offset_nocolon().context("%z failed")?, + 1 => self.fmt_offset_colon().context("%:z failed")?, + 2 => self.fmt_offset_colon2().context("%::z failed")?, + 3 => self.fmt_offset_colon3().context("%:::z failed")?, + _ => { return Err(err!( - "invalid format string, expected directive \ - after '%:'", - )); + "invalid number of `:` in `%z` directive" + )) } - match self.f() { - b'Q' => self.fmt_iana_colon().context("%:Q failed")?, - b'z' => { - self.fmt_offset_colon().context("%:z failed")? - } - unk => { - return Err(err!( - "found unrecognized directive %{unk} \ - following %:", - unk = escape::Byte(unk), - )); - } - } - } + }, b'.' => { if !self.bump_fmt() { return Err(err!( @@ -199,7 +196,8 @@ impl<'c, 'f, 't, 'w, W: Write, L: Custom> Formatter<'c, 'f, 't, 'w, W, L> { fn parse_extension(&mut self) -> Result { let flag = self.parse_flag()?; let width = self.parse_width()?; - Ok(Extension { flag, width }) + let colons = self.parse_colons(); + Ok(Extension { flag, width, colons }) } /// Parses an optional flag. And if one is parsed, the parser is bumped @@ -227,6 +225,15 @@ impl<'c, 'f, 't, 'w, W: Write, L: Custom> Formatter<'c, 'f, 't, 'w, W, L> { Ok(width) } + /// Parses an optional number of colons (up to 3) immediately before a + /// conversion specifier. + #[cfg_attr(feature = "perf-inline", inline(always))] + fn parse_colons(&mut self) -> u8 { + let (colons, fmt) = Extension::parse_colons(self.fmt); + self.fmt = fmt; + colons + } + // These are the formatting functions. They are pretty much responsible // for getting what they need for the broken down time and reporting a // decent failure mode if what they need couldn't be found. And then, @@ -430,7 +437,7 @@ impl<'c, 'f, 't, 'w, W: Write, L: Custom> Formatter<'c, 'f, 't, 'w, W, L> { zone offset, but none were present" ) })?; - return write_offset(offset, false, &mut self.wtr); + return write_offset(offset, false, true, false, &mut self.wtr); }; self.wtr.write_str(iana)?; Ok(()) @@ -445,7 +452,7 @@ impl<'c, 'f, 't, 'w, W: Write, L: Custom> Formatter<'c, 'f, 't, 'w, W, L> { zone offset, but none were present" ) })?; - return write_offset(offset, true, &mut self.wtr); + return write_offset(offset, true, true, false, &mut self.wtr); }; self.wtr.write_str(iana)?; Ok(()) @@ -456,7 +463,7 @@ impl<'c, 'f, 't, 'w, W: Write, L: Custom> Formatter<'c, 'f, 't, 'w, W, L> { let offset = self.tm.offset.ok_or_else(|| { err!("requires offset to format time zone offset") })?; - write_offset(offset, false, self.wtr) + write_offset(offset, false, true, false, self.wtr) } /// %:z @@ -464,7 +471,23 @@ impl<'c, 'f, 't, 'w, W: Write, L: Custom> Formatter<'c, 'f, 't, 'w, W, L> { let offset = self.tm.offset.ok_or_else(|| { err!("requires offset to format time zone offset") })?; - write_offset(offset, true, self.wtr) + write_offset(offset, true, true, false, self.wtr) + } + + /// %::z + fn fmt_offset_colon2(&mut self) -> Result<(), Error> { + let offset = self.tm.offset.ok_or_else(|| { + err!("requires offset to format time zone offset") + })?; + write_offset(offset, true, true, true, self.wtr) + } + + /// %:::z + fn fmt_offset_colon3(&mut self) -> Result<(), Error> { + let offset = self.tm.offset.ok_or_else(|| { + err!("requires offset to format time zone offset") + })?; + write_offset(offset, true, false, false, self.wtr) } /// %S @@ -815,9 +838,18 @@ impl<'c, 'f, 't, 'w, W: Write, L: Custom> Formatter<'c, 'f, 't, 'w, W, L> { /// /// When `colon` is true, the hour, minute and optional second components are /// delimited by a colon. Otherwise, no delimiter is used. +/// +/// When `minute` is true, the minute component is always printed. When +/// false, the minute component is only printed when it is non-zero (or if +/// the second component is non-zero). +/// +/// When `second` is true, the second component is always printed. When false, +/// the second component is only printed when it is non-zero. fn write_offset( offset: Offset, colon: bool, + minute: bool, + second: bool, wtr: &mut W, ) -> Result<(), Error> { static FMT_TWO: DecimalFormatter = DecimalFormatter::new().padding(2); @@ -828,15 +860,17 @@ fn write_offset( wtr.write_str(if offset.is_negative() { "-" } else { "+" })?; wtr.write_int(&FMT_TWO, hours)?; - if colon { - wtr.write_str(":")?; - } - wtr.write_int(&FMT_TWO, minutes)?; - if seconds != 0 { + if minute || minutes != 0 || seconds != 0 { if colon { wtr.write_str(":")?; } - wtr.write_int(&FMT_TWO, seconds)?; + wtr.write_int(&FMT_TWO, minutes)?; + if second || seconds != 0 { + if colon { + wtr.write_str(":")?; + } + wtr.write_int(&FMT_TWO, seconds)?; + } } Ok(()) } @@ -1118,12 +1152,28 @@ mod tests { insta::assert_snapshot!(f("%z", o(0, 0, 0)), @"+0000"); insta::assert_snapshot!(f("%:z", o(0, 0, 0)), @"+00:00"); + insta::assert_snapshot!(f("%::z", o(0, 0, 0)), @"+00:00:00"); + insta::assert_snapshot!(f("%:::z", o(0, 0, 0)), @"+00"); + insta::assert_snapshot!(f("%z", -o(4, 0, 0)), @"-0400"); insta::assert_snapshot!(f("%:z", -o(4, 0, 0)), @"-04:00"); + insta::assert_snapshot!(f("%::z", -o(4, 0, 0)), @"-04:00:00"); + insta::assert_snapshot!(f("%:::z", -o(4, 0, 0)), @"-04"); + insta::assert_snapshot!(f("%z", o(5, 30, 0)), @"+0530"); insta::assert_snapshot!(f("%:z", o(5, 30, 0)), @"+05:30"); + insta::assert_snapshot!(f("%::z", o(5, 30, 0)), @"+05:30:00"); + insta::assert_snapshot!(f("%:::z", o(5, 30, 0)), @"+05:30"); + insta::assert_snapshot!(f("%z", o(5, 30, 15)), @"+053015"); insta::assert_snapshot!(f("%:z", o(5, 30, 15)), @"+05:30:15"); + insta::assert_snapshot!(f("%::z", o(5, 30, 15)), @"+05:30:15"); + insta::assert_snapshot!(f("%:::z", o(5, 30, 15)), @"+05:30:15"); + + insta::assert_snapshot!(f("%z", o(5, 0, 15)), @"+050015"); + insta::assert_snapshot!(f("%:z", o(5, 0, 15)), @"+05:00:15"); + insta::assert_snapshot!(f("%::z", o(5, 0, 15)), @"+05:00:15"); + insta::assert_snapshot!(f("%:::z", o(5, 0, 15)), @"+05:00:15"); } #[test] diff --git a/src/fmt/strtime/mod.rs b/src/fmt/strtime/mod.rs index 872e354..998a8a5 100644 --- a/src/fmt/strtime/mod.rs +++ b/src/fmt/strtime/mod.rs @@ -202,6 +202,8 @@ strings, the strings are matched without regard to ASCII case. | `%Z` | `EDT` | A time zone abbreviation. Supported when formatting only. | | `%z` | `+0530` | A time zone offset in the format `[+-]HHMM[SS]`. | | `%:z` | `+05:30` | A time zone offset in the format `[+-]HH:MM[:SS]`. | +| `%::z` | `+05:30:00` | A time zone offset in the format `[+-]HH:MM:SS`. | +| `%:::z` | `-04`, `+05:30` | A time zone offset in the format `[+-]HH:[MM[:SS]]`. | When formatting, the following flags can be inserted immediately after the `%` and before the directive: @@ -253,7 +255,6 @@ is variable width data. If you have a use case for this, please The following things are currently unsupported: * Parsing or formatting fractional seconds in the time time zone offset. -* The `%::z` and `%:::z` specifiers found in GNU date. * The `%+` conversion specifier is not supported since there doesn't seem to be any consistent definition for it. * With only Jiff, the `%c`, `%r`, `%X` and `%x` locale oriented specifiers @@ -3084,6 +3085,7 @@ impl From