diff --git a/literal/Cargo.toml b/literal/Cargo.toml index 7afa9db..c5b8925 100644 --- a/literal/Cargo.toml +++ b/literal/Cargo.toml @@ -8,8 +8,10 @@ repository = "https://github.com/RustPython/RustPython" license = "MIT" [dependencies] +bitflags = "2.2.1" +itertools = "0.10.5" +num-bigint = { workspace = true } num-traits = { workspace = true } - hexf-parse = "0.2.1" lexical-parse-float = { version = "0.8.0", features = ["format"] } unic-ucd-category = "0.9" diff --git a/literal/src/cformat.rs b/literal/src/cformat.rs new file mode 100644 index 0000000..4be669a --- /dev/null +++ b/literal/src/cformat.rs @@ -0,0 +1,1044 @@ +//! Implementation of Printf-Style string formatting +//! as per the [Python Docs](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). +use crate::{float, format::Case}; +use bitflags::bitflags; +use num_bigint::{BigInt, Sign}; +use num_traits::Signed; +use std::{ + cmp, fmt, + iter::{Enumerate, Peekable}, + str::FromStr, +}; + +#[derive(Debug, PartialEq)] +pub enum CFormatErrorType { + UnmatchedKeyParentheses, + MissingModuloSign, + UnsupportedFormatChar(char), + IncompleteFormat, + IntTooBig, + // Unimplemented, +} + +// also contains how many chars the parsing function consumed +pub type ParsingError = (CFormatErrorType, usize); + +#[derive(Debug, PartialEq)] +pub struct CFormatError { + pub typ: CFormatErrorType, // FIXME + pub index: usize, +} + +impl fmt::Display for CFormatError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use CFormatErrorType::*; + match self.typ { + UnmatchedKeyParentheses => write!(f, "incomplete format key"), + IncompleteFormat => write!(f, "incomplete format"), + UnsupportedFormatChar(c) => write!( + f, + "unsupported format character '{}' ({:#x}) at index {}", + c, c as u32, self.index + ), + IntTooBig => write!(f, "width/precision too big"), + _ => write!(f, "unexpected error parsing format string"), + } + } +} + +pub type CFormatConversion = super::format::FormatConversion; + +#[derive(Debug, PartialEq)] +pub enum CNumberType { + Decimal, + Octal, + Hex(Case), +} + +#[derive(Debug, PartialEq)] +pub enum CFloatType { + Exponent(Case), + PointDecimal(Case), + General(Case), +} + +#[derive(Debug, PartialEq)] +pub enum CFormatType { + Number(CNumberType), + Float(CFloatType), + Character, + String(CFormatConversion), +} + +#[derive(Debug, PartialEq)] +pub enum CFormatPrecision { + Quantity(CFormatQuantity), + Dot, +} + +impl From for CFormatPrecision { + fn from(quantity: CFormatQuantity) -> Self { + CFormatPrecision::Quantity(quantity) + } +} + +bitflags! { + #[derive(Copy, Clone, Debug, PartialEq)] + pub struct CConversionFlags: u32 { + const ALTERNATE_FORM = 0b0000_0001; + const ZERO_PAD = 0b0000_0010; + const LEFT_ADJUST = 0b0000_0100; + const BLANK_SIGN = 0b0000_1000; + const SIGN_CHAR = 0b0001_0000; + } +} + +impl CConversionFlags { + #[inline] + pub fn sign_string(&self) -> &'static str { + if self.contains(CConversionFlags::SIGN_CHAR) { + "+" + } else if self.contains(CConversionFlags::BLANK_SIGN) { + " " + } else { + "" + } + } +} + +#[derive(Debug, PartialEq)] +pub enum CFormatQuantity { + Amount(usize), + FromValuesTuple, +} + +#[derive(Debug, PartialEq)] +pub struct CFormatSpec { + pub mapping_key: Option, + pub flags: CConversionFlags, + pub min_field_width: Option, + pub precision: Option, + pub format_type: CFormatType, + pub format_char: char, + // chars_consumed: usize, +} + +impl FromStr for CFormatSpec { + type Err = ParsingError; + + fn from_str(text: &str) -> Result { + let mut chars = text.chars().enumerate().peekable(); + if chars.next().map(|x| x.1) != Some('%') { + return Err((CFormatErrorType::MissingModuloSign, 1)); + } + + CFormatSpec::parse(&mut chars) + } +} + +pub type ParseIter = Peekable>; + +impl CFormatSpec { + pub fn parse(iter: &mut ParseIter) -> Result + where + T: Into + Copy, + I: Iterator, + { + let mapping_key = parse_spec_mapping_key(iter)?; + let flags = parse_flags(iter); + let min_field_width = parse_quantity(iter)?; + let precision = parse_precision(iter)?; + consume_length(iter); + let (format_type, format_char) = parse_format_type(iter)?; + + Ok(CFormatSpec { + mapping_key, + flags, + min_field_width, + precision, + format_type, + format_char, + }) + } + + fn compute_fill_string(fill_char: char, fill_chars_needed: usize) -> String { + (0..fill_chars_needed) + .map(|_| fill_char) + .collect::() + } + + fn fill_string( + &self, + string: String, + fill_char: char, + num_prefix_chars: Option, + ) -> String { + let mut num_chars = string.chars().count(); + if let Some(num_prefix_chars) = num_prefix_chars { + num_chars += num_prefix_chars; + } + let num_chars = num_chars; + + let width = match &self.min_field_width { + Some(CFormatQuantity::Amount(width)) => cmp::max(width, &num_chars), + _ => &num_chars, + }; + let fill_chars_needed = width.saturating_sub(num_chars); + let fill_string = CFormatSpec::compute_fill_string(fill_char, fill_chars_needed); + + if !fill_string.is_empty() { + if self.flags.contains(CConversionFlags::LEFT_ADJUST) { + format!("{string}{fill_string}") + } else { + format!("{fill_string}{string}") + } + } else { + string + } + } + + fn fill_string_with_precision(&self, string: String, fill_char: char) -> String { + let num_chars = string.chars().count(); + + let width = match &self.precision { + Some(CFormatPrecision::Quantity(CFormatQuantity::Amount(width))) => { + cmp::max(width, &num_chars) + } + _ => &num_chars, + }; + let fill_chars_needed = width.saturating_sub(num_chars); + let fill_string = CFormatSpec::compute_fill_string(fill_char, fill_chars_needed); + + if !fill_string.is_empty() { + // Don't left-adjust if precision-filling: that will always be prepending 0s to %d + // arguments, the LEFT_ADJUST flag will be used by a later call to fill_string with + // the 0-filled string as the string param. + format!("{fill_string}{string}") + } else { + string + } + } + + fn format_string_with_precision( + &self, + string: String, + precision: Option<&CFormatPrecision>, + ) -> String { + // truncate if needed + let string = match precision { + Some(CFormatPrecision::Quantity(CFormatQuantity::Amount(precision))) + if string.chars().count() > *precision => + { + string.chars().take(*precision).collect::() + } + Some(CFormatPrecision::Dot) => { + // truncate to 0 + String::new() + } + _ => string, + }; + self.fill_string(string, ' ', None) + } + + #[inline] + pub fn format_string(&self, string: String) -> String { + self.format_string_with_precision(string, self.precision.as_ref()) + } + + #[inline] + pub fn format_char(&self, ch: char) -> String { + self.format_string_with_precision( + ch.to_string(), + Some(&(CFormatQuantity::Amount(1).into())), + ) + } + + pub fn format_bytes(&self, bytes: &[u8]) -> Vec { + let bytes = if let Some(CFormatPrecision::Quantity(CFormatQuantity::Amount(precision))) = + self.precision + { + &bytes[..cmp::min(bytes.len(), precision)] + } else { + bytes + }; + if let Some(CFormatQuantity::Amount(width)) = self.min_field_width { + let fill = cmp::max(0, width - bytes.len()); + let mut v = Vec::with_capacity(bytes.len() + fill); + if self.flags.contains(CConversionFlags::LEFT_ADJUST) { + v.extend_from_slice(bytes); + v.append(&mut vec![b' '; fill]); + } else { + v.append(&mut vec![b' '; fill]); + v.extend_from_slice(bytes); + } + v + } else { + bytes.to_vec() + } + } + + pub fn format_number(&self, num: &BigInt) -> String { + use CNumberType::*; + let magnitude = num.abs(); + let prefix = if self.flags.contains(CConversionFlags::ALTERNATE_FORM) { + match self.format_type { + CFormatType::Number(Octal) => "0o", + CFormatType::Number(Hex(Case::Lower)) => "0x", + CFormatType::Number(Hex(Case::Upper)) => "0X", + _ => "", + } + } else { + "" + }; + + let magnitude_string: String = match self.format_type { + CFormatType::Number(Decimal) => magnitude.to_str_radix(10), + CFormatType::Number(Octal) => magnitude.to_str_radix(8), + CFormatType::Number(Hex(Case::Lower)) => magnitude.to_str_radix(16), + CFormatType::Number(Hex(Case::Upper)) => { + let mut result = magnitude.to_str_radix(16); + result.make_ascii_uppercase(); + result + } + _ => unreachable!(), // Should not happen because caller has to make sure that this is a number + }; + + let sign_string = match num.sign() { + Sign::Minus => "-", + _ => self.flags.sign_string(), + }; + + let padded_magnitude_string = self.fill_string_with_precision(magnitude_string, '0'); + + if self.flags.contains(CConversionFlags::ZERO_PAD) { + let fill_char = if !self.flags.contains(CConversionFlags::LEFT_ADJUST) { + '0' + } else { + ' ' // '-' overrides the '0' conversion if both are given + }; + let signed_prefix = format!("{sign_string}{prefix}"); + format!( + "{}{}", + signed_prefix, + self.fill_string( + padded_magnitude_string, + fill_char, + Some(signed_prefix.chars().count()), + ), + ) + } else { + self.fill_string( + format!("{sign_string}{prefix}{padded_magnitude_string}"), + ' ', + None, + ) + } + } + + pub fn format_float(&self, num: f64) -> String { + let sign_string = if num.is_sign_negative() && !num.is_nan() { + "-" + } else { + self.flags.sign_string() + }; + + let precision = match &self.precision { + Some(CFormatPrecision::Quantity(quantity)) => match quantity { + CFormatQuantity::Amount(amount) => *amount, + CFormatQuantity::FromValuesTuple => 6, + }, + Some(CFormatPrecision::Dot) => 0, + None => 6, + }; + + let magnitude_string = match &self.format_type { + CFormatType::Float(CFloatType::PointDecimal(case)) => { + let magnitude = num.abs(); + float::format_fixed( + precision, + magnitude, + *case, + self.flags.contains(CConversionFlags::ALTERNATE_FORM), + ) + } + CFormatType::Float(CFloatType::Exponent(case)) => { + let magnitude = num.abs(); + float::format_exponent( + precision, + magnitude, + *case, + self.flags.contains(CConversionFlags::ALTERNATE_FORM), + ) + } + CFormatType::Float(CFloatType::General(case)) => { + let precision = if precision == 0 { 1 } else { precision }; + let magnitude = num.abs(); + float::format_general( + precision, + magnitude, + *case, + self.flags.contains(CConversionFlags::ALTERNATE_FORM), + false, + ) + } + _ => unreachable!(), + }; + + if self.flags.contains(CConversionFlags::ZERO_PAD) { + let fill_char = if !self.flags.contains(CConversionFlags::LEFT_ADJUST) { + '0' + } else { + ' ' + }; + format!( + "{}{}", + sign_string, + self.fill_string( + magnitude_string, + fill_char, + Some(sign_string.chars().count()), + ) + ) + } else { + self.fill_string(format!("{sign_string}{magnitude_string}"), ' ', None) + } + } +} + +fn parse_spec_mapping_key(iter: &mut ParseIter) -> Result, ParsingError> +where + T: Into + Copy, + I: Iterator, +{ + if let Some(&(index, c)) = iter.peek() { + if c.into() == '(' { + iter.next().unwrap(); + return match parse_text_inside_parentheses(iter) { + Some(key) => Ok(Some(key)), + None => Err((CFormatErrorType::UnmatchedKeyParentheses, index)), + }; + } + } + Ok(None) +} + +fn parse_flags(iter: &mut ParseIter) -> CConversionFlags +where + T: Into + Copy, + I: Iterator, +{ + let mut flags = CConversionFlags::empty(); + while let Some(&(_, c)) = iter.peek() { + let flag = match c.into() { + '#' => CConversionFlags::ALTERNATE_FORM, + '0' => CConversionFlags::ZERO_PAD, + '-' => CConversionFlags::LEFT_ADJUST, + ' ' => CConversionFlags::BLANK_SIGN, + '+' => CConversionFlags::SIGN_CHAR, + _ => break, + }; + iter.next().unwrap(); + flags |= flag; + } + flags +} + +fn consume_length(iter: &mut ParseIter) +where + T: Into + Copy, + I: Iterator, +{ + if let Some(&(_, c)) = iter.peek() { + let c = c.into(); + if c == 'h' || c == 'l' || c == 'L' { + iter.next().unwrap(); + } + } +} + +fn parse_format_type(iter: &mut ParseIter) -> Result<(CFormatType, char), ParsingError> +where + T: Into, + I: Iterator, +{ + use CFloatType::*; + use CNumberType::*; + let (index, c) = match iter.next() { + Some((index, c)) => (index, c.into()), + None => { + return Err(( + CFormatErrorType::IncompleteFormat, + iter.peek().map(|x| x.0).unwrap_or(0), + )); + } + }; + let format_type = match c { + 'd' | 'i' | 'u' => CFormatType::Number(Decimal), + 'o' => CFormatType::Number(Octal), + 'x' => CFormatType::Number(Hex(Case::Lower)), + 'X' => CFormatType::Number(Hex(Case::Upper)), + 'e' => CFormatType::Float(Exponent(Case::Lower)), + 'E' => CFormatType::Float(Exponent(Case::Upper)), + 'f' => CFormatType::Float(PointDecimal(Case::Lower)), + 'F' => CFormatType::Float(PointDecimal(Case::Upper)), + 'g' => CFormatType::Float(General(Case::Lower)), + 'G' => CFormatType::Float(General(Case::Upper)), + 'c' => CFormatType::Character, + 'r' => CFormatType::String(CFormatConversion::Repr), + 's' => CFormatType::String(CFormatConversion::Str), + 'b' => CFormatType::String(CFormatConversion::Bytes), + 'a' => CFormatType::String(CFormatConversion::Ascii), + _ => return Err((CFormatErrorType::UnsupportedFormatChar(c), index)), + }; + Ok((format_type, c)) +} + +fn parse_quantity(iter: &mut ParseIter) -> Result, ParsingError> +where + T: Into + Copy, + I: Iterator, +{ + if let Some(&(_, c)) = iter.peek() { + let c: char = c.into(); + if c == '*' { + iter.next().unwrap(); + return Ok(Some(CFormatQuantity::FromValuesTuple)); + } + if let Some(i) = c.to_digit(10) { + let mut num = i as i32; + iter.next().unwrap(); + while let Some(&(index, c)) = iter.peek() { + if let Some(i) = c.into().to_digit(10) { + num = num + .checked_mul(10) + .and_then(|num| num.checked_add(i as i32)) + .ok_or((CFormatErrorType::IntTooBig, index))?; + iter.next().unwrap(); + } else { + break; + } + } + return Ok(Some(CFormatQuantity::Amount(num.unsigned_abs() as usize))); + } + } + Ok(None) +} + +fn parse_precision(iter: &mut ParseIter) -> Result, ParsingError> +where + T: Into + Copy, + I: Iterator, +{ + if let Some(&(_, c)) = iter.peek() { + if c.into() == '.' { + iter.next().unwrap(); + let quantity = parse_quantity(iter)?; + let precision = quantity.map_or(CFormatPrecision::Dot, CFormatPrecision::Quantity); + return Ok(Some(precision)); + } + } + Ok(None) +} + +fn parse_text_inside_parentheses(iter: &mut ParseIter) -> Option +where + T: Into, + I: Iterator, +{ + let mut counter: i32 = 1; + let mut contained_text = String::new(); + loop { + let (_, c) = iter.next()?; + let c = c.into(); + match c { + _ if c == '(' => { + counter += 1; + } + _ if c == ')' => { + counter -= 1; + } + _ => (), + } + + if counter > 0 { + contained_text.push(c); + } else { + break; + } + } + + Some(contained_text) +} + +#[derive(Debug, PartialEq)] +pub enum CFormatPart { + Literal(T), + Spec(CFormatSpec), +} + +impl CFormatPart { + #[inline] + pub fn is_specifier(&self) -> bool { + matches!(self, CFormatPart::Spec(_)) + } + + #[inline] + pub fn has_key(&self) -> bool { + match self { + CFormatPart::Spec(s) => s.mapping_key.is_some(), + _ => false, + } + } +} + +#[derive(Debug, PartialEq)] +pub struct CFormatStrOrBytes { + parts: Vec<(usize, CFormatPart)>, +} + +impl CFormatStrOrBytes { + pub fn check_specifiers(&self) -> Option<(usize, bool)> { + let mut count = 0; + let mut mapping_required = false; + for (_, part) in &self.parts { + if part.is_specifier() { + let has_key = part.has_key(); + if count == 0 { + mapping_required = has_key; + } else if mapping_required != has_key { + return None; + } + count += 1; + } + } + Some((count, mapping_required)) + } + + #[inline] + pub fn iter(&self) -> impl Iterator)> { + self.parts.iter() + } + + #[inline] + pub fn iter_mut(&mut self) -> impl Iterator)> { + self.parts.iter_mut() + } +} + +pub type CFormatBytes = CFormatStrOrBytes>; + +impl CFormatBytes { + pub fn parse>(iter: &mut ParseIter) -> Result { + let mut parts = vec![]; + let mut literal = vec![]; + let mut part_index = 0; + while let Some((index, c)) = iter.next() { + if c == b'%' { + if let Some(&(_, second)) = iter.peek() { + if second == b'%' { + iter.next().unwrap(); + literal.push(b'%'); + continue; + } else { + if !literal.is_empty() { + parts.push(( + part_index, + CFormatPart::Literal(std::mem::take(&mut literal)), + )); + } + let spec = CFormatSpec::parse(iter).map_err(|err| CFormatError { + typ: err.0, + index: err.1, + })?; + parts.push((index, CFormatPart::Spec(spec))); + if let Some(&(index, _)) = iter.peek() { + part_index = index; + } + } + } else { + return Err(CFormatError { + typ: CFormatErrorType::IncompleteFormat, + index: index + 1, + }); + } + } else { + literal.push(c); + } + } + if !literal.is_empty() { + parts.push((part_index, CFormatPart::Literal(literal))); + } + Ok(Self { parts }) + } + + pub fn parse_from_bytes(bytes: &[u8]) -> Result { + let mut iter = bytes.iter().cloned().enumerate().peekable(); + Self::parse(&mut iter) + } +} + +pub type CFormatString = CFormatStrOrBytes; + +impl FromStr for CFormatString { + type Err = CFormatError; + + fn from_str(text: &str) -> Result { + let mut iter = text.chars().enumerate().peekable(); + Self::parse(&mut iter) + } +} + +impl CFormatString { + pub(crate) fn parse>( + iter: &mut ParseIter, + ) -> Result { + let mut parts = vec![]; + let mut literal = String::new(); + let mut part_index = 0; + while let Some((index, c)) = iter.next() { + if c == '%' { + if let Some(&(_, second)) = iter.peek() { + if second == '%' { + iter.next().unwrap(); + literal.push('%'); + continue; + } else { + if !literal.is_empty() { + parts.push(( + part_index, + CFormatPart::Literal(std::mem::take(&mut literal)), + )); + } + let spec = CFormatSpec::parse(iter).map_err(|err| CFormatError { + typ: err.0, + index: err.1, + })?; + parts.push((index, CFormatPart::Spec(spec))); + if let Some(&(index, _)) = iter.peek() { + part_index = index; + } + } + } else { + return Err(CFormatError { + typ: CFormatErrorType::IncompleteFormat, + index: index + 1, + }); + } + } else { + literal.push(c); + } + } + if !literal.is_empty() { + parts.push((part_index, CFormatPart::Literal(literal))); + } + Ok(Self { parts }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_fill_and_align() { + assert_eq!( + "%10s" + .parse::() + .unwrap() + .format_string("test".to_owned()), + " test".to_owned() + ); + assert_eq!( + "%-10s" + .parse::() + .unwrap() + .format_string("test".to_owned()), + "test ".to_owned() + ); + assert_eq!( + "%#10x" + .parse::() + .unwrap() + .format_number(&BigInt::from(0x1337)), + " 0x1337".to_owned() + ); + assert_eq!( + "%-#10x" + .parse::() + .unwrap() + .format_number(&BigInt::from(0x1337)), + "0x1337 ".to_owned() + ); + } + + #[test] + fn test_parse_key() { + let expected = Ok(CFormatSpec { + mapping_key: Some("amount".to_owned()), + format_type: CFormatType::Number(CNumberType::Decimal), + format_char: 'd', + min_field_width: None, + precision: None, + flags: CConversionFlags::empty(), + }); + assert_eq!("%(amount)d".parse::(), expected); + + let expected = Ok(CFormatSpec { + mapping_key: Some("m((u(((l((((ti))))p)))l))e".to_owned()), + format_type: CFormatType::Number(CNumberType::Decimal), + format_char: 'd', + min_field_width: None, + precision: None, + flags: CConversionFlags::empty(), + }); + assert_eq!( + "%(m((u(((l((((ti))))p)))l))e)d".parse::(), + expected + ); + } + + #[test] + fn test_format_parse_key_fail() { + assert_eq!( + "%(aged".parse::(), + Err(CFormatError { + typ: CFormatErrorType::UnmatchedKeyParentheses, + index: 1 + }) + ); + } + + #[test] + fn test_format_parse_type_fail() { + assert_eq!( + "Hello %n".parse::(), + Err(CFormatError { + typ: CFormatErrorType::UnsupportedFormatChar('n'), + index: 7 + }) + ); + } + + #[test] + fn test_incomplete_format_fail() { + assert_eq!( + "Hello %".parse::(), + Err(CFormatError { + typ: CFormatErrorType::IncompleteFormat, + index: 7 + }) + ); + } + + #[test] + fn test_parse_flags() { + let expected = Ok(CFormatSpec { + format_type: CFormatType::Number(CNumberType::Decimal), + format_char: 'd', + min_field_width: Some(CFormatQuantity::Amount(10)), + precision: None, + mapping_key: None, + flags: CConversionFlags::all(), + }); + let parsed = "% 0 -+++###10d".parse::(); + assert_eq!(parsed, expected); + assert_eq!( + parsed.unwrap().format_number(&BigInt::from(12)), + "+12 ".to_owned() + ); + } + + #[test] + fn test_parse_and_format_string() { + assert_eq!( + "%5.4s" + .parse::() + .unwrap() + .format_string("Hello, World!".to_owned()), + " Hell".to_owned() + ); + assert_eq!( + "%-5.4s" + .parse::() + .unwrap() + .format_string("Hello, World!".to_owned()), + "Hell ".to_owned() + ); + assert_eq!( + "%.s" + .parse::() + .unwrap() + .format_string("Hello, World!".to_owned()), + "".to_owned() + ); + assert_eq!( + "%5.s" + .parse::() + .unwrap() + .format_string("Hello, World!".to_owned()), + " ".to_owned() + ); + } + + #[test] + fn test_parse_and_format_unicode_string() { + assert_eq!( + "%.2s" + .parse::() + .unwrap() + .format_string("❤❤❤❤❤❤❤❤".to_owned()), + "❤❤".to_owned() + ); + } + + #[test] + fn test_parse_and_format_number() { + assert_eq!( + "%5d" + .parse::() + .unwrap() + .format_number(&BigInt::from(27)), + " 27".to_owned() + ); + assert_eq!( + "%05d" + .parse::() + .unwrap() + .format_number(&BigInt::from(27)), + "00027".to_owned() + ); + assert_eq!( + "%.5d" + .parse::() + .unwrap() + .format_number(&BigInt::from(27)), + "00027".to_owned() + ); + assert_eq!( + "%+05d" + .parse::() + .unwrap() + .format_number(&BigInt::from(27)), + "+0027".to_owned() + ); + assert_eq!( + "%-d" + .parse::() + .unwrap() + .format_number(&BigInt::from(-27)), + "-27".to_owned() + ); + assert_eq!( + "% d" + .parse::() + .unwrap() + .format_number(&BigInt::from(27)), + " 27".to_owned() + ); + assert_eq!( + "% d" + .parse::() + .unwrap() + .format_number(&BigInt::from(-27)), + "-27".to_owned() + ); + assert_eq!( + "%08x" + .parse::() + .unwrap() + .format_number(&BigInt::from(0x1337)), + "00001337".to_owned() + ); + assert_eq!( + "%#010x" + .parse::() + .unwrap() + .format_number(&BigInt::from(0x1337)), + "0x00001337".to_owned() + ); + assert_eq!( + "%-#010x" + .parse::() + .unwrap() + .format_number(&BigInt::from(0x1337)), + "0x1337 ".to_owned() + ); + } + + #[test] + fn test_parse_and_format_float() { + assert_eq!( + "%f".parse::().unwrap().format_float(1.2345), + "1.234500" + ); + assert_eq!( + "%.2f".parse::().unwrap().format_float(1.2345), + "1.23" + ); + assert_eq!( + "%.f".parse::().unwrap().format_float(1.2345), + "1" + ); + assert_eq!( + "%+.f".parse::().unwrap().format_float(1.2345), + "+1" + ); + assert_eq!( + "%+f".parse::().unwrap().format_float(1.2345), + "+1.234500" + ); + assert_eq!( + "% f".parse::().unwrap().format_float(1.2345), + " 1.234500" + ); + assert_eq!( + "%f".parse::().unwrap().format_float(-1.2345), + "-1.234500" + ); + assert_eq!( + "%f".parse::() + .unwrap() + .format_float(1.2345678901), + "1.234568" + ); + } + + #[test] + fn test_format_parse() { + let fmt = "Hello, my name is %s and I'm %d years old"; + let expected = Ok(CFormatString { + parts: vec![ + (0, CFormatPart::Literal("Hello, my name is ".to_owned())), + ( + 18, + CFormatPart::Spec(CFormatSpec { + format_type: CFormatType::String(CFormatConversion::Str), + format_char: 's', + mapping_key: None, + min_field_width: None, + precision: None, + flags: CConversionFlags::empty(), + }), + ), + (20, CFormatPart::Literal(" and I'm ".to_owned())), + ( + 29, + CFormatPart::Spec(CFormatSpec { + format_type: CFormatType::Number(CNumberType::Decimal), + format_char: 'd', + mapping_key: None, + min_field_width: None, + precision: None, + flags: CConversionFlags::empty(), + }), + ), + (31, CFormatPart::Literal(" years old".to_owned())), + ], + }); + let result = fmt.parse::(); + assert_eq!( + result, expected, + "left = {result:#?} \n\n\n right = {expected:#?}" + ); + } +} diff --git a/literal/src/format.rs b/literal/src/format.rs index 3d5d9d7..8764c35 100644 --- a/literal/src/format.rs +++ b/literal/src/format.rs @@ -1,5 +1,1229 @@ +use crate::float; +use itertools::{Itertools, PeekingNext}; +use num_bigint::{BigInt, Sign}; +use num_traits::{cast::ToPrimitive, Signed}; +use std::ops::Deref; +use std::{cmp, str::FromStr}; + #[derive(Debug, PartialEq, Clone, Copy)] pub enum Case { Lower, Upper, } + +trait FormatParse { + fn parse(text: &str) -> (Option, &str) + where + Self: Sized; +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum FormatConversion { + Str, + Repr, + Ascii, + Bytes, +} + +impl FormatParse for FormatConversion { + fn parse(text: &str) -> (Option, &str) { + let Some(conversion) = Self::from_string(text) else { + return (None, text); + }; + let mut chars = text.chars(); + chars.next(); // Consume the bang + chars.next(); // Consume one r,s,a char + (Some(conversion), chars.as_str()) + } +} + +impl FormatConversion { + pub fn from_char(c: char) -> Option { + match c { + 's' => Some(FormatConversion::Str), + 'r' => Some(FormatConversion::Repr), + 'a' => Some(FormatConversion::Ascii), + 'b' => Some(FormatConversion::Bytes), + _ => None, + } + } + + fn from_string(text: &str) -> Option { + let mut chars = text.chars(); + if chars.next() != Some('!') { + return None; + } + + FormatConversion::from_char(chars.next()?) + } +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum FormatAlign { + Left, + Right, + AfterSign, + Center, +} + +impl FormatAlign { + fn from_char(c: char) -> Option { + match c { + '<' => Some(FormatAlign::Left), + '>' => Some(FormatAlign::Right), + '=' => Some(FormatAlign::AfterSign), + '^' => Some(FormatAlign::Center), + _ => None, + } + } +} + +impl FormatParse for FormatAlign { + fn parse(text: &str) -> (Option, &str) { + let mut chars = text.chars(); + if let Some(maybe_align) = chars.next().and_then(Self::from_char) { + (Some(maybe_align), chars.as_str()) + } else { + (None, text) + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum FormatSign { + Plus, + Minus, + MinusOrSpace, +} + +impl FormatParse for FormatSign { + fn parse(text: &str) -> (Option, &str) { + let mut chars = text.chars(); + match chars.next() { + Some('-') => (Some(Self::Minus), chars.as_str()), + Some('+') => (Some(Self::Plus), chars.as_str()), + Some(' ') => (Some(Self::MinusOrSpace), chars.as_str()), + _ => (None, text), + } + } +} + +#[derive(Debug, PartialEq)] +pub enum FormatGrouping { + Comma, + Underscore, +} + +impl FormatParse for FormatGrouping { + fn parse(text: &str) -> (Option, &str) { + let mut chars = text.chars(); + match chars.next() { + Some('_') => (Some(Self::Underscore), chars.as_str()), + Some(',') => (Some(Self::Comma), chars.as_str()), + _ => (None, text), + } + } +} + +#[derive(Debug, PartialEq)] +pub enum FormatType { + String, + Binary, + Character, + Decimal, + Octal, + Number(Case), + Hex(Case), + Exponent(Case), + GeneralFormat(Case), + FixedPoint(Case), + Percentage, +} + +impl From<&FormatType> for char { + fn from(from: &FormatType) -> char { + match from { + FormatType::String => 's', + FormatType::Binary => 'b', + FormatType::Character => 'c', + FormatType::Decimal => 'd', + FormatType::Octal => 'o', + FormatType::Number(Case::Lower) => 'n', + FormatType::Number(Case::Upper) => 'N', + FormatType::Hex(Case::Lower) => 'x', + FormatType::Hex(Case::Upper) => 'X', + FormatType::Exponent(Case::Lower) => 'e', + FormatType::Exponent(Case::Upper) => 'E', + FormatType::GeneralFormat(Case::Lower) => 'g', + FormatType::GeneralFormat(Case::Upper) => 'G', + FormatType::FixedPoint(Case::Lower) => 'f', + FormatType::FixedPoint(Case::Upper) => 'F', + FormatType::Percentage => '%', + } + } +} + +impl FormatParse for FormatType { + fn parse(text: &str) -> (Option, &str) { + let mut chars = text.chars(); + match chars.next() { + Some('s') => (Some(Self::String), chars.as_str()), + Some('b') => (Some(Self::Binary), chars.as_str()), + Some('c') => (Some(Self::Character), chars.as_str()), + Some('d') => (Some(Self::Decimal), chars.as_str()), + Some('o') => (Some(Self::Octal), chars.as_str()), + Some('n') => (Some(Self::Number(Case::Lower)), chars.as_str()), + Some('N') => (Some(Self::Number(Case::Upper)), chars.as_str()), + Some('x') => (Some(Self::Hex(Case::Lower)), chars.as_str()), + Some('X') => (Some(Self::Hex(Case::Upper)), chars.as_str()), + Some('e') => (Some(Self::Exponent(Case::Lower)), chars.as_str()), + Some('E') => (Some(Self::Exponent(Case::Upper)), chars.as_str()), + Some('f') => (Some(Self::FixedPoint(Case::Lower)), chars.as_str()), + Some('F') => (Some(Self::FixedPoint(Case::Upper)), chars.as_str()), + Some('g') => (Some(Self::GeneralFormat(Case::Lower)), chars.as_str()), + Some('G') => (Some(Self::GeneralFormat(Case::Upper)), chars.as_str()), + Some('%') => (Some(Self::Percentage), chars.as_str()), + _ => (None, text), + } + } +} + +#[derive(Debug, PartialEq)] +pub struct FormatSpec { + conversion: Option, + fill: Option, + align: Option, + sign: Option, + alternate_form: bool, + width: Option, + grouping_option: Option, + precision: Option, + format_type: Option, +} + +fn get_num_digits(text: &str) -> usize { + for (index, character) in text.char_indices() { + if !character.is_ascii_digit() { + return index; + } + } + text.len() +} + +fn parse_fill_and_align(text: &str) -> (Option, Option, &str) { + let char_indices: Vec<(usize, char)> = text.char_indices().take(3).collect(); + if char_indices.is_empty() { + (None, None, text) + } else if char_indices.len() == 1 { + let (maybe_align, remaining) = FormatAlign::parse(text); + (None, maybe_align, remaining) + } else { + let (maybe_align, remaining) = FormatAlign::parse(&text[char_indices[1].0..]); + if maybe_align.is_some() { + (Some(char_indices[0].1), maybe_align, remaining) + } else { + let (only_align, only_align_remaining) = FormatAlign::parse(text); + (None, only_align, only_align_remaining) + } + } +} + +fn parse_number(text: &str) -> Result<(Option, &str), FormatSpecError> { + let num_digits: usize = get_num_digits(text); + if num_digits == 0 { + return Ok((None, text)); + } + if let Ok(num) = text[..num_digits].parse::() { + Ok((Some(num), &text[num_digits..])) + } else { + // NOTE: this condition is different from CPython + Err(FormatSpecError::DecimalDigitsTooMany) + } +} + +fn parse_alternate_form(text: &str) -> (bool, &str) { + let mut chars = text.chars(); + match chars.next() { + Some('#') => (true, chars.as_str()), + _ => (false, text), + } +} + +fn parse_zero(text: &str) -> (bool, &str) { + let mut chars = text.chars(); + match chars.next() { + Some('0') => (true, chars.as_str()), + _ => (false, text), + } +} + +fn parse_precision(text: &str) -> Result<(Option, &str), FormatSpecError> { + let mut chars = text.chars(); + Ok(match chars.next() { + Some('.') => { + let (size, remaining) = parse_number(chars.as_str())?; + if let Some(size) = size { + if size > i32::MAX as usize { + return Err(FormatSpecError::PrecisionTooBig); + } + (Some(size), remaining) + } else { + (None, text) + } + } + _ => (None, text), + }) +} + +impl FormatSpec { + pub fn parse(text: &str) -> Result { + // get_integer in CPython + let (conversion, text) = FormatConversion::parse(text); + let (mut fill, mut align, text) = parse_fill_and_align(text); + let (sign, text) = FormatSign::parse(text); + let (alternate_form, text) = parse_alternate_form(text); + let (zero, text) = parse_zero(text); + let (width, text) = parse_number(text)?; + let (grouping_option, text) = FormatGrouping::parse(text); + let (precision, text) = parse_precision(text)?; + let (format_type, text) = FormatType::parse(text); + if !text.is_empty() { + return Err(FormatSpecError::InvalidFormatSpecifier); + } + + if zero && fill.is_none() { + fill.replace('0'); + align = align.or(Some(FormatAlign::AfterSign)); + } + + Ok(FormatSpec { + conversion, + fill, + align, + sign, + alternate_form, + width, + grouping_option, + precision, + format_type, + }) + } + + fn compute_fill_string(fill_char: char, fill_chars_needed: i32) -> String { + (0..fill_chars_needed) + .map(|_| fill_char) + .collect::() + } + + fn add_magnitude_separators_for_char( + magnitude_str: String, + inter: i32, + sep: char, + disp_digit_cnt: i32, + ) -> String { + // Don't add separators to the floating decimal point of numbers + let mut parts = magnitude_str.splitn(2, '.'); + let magnitude_int_str = parts.next().unwrap().to_string(); + let dec_digit_cnt = magnitude_str.len() as i32 - magnitude_int_str.len() as i32; + let int_digit_cnt = disp_digit_cnt - dec_digit_cnt; + let mut result = FormatSpec::separate_integer(magnitude_int_str, inter, sep, int_digit_cnt); + if let Some(part) = parts.next() { + result.push_str(&format!(".{part}")) + } + result + } + + fn separate_integer( + magnitude_str: String, + inter: i32, + sep: char, + disp_digit_cnt: i32, + ) -> String { + let magnitude_len = magnitude_str.len() as i32; + let offset = (disp_digit_cnt % (inter + 1) == 0) as i32; + let disp_digit_cnt = disp_digit_cnt + offset; + let pad_cnt = disp_digit_cnt - magnitude_len; + let sep_cnt = disp_digit_cnt / (inter + 1); + let diff = pad_cnt - sep_cnt; + if pad_cnt > 0 && diff > 0 { + // separate with 0 padding + let padding = "0".repeat(diff as usize); + let padded_num = format!("{padding}{magnitude_str}"); + FormatSpec::insert_separator(padded_num, inter, sep, sep_cnt) + } else { + // separate without padding + let sep_cnt = (magnitude_len - 1) / inter; + FormatSpec::insert_separator(magnitude_str, inter, sep, sep_cnt) + } + } + + fn insert_separator(mut magnitude_str: String, inter: i32, sep: char, sep_cnt: i32) -> String { + let magnitude_len = magnitude_str.len() as i32; + for i in 1..sep_cnt + 1 { + magnitude_str.insert((magnitude_len - inter * i) as usize, sep); + } + magnitude_str + } + + fn validate_format(&self, default_format_type: FormatType) -> Result<(), FormatSpecError> { + let format_type = self.format_type.as_ref().unwrap_or(&default_format_type); + match (&self.grouping_option, format_type) { + ( + Some(FormatGrouping::Comma), + FormatType::String + | FormatType::Character + | FormatType::Binary + | FormatType::Octal + | FormatType::Hex(_) + | FormatType::Number(_), + ) => { + let ch = char::from(format_type); + Err(FormatSpecError::UnspecifiedFormat(',', ch)) + } + ( + Some(FormatGrouping::Underscore), + FormatType::String | FormatType::Character | FormatType::Number(_), + ) => { + let ch = char::from(format_type); + Err(FormatSpecError::UnspecifiedFormat('_', ch)) + } + _ => Ok(()), + } + } + + fn get_separator_interval(&self) -> usize { + match self.format_type { + Some(FormatType::Binary | FormatType::Octal | FormatType::Hex(_)) => 4, + Some(FormatType::Decimal | FormatType::Number(_) | FormatType::FixedPoint(_)) => 3, + None => 3, + _ => panic!("Separators only valid for numbers!"), + } + } + + fn add_magnitude_separators(&self, magnitude_str: String, prefix: &str) -> String { + match &self.grouping_option { + Some(fg) => { + let sep = match fg { + FormatGrouping::Comma => ',', + FormatGrouping::Underscore => '_', + }; + let inter = self.get_separator_interval().try_into().unwrap(); + let magnitude_len = magnitude_str.len(); + let width = self.width.unwrap_or(magnitude_len) as i32 - prefix.len() as i32; + let disp_digit_cnt = cmp::max(width, magnitude_len as i32); + FormatSpec::add_magnitude_separators_for_char( + magnitude_str, + inter, + sep, + disp_digit_cnt, + ) + } + None => magnitude_str, + } + } + + pub fn format_bool(&self, input: bool) -> Result { + let x = u8::from(input); + let result: Result = match &self.format_type { + Some(FormatType::Decimal) => Ok(x.to_string()), + None => { + let first_letter = (input.to_string().as_bytes()[0] as char).to_uppercase(); + Ok(first_letter.collect::() + &input.to_string()[1..]) + } + _ => Err(FormatSpecError::InvalidFormatSpecifier), + }; + result + } + + pub fn format_float(&self, num: f64) -> Result { + self.validate_format(FormatType::FixedPoint(Case::Lower))?; + let precision = self.precision.unwrap_or(6); + let magnitude = num.abs(); + let raw_magnitude_str: Result = match &self.format_type { + Some(FormatType::FixedPoint(case)) => Ok(float::format_fixed( + precision, + magnitude, + *case, + self.alternate_form, + )), + Some(FormatType::Decimal) + | Some(FormatType::Binary) + | Some(FormatType::Octal) + | Some(FormatType::Hex(_)) + | Some(FormatType::String) + | Some(FormatType::Character) + | Some(FormatType::Number(Case::Upper)) => { + let ch = char::from(self.format_type.as_ref().unwrap()); + Err(FormatSpecError::UnknownFormatCode(ch, "float")) + } + Some(FormatType::GeneralFormat(case)) | Some(FormatType::Number(case)) => { + let precision = if precision == 0 { 1 } else { precision }; + Ok(float::format_general( + precision, + magnitude, + *case, + self.alternate_form, + false, + )) + } + Some(FormatType::Exponent(case)) => Ok(float::format_exponent( + precision, + magnitude, + *case, + self.alternate_form, + )), + Some(FormatType::Percentage) => match magnitude { + magnitude if magnitude.is_nan() => Ok("nan%".to_owned()), + magnitude if magnitude.is_infinite() => Ok("inf%".to_owned()), + _ => { + let result = format!("{:.*}", precision, magnitude * 100.0); + let point = float::decimal_point_or_empty(precision, self.alternate_form); + Ok(format!("{result}{point}%")) + } + }, + None => match magnitude { + magnitude if magnitude.is_nan() => Ok("nan".to_owned()), + magnitude if magnitude.is_infinite() => Ok("inf".to_owned()), + _ => match self.precision { + Some(precision) => Ok(float::format_general( + precision, + magnitude, + Case::Lower, + self.alternate_form, + true, + )), + None => Ok(float::to_string(magnitude)), + }, + }, + }; + let format_sign = self.sign.unwrap_or(FormatSign::Minus); + let sign_str = if num.is_sign_negative() && !num.is_nan() { + "-" + } else { + match format_sign { + FormatSign::Plus => "+", + FormatSign::Minus => "", + FormatSign::MinusOrSpace => " ", + } + }; + let magnitude_str = self.add_magnitude_separators(raw_magnitude_str?, sign_str); + self.format_sign_and_align(&AsciiStr::new(&magnitude_str), sign_str, FormatAlign::Right) + } + + #[inline] + fn format_int_radix(&self, magnitude: BigInt, radix: u32) -> Result { + match self.precision { + Some(_) => Err(FormatSpecError::PrecisionNotAllowed), + None => Ok(magnitude.to_str_radix(radix)), + } + } + + pub fn format_int(&self, num: &BigInt) -> Result { + self.validate_format(FormatType::Decimal)?; + let magnitude = num.abs(); + let prefix = if self.alternate_form { + match self.format_type { + Some(FormatType::Binary) => "0b", + Some(FormatType::Octal) => "0o", + Some(FormatType::Hex(Case::Lower)) => "0x", + Some(FormatType::Hex(Case::Upper)) => "0X", + _ => "", + } + } else { + "" + }; + let raw_magnitude_str = match self.format_type { + Some(FormatType::Binary) => self.format_int_radix(magnitude, 2), + Some(FormatType::Decimal) => self.format_int_radix(magnitude, 10), + Some(FormatType::Octal) => self.format_int_radix(magnitude, 8), + Some(FormatType::Hex(Case::Lower)) => self.format_int_radix(magnitude, 16), + Some(FormatType::Hex(Case::Upper)) => match self.precision { + Some(_) => Err(FormatSpecError::PrecisionNotAllowed), + None => { + let mut result = magnitude.to_str_radix(16); + result.make_ascii_uppercase(); + Ok(result) + } + }, + Some(FormatType::Number(Case::Lower)) => self.format_int_radix(magnitude, 10), + Some(FormatType::Number(Case::Upper)) => { + Err(FormatSpecError::UnknownFormatCode('N', "int")) + } + Some(FormatType::String) => Err(FormatSpecError::UnknownFormatCode('s', "int")), + Some(FormatType::Character) => match (self.sign, self.alternate_form) { + (Some(_), _) => Err(FormatSpecError::NotAllowed("Sign")), + (_, true) => Err(FormatSpecError::NotAllowed("Alternate form (#)")), + (_, _) => match num.to_u32() { + Some(n) if n <= 0x10ffff => Ok(std::char::from_u32(n).unwrap().to_string()), + Some(_) | None => Err(FormatSpecError::CodeNotInRange), + }, + }, + Some(FormatType::GeneralFormat(_)) + | Some(FormatType::FixedPoint(_)) + | Some(FormatType::Exponent(_)) + | Some(FormatType::Percentage) => match num.to_f64() { + Some(float) => return self.format_float(float), + _ => Err(FormatSpecError::UnableToConvert), + }, + None => self.format_int_radix(magnitude, 10), + }?; + let format_sign = self.sign.unwrap_or(FormatSign::Minus); + let sign_str = match num.sign() { + Sign::Minus => "-", + _ => match format_sign { + FormatSign::Plus => "+", + FormatSign::Minus => "", + FormatSign::MinusOrSpace => " ", + }, + }; + let sign_prefix = format!("{sign_str}{prefix}"); + let magnitude_str = self.add_magnitude_separators(raw_magnitude_str, &sign_prefix); + self.format_sign_and_align( + &AsciiStr::new(&magnitude_str), + &sign_prefix, + FormatAlign::Right, + ) + } + + pub fn format_string(&self, s: &T) -> Result + where + T: CharLen + Deref, + { + self.validate_format(FormatType::String)?; + match self.format_type { + Some(FormatType::String) | None => self + .format_sign_and_align(s, "", FormatAlign::Left) + .map(|mut value| { + if let Some(precision) = self.precision { + value.truncate(precision); + } + value + }), + _ => { + let ch = char::from(self.format_type.as_ref().unwrap()); + Err(FormatSpecError::UnknownFormatCode(ch, "str")) + } + } + } + + fn format_sign_and_align( + &self, + magnitude_str: &T, + sign_str: &str, + default_align: FormatAlign, + ) -> Result + where + T: CharLen + Deref, + { + let align = self.align.unwrap_or(default_align); + + let num_chars = magnitude_str.char_len(); + let fill_char = self.fill.unwrap_or(' '); + let fill_chars_needed: i32 = self.width.map_or(0, |w| { + cmp::max(0, (w as i32) - (num_chars as i32) - (sign_str.len() as i32)) + }); + + let magnitude_str = magnitude_str.deref(); + Ok(match align { + FormatAlign::Left => format!( + "{}{}{}", + sign_str, + magnitude_str, + FormatSpec::compute_fill_string(fill_char, fill_chars_needed) + ), + FormatAlign::Right => format!( + "{}{}{}", + FormatSpec::compute_fill_string(fill_char, fill_chars_needed), + sign_str, + magnitude_str + ), + FormatAlign::AfterSign => format!( + "{}{}{}", + sign_str, + FormatSpec::compute_fill_string(fill_char, fill_chars_needed), + magnitude_str + ), + FormatAlign::Center => { + let left_fill_chars_needed = fill_chars_needed / 2; + let right_fill_chars_needed = fill_chars_needed - left_fill_chars_needed; + let left_fill_string = + FormatSpec::compute_fill_string(fill_char, left_fill_chars_needed); + let right_fill_string = + FormatSpec::compute_fill_string(fill_char, right_fill_chars_needed); + format!("{left_fill_string}{sign_str}{magnitude_str}{right_fill_string}") + } + }) + } +} + +pub trait CharLen { + /// Returns the number of characters in the text + fn char_len(&self) -> usize; +} + +struct AsciiStr<'a> { + inner: &'a str, +} + +impl<'a> AsciiStr<'a> { + fn new(inner: &'a str) -> Self { + Self { inner } + } +} + +impl CharLen for AsciiStr<'_> { + fn char_len(&self) -> usize { + self.inner.len() + } +} + +impl Deref for AsciiStr<'_> { + type Target = str; + fn deref(&self) -> &Self::Target { + self.inner + } +} + +#[derive(Debug, PartialEq)] +pub enum FormatSpecError { + DecimalDigitsTooMany, + PrecisionTooBig, + InvalidFormatSpecifier, + UnspecifiedFormat(char, char), + UnknownFormatCode(char, &'static str), + PrecisionNotAllowed, + NotAllowed(&'static str), + UnableToConvert, + CodeNotInRange, + NotImplemented(char, &'static str), +} + +#[derive(Debug, PartialEq)] +pub enum FormatParseError { + UnmatchedBracket, + MissingStartBracket, + UnescapedStartBracketInLiteral, + InvalidFormatSpecifier, + UnknownConversion, + EmptyAttribute, + MissingRightBracket, + InvalidCharacterAfterRightBracket, +} + +impl FromStr for FormatSpec { + type Err = FormatSpecError; + fn from_str(s: &str) -> Result { + FormatSpec::parse(s) + } +} + +#[derive(Debug, PartialEq)] +pub enum FieldNamePart { + Attribute(String), + Index(usize), + StringIndex(String), +} + +impl FieldNamePart { + fn parse_part( + chars: &mut impl PeekingNext, + ) -> Result, FormatParseError> { + chars + .next() + .map(|ch| match ch { + '.' => { + let mut attribute = String::new(); + for ch in chars.peeking_take_while(|ch| *ch != '.' && *ch != '[') { + attribute.push(ch); + } + if attribute.is_empty() { + Err(FormatParseError::EmptyAttribute) + } else { + Ok(FieldNamePart::Attribute(attribute)) + } + } + '[' => { + let mut index = String::new(); + for ch in chars { + if ch == ']' { + return if index.is_empty() { + Err(FormatParseError::EmptyAttribute) + } else if let Ok(index) = index.parse::() { + Ok(FieldNamePart::Index(index)) + } else { + Ok(FieldNamePart::StringIndex(index)) + }; + } + index.push(ch); + } + Err(FormatParseError::MissingRightBracket) + } + _ => Err(FormatParseError::InvalidCharacterAfterRightBracket), + }) + .transpose() + } +} + +#[derive(Debug, PartialEq)] +pub enum FieldType { + Auto, + Index(usize), + Keyword(String), +} + +#[derive(Debug, PartialEq)] +pub struct FieldName { + pub field_type: FieldType, + pub parts: Vec, +} + +impl FieldName { + pub fn parse(text: &str) -> Result { + let mut chars = text.chars().peekable(); + let mut first = String::new(); + for ch in chars.peeking_take_while(|ch| *ch != '.' && *ch != '[') { + first.push(ch); + } + + let field_type = if first.is_empty() { + FieldType::Auto + } else if let Ok(index) = first.parse::() { + FieldType::Index(index) + } else { + FieldType::Keyword(first) + }; + + let mut parts = Vec::new(); + while let Some(part) = FieldNamePart::parse_part(&mut chars)? { + parts.push(part) + } + + Ok(FieldName { field_type, parts }) + } +} + +#[derive(Debug, PartialEq)] +pub enum FormatPart { + Field { + field_name: String, + conversion_spec: Option, + format_spec: String, + }, + Literal(String), +} + +#[derive(Debug, PartialEq)] +pub struct FormatString { + pub format_parts: Vec, +} + +impl FormatString { + fn parse_literal_single(text: &str) -> Result<(char, &str), FormatParseError> { + let mut chars = text.chars(); + // This should never be called with an empty str + let first_char = chars.next().unwrap(); + // isn't this detectable only with bytes operation? + if first_char == '{' || first_char == '}' { + let maybe_next_char = chars.next(); + // if we see a bracket, it has to be escaped by doubling up to be in a literal + return if maybe_next_char.is_none() || maybe_next_char.unwrap() != first_char { + Err(FormatParseError::UnescapedStartBracketInLiteral) + } else { + Ok((first_char, chars.as_str())) + }; + } + Ok((first_char, chars.as_str())) + } + + fn parse_literal(text: &str) -> Result<(FormatPart, &str), FormatParseError> { + let mut cur_text = text; + let mut result_string = String::new(); + while !cur_text.is_empty() { + match FormatString::parse_literal_single(cur_text) { + Ok((next_char, remaining)) => { + result_string.push(next_char); + cur_text = remaining; + } + Err(err) => { + return if !result_string.is_empty() { + Ok((FormatPart::Literal(result_string), cur_text)) + } else { + Err(err) + }; + } + } + } + Ok((FormatPart::Literal(result_string), "")) + } + + fn parse_part_in_brackets(text: &str) -> Result { + let parts: Vec<&str> = text.splitn(2, ':').collect(); + // before the comma is a keyword or arg index, after the comma is maybe a spec. + let arg_part = parts[0]; + + let format_spec = if parts.len() > 1 { + parts[1].to_owned() + } else { + String::new() + }; + + // On parts[0] can still be the conversion (!r, !s, !a) + let parts: Vec<&str> = arg_part.splitn(2, '!').collect(); + // before the bang is a keyword or arg index, after the comma is maybe a conversion spec. + let arg_part = parts[0]; + + let conversion_spec = parts + .get(1) + .map(|conversion| { + // conversions are only every one character + conversion + .chars() + .exactly_one() + .map_err(|_| FormatParseError::UnknownConversion) + }) + .transpose()?; + + Ok(FormatPart::Field { + field_name: arg_part.to_owned(), + conversion_spec, + format_spec, + }) + } + + fn parse_spec(text: &str) -> Result<(FormatPart, &str), FormatParseError> { + let mut nested = false; + let mut end_bracket_pos = None; + let mut left = String::new(); + + // There may be one layer nesting brackets in spec + for (idx, c) in text.char_indices() { + if idx == 0 { + if c != '{' { + return Err(FormatParseError::MissingStartBracket); + } + } else if c == '{' { + if nested { + return Err(FormatParseError::InvalidFormatSpecifier); + } else { + nested = true; + left.push(c); + continue; + } + } else if c == '}' { + if nested { + nested = false; + left.push(c); + continue; + } else { + end_bracket_pos = Some(idx); + break; + } + } else { + left.push(c); + } + } + if let Some(pos) = end_bracket_pos { + let (_, right) = text.split_at(pos); + let format_part = FormatString::parse_part_in_brackets(&left)?; + Ok((format_part, &right[1..])) + } else { + Err(FormatParseError::UnmatchedBracket) + } + } +} + +pub trait FromTemplate<'a>: Sized { + type Err; + fn from_str(s: &'a str) -> Result; +} + +impl<'a> FromTemplate<'a> for FormatString { + type Err = FormatParseError; + + fn from_str(text: &'a str) -> Result { + let mut cur_text: &str = text; + let mut parts: Vec = Vec::new(); + while !cur_text.is_empty() { + // Try to parse both literals and bracketed format parts until we + // run out of text + cur_text = FormatString::parse_literal(cur_text) + .or_else(|_| FormatString::parse_spec(cur_text)) + .map(|(part, new_text)| { + parts.push(part); + new_text + })?; + } + Ok(FormatString { + format_parts: parts, + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_fill_and_align() { + assert_eq!( + parse_fill_and_align(" <"), + (Some(' '), Some(FormatAlign::Left), "") + ); + assert_eq!( + parse_fill_and_align(" <22"), + (Some(' '), Some(FormatAlign::Left), "22") + ); + assert_eq!( + parse_fill_and_align("<22"), + (None, Some(FormatAlign::Left), "22") + ); + assert_eq!( + parse_fill_and_align(" ^^"), + (Some(' '), Some(FormatAlign::Center), "^") + ); + assert_eq!( + parse_fill_and_align("==="), + (Some('='), Some(FormatAlign::AfterSign), "=") + ); + } + + #[test] + fn test_width_only() { + let expected = Ok(FormatSpec { + conversion: None, + fill: None, + align: None, + sign: None, + alternate_form: false, + width: Some(33), + grouping_option: None, + precision: None, + format_type: None, + }); + assert_eq!(FormatSpec::parse("33"), expected); + } + + #[test] + fn test_fill_and_width() { + let expected = Ok(FormatSpec { + conversion: None, + fill: Some('<'), + align: Some(FormatAlign::Right), + sign: None, + alternate_form: false, + width: Some(33), + grouping_option: None, + precision: None, + format_type: None, + }); + assert_eq!(FormatSpec::parse("<>33"), expected); + } + + #[test] + fn test_all() { + let expected = Ok(FormatSpec { + conversion: None, + fill: Some('<'), + align: Some(FormatAlign::Right), + sign: Some(FormatSign::Minus), + alternate_form: true, + width: Some(23), + grouping_option: Some(FormatGrouping::Comma), + precision: Some(11), + format_type: Some(FormatType::Binary), + }); + assert_eq!(FormatSpec::parse("<>-#23,.11b"), expected); + } + + #[test] + fn test_format_int() { + assert_eq!( + FormatSpec::parse("d") + .unwrap() + .format_int(&BigInt::from_bytes_be(Sign::Plus, b"\x10")), + Ok("16".to_owned()) + ); + assert_eq!( + FormatSpec::parse("x") + .unwrap() + .format_int(&BigInt::from_bytes_be(Sign::Plus, b"\x10")), + Ok("10".to_owned()) + ); + assert_eq!( + FormatSpec::parse("b") + .unwrap() + .format_int(&BigInt::from_bytes_be(Sign::Plus, b"\x10")), + Ok("10000".to_owned()) + ); + assert_eq!( + FormatSpec::parse("o") + .unwrap() + .format_int(&BigInt::from_bytes_be(Sign::Plus, b"\x10")), + Ok("20".to_owned()) + ); + assert_eq!( + FormatSpec::parse("+d") + .unwrap() + .format_int(&BigInt::from_bytes_be(Sign::Plus, b"\x10")), + Ok("+16".to_owned()) + ); + assert_eq!( + FormatSpec::parse("^ 5d") + .unwrap() + .format_int(&BigInt::from_bytes_be(Sign::Minus, b"\x10")), + Ok(" -16 ".to_owned()) + ); + assert_eq!( + FormatSpec::parse("0>+#10x") + .unwrap() + .format_int(&BigInt::from_bytes_be(Sign::Plus, b"\x10")), + Ok("00000+0x10".to_owned()) + ); + } + + #[test] + fn test_format_int_sep() { + let spec = FormatSpec::parse(",").expect(""); + assert_eq!(spec.grouping_option, Some(FormatGrouping::Comma)); + assert_eq!( + spec.format_int(&BigInt::from_str("1234567890123456789012345678").unwrap()), + Ok("1,234,567,890,123,456,789,012,345,678".to_owned()) + ); + } + + #[test] + fn test_format_parse() { + let expected = Ok(FormatString { + format_parts: vec![ + FormatPart::Literal("abcd".to_owned()), + FormatPart::Field { + field_name: "1".to_owned(), + conversion_spec: None, + format_spec: String::new(), + }, + FormatPart::Literal(":".to_owned()), + FormatPart::Field { + field_name: "key".to_owned(), + conversion_spec: None, + format_spec: String::new(), + }, + ], + }); + + assert_eq!(FormatString::from_str("abcd{1}:{key}"), expected); + } + + #[test] + fn test_format_parse_multi_byte_char() { + assert!(FormatString::from_str("{a:%ЫйЯЧ}").is_ok()); + } + + #[test] + fn test_format_parse_fail() { + assert_eq!( + FormatString::from_str("{s"), + Err(FormatParseError::UnmatchedBracket) + ); + } + + #[test] + fn test_format_parse_escape() { + let expected = Ok(FormatString { + format_parts: vec![ + FormatPart::Literal("{".to_owned()), + FormatPart::Field { + field_name: "key".to_owned(), + conversion_spec: None, + format_spec: String::new(), + }, + FormatPart::Literal("}ddfe".to_owned()), + ], + }); + + assert_eq!(FormatString::from_str("{{{key}}}ddfe"), expected); + } + + #[test] + fn test_format_invalid_specification() { + assert_eq!( + FormatSpec::parse("%3"), + Err(FormatSpecError::InvalidFormatSpecifier) + ); + assert_eq!( + FormatSpec::parse(".2fa"), + Err(FormatSpecError::InvalidFormatSpecifier) + ); + assert_eq!( + FormatSpec::parse("ds"), + Err(FormatSpecError::InvalidFormatSpecifier) + ); + assert_eq!( + FormatSpec::parse("x+"), + Err(FormatSpecError::InvalidFormatSpecifier) + ); + assert_eq!( + FormatSpec::parse("b4"), + Err(FormatSpecError::InvalidFormatSpecifier) + ); + assert_eq!( + FormatSpec::parse("o!"), + Err(FormatSpecError::InvalidFormatSpecifier) + ); + assert_eq!( + FormatSpec::parse("d "), + Err(FormatSpecError::InvalidFormatSpecifier) + ); + } + + #[test] + fn test_parse_field_name() { + assert_eq!( + FieldName::parse(""), + Ok(FieldName { + field_type: FieldType::Auto, + parts: Vec::new(), + }) + ); + assert_eq!( + FieldName::parse("0"), + Ok(FieldName { + field_type: FieldType::Index(0), + parts: Vec::new(), + }) + ); + assert_eq!( + FieldName::parse("key"), + Ok(FieldName { + field_type: FieldType::Keyword("key".to_owned()), + parts: Vec::new(), + }) + ); + assert_eq!( + FieldName::parse("key.attr[0][string]"), + Ok(FieldName { + field_type: FieldType::Keyword("key".to_owned()), + parts: vec![ + FieldNamePart::Attribute("attr".to_owned()), + FieldNamePart::Index(0), + FieldNamePart::StringIndex("string".to_owned()) + ], + }) + ); + assert_eq!( + FieldName::parse("key.."), + Err(FormatParseError::EmptyAttribute) + ); + assert_eq!( + FieldName::parse("key[]"), + Err(FormatParseError::EmptyAttribute) + ); + assert_eq!( + FieldName::parse("key["), + Err(FormatParseError::MissingRightBracket) + ); + assert_eq!( + FieldName::parse("key[0]after"), + Err(FormatParseError::InvalidCharacterAfterRightBracket) + ); + } +} diff --git a/literal/src/lib.rs b/literal/src/lib.rs index 9b96205..31b0f90 100644 --- a/literal/src/lib.rs +++ b/literal/src/lib.rs @@ -1,3 +1,4 @@ +pub mod cformat; pub mod char; pub mod escape; pub mod float;