mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-07 13:15:21 +00:00
rustpython-literal
This commit is contained in:
parent
bd64603950
commit
80109b1fe0
9 changed files with 773 additions and 7 deletions
|
@ -11,10 +11,10 @@ license = "MIT"
|
|||
default = ["constant-optimization", "fold"]
|
||||
constant-optimization = ["fold"]
|
||||
fold = []
|
||||
unparse = ["rustpython-common"]
|
||||
unparse = ["rustpython-literal"]
|
||||
|
||||
[dependencies]
|
||||
rustpython-compiler-core = { path = "../core", version = "0.2.0" }
|
||||
rustpython-common = { path = "../../common", version = "0.2.0", optional = true }
|
||||
rustpython-literal = { path = "../literal", version = "0.2.0", optional = true }
|
||||
|
||||
num-bigint = { workspace = true }
|
||||
|
|
|
@ -35,17 +35,17 @@ impl From<BigInt> for Constant {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "rustpython-common")]
|
||||
#[cfg(feature = "rustpython-literal")]
|
||||
impl std::fmt::Display for Constant {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Constant::None => f.pad("None"),
|
||||
Constant::Bool(b) => f.pad(if *b { "True" } else { "False" }),
|
||||
Constant::Str(s) => rustpython_common::escape::UnicodeEscape::new_repr(s.as_str())
|
||||
Constant::Str(s) => rustpython_literal::escape::UnicodeEscape::new_repr(s.as_str())
|
||||
.str_repr()
|
||||
.write(f),
|
||||
Constant::Bytes(b) => {
|
||||
let escape = rustpython_common::escape::AsciiEscape::new_repr(b);
|
||||
let escape = rustpython_literal::escape::AsciiEscape::new_repr(b);
|
||||
let repr = escape.bytes_repr().to_string().unwrap();
|
||||
f.pad(&repr)
|
||||
}
|
||||
|
@ -64,7 +64,7 @@ impl std::fmt::Display for Constant {
|
|||
f.write_str(")")
|
||||
}
|
||||
}
|
||||
Constant::Float(fp) => f.pad(&rustpython_common::float_ops::to_string(*fp)),
|
||||
Constant::Float(fp) => f.pad(&rustpython_literal::float::to_string(*fp)),
|
||||
Constant::Complex { real, imag } => {
|
||||
if *real == 0.0 {
|
||||
write!(f, "{imag}j")
|
||||
|
|
|
@ -511,7 +511,7 @@ impl<'a> Unparser<'a> {
|
|||
} else {
|
||||
self.p("f")?;
|
||||
let body = to_string_fmt(|f| Unparser::new(f).unparse_fstring_body(values, is_spec));
|
||||
rustpython_common::escape::UnicodeEscape::new_repr(&body)
|
||||
rustpython_literal::escape::UnicodeEscape::new_repr(&body)
|
||||
.str_repr()
|
||||
.write(&mut self.f)
|
||||
}
|
||||
|
|
18
literal/Cargo.toml
Normal file
18
literal/Cargo.toml
Normal file
|
@ -0,0 +1,18 @@
|
|||
[package]
|
||||
name = "rustpython-literal"
|
||||
version = "0.2.0"
|
||||
description = "Common literal handling utilities mostly useful for unparse and repr."
|
||||
authors = ["RustPython Team"]
|
||||
edition = "2021"
|
||||
repository = "https://github.com/RustPython/RustPython"
|
||||
license = "MIT"
|
||||
|
||||
[dependencies]
|
||||
num-traits = { workspace = true }
|
||||
|
||||
hexf-parse = "0.2.1"
|
||||
lexical-parse-float = { version = "0.8.0", features = ["format"] }
|
||||
unic-ucd-category = "0.9"
|
||||
|
||||
[dev-dependencies]
|
||||
rand = { workspace = true }
|
15
literal/src/char.rs
Normal file
15
literal/src/char.rs
Normal file
|
@ -0,0 +1,15 @@
|
|||
use unic_ucd_category::GeneralCategory;
|
||||
|
||||
/// According to python following categories aren't printable:
|
||||
/// * Cc (Other, Control)
|
||||
/// * Cf (Other, Format)
|
||||
/// * Cs (Other, Surrogate)
|
||||
/// * Co (Other, Private Use)
|
||||
/// * Cn (Other, Not Assigned)
|
||||
/// * Zl Separator, Line ('\u2028', LINE SEPARATOR)
|
||||
/// * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
|
||||
/// * Zs (Separator, Space) other than ASCII space('\x20').
|
||||
pub fn is_printable(c: char) -> bool {
|
||||
let cat = GeneralCategory::of(c);
|
||||
!(cat.is_other() || cat.is_separator())
|
||||
}
|
414
literal/src/escape.rs
Normal file
414
literal/src/escape.rs
Normal file
|
@ -0,0 +1,414 @@
|
|||
#[derive(Debug, Clone, Copy)]
|
||||
pub enum Quote {
|
||||
Single,
|
||||
Double,
|
||||
}
|
||||
|
||||
impl Quote {
|
||||
#[inline]
|
||||
pub const fn swap(self) -> Quote {
|
||||
match self {
|
||||
Quote::Single => Quote::Double,
|
||||
Quote::Double => Quote::Single,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn to_byte(&self) -> u8 {
|
||||
match self {
|
||||
Quote::Single => b'\'',
|
||||
Quote::Double => b'"',
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn to_char(&self) -> char {
|
||||
match self {
|
||||
Quote::Single => '\'',
|
||||
Quote::Double => '"',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct EscapeLayout {
|
||||
pub quote: Quote,
|
||||
pub len: Option<usize>,
|
||||
}
|
||||
|
||||
pub trait Escape {
|
||||
fn source_len(&self) -> usize;
|
||||
fn layout(&self) -> &EscapeLayout;
|
||||
fn changed(&self) -> bool {
|
||||
self.layout().len != Some(self.source_len())
|
||||
}
|
||||
|
||||
fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result;
|
||||
fn write_body_slow(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result;
|
||||
fn write_body(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
||||
if self.changed() {
|
||||
self.write_body_slow(formatter)
|
||||
} else {
|
||||
self.write_source(formatter)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the outer quotes to use and the number of quotes that need to be
|
||||
/// escaped.
|
||||
pub(crate) const fn choose_quote(
|
||||
single_count: usize,
|
||||
double_count: usize,
|
||||
preferred_quote: Quote,
|
||||
) -> (Quote, usize) {
|
||||
let (primary_count, secondary_count) = match preferred_quote {
|
||||
Quote::Single => (single_count, double_count),
|
||||
Quote::Double => (double_count, single_count),
|
||||
};
|
||||
|
||||
// always use primary unless we have primary but no seconday
|
||||
let use_secondary = primary_count > 0 && secondary_count == 0;
|
||||
if use_secondary {
|
||||
(preferred_quote.swap(), secondary_count)
|
||||
} else {
|
||||
(preferred_quote, primary_count)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct UnicodeEscape<'a> {
|
||||
source: &'a str,
|
||||
layout: EscapeLayout,
|
||||
}
|
||||
|
||||
impl<'a> UnicodeEscape<'a> {
|
||||
pub fn with_forced_quote(source: &'a str, quote: Quote) -> Self {
|
||||
let layout = EscapeLayout { quote, len: None };
|
||||
Self { source, layout }
|
||||
}
|
||||
pub fn new_repr(source: &'a str) -> Self {
|
||||
let layout = Self::repr_layout(source, Quote::Single);
|
||||
Self { source, layout }
|
||||
}
|
||||
|
||||
pub fn str_repr<'r>(&'a self) -> StrRepr<'r, 'a> {
|
||||
StrRepr(self)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StrRepr<'r, 'a>(&'r UnicodeEscape<'a>);
|
||||
|
||||
impl StrRepr<'_, '_> {
|
||||
pub fn write(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
||||
let quote = self.0.layout().quote.to_char();
|
||||
formatter.write_char(quote)?;
|
||||
self.0.write_body(formatter)?;
|
||||
formatter.write_char(quote)
|
||||
}
|
||||
|
||||
pub fn to_string(&self) -> Option<String> {
|
||||
let mut s = String::with_capacity(self.0.layout().len?);
|
||||
self.write(&mut s).unwrap();
|
||||
Some(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for StrRepr<'_, '_> {
|
||||
fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.write(formatter)
|
||||
}
|
||||
}
|
||||
|
||||
impl UnicodeEscape<'_> {
|
||||
const REPR_RESERVED_LEN: usize = 2; // for quotes
|
||||
|
||||
pub fn repr_layout(source: &str, preferred_quote: Quote) -> EscapeLayout {
|
||||
Self::output_layout_with_checker(source, preferred_quote, |a, b| {
|
||||
Some((a as isize).checked_add(b as isize)? as usize)
|
||||
})
|
||||
}
|
||||
|
||||
fn output_layout_with_checker(
|
||||
source: &str,
|
||||
preferred_quote: Quote,
|
||||
length_add: impl Fn(usize, usize) -> Option<usize>,
|
||||
) -> EscapeLayout {
|
||||
let mut out_len = Self::REPR_RESERVED_LEN;
|
||||
let mut single_count = 0;
|
||||
let mut double_count = 0;
|
||||
|
||||
for ch in source.chars() {
|
||||
let incr = match ch {
|
||||
'\'' => {
|
||||
single_count += 1;
|
||||
1
|
||||
}
|
||||
'"' => {
|
||||
double_count += 1;
|
||||
1
|
||||
}
|
||||
c => Self::escaped_char_len(c),
|
||||
};
|
||||
let Some(new_len) = length_add(out_len, incr) else {
|
||||
#[cold]
|
||||
fn stop(single_count: usize, double_count: usize, preferred_quote: Quote) -> EscapeLayout {
|
||||
EscapeLayout { quote: choose_quote(single_count, double_count, preferred_quote).0, len: None }
|
||||
}
|
||||
return stop(single_count, double_count, preferred_quote);
|
||||
};
|
||||
out_len = new_len;
|
||||
}
|
||||
|
||||
let (quote, num_escaped_quotes) = choose_quote(single_count, double_count, preferred_quote);
|
||||
// we'll be adding backslashes in front of the existing inner quotes
|
||||
let Some(out_len) = length_add(out_len, num_escaped_quotes) else {
|
||||
return EscapeLayout { quote, len: None };
|
||||
};
|
||||
|
||||
EscapeLayout {
|
||||
quote,
|
||||
len: Some(out_len - Self::REPR_RESERVED_LEN),
|
||||
}
|
||||
}
|
||||
|
||||
fn escaped_char_len(ch: char) -> usize {
|
||||
match ch {
|
||||
'\\' | '\t' | '\r' | '\n' => 2,
|
||||
ch if ch < ' ' || ch as u32 == 0x7f => 4, // \xHH
|
||||
ch if ch.is_ascii() => 1,
|
||||
ch if crate::char::is_printable(ch) => {
|
||||
// max = std::cmp::max(ch, max);
|
||||
ch.len_utf8()
|
||||
}
|
||||
ch if (ch as u32) < 0x100 => 4, // \xHH
|
||||
ch if (ch as u32) < 0x10000 => 6, // \uHHHH
|
||||
_ => 10, // \uHHHHHHHH
|
||||
}
|
||||
}
|
||||
|
||||
fn write_char(
|
||||
ch: char,
|
||||
quote: Quote,
|
||||
formatter: &mut impl std::fmt::Write,
|
||||
) -> std::fmt::Result {
|
||||
match ch {
|
||||
'\n' => formatter.write_str("\\n"),
|
||||
'\t' => formatter.write_str("\\t"),
|
||||
'\r' => formatter.write_str("\\r"),
|
||||
// these 2 branches *would* be handled below, but we shouldn't have to do a
|
||||
// unicodedata lookup just for ascii characters
|
||||
'\x20'..='\x7e' => {
|
||||
// printable ascii range
|
||||
if ch == quote.to_char() || ch == '\\' {
|
||||
formatter.write_char('\\')?;
|
||||
}
|
||||
formatter.write_char(ch)
|
||||
}
|
||||
ch if ch.is_ascii() => {
|
||||
write!(formatter, "\\x{:02x}", ch as u8)
|
||||
}
|
||||
ch if crate::char::is_printable(ch) => formatter.write_char(ch),
|
||||
'\0'..='\u{ff}' => {
|
||||
write!(formatter, "\\x{:02x}", ch as u32)
|
||||
}
|
||||
'\0'..='\u{ffff}' => {
|
||||
write!(formatter, "\\u{:04x}", ch as u32)
|
||||
}
|
||||
_ => {
|
||||
write!(formatter, "\\U{:08x}", ch as u32)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Escape for UnicodeEscape<'a> {
|
||||
fn source_len(&self) -> usize {
|
||||
self.source.len()
|
||||
}
|
||||
|
||||
fn layout(&self) -> &EscapeLayout {
|
||||
&self.layout
|
||||
}
|
||||
|
||||
fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
||||
formatter.write_str(self.source)
|
||||
}
|
||||
|
||||
#[cold]
|
||||
fn write_body_slow(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
||||
for ch in self.source.chars() {
|
||||
Self::write_char(ch, self.layout().quote, formatter)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod unicode_escapse_tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn changed() {
|
||||
fn test(s: &str) -> bool {
|
||||
UnicodeEscape::new_repr(s).changed()
|
||||
}
|
||||
assert!(!test("hello"));
|
||||
assert!(!test("'hello'"));
|
||||
assert!(!test("\"hello\""));
|
||||
|
||||
assert!(test("'\"hello"));
|
||||
assert!(test("hello\n"));
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AsciiEscape<'a> {
|
||||
source: &'a [u8],
|
||||
layout: EscapeLayout,
|
||||
}
|
||||
|
||||
impl<'a> AsciiEscape<'a> {
|
||||
pub fn new(source: &'a [u8], layout: EscapeLayout) -> Self {
|
||||
Self { source, layout }
|
||||
}
|
||||
pub fn with_forced_quote(source: &'a [u8], quote: Quote) -> Self {
|
||||
let layout = EscapeLayout { quote, len: None };
|
||||
Self { source, layout }
|
||||
}
|
||||
pub fn new_repr(source: &'a [u8]) -> Self {
|
||||
let layout = Self::repr_layout(source, Quote::Single);
|
||||
Self { source, layout }
|
||||
}
|
||||
|
||||
pub fn bytes_repr<'r>(&'a self) -> BytesRepr<'r, 'a> {
|
||||
BytesRepr(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl AsciiEscape<'_> {
|
||||
pub fn repr_layout(source: &[u8], preferred_quote: Quote) -> EscapeLayout {
|
||||
Self::output_layout_with_checker(source, preferred_quote, 3, |a, b| {
|
||||
Some((a as isize).checked_add(b as isize)? as usize)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn named_repr_layout(source: &[u8], name: &str) -> EscapeLayout {
|
||||
Self::output_layout_with_checker(source, Quote::Single, name.len() + 2 + 3, |a, b| {
|
||||
Some((a as isize).checked_add(b as isize)? as usize)
|
||||
})
|
||||
}
|
||||
|
||||
fn output_layout_with_checker(
|
||||
source: &[u8],
|
||||
preferred_quote: Quote,
|
||||
reserved_len: usize,
|
||||
length_add: impl Fn(usize, usize) -> Option<usize>,
|
||||
) -> EscapeLayout {
|
||||
let mut out_len = reserved_len;
|
||||
let mut single_count = 0;
|
||||
let mut double_count = 0;
|
||||
|
||||
for ch in source.iter() {
|
||||
let incr = match ch {
|
||||
b'\'' => {
|
||||
single_count += 1;
|
||||
1
|
||||
}
|
||||
b'"' => {
|
||||
double_count += 1;
|
||||
1
|
||||
}
|
||||
c => Self::escaped_char_len(*c),
|
||||
};
|
||||
let Some(new_len) = length_add(out_len, incr) else {
|
||||
#[cold]
|
||||
fn stop(single_count: usize, double_count: usize, preferred_quote: Quote) -> EscapeLayout {
|
||||
EscapeLayout { quote: choose_quote(single_count, double_count, preferred_quote).0, len: None }
|
||||
}
|
||||
return stop(single_count, double_count, preferred_quote);
|
||||
};
|
||||
out_len = new_len;
|
||||
}
|
||||
|
||||
let (quote, num_escaped_quotes) = choose_quote(single_count, double_count, preferred_quote);
|
||||
// we'll be adding backslashes in front of the existing inner quotes
|
||||
let Some(out_len) = length_add(out_len, num_escaped_quotes) else {
|
||||
return EscapeLayout { quote, len: None };
|
||||
};
|
||||
|
||||
EscapeLayout {
|
||||
quote,
|
||||
len: Some(out_len - reserved_len),
|
||||
}
|
||||
}
|
||||
|
||||
fn escaped_char_len(ch: u8) -> usize {
|
||||
match ch {
|
||||
b'\\' | b'\t' | b'\r' | b'\n' => 2,
|
||||
0x20..=0x7e => 1,
|
||||
_ => 4, // \xHH
|
||||
}
|
||||
}
|
||||
|
||||
fn write_char(ch: u8, quote: Quote, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
||||
match ch {
|
||||
b'\t' => formatter.write_str("\\t"),
|
||||
b'\n' => formatter.write_str("\\n"),
|
||||
b'\r' => formatter.write_str("\\r"),
|
||||
0x20..=0x7e => {
|
||||
// printable ascii range
|
||||
if ch == quote.to_byte() || ch == b'\\' {
|
||||
formatter.write_char('\\')?;
|
||||
}
|
||||
formatter.write_char(ch as char)
|
||||
}
|
||||
ch => write!(formatter, "\\x{ch:02x}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Escape for AsciiEscape<'a> {
|
||||
fn source_len(&self) -> usize {
|
||||
self.source.len()
|
||||
}
|
||||
|
||||
fn layout(&self) -> &EscapeLayout {
|
||||
&self.layout
|
||||
}
|
||||
|
||||
fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
||||
formatter.write_str(unsafe {
|
||||
// SAFETY: this function must be called only when source is printable ascii characters
|
||||
std::str::from_utf8_unchecked(self.source)
|
||||
})
|
||||
}
|
||||
|
||||
#[cold]
|
||||
fn write_body_slow(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
||||
for ch in self.source.iter() {
|
||||
Self::write_char(*ch, self.layout().quote, formatter)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BytesRepr<'r, 'a>(&'r AsciiEscape<'a>);
|
||||
|
||||
impl BytesRepr<'_, '_> {
|
||||
pub fn write(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
||||
let quote = self.0.layout().quote.to_char();
|
||||
formatter.write_char('b')?;
|
||||
formatter.write_char(quote)?;
|
||||
self.0.write_body(formatter)?;
|
||||
formatter.write_char(quote)
|
||||
}
|
||||
|
||||
pub fn to_string(&self) -> Option<String> {
|
||||
let mut s = String::with_capacity(self.0.layout().len?);
|
||||
self.write(&mut s).unwrap();
|
||||
Some(s)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for BytesRepr<'_, '_> {
|
||||
fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.write(formatter)
|
||||
}
|
||||
}
|
310
literal/src/float.rs
Normal file
310
literal/src/float.rs
Normal file
|
@ -0,0 +1,310 @@
|
|||
use crate::format::Case;
|
||||
use num_traits::{Float, Zero};
|
||||
use std::f64;
|
||||
|
||||
pub fn parse_str(literal: &str) -> Option<f64> {
|
||||
parse_inner(literal.trim().as_bytes())
|
||||
}
|
||||
|
||||
pub fn parse_bytes(literal: &[u8]) -> Option<f64> {
|
||||
parse_inner(trim_slice(literal, |b| b.is_ascii_whitespace()))
|
||||
}
|
||||
|
||||
fn trim_slice<T>(v: &[T], mut trim: impl FnMut(&T) -> bool) -> &[T] {
|
||||
let mut it = v.iter();
|
||||
// it.take_while_ref(&mut trim).for_each(drop);
|
||||
// hmm.. `&mut slice::Iter<_>` is not `Clone`
|
||||
// it.by_ref().rev().take_while_ref(&mut trim).for_each(drop);
|
||||
while it.clone().next().map_or(false, &mut trim) {
|
||||
it.next();
|
||||
}
|
||||
while it.clone().next_back().map_or(false, &mut trim) {
|
||||
it.next_back();
|
||||
}
|
||||
it.as_slice()
|
||||
}
|
||||
|
||||
fn parse_inner(literal: &[u8]) -> Option<f64> {
|
||||
use lexical_parse_float::{
|
||||
format::PYTHON3_LITERAL, FromLexicalWithOptions, NumberFormatBuilder, Options,
|
||||
};
|
||||
// lexical-core's format::PYTHON_STRING is inaccurate
|
||||
const PYTHON_STRING: u128 = NumberFormatBuilder::rebuild(PYTHON3_LITERAL)
|
||||
.no_special(false)
|
||||
.build();
|
||||
f64::from_lexical_with_options::<PYTHON_STRING>(literal, &Options::new()).ok()
|
||||
}
|
||||
|
||||
pub fn is_integer(v: f64) -> bool {
|
||||
(v - v.round()).abs() < f64::EPSILON
|
||||
}
|
||||
|
||||
fn format_nan(case: Case) -> String {
|
||||
let nan = match case {
|
||||
Case::Lower => "nan",
|
||||
Case::Upper => "NAN",
|
||||
};
|
||||
|
||||
nan.to_string()
|
||||
}
|
||||
|
||||
fn format_inf(case: Case) -> String {
|
||||
let inf = match case {
|
||||
Case::Lower => "inf",
|
||||
Case::Upper => "INF",
|
||||
};
|
||||
|
||||
inf.to_string()
|
||||
}
|
||||
|
||||
pub fn decimal_point_or_empty(precision: usize, alternate_form: bool) -> &'static str {
|
||||
match (precision, alternate_form) {
|
||||
(0, true) => ".",
|
||||
_ => "",
|
||||
}
|
||||
}
|
||||
|
||||
pub fn format_fixed(precision: usize, magnitude: f64, case: Case, alternate_form: bool) -> String {
|
||||
match magnitude {
|
||||
magnitude if magnitude.is_finite() => {
|
||||
let point = decimal_point_or_empty(precision, alternate_form);
|
||||
format!("{magnitude:.precision$}{point}")
|
||||
}
|
||||
magnitude if magnitude.is_nan() => format_nan(case),
|
||||
magnitude if magnitude.is_infinite() => format_inf(case),
|
||||
_ => "".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
// Formats floats into Python style exponent notation, by first formatting in Rust style
|
||||
// exponent notation (`1.0000e0`), then convert to Python style (`1.0000e+00`).
|
||||
pub fn format_exponent(
|
||||
precision: usize,
|
||||
magnitude: f64,
|
||||
case: Case,
|
||||
alternate_form: bool,
|
||||
) -> String {
|
||||
match magnitude {
|
||||
magnitude if magnitude.is_finite() => {
|
||||
let r_exp = format!("{magnitude:.precision$e}");
|
||||
let mut parts = r_exp.splitn(2, 'e');
|
||||
let base = parts.next().unwrap();
|
||||
let exponent = parts.next().unwrap().parse::<i64>().unwrap();
|
||||
let e = match case {
|
||||
Case::Lower => 'e',
|
||||
Case::Upper => 'E',
|
||||
};
|
||||
let point = decimal_point_or_empty(precision, alternate_form);
|
||||
format!("{base}{point}{e}{exponent:+#03}")
|
||||
}
|
||||
magnitude if magnitude.is_nan() => format_nan(case),
|
||||
magnitude if magnitude.is_infinite() => format_inf(case),
|
||||
_ => "".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
/// If s represents a floating point value, trailing zeros and a possibly trailing
|
||||
/// decimal point will be removed.
|
||||
/// This function does NOT work with decimal commas.
|
||||
fn maybe_remove_trailing_redundant_chars(s: String, alternate_form: bool) -> String {
|
||||
if !alternate_form && s.contains('.') {
|
||||
// only truncate floating point values when not in alternate form
|
||||
let s = remove_trailing_zeros(s);
|
||||
remove_trailing_decimal_point(s)
|
||||
} else {
|
||||
s
|
||||
}
|
||||
}
|
||||
|
||||
fn remove_trailing_zeros(s: String) -> String {
|
||||
let mut s = s;
|
||||
while s.ends_with('0') {
|
||||
s.pop();
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
fn remove_trailing_decimal_point(s: String) -> String {
|
||||
let mut s = s;
|
||||
if s.ends_with('.') {
|
||||
s.pop();
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
pub fn format_general(
|
||||
precision: usize,
|
||||
magnitude: f64,
|
||||
case: Case,
|
||||
alternate_form: bool,
|
||||
always_shows_fract: bool,
|
||||
) -> String {
|
||||
match magnitude {
|
||||
magnitude if magnitude.is_finite() => {
|
||||
let r_exp = format!("{:.*e}", precision.saturating_sub(1), magnitude);
|
||||
let mut parts = r_exp.splitn(2, 'e');
|
||||
let base = parts.next().unwrap();
|
||||
let exponent = parts.next().unwrap().parse::<i64>().unwrap();
|
||||
if exponent < -4 || exponent + (always_shows_fract as i64) >= (precision as i64) {
|
||||
let e = match case {
|
||||
Case::Lower => 'e',
|
||||
Case::Upper => 'E',
|
||||
};
|
||||
let magnitude = format!("{:.*}", precision + 1, base);
|
||||
let base = maybe_remove_trailing_redundant_chars(magnitude, alternate_form);
|
||||
let point = decimal_point_or_empty(precision.saturating_sub(1), alternate_form);
|
||||
format!("{base}{point}{e}{exponent:+#03}")
|
||||
} else {
|
||||
let precision = ((precision as i64) - 1 - exponent) as usize;
|
||||
let magnitude = format!("{magnitude:.precision$}");
|
||||
let base = maybe_remove_trailing_redundant_chars(magnitude, alternate_form);
|
||||
let point = decimal_point_or_empty(precision, alternate_form);
|
||||
format!("{base}{point}")
|
||||
}
|
||||
}
|
||||
magnitude if magnitude.is_nan() => format_nan(case),
|
||||
magnitude if magnitude.is_infinite() => format_inf(case),
|
||||
_ => "".to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: rewrite using format_general
|
||||
pub fn to_string(value: f64) -> String {
|
||||
let lit = format!("{value:e}");
|
||||
if let Some(position) = lit.find('e') {
|
||||
let significand = &lit[..position];
|
||||
let exponent = &lit[position + 1..];
|
||||
let exponent = exponent.parse::<i32>().unwrap();
|
||||
if exponent < 16 && exponent > -5 {
|
||||
if is_integer(value) {
|
||||
format!("{value:.1?}")
|
||||
} else {
|
||||
value.to_string()
|
||||
}
|
||||
} else {
|
||||
format!("{significand}e{exponent:+#03}")
|
||||
}
|
||||
} else {
|
||||
let mut s = value.to_string();
|
||||
s.make_ascii_lowercase();
|
||||
s
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_hex(s: &str) -> Option<f64> {
|
||||
if let Ok(f) = hexf_parse::parse_hexf64(s, false) {
|
||||
return Some(f);
|
||||
}
|
||||
match s.to_ascii_lowercase().as_str() {
|
||||
"nan" | "+nan" | "-nan" => Some(f64::NAN),
|
||||
"inf" | "infinity" | "+inf" | "+infinity" => Some(f64::INFINITY),
|
||||
"-inf" | "-infinity" => Some(f64::NEG_INFINITY),
|
||||
value => {
|
||||
let mut hex = String::with_capacity(value.len());
|
||||
let has_0x = value.contains("0x");
|
||||
let has_p = value.contains('p');
|
||||
let has_dot = value.contains('.');
|
||||
let mut start = 0;
|
||||
|
||||
if !has_0x && value.starts_with('-') {
|
||||
hex.push_str("-0x");
|
||||
start += 1;
|
||||
} else if !has_0x {
|
||||
hex.push_str("0x");
|
||||
if value.starts_with('+') {
|
||||
start += 1;
|
||||
}
|
||||
}
|
||||
|
||||
for (index, ch) in value.chars().enumerate() {
|
||||
if ch == 'p' {
|
||||
if has_dot {
|
||||
hex.push('p');
|
||||
} else {
|
||||
hex.push_str(".p");
|
||||
}
|
||||
} else if index >= start {
|
||||
hex.push(ch);
|
||||
}
|
||||
}
|
||||
|
||||
if !has_p && has_dot {
|
||||
hex.push_str("p0");
|
||||
} else if !has_p && !has_dot {
|
||||
hex.push_str(".p0")
|
||||
}
|
||||
|
||||
hexf_parse::parse_hexf64(hex.as_str(), false).ok()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_hex(value: f64) -> String {
|
||||
let (mantissa, exponent, sign) = value.integer_decode();
|
||||
let sign_fmt = if sign < 0 { "-" } else { "" };
|
||||
match value {
|
||||
value if value.is_zero() => format!("{sign_fmt}0x0.0p+0"),
|
||||
value if value.is_infinite() => format!("{sign_fmt}inf"),
|
||||
value if value.is_nan() => "nan".to_owned(),
|
||||
_ => {
|
||||
const BITS: i16 = 52;
|
||||
const FRACT_MASK: u64 = 0xf_ffff_ffff_ffff;
|
||||
format!(
|
||||
"{}{:#x}.{:013x}p{:+}",
|
||||
sign_fmt,
|
||||
mantissa >> BITS,
|
||||
mantissa & FRACT_MASK,
|
||||
exponent + BITS
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_hex() {
|
||||
use rand::Rng;
|
||||
for _ in 0..20000 {
|
||||
let bytes = rand::thread_rng().gen::<[u64; 1]>();
|
||||
let f = f64::from_bits(bytes[0]);
|
||||
if !f.is_finite() {
|
||||
continue;
|
||||
}
|
||||
let hex = to_hex(f);
|
||||
// println!("{} -> {}", f, hex);
|
||||
let roundtrip = hexf_parse::parse_hexf64(&hex, false).unwrap();
|
||||
// println!(" -> {}", roundtrip);
|
||||
assert!(f == roundtrip, "{} {} {}", f, hex, roundtrip);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove_trailing_zeros() {
|
||||
assert!(remove_trailing_zeros(String::from("100")) == *"1");
|
||||
assert!(remove_trailing_zeros(String::from("100.00")) == *"100.");
|
||||
|
||||
// leave leading zeros untouched
|
||||
assert!(remove_trailing_zeros(String::from("001")) == *"001");
|
||||
|
||||
// leave strings untouched if they don't end with 0
|
||||
assert!(remove_trailing_zeros(String::from("101")) == *"101");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_remove_trailing_decimal_point() {
|
||||
assert!(remove_trailing_decimal_point(String::from("100.")) == *"100");
|
||||
assert!(remove_trailing_decimal_point(String::from("1.")) == *"1");
|
||||
|
||||
// leave leading decimal points untouched
|
||||
assert!(remove_trailing_decimal_point(String::from(".5")) == *".5");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_maybe_remove_trailing_redundant_chars() {
|
||||
assert!(maybe_remove_trailing_redundant_chars(String::from("100."), true) == *"100.");
|
||||
assert!(maybe_remove_trailing_redundant_chars(String::from("100."), false) == *"100");
|
||||
assert!(maybe_remove_trailing_redundant_chars(String::from("1."), false) == *"1");
|
||||
assert!(maybe_remove_trailing_redundant_chars(String::from("10.0"), false) == *"10");
|
||||
|
||||
// don't truncate integers
|
||||
assert!(maybe_remove_trailing_redundant_chars(String::from("1000"), false) == *"1000");
|
||||
}
|
5
literal/src/format.rs
Normal file
5
literal/src/format.rs
Normal file
|
@ -0,0 +1,5 @@
|
|||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
pub enum Case {
|
||||
Lower,
|
||||
Upper,
|
||||
}
|
4
literal/src/lib.rs
Normal file
4
literal/src/lib.rs
Normal file
|
@ -0,0 +1,4 @@
|
|||
pub mod char;
|
||||
pub mod escape;
|
||||
pub mod float;
|
||||
pub mod format;
|
Loading…
Add table
Add a link
Reference in a new issue