use ruff_python_ast::{ str::{Quote, TripleQuotes}, BytesLiteralFlags, StringFlags, StringLiteralFlags, }; pub struct EscapeLayout { pub quote: Quote, pub len: Option, } pub trait Escape { fn source_len(&self) -> usize; fn layout(&self) -> &EscapeLayout; fn changed(&self) -> bool { self.layout().len != Some(self.source_len()) } fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result; fn write_body_slow(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result; fn write_body(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { if self.changed() { self.write_body_slow(formatter) } else { self.write_source(formatter) } } } /// Returns the outer quotes to use and the number of quotes that need to be /// escaped. pub(crate) const fn choose_quote( single_count: usize, double_count: usize, preferred_quote: Quote, ) -> (Quote, usize) { let (primary_count, secondary_count) = match preferred_quote { Quote::Single => (single_count, double_count), Quote::Double => (double_count, single_count), }; // always use primary unless we have primary but no secondary let use_secondary = primary_count > 0 && secondary_count == 0; if use_secondary { (preferred_quote.opposite(), secondary_count) } else { (preferred_quote, primary_count) } } pub struct UnicodeEscape<'a> { source: &'a str, layout: EscapeLayout, } impl<'a> UnicodeEscape<'a> { #[inline] pub fn with_preferred_quote(source: &'a str, quote: Quote) -> Self { let layout = Self::repr_layout(source, quote); Self { source, layout } } #[inline] pub fn new_repr(source: &'a str) -> Self { Self::with_preferred_quote(source, Quote::Single) } #[inline] pub fn str_repr<'r>(&'a self, triple_quotes: TripleQuotes) -> StrRepr<'r, 'a> { StrRepr { escape: self, triple_quotes, } } } pub struct StrRepr<'r, 'a> { escape: &'r UnicodeEscape<'a>, triple_quotes: TripleQuotes, } impl StrRepr<'_, '_> { pub fn write(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { let flags = StringLiteralFlags::empty() .with_quote_style(self.escape.layout().quote) .with_triple_quotes(self.triple_quotes); formatter.write_str(flags.quote_str())?; self.escape.write_body(formatter)?; formatter.write_str(flags.quote_str())?; Ok(()) } pub fn to_string(&self) -> Option { let mut s = String::with_capacity(self.escape.layout().len?); self.write(&mut s).unwrap(); Some(s) } } impl std::fmt::Display for StrRepr<'_, '_> { fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.write(formatter) } } impl UnicodeEscape<'_> { const REPR_RESERVED_LEN: usize = 2; // for quotes #[expect(clippy::cast_possible_wrap, clippy::cast_sign_loss)] pub fn repr_layout(source: &str, preferred_quote: Quote) -> EscapeLayout { Self::output_layout_with_checker(source, preferred_quote, |a, b| { Some((a as isize).checked_add(b as isize)? as usize) }) } fn output_layout_with_checker( source: &str, preferred_quote: Quote, length_add: impl Fn(usize, usize) -> Option, ) -> EscapeLayout { let mut out_len = Self::REPR_RESERVED_LEN; let mut single_count = 0; let mut double_count = 0; for ch in source.chars() { let incr = match ch { '\'' => { single_count += 1; 1 } '"' => { double_count += 1; 1 } c => Self::escaped_char_len(c), }; let Some(new_len) = length_add(out_len, incr) else { #[cold] fn stop( single_count: usize, double_count: usize, preferred_quote: Quote, ) -> EscapeLayout { EscapeLayout { quote: choose_quote(single_count, double_count, preferred_quote).0, len: None, } } return stop(single_count, double_count, preferred_quote); }; out_len = new_len; } let (quote, num_escaped_quotes) = choose_quote(single_count, double_count, preferred_quote); // we'll be adding backslashes in front of the existing inner quotes let Some(out_len) = length_add(out_len, num_escaped_quotes) else { return EscapeLayout { quote, len: None }; }; EscapeLayout { quote, len: Some(out_len - Self::REPR_RESERVED_LEN), } } fn escaped_char_len(ch: char) -> usize { match ch { '\\' | '\t' | '\r' | '\n' => 2, ch if ch < ' ' || ch as u32 == 0x7f => 4, // \xHH ch if ch.is_ascii() => 1, ch if crate::char::is_printable(ch) => { // max = std::cmp::max(ch, max); ch.len_utf8() } ch if (ch as u32) < 0x100 => 4, // \xHH ch if (ch as u32) < 0x10000 => 6, // \uHHHH _ => 10, // \uHHHHHHHH } } fn write_char( ch: char, quote: Quote, formatter: &mut impl std::fmt::Write, ) -> std::fmt::Result { match ch { '\n' => formatter.write_str("\\n"), '\t' => formatter.write_str("\\t"), '\r' => formatter.write_str("\\r"), // these 2 branches *would* be handled below, but we shouldn't have to do a // unicodedata lookup just for ascii characters '\x20'..='\x7e' => { // printable ascii range if ch == quote.as_char() || ch == '\\' { formatter.write_char('\\')?; } formatter.write_char(ch) } ch if ch.is_ascii() => { write!(formatter, "\\x{:02x}", ch as u8) } ch if crate::char::is_printable(ch) => formatter.write_char(ch), '\0'..='\u{ff}' => { write!(formatter, "\\x{:02x}", ch as u32) } '\0'..='\u{ffff}' => { write!(formatter, "\\u{:04x}", ch as u32) } _ => { write!(formatter, "\\U{:08x}", ch as u32) } } } } impl Escape for UnicodeEscape<'_> { fn source_len(&self) -> usize { self.source.len() } fn layout(&self) -> &EscapeLayout { &self.layout } fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { formatter.write_str(self.source) } #[cold] fn write_body_slow(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { for ch in self.source.chars() { Self::write_char(ch, self.layout().quote, formatter)?; } Ok(()) } } pub struct AsciiEscape<'a> { source: &'a [u8], layout: EscapeLayout, } impl<'a> AsciiEscape<'a> { #[inline] pub fn new(source: &'a [u8], layout: EscapeLayout) -> Self { Self { source, layout } } #[inline] pub fn with_preferred_quote(source: &'a [u8], quote: Quote) -> Self { let layout = Self::repr_layout(source, quote); Self { source, layout } } #[inline] pub fn new_repr(source: &'a [u8]) -> Self { Self::with_preferred_quote(source, Quote::Single) } #[inline] pub fn bytes_repr<'r>(&'a self, triple_quotes: TripleQuotes) -> BytesRepr<'r, 'a> { BytesRepr { escape: self, triple_quotes, } } } impl AsciiEscape<'_> { #[expect(clippy::cast_possible_wrap, clippy::cast_sign_loss)] pub fn repr_layout(source: &[u8], preferred_quote: Quote) -> EscapeLayout { Self::output_layout_with_checker(source, preferred_quote, 3, |a, b| { Some((a as isize).checked_add(b as isize)? as usize) }) } fn output_layout_with_checker( source: &[u8], preferred_quote: Quote, reserved_len: usize, length_add: impl Fn(usize, usize) -> Option, ) -> EscapeLayout { let mut out_len = reserved_len; let mut single_count = 0; let mut double_count = 0; for ch in source { let incr = match ch { b'\'' => { single_count += 1; 1 } b'"' => { double_count += 1; 1 } c => Self::escaped_char_len(*c), }; let Some(new_len) = length_add(out_len, incr) else { #[cold] fn stop( single_count: usize, double_count: usize, preferred_quote: Quote, ) -> EscapeLayout { EscapeLayout { quote: choose_quote(single_count, double_count, preferred_quote).0, len: None, } } return stop(single_count, double_count, preferred_quote); }; out_len = new_len; } let (quote, num_escaped_quotes) = choose_quote(single_count, double_count, preferred_quote); // we'll be adding backslashes in front of the existing inner quotes let Some(out_len) = length_add(out_len, num_escaped_quotes) else { return EscapeLayout { quote, len: None }; }; EscapeLayout { quote, len: Some(out_len - reserved_len), } } fn escaped_char_len(ch: u8) -> usize { match ch { b'\\' | b'\t' | b'\r' | b'\n' => 2, 0x20..=0x7e => 1, _ => 4, // \xHH } } fn write_char(ch: u8, quote: Quote, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { match ch { b'\t' => formatter.write_str("\\t"), b'\n' => formatter.write_str("\\n"), b'\r' => formatter.write_str("\\r"), 0x20..=0x7e => { // printable ascii range if ch == quote.as_byte() || ch == b'\\' { formatter.write_char('\\')?; } formatter.write_char(ch as char) } ch => write!(formatter, "\\x{ch:02x}"), } } } impl Escape for AsciiEscape<'_> { fn source_len(&self) -> usize { self.source.len() } fn layout(&self) -> &EscapeLayout { &self.layout } fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { // OK because function must be called only when source is printable ascii characters. let string = std::str::from_utf8(self.source).expect("ASCII bytes"); formatter.write_str(string) } #[cold] fn write_body_slow(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { for ch in self.source { Self::write_char(*ch, self.layout().quote, formatter)?; } Ok(()) } } pub struct BytesRepr<'r, 'a> { escape: &'r AsciiEscape<'a>, triple_quotes: TripleQuotes, } impl BytesRepr<'_, '_> { pub fn write(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { let flags = BytesLiteralFlags::empty() .with_quote_style(self.escape.layout().quote) .with_triple_quotes(self.triple_quotes); formatter.write_char('b')?; formatter.write_str(flags.quote_str())?; self.escape.write_body(formatter)?; formatter.write_str(flags.quote_str())?; Ok(()) } pub fn to_string(&self) -> Option { let mut s = String::with_capacity(self.escape.layout().len?); self.write(&mut s).unwrap(); Some(s) } } impl std::fmt::Display for BytesRepr<'_, '_> { fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.write(formatter) } } #[cfg(test)] mod unicode_escape_tests { use super::*; #[test] fn changed() { fn test(s: &str) -> bool { UnicodeEscape::new_repr(s).changed() } assert!(!test("hello")); assert!(!test("'hello'")); assert!(!test("\"hello\"")); assert!(test("'\"hello")); assert!(test("hello\n")); } }