mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-07 21:25:08 +00:00
417 lines
12 KiB
Rust
417 lines
12 KiB
Rust
use ruff_python_ast::{
|
|
str::{Quote, TripleQuotes},
|
|
BytesLiteralFlags, StringFlags, StringLiteralFlags,
|
|
};
|
|
|
|
pub struct EscapeLayout {
|
|
pub quote: Quote,
|
|
pub len: Option<usize>,
|
|
}
|
|
|
|
pub trait Escape {
|
|
fn source_len(&self) -> usize;
|
|
fn layout(&self) -> &EscapeLayout;
|
|
fn changed(&self) -> bool {
|
|
self.layout().len != Some(self.source_len())
|
|
}
|
|
|
|
fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result;
|
|
fn write_body_slow(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result;
|
|
fn write_body(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
|
if self.changed() {
|
|
self.write_body_slow(formatter)
|
|
} else {
|
|
self.write_source(formatter)
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Returns the outer quotes to use and the number of quotes that need to be
|
|
/// escaped.
|
|
pub(crate) const fn choose_quote(
|
|
single_count: usize,
|
|
double_count: usize,
|
|
preferred_quote: Quote,
|
|
) -> (Quote, usize) {
|
|
let (primary_count, secondary_count) = match preferred_quote {
|
|
Quote::Single => (single_count, double_count),
|
|
Quote::Double => (double_count, single_count),
|
|
};
|
|
|
|
// always use primary unless we have primary but no secondary
|
|
let use_secondary = primary_count > 0 && secondary_count == 0;
|
|
if use_secondary {
|
|
(preferred_quote.opposite(), secondary_count)
|
|
} else {
|
|
(preferred_quote, primary_count)
|
|
}
|
|
}
|
|
|
|
pub struct UnicodeEscape<'a> {
|
|
source: &'a str,
|
|
layout: EscapeLayout,
|
|
}
|
|
|
|
impl<'a> UnicodeEscape<'a> {
|
|
#[inline]
|
|
pub fn with_preferred_quote(source: &'a str, quote: Quote) -> Self {
|
|
let layout = Self::repr_layout(source, quote);
|
|
Self { source, layout }
|
|
}
|
|
#[inline]
|
|
pub fn new_repr(source: &'a str) -> Self {
|
|
Self::with_preferred_quote(source, Quote::Single)
|
|
}
|
|
#[inline]
|
|
pub fn str_repr<'r>(&'a self, triple_quotes: TripleQuotes) -> StrRepr<'r, 'a> {
|
|
StrRepr {
|
|
escape: self,
|
|
triple_quotes,
|
|
}
|
|
}
|
|
}
|
|
|
|
pub struct StrRepr<'r, 'a> {
|
|
escape: &'r UnicodeEscape<'a>,
|
|
triple_quotes: TripleQuotes,
|
|
}
|
|
|
|
impl StrRepr<'_, '_> {
|
|
pub fn write(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
|
let flags = StringLiteralFlags::empty()
|
|
.with_quote_style(self.escape.layout().quote)
|
|
.with_triple_quotes(self.triple_quotes);
|
|
formatter.write_str(flags.quote_str())?;
|
|
self.escape.write_body(formatter)?;
|
|
formatter.write_str(flags.quote_str())?;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn to_string(&self) -> Option<String> {
|
|
let mut s = String::with_capacity(self.escape.layout().len?);
|
|
self.write(&mut s).unwrap();
|
|
Some(s)
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Display for StrRepr<'_, '_> {
|
|
fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
self.write(formatter)
|
|
}
|
|
}
|
|
|
|
impl UnicodeEscape<'_> {
|
|
const REPR_RESERVED_LEN: usize = 2; // for quotes
|
|
|
|
#[expect(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
|
|
pub fn repr_layout(source: &str, preferred_quote: Quote) -> EscapeLayout {
|
|
Self::output_layout_with_checker(source, preferred_quote, |a, b| {
|
|
Some((a as isize).checked_add(b as isize)? as usize)
|
|
})
|
|
}
|
|
|
|
fn output_layout_with_checker(
|
|
source: &str,
|
|
preferred_quote: Quote,
|
|
length_add: impl Fn(usize, usize) -> Option<usize>,
|
|
) -> EscapeLayout {
|
|
let mut out_len = Self::REPR_RESERVED_LEN;
|
|
let mut single_count = 0;
|
|
let mut double_count = 0;
|
|
|
|
for ch in source.chars() {
|
|
let incr = match ch {
|
|
'\'' => {
|
|
single_count += 1;
|
|
1
|
|
}
|
|
'"' => {
|
|
double_count += 1;
|
|
1
|
|
}
|
|
c => Self::escaped_char_len(c),
|
|
};
|
|
let Some(new_len) = length_add(out_len, incr) else {
|
|
#[cold]
|
|
fn stop(
|
|
single_count: usize,
|
|
double_count: usize,
|
|
preferred_quote: Quote,
|
|
) -> EscapeLayout {
|
|
EscapeLayout {
|
|
quote: choose_quote(single_count, double_count, preferred_quote).0,
|
|
len: None,
|
|
}
|
|
}
|
|
return stop(single_count, double_count, preferred_quote);
|
|
};
|
|
out_len = new_len;
|
|
}
|
|
|
|
let (quote, num_escaped_quotes) = choose_quote(single_count, double_count, preferred_quote);
|
|
// we'll be adding backslashes in front of the existing inner quotes
|
|
let Some(out_len) = length_add(out_len, num_escaped_quotes) else {
|
|
return EscapeLayout { quote, len: None };
|
|
};
|
|
|
|
EscapeLayout {
|
|
quote,
|
|
len: Some(out_len - Self::REPR_RESERVED_LEN),
|
|
}
|
|
}
|
|
|
|
fn escaped_char_len(ch: char) -> usize {
|
|
match ch {
|
|
'\\' | '\t' | '\r' | '\n' => 2,
|
|
ch if ch < ' ' || ch as u32 == 0x7f => 4, // \xHH
|
|
ch if ch.is_ascii() => 1,
|
|
ch if crate::char::is_printable(ch) => {
|
|
// max = std::cmp::max(ch, max);
|
|
ch.len_utf8()
|
|
}
|
|
ch if (ch as u32) < 0x100 => 4, // \xHH
|
|
ch if (ch as u32) < 0x10000 => 6, // \uHHHH
|
|
_ => 10, // \uHHHHHHHH
|
|
}
|
|
}
|
|
|
|
fn write_char(
|
|
ch: char,
|
|
quote: Quote,
|
|
formatter: &mut impl std::fmt::Write,
|
|
) -> std::fmt::Result {
|
|
match ch {
|
|
'\n' => formatter.write_str("\\n"),
|
|
'\t' => formatter.write_str("\\t"),
|
|
'\r' => formatter.write_str("\\r"),
|
|
// these 2 branches *would* be handled below, but we shouldn't have to do a
|
|
// unicodedata lookup just for ascii characters
|
|
'\x20'..='\x7e' => {
|
|
// printable ascii range
|
|
if ch == quote.as_char() || ch == '\\' {
|
|
formatter.write_char('\\')?;
|
|
}
|
|
formatter.write_char(ch)
|
|
}
|
|
ch if ch.is_ascii() => {
|
|
write!(formatter, "\\x{:02x}", ch as u8)
|
|
}
|
|
ch if crate::char::is_printable(ch) => formatter.write_char(ch),
|
|
'\0'..='\u{ff}' => {
|
|
write!(formatter, "\\x{:02x}", ch as u32)
|
|
}
|
|
'\0'..='\u{ffff}' => {
|
|
write!(formatter, "\\u{:04x}", ch as u32)
|
|
}
|
|
_ => {
|
|
write!(formatter, "\\U{:08x}", ch as u32)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Escape for UnicodeEscape<'_> {
|
|
fn source_len(&self) -> usize {
|
|
self.source.len()
|
|
}
|
|
|
|
fn layout(&self) -> &EscapeLayout {
|
|
&self.layout
|
|
}
|
|
|
|
fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
|
formatter.write_str(self.source)
|
|
}
|
|
|
|
#[cold]
|
|
fn write_body_slow(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
|
for ch in self.source.chars() {
|
|
Self::write_char(ch, self.layout().quote, formatter)?;
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
pub struct AsciiEscape<'a> {
|
|
source: &'a [u8],
|
|
layout: EscapeLayout,
|
|
}
|
|
|
|
impl<'a> AsciiEscape<'a> {
|
|
#[inline]
|
|
pub fn new(source: &'a [u8], layout: EscapeLayout) -> Self {
|
|
Self { source, layout }
|
|
}
|
|
#[inline]
|
|
pub fn with_preferred_quote(source: &'a [u8], quote: Quote) -> Self {
|
|
let layout = Self::repr_layout(source, quote);
|
|
Self { source, layout }
|
|
}
|
|
#[inline]
|
|
pub fn new_repr(source: &'a [u8]) -> Self {
|
|
Self::with_preferred_quote(source, Quote::Single)
|
|
}
|
|
#[inline]
|
|
pub fn bytes_repr<'r>(&'a self, triple_quotes: TripleQuotes) -> BytesRepr<'r, 'a> {
|
|
BytesRepr {
|
|
escape: self,
|
|
triple_quotes,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl AsciiEscape<'_> {
|
|
#[expect(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
|
|
pub fn repr_layout(source: &[u8], preferred_quote: Quote) -> EscapeLayout {
|
|
Self::output_layout_with_checker(source, preferred_quote, 3, |a, b| {
|
|
Some((a as isize).checked_add(b as isize)? as usize)
|
|
})
|
|
}
|
|
|
|
fn output_layout_with_checker(
|
|
source: &[u8],
|
|
preferred_quote: Quote,
|
|
reserved_len: usize,
|
|
length_add: impl Fn(usize, usize) -> Option<usize>,
|
|
) -> EscapeLayout {
|
|
let mut out_len = reserved_len;
|
|
let mut single_count = 0;
|
|
let mut double_count = 0;
|
|
|
|
for ch in source {
|
|
let incr = match ch {
|
|
b'\'' => {
|
|
single_count += 1;
|
|
1
|
|
}
|
|
b'"' => {
|
|
double_count += 1;
|
|
1
|
|
}
|
|
c => Self::escaped_char_len(*c),
|
|
};
|
|
let Some(new_len) = length_add(out_len, incr) else {
|
|
#[cold]
|
|
fn stop(
|
|
single_count: usize,
|
|
double_count: usize,
|
|
preferred_quote: Quote,
|
|
) -> EscapeLayout {
|
|
EscapeLayout {
|
|
quote: choose_quote(single_count, double_count, preferred_quote).0,
|
|
len: None,
|
|
}
|
|
}
|
|
return stop(single_count, double_count, preferred_quote);
|
|
};
|
|
out_len = new_len;
|
|
}
|
|
|
|
let (quote, num_escaped_quotes) = choose_quote(single_count, double_count, preferred_quote);
|
|
// we'll be adding backslashes in front of the existing inner quotes
|
|
let Some(out_len) = length_add(out_len, num_escaped_quotes) else {
|
|
return EscapeLayout { quote, len: None };
|
|
};
|
|
|
|
EscapeLayout {
|
|
quote,
|
|
len: Some(out_len - reserved_len),
|
|
}
|
|
}
|
|
|
|
fn escaped_char_len(ch: u8) -> usize {
|
|
match ch {
|
|
b'\\' | b'\t' | b'\r' | b'\n' => 2,
|
|
0x20..=0x7e => 1,
|
|
_ => 4, // \xHH
|
|
}
|
|
}
|
|
|
|
fn write_char(ch: u8, quote: Quote, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
|
match ch {
|
|
b'\t' => formatter.write_str("\\t"),
|
|
b'\n' => formatter.write_str("\\n"),
|
|
b'\r' => formatter.write_str("\\r"),
|
|
0x20..=0x7e => {
|
|
// printable ascii range
|
|
if ch == quote.as_byte() || ch == b'\\' {
|
|
formatter.write_char('\\')?;
|
|
}
|
|
formatter.write_char(ch as char)
|
|
}
|
|
ch => write!(formatter, "\\x{ch:02x}"),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Escape for AsciiEscape<'_> {
|
|
fn source_len(&self) -> usize {
|
|
self.source.len()
|
|
}
|
|
|
|
fn layout(&self) -> &EscapeLayout {
|
|
&self.layout
|
|
}
|
|
fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
|
// OK because function must be called only when source is printable ascii characters.
|
|
let string = std::str::from_utf8(self.source).expect("ASCII bytes");
|
|
formatter.write_str(string)
|
|
}
|
|
|
|
#[cold]
|
|
fn write_body_slow(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
|
for ch in self.source {
|
|
Self::write_char(*ch, self.layout().quote, formatter)?;
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
pub struct BytesRepr<'r, 'a> {
|
|
escape: &'r AsciiEscape<'a>,
|
|
triple_quotes: TripleQuotes,
|
|
}
|
|
|
|
impl BytesRepr<'_, '_> {
|
|
pub fn write(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result {
|
|
let flags = BytesLiteralFlags::empty()
|
|
.with_quote_style(self.escape.layout().quote)
|
|
.with_triple_quotes(self.triple_quotes);
|
|
|
|
formatter.write_char('b')?;
|
|
formatter.write_str(flags.quote_str())?;
|
|
self.escape.write_body(formatter)?;
|
|
formatter.write_str(flags.quote_str())?;
|
|
Ok(())
|
|
}
|
|
|
|
pub fn to_string(&self) -> Option<String> {
|
|
let mut s = String::with_capacity(self.escape.layout().len?);
|
|
self.write(&mut s).unwrap();
|
|
Some(s)
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Display for BytesRepr<'_, '_> {
|
|
fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
self.write(formatter)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod unicode_escape_tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn changed() {
|
|
fn test(s: &str) -> bool {
|
|
UnicodeEscape::new_repr(s).changed()
|
|
}
|
|
assert!(!test("hello"));
|
|
assert!(!test("'hello'"));
|
|
assert!(!test("\"hello\""));
|
|
|
|
assert!(test("'\"hello"));
|
|
assert!(test("hello\n"));
|
|
}
|
|
}
|