mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-03 18:28:24 +00:00
perf(pycodestyle): Initialize Stylist from tokens (#3757)
This commit is contained in:
parent
000394f428
commit
f68c26a506
14 changed files with 200 additions and 172 deletions
|
@ -63,7 +63,7 @@ pub struct Generator<'a> {
|
|||
/// The indentation style to use.
|
||||
indent: &'a Indentation,
|
||||
/// The quote style to use for string literals.
|
||||
quote: &'a Quote,
|
||||
quote: Quote,
|
||||
/// The line ending to use.
|
||||
line_ending: &'a LineEnding,
|
||||
buffer: String,
|
||||
|
@ -87,11 +87,7 @@ impl<'a> From<&'a Stylist<'a>> for Generator<'a> {
|
|||
}
|
||||
|
||||
impl<'a> Generator<'a> {
|
||||
pub const fn new(
|
||||
indent: &'a Indentation,
|
||||
quote: &'a Quote,
|
||||
line_ending: &'a LineEnding,
|
||||
) -> Self {
|
||||
pub const fn new(indent: &'a Indentation, quote: Quote, line_ending: &'a LineEnding) -> Self {
|
||||
Self {
|
||||
// Style preferences.
|
||||
indent,
|
||||
|
@ -1229,8 +1225,8 @@ impl<'a> Generator<'a> {
|
|||
let mut generator = Generator::new(
|
||||
self.indent,
|
||||
match self.quote {
|
||||
Quote::Single => &Quote::Double,
|
||||
Quote::Double => &Quote::Single,
|
||||
Quote::Single => Quote::Double,
|
||||
Quote::Double => Quote::Single,
|
||||
},
|
||||
self.line_ending,
|
||||
);
|
||||
|
@ -1270,14 +1266,14 @@ mod tests {
|
|||
let line_ending = LineEnding::default();
|
||||
let program = parser::parse_program(contents, "<filename>").unwrap();
|
||||
let stmt = program.first().unwrap();
|
||||
let mut generator = Generator::new(&indentation, "e, &line_ending);
|
||||
let mut generator = Generator::new(&indentation, quote, &line_ending);
|
||||
generator.unparse_stmt(stmt);
|
||||
generator.generate()
|
||||
}
|
||||
|
||||
fn round_trip_with(
|
||||
indentation: &Indentation,
|
||||
quote: &Quote,
|
||||
quote: Quote,
|
||||
line_ending: &LineEnding,
|
||||
contents: &str,
|
||||
) -> String {
|
||||
|
@ -1452,7 +1448,7 @@ if True:
|
|||
assert_eq!(
|
||||
round_trip_with(
|
||||
&Indentation::default(),
|
||||
&Quote::Double,
|
||||
Quote::Double,
|
||||
&LineEnding::default(),
|
||||
r#""hello""#
|
||||
),
|
||||
|
@ -1461,7 +1457,7 @@ if True:
|
|||
assert_eq!(
|
||||
round_trip_with(
|
||||
&Indentation::default(),
|
||||
&Quote::Single,
|
||||
Quote::Single,
|
||||
&LineEnding::default(),
|
||||
r#""hello""#
|
||||
),
|
||||
|
@ -1470,7 +1466,7 @@ if True:
|
|||
assert_eq!(
|
||||
round_trip_with(
|
||||
&Indentation::default(),
|
||||
&Quote::Double,
|
||||
Quote::Double,
|
||||
&LineEnding::default(),
|
||||
r#"'hello'"#
|
||||
),
|
||||
|
@ -1479,7 +1475,7 @@ if True:
|
|||
assert_eq!(
|
||||
round_trip_with(
|
||||
&Indentation::default(),
|
||||
&Quote::Single,
|
||||
Quote::Single,
|
||||
&LineEnding::default(),
|
||||
r#"'hello'"#
|
||||
),
|
||||
|
@ -1492,7 +1488,7 @@ if True:
|
|||
assert_eq!(
|
||||
round_trip_with(
|
||||
&Indentation::new(" ".to_string()),
|
||||
&Quote::default(),
|
||||
Quote::default(),
|
||||
&LineEnding::default(),
|
||||
r#"
|
||||
if True:
|
||||
|
@ -1510,7 +1506,7 @@ if True:
|
|||
assert_eq!(
|
||||
round_trip_with(
|
||||
&Indentation::new(" ".to_string()),
|
||||
&Quote::default(),
|
||||
Quote::default(),
|
||||
&LineEnding::default(),
|
||||
r#"
|
||||
if True:
|
||||
|
@ -1528,7 +1524,7 @@ if True:
|
|||
assert_eq!(
|
||||
round_trip_with(
|
||||
&Indentation::new("\t".to_string()),
|
||||
&Quote::default(),
|
||||
Quote::default(),
|
||||
&LineEnding::default(),
|
||||
r#"
|
||||
if True:
|
||||
|
@ -1550,7 +1546,7 @@ if True:
|
|||
assert_eq!(
|
||||
round_trip_with(
|
||||
&Indentation::default(),
|
||||
&Quote::default(),
|
||||
Quote::default(),
|
||||
&LineEnding::Lf,
|
||||
"if True:\n print(42)",
|
||||
),
|
||||
|
@ -1560,7 +1556,7 @@ if True:
|
|||
assert_eq!(
|
||||
round_trip_with(
|
||||
&Indentation::default(),
|
||||
&Quote::default(),
|
||||
Quote::default(),
|
||||
&LineEnding::CrLf,
|
||||
"if True:\n print(42)",
|
||||
),
|
||||
|
@ -1570,7 +1566,7 @@ if True:
|
|||
assert_eq!(
|
||||
round_trip_with(
|
||||
&Indentation::default(),
|
||||
&Quote::default(),
|
||||
Quote::default(),
|
||||
&LineEnding::Cr,
|
||||
"if True:\n print(42)",
|
||||
),
|
||||
|
|
|
@ -107,13 +107,12 @@ impl From<&str> for Index {
|
|||
|
||||
let mut line_start_offsets: Vec<u32> = Vec::with_capacity(48);
|
||||
line_start_offsets.push(0);
|
||||
let mut utf8 = false;
|
||||
|
||||
// SAFE because of length assertion above
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
for (i, byte) in contents.bytes().enumerate() {
|
||||
if !byte.is_ascii() {
|
||||
return Self::Utf8(continue_utf8_index(&contents[i..], i, line_start_offsets));
|
||||
}
|
||||
utf8 |= !byte.is_ascii();
|
||||
|
||||
match byte {
|
||||
// Only track one line break for `\r\n`.
|
||||
|
@ -125,32 +124,12 @@ impl From<&str> for Index {
|
|||
}
|
||||
}
|
||||
|
||||
Self::Ascii(AsciiIndex::new(line_start_offsets))
|
||||
}
|
||||
}
|
||||
|
||||
// SAFE because of length assertion in `Index::from(&str)`
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
fn continue_utf8_index(
|
||||
non_ascii_part: &str,
|
||||
offset: usize,
|
||||
line_start_offsets: Vec<u32>,
|
||||
) -> Utf8Index {
|
||||
let mut lines = line_start_offsets;
|
||||
|
||||
for (position, char) in non_ascii_part.char_indices() {
|
||||
match char {
|
||||
// Only track `\n` for `\r\n`
|
||||
'\r' if non_ascii_part.as_bytes().get(position + 1) == Some(&b'\n') => continue,
|
||||
'\r' | '\n' => {
|
||||
let absolute_offset = offset + position + 1;
|
||||
lines.push(absolute_offset as u32);
|
||||
}
|
||||
_ => {}
|
||||
if utf8 {
|
||||
Self::Utf8(Utf8Index::new(line_start_offsets))
|
||||
} else {
|
||||
Self::Ascii(AsciiIndex::new(line_start_offsets))
|
||||
}
|
||||
}
|
||||
|
||||
Utf8Index::new(lines)
|
||||
}
|
||||
|
||||
/// Index for fast [`Location`] to byte offset conversions for ASCII documents.
|
||||
|
|
|
@ -7,14 +7,15 @@ pub use generator::Generator;
|
|||
pub use indexer::Indexer;
|
||||
pub use locator::Locator;
|
||||
use rustpython_parser as parser;
|
||||
use rustpython_parser::ParseError;
|
||||
use rustpython_parser::{lexer, Mode, ParseError};
|
||||
pub use stylist::{LineEnding, Stylist};
|
||||
|
||||
/// Run round-trip source code generation on a given Python code.
|
||||
pub fn round_trip(code: &str, source_path: &str) -> Result<String, ParseError> {
|
||||
let locator = Locator::new(code);
|
||||
let python_ast = parser::parse_program(code, source_path)?;
|
||||
let stylist = Stylist::from_contents(code, &locator);
|
||||
let tokens: Vec<_> = lexer::lex(code, Mode::Module).collect();
|
||||
let stylist = Stylist::from_tokens(&tokens, &locator);
|
||||
let mut generator: Generator = (&stylist).into();
|
||||
generator.unparse_suite(&python_ast);
|
||||
Ok(generator.generate())
|
||||
|
|
|
@ -5,7 +5,8 @@ use std::ops::Deref;
|
|||
|
||||
use once_cell::unsync::OnceCell;
|
||||
use rustpython_parser::ast::Location;
|
||||
use rustpython_parser::{lexer, Mode, Tok};
|
||||
use rustpython_parser::lexer::LexResult;
|
||||
use rustpython_parser::Tok;
|
||||
|
||||
use crate::source_code::Locator;
|
||||
use ruff_rustpython::vendor;
|
||||
|
@ -14,34 +15,74 @@ use crate::str::leading_quote;
|
|||
use crate::types::Range;
|
||||
|
||||
pub struct Stylist<'a> {
|
||||
contents: &'a str,
|
||||
locator: &'a Locator<'a>,
|
||||
indentation: OnceCell<Indentation>,
|
||||
indent_end: Option<Location>,
|
||||
quote: OnceCell<Quote>,
|
||||
quote_range: Option<Range>,
|
||||
line_ending: OnceCell<LineEnding>,
|
||||
}
|
||||
|
||||
impl<'a> Stylist<'a> {
|
||||
pub fn indentation(&'a self) -> &'a Indentation {
|
||||
self.indentation
|
||||
.get_or_init(|| detect_indentation(self.contents, self.locator).unwrap_or_default())
|
||||
self.indentation.get_or_init(|| {
|
||||
if let Some(indent_end) = self.indent_end {
|
||||
let start = Location::new(indent_end.row(), 0);
|
||||
let whitespace = self.locator.slice(Range::new(start, indent_end));
|
||||
Indentation(whitespace.to_string())
|
||||
} else {
|
||||
Indentation::default()
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn quote(&'a self) -> &'a Quote {
|
||||
self.quote
|
||||
.get_or_init(|| detect_quote(self.contents, self.locator).unwrap_or_default())
|
||||
pub fn quote(&'a self) -> Quote {
|
||||
*self.quote.get_or_init(|| {
|
||||
self.quote_range
|
||||
.and_then(|quote_range| {
|
||||
let content = self.locator.slice(quote_range);
|
||||
leading_quote(content)
|
||||
})
|
||||
.map(|pattern| {
|
||||
if pattern.contains('\'') {
|
||||
Quote::Single
|
||||
} else if pattern.contains('"') {
|
||||
Quote::Double
|
||||
} else {
|
||||
unreachable!("Expected string to start with a valid quote prefix")
|
||||
}
|
||||
})
|
||||
.unwrap_or_default()
|
||||
})
|
||||
}
|
||||
|
||||
pub fn line_ending(&'a self) -> &'a LineEnding {
|
||||
self.line_ending
|
||||
.get_or_init(|| detect_line_ending(self.contents).unwrap_or_default())
|
||||
.get_or_init(|| detect_line_ending(self.locator.contents()).unwrap_or_default())
|
||||
}
|
||||
|
||||
pub fn from_contents(contents: &'a str, locator: &'a Locator<'a>) -> Self {
|
||||
pub fn from_tokens(tokens: &[LexResult], locator: &'a Locator<'a>) -> Self {
|
||||
let indent_end = tokens.iter().flatten().find_map(|(_, t, end)| {
|
||||
if matches!(t, Tok::Indent) {
|
||||
Some(*end)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
|
||||
let quote_range = tokens.iter().flatten().find_map(|(start, t, end)| match t {
|
||||
Tok::String {
|
||||
triple_quoted: false,
|
||||
..
|
||||
} => Some(Range::new(*start, *end)),
|
||||
_ => None,
|
||||
});
|
||||
|
||||
Self {
|
||||
contents,
|
||||
locator,
|
||||
indentation: OnceCell::default(),
|
||||
indent_end,
|
||||
quote_range,
|
||||
quote: OnceCell::default(),
|
||||
line_ending: OnceCell::default(),
|
||||
}
|
||||
|
@ -49,7 +90,7 @@ impl<'a> Stylist<'a> {
|
|||
}
|
||||
|
||||
/// The quotation style used in Python source code.
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
#[derive(Debug, Default, PartialEq, Eq, Copy, Clone)]
|
||||
pub enum Quote {
|
||||
Single,
|
||||
#[default]
|
||||
|
@ -65,8 +106,8 @@ impl From<Quote> for char {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<&Quote> for vendor::str::Quote {
|
||||
fn from(val: &Quote) -> Self {
|
||||
impl From<Quote> for vendor::str::Quote {
|
||||
fn from(val: Quote) -> Self {
|
||||
match val {
|
||||
Quote::Single => vendor::str::Quote::Single,
|
||||
Quote::Double => vendor::str::Quote::Double,
|
||||
|
@ -83,15 +124,6 @@ impl fmt::Display for Quote {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<&Quote> for char {
|
||||
fn from(val: &Quote) -> Self {
|
||||
match val {
|
||||
Quote::Single => '\'',
|
||||
Quote::Double => '"',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The indentation style used in Python source code.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub struct Indentation(String);
|
||||
|
@ -163,38 +195,6 @@ impl Deref for LineEnding {
|
|||
}
|
||||
}
|
||||
|
||||
/// Detect the indentation style of the given tokens.
|
||||
fn detect_indentation(contents: &str, locator: &Locator) -> Option<Indentation> {
|
||||
for (_start, tok, end) in lexer::lex(contents, Mode::Module).flatten() {
|
||||
if let Tok::Indent { .. } = tok {
|
||||
let start = Location::new(end.row(), 0);
|
||||
let whitespace = locator.slice(Range::new(start, end));
|
||||
return Some(Indentation(whitespace.to_string()));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Detect the quotation style of the given tokens.
|
||||
fn detect_quote(contents: &str, locator: &Locator) -> Option<Quote> {
|
||||
for (start, tok, end) in lexer::lex(contents, Mode::Module).flatten() {
|
||||
if let Tok::String { .. } = tok {
|
||||
let content = locator.slice(Range::new(start, end));
|
||||
if let Some(pattern) = leading_quote(content) {
|
||||
if pattern.contains("\"\"\"") {
|
||||
continue;
|
||||
} else if pattern.contains('\'') {
|
||||
return Some(Quote::Single);
|
||||
} else if pattern.contains('"') {
|
||||
return Some(Quote::Double);
|
||||
}
|
||||
unreachable!("Expected string to start with a valid quote prefix")
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Detect the line ending style of the given contents.
|
||||
fn detect_line_ending(contents: &str) -> Option<LineEnding> {
|
||||
if let Some(position) = contents.find('\n') {
|
||||
|
@ -212,25 +212,30 @@ fn detect_line_ending(contents: &str) -> Option<LineEnding> {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::source_code::stylist::{
|
||||
detect_indentation, detect_line_ending, detect_quote, Indentation, LineEnding, Quote,
|
||||
};
|
||||
use crate::source_code::Locator;
|
||||
use crate::source_code::stylist::{detect_line_ending, Indentation, LineEnding, Quote};
|
||||
use crate::source_code::{Locator, Stylist};
|
||||
use rustpython_parser::lexer::lex;
|
||||
use rustpython_parser::Mode;
|
||||
|
||||
#[test]
|
||||
fn indentation() {
|
||||
let contents = r#"x = 1"#;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(detect_indentation(contents, &locator), None);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||||
&Indentation::default()
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
if True:
|
||||
pass
|
||||
"#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
detect_indentation(contents, &locator),
|
||||
Some(Indentation(" ".to_string()))
|
||||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||||
&Indentation(" ".to_string())
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
|
@ -238,9 +243,10 @@ if True:
|
|||
pass
|
||||
"#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
detect_indentation(contents, &locator),
|
||||
Some(Indentation(" ".to_string()))
|
||||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||||
&Indentation(" ".to_string())
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
|
@ -248,9 +254,10 @@ if True:
|
|||
pass
|
||||
"#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
detect_indentation(contents, &locator),
|
||||
Some(Indentation("\t".to_string()))
|
||||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||||
&Indentation("\t".to_string())
|
||||
);
|
||||
|
||||
// TODO(charlie): Should non-significant whitespace be detected?
|
||||
|
@ -262,26 +269,46 @@ x = (
|
|||
)
|
||||
"#;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(detect_indentation(contents, &locator), None);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).indentation(),
|
||||
&Indentation::default()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn quote() {
|
||||
let contents = r#"x = 1"#;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(detect_quote(contents, &locator), None);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||||
Quote::default()
|
||||
);
|
||||
|
||||
let contents = r#"x = '1'"#;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(detect_quote(contents, &locator), Some(Quote::Single));
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||||
Quote::Single
|
||||
);
|
||||
|
||||
let contents = r#"x = "1""#;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(detect_quote(contents, &locator), Some(Quote::Double));
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||||
Quote::Double
|
||||
);
|
||||
|
||||
let contents = r#"s = "It's done.""#;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(detect_quote(contents, &locator), Some(Quote::Double));
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||||
Quote::Double
|
||||
);
|
||||
|
||||
// No style if only double quoted docstring (will take default Double)
|
||||
let contents = r#"
|
||||
|
@ -290,7 +317,11 @@ def f():
|
|||
pass
|
||||
"#;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(detect_quote(contents, &locator), None);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||||
Quote::default()
|
||||
);
|
||||
|
||||
// Detect from string literal appearing after docstring
|
||||
let contents = r#"
|
||||
|
@ -299,7 +330,23 @@ def f():
|
|||
a = 'v'
|
||||
"#;
|
||||
let locator = Locator::new(contents);
|
||||
assert_eq!(detect_quote(contents, &locator), Some(Quote::Single));
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||||
Quote::Single
|
||||
);
|
||||
|
||||
let contents = r#"
|
||||
'''Module docstring.'''
|
||||
|
||||
a = "v"
|
||||
"#;
|
||||
let locator = Locator::new(contents);
|
||||
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
|
||||
assert_eq!(
|
||||
Stylist::from_tokens(&tokens, &locator).quote(),
|
||||
Quote::Double
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue