perf(pycodestyle): Initialize Stylist from tokens (#3757)

This commit is contained in:
Micha Reiser 2023-03-28 11:53:35 +02:00 committed by GitHub
parent 000394f428
commit f68c26a506
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 200 additions and 172 deletions

View file

@ -63,7 +63,7 @@ pub struct Generator<'a> {
/// The indentation style to use.
indent: &'a Indentation,
/// The quote style to use for string literals.
quote: &'a Quote,
quote: Quote,
/// The line ending to use.
line_ending: &'a LineEnding,
buffer: String,
@ -87,11 +87,7 @@ impl<'a> From<&'a Stylist<'a>> for Generator<'a> {
}
impl<'a> Generator<'a> {
pub const fn new(
indent: &'a Indentation,
quote: &'a Quote,
line_ending: &'a LineEnding,
) -> Self {
pub const fn new(indent: &'a Indentation, quote: Quote, line_ending: &'a LineEnding) -> Self {
Self {
// Style preferences.
indent,
@ -1229,8 +1225,8 @@ impl<'a> Generator<'a> {
let mut generator = Generator::new(
self.indent,
match self.quote {
Quote::Single => &Quote::Double,
Quote::Double => &Quote::Single,
Quote::Single => Quote::Double,
Quote::Double => Quote::Single,
},
self.line_ending,
);
@ -1270,14 +1266,14 @@ mod tests {
let line_ending = LineEnding::default();
let program = parser::parse_program(contents, "<filename>").unwrap();
let stmt = program.first().unwrap();
let mut generator = Generator::new(&indentation, &quote, &line_ending);
let mut generator = Generator::new(&indentation, quote, &line_ending);
generator.unparse_stmt(stmt);
generator.generate()
}
fn round_trip_with(
indentation: &Indentation,
quote: &Quote,
quote: Quote,
line_ending: &LineEnding,
contents: &str,
) -> String {
@ -1452,7 +1448,7 @@ if True:
assert_eq!(
round_trip_with(
&Indentation::default(),
&Quote::Double,
Quote::Double,
&LineEnding::default(),
r#""hello""#
),
@ -1461,7 +1457,7 @@ if True:
assert_eq!(
round_trip_with(
&Indentation::default(),
&Quote::Single,
Quote::Single,
&LineEnding::default(),
r#""hello""#
),
@ -1470,7 +1466,7 @@ if True:
assert_eq!(
round_trip_with(
&Indentation::default(),
&Quote::Double,
Quote::Double,
&LineEnding::default(),
r#"'hello'"#
),
@ -1479,7 +1475,7 @@ if True:
assert_eq!(
round_trip_with(
&Indentation::default(),
&Quote::Single,
Quote::Single,
&LineEnding::default(),
r#"'hello'"#
),
@ -1492,7 +1488,7 @@ if True:
assert_eq!(
round_trip_with(
&Indentation::new(" ".to_string()),
&Quote::default(),
Quote::default(),
&LineEnding::default(),
r#"
if True:
@ -1510,7 +1506,7 @@ if True:
assert_eq!(
round_trip_with(
&Indentation::new(" ".to_string()),
&Quote::default(),
Quote::default(),
&LineEnding::default(),
r#"
if True:
@ -1528,7 +1524,7 @@ if True:
assert_eq!(
round_trip_with(
&Indentation::new("\t".to_string()),
&Quote::default(),
Quote::default(),
&LineEnding::default(),
r#"
if True:
@ -1550,7 +1546,7 @@ if True:
assert_eq!(
round_trip_with(
&Indentation::default(),
&Quote::default(),
Quote::default(),
&LineEnding::Lf,
"if True:\n print(42)",
),
@ -1560,7 +1556,7 @@ if True:
assert_eq!(
round_trip_with(
&Indentation::default(),
&Quote::default(),
Quote::default(),
&LineEnding::CrLf,
"if True:\n print(42)",
),
@ -1570,7 +1566,7 @@ if True:
assert_eq!(
round_trip_with(
&Indentation::default(),
&Quote::default(),
Quote::default(),
&LineEnding::Cr,
"if True:\n print(42)",
),

View file

@ -107,13 +107,12 @@ impl From<&str> for Index {
let mut line_start_offsets: Vec<u32> = Vec::with_capacity(48);
line_start_offsets.push(0);
let mut utf8 = false;
// SAFE because of length assertion above
#[allow(clippy::cast_possible_truncation)]
for (i, byte) in contents.bytes().enumerate() {
if !byte.is_ascii() {
return Self::Utf8(continue_utf8_index(&contents[i..], i, line_start_offsets));
}
utf8 |= !byte.is_ascii();
match byte {
// Only track one line break for `\r\n`.
@ -125,32 +124,12 @@ impl From<&str> for Index {
}
}
Self::Ascii(AsciiIndex::new(line_start_offsets))
}
}
// SAFE because of length assertion in `Index::from(&str)`
#[allow(clippy::cast_possible_truncation)]
fn continue_utf8_index(
non_ascii_part: &str,
offset: usize,
line_start_offsets: Vec<u32>,
) -> Utf8Index {
let mut lines = line_start_offsets;
for (position, char) in non_ascii_part.char_indices() {
match char {
// Only track `\n` for `\r\n`
'\r' if non_ascii_part.as_bytes().get(position + 1) == Some(&b'\n') => continue,
'\r' | '\n' => {
let absolute_offset = offset + position + 1;
lines.push(absolute_offset as u32);
}
_ => {}
if utf8 {
Self::Utf8(Utf8Index::new(line_start_offsets))
} else {
Self::Ascii(AsciiIndex::new(line_start_offsets))
}
}
Utf8Index::new(lines)
}
/// Index for fast [`Location`] to byte offset conversions for ASCII documents.

View file

@ -7,14 +7,15 @@ pub use generator::Generator;
pub use indexer::Indexer;
pub use locator::Locator;
use rustpython_parser as parser;
use rustpython_parser::ParseError;
use rustpython_parser::{lexer, Mode, ParseError};
pub use stylist::{LineEnding, Stylist};
/// Run round-trip source code generation on a given Python code.
pub fn round_trip(code: &str, source_path: &str) -> Result<String, ParseError> {
let locator = Locator::new(code);
let python_ast = parser::parse_program(code, source_path)?;
let stylist = Stylist::from_contents(code, &locator);
let tokens: Vec<_> = lexer::lex(code, Mode::Module).collect();
let stylist = Stylist::from_tokens(&tokens, &locator);
let mut generator: Generator = (&stylist).into();
generator.unparse_suite(&python_ast);
Ok(generator.generate())

View file

@ -5,7 +5,8 @@ use std::ops::Deref;
use once_cell::unsync::OnceCell;
use rustpython_parser::ast::Location;
use rustpython_parser::{lexer, Mode, Tok};
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
use crate::source_code::Locator;
use ruff_rustpython::vendor;
@ -14,34 +15,74 @@ use crate::str::leading_quote;
use crate::types::Range;
pub struct Stylist<'a> {
contents: &'a str,
locator: &'a Locator<'a>,
indentation: OnceCell<Indentation>,
indent_end: Option<Location>,
quote: OnceCell<Quote>,
quote_range: Option<Range>,
line_ending: OnceCell<LineEnding>,
}
impl<'a> Stylist<'a> {
pub fn indentation(&'a self) -> &'a Indentation {
self.indentation
.get_or_init(|| detect_indentation(self.contents, self.locator).unwrap_or_default())
self.indentation.get_or_init(|| {
if let Some(indent_end) = self.indent_end {
let start = Location::new(indent_end.row(), 0);
let whitespace = self.locator.slice(Range::new(start, indent_end));
Indentation(whitespace.to_string())
} else {
Indentation::default()
}
})
}
pub fn quote(&'a self) -> &'a Quote {
self.quote
.get_or_init(|| detect_quote(self.contents, self.locator).unwrap_or_default())
pub fn quote(&'a self) -> Quote {
*self.quote.get_or_init(|| {
self.quote_range
.and_then(|quote_range| {
let content = self.locator.slice(quote_range);
leading_quote(content)
})
.map(|pattern| {
if pattern.contains('\'') {
Quote::Single
} else if pattern.contains('"') {
Quote::Double
} else {
unreachable!("Expected string to start with a valid quote prefix")
}
})
.unwrap_or_default()
})
}
pub fn line_ending(&'a self) -> &'a LineEnding {
self.line_ending
.get_or_init(|| detect_line_ending(self.contents).unwrap_or_default())
.get_or_init(|| detect_line_ending(self.locator.contents()).unwrap_or_default())
}
pub fn from_contents(contents: &'a str, locator: &'a Locator<'a>) -> Self {
pub fn from_tokens(tokens: &[LexResult], locator: &'a Locator<'a>) -> Self {
let indent_end = tokens.iter().flatten().find_map(|(_, t, end)| {
if matches!(t, Tok::Indent) {
Some(*end)
} else {
None
}
});
let quote_range = tokens.iter().flatten().find_map(|(start, t, end)| match t {
Tok::String {
triple_quoted: false,
..
} => Some(Range::new(*start, *end)),
_ => None,
});
Self {
contents,
locator,
indentation: OnceCell::default(),
indent_end,
quote_range,
quote: OnceCell::default(),
line_ending: OnceCell::default(),
}
@ -49,7 +90,7 @@ impl<'a> Stylist<'a> {
}
/// The quotation style used in Python source code.
#[derive(Debug, Default, PartialEq, Eq)]
#[derive(Debug, Default, PartialEq, Eq, Copy, Clone)]
pub enum Quote {
Single,
#[default]
@ -65,8 +106,8 @@ impl From<Quote> for char {
}
}
impl From<&Quote> for vendor::str::Quote {
fn from(val: &Quote) -> Self {
impl From<Quote> for vendor::str::Quote {
fn from(val: Quote) -> Self {
match val {
Quote::Single => vendor::str::Quote::Single,
Quote::Double => vendor::str::Quote::Double,
@ -83,15 +124,6 @@ impl fmt::Display for Quote {
}
}
impl From<&Quote> for char {
fn from(val: &Quote) -> Self {
match val {
Quote::Single => '\'',
Quote::Double => '"',
}
}
}
/// The indentation style used in Python source code.
#[derive(Debug, PartialEq, Eq)]
pub struct Indentation(String);
@ -163,38 +195,6 @@ impl Deref for LineEnding {
}
}
/// Detect the indentation style of the given tokens.
fn detect_indentation(contents: &str, locator: &Locator) -> Option<Indentation> {
for (_start, tok, end) in lexer::lex(contents, Mode::Module).flatten() {
if let Tok::Indent { .. } = tok {
let start = Location::new(end.row(), 0);
let whitespace = locator.slice(Range::new(start, end));
return Some(Indentation(whitespace.to_string()));
}
}
None
}
/// Detect the quotation style of the given tokens.
fn detect_quote(contents: &str, locator: &Locator) -> Option<Quote> {
for (start, tok, end) in lexer::lex(contents, Mode::Module).flatten() {
if let Tok::String { .. } = tok {
let content = locator.slice(Range::new(start, end));
if let Some(pattern) = leading_quote(content) {
if pattern.contains("\"\"\"") {
continue;
} else if pattern.contains('\'') {
return Some(Quote::Single);
} else if pattern.contains('"') {
return Some(Quote::Double);
}
unreachable!("Expected string to start with a valid quote prefix")
}
}
}
None
}
/// Detect the line ending style of the given contents.
fn detect_line_ending(contents: &str) -> Option<LineEnding> {
if let Some(position) = contents.find('\n') {
@ -212,25 +212,30 @@ fn detect_line_ending(contents: &str) -> Option<LineEnding> {
#[cfg(test)]
mod tests {
use crate::source_code::stylist::{
detect_indentation, detect_line_ending, detect_quote, Indentation, LineEnding, Quote,
};
use crate::source_code::Locator;
use crate::source_code::stylist::{detect_line_ending, Indentation, LineEnding, Quote};
use crate::source_code::{Locator, Stylist};
use rustpython_parser::lexer::lex;
use rustpython_parser::Mode;
#[test]
fn indentation() {
let contents = r#"x = 1"#;
let locator = Locator::new(contents);
assert_eq!(detect_indentation(contents, &locator), None);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation::default()
);
let contents = r#"
if True:
pass
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
detect_indentation(contents, &locator),
Some(Indentation(" ".to_string()))
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation(" ".to_string())
);
let contents = r#"
@ -238,9 +243,10 @@ if True:
pass
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
detect_indentation(contents, &locator),
Some(Indentation(" ".to_string()))
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation(" ".to_string())
);
let contents = r#"
@ -248,9 +254,10 @@ if True:
pass
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
detect_indentation(contents, &locator),
Some(Indentation("\t".to_string()))
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation("\t".to_string())
);
// TODO(charlie): Should non-significant whitespace be detected?
@ -262,26 +269,46 @@ x = (
)
"#;
let locator = Locator::new(contents);
assert_eq!(detect_indentation(contents, &locator), None);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation::default()
);
}
#[test]
fn quote() {
let contents = r#"x = 1"#;
let locator = Locator::new(contents);
assert_eq!(detect_quote(contents, &locator), None);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::default()
);
let contents = r#"x = '1'"#;
let locator = Locator::new(contents);
assert_eq!(detect_quote(contents, &locator), Some(Quote::Single));
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Single
);
let contents = r#"x = "1""#;
let locator = Locator::new(contents);
assert_eq!(detect_quote(contents, &locator), Some(Quote::Double));
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
);
let contents = r#"s = "It's done.""#;
let locator = Locator::new(contents);
assert_eq!(detect_quote(contents, &locator), Some(Quote::Double));
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
);
// No style if only double quoted docstring (will take default Double)
let contents = r#"
@ -290,7 +317,11 @@ def f():
pass
"#;
let locator = Locator::new(contents);
assert_eq!(detect_quote(contents, &locator), None);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::default()
);
// Detect from string literal appearing after docstring
let contents = r#"
@ -299,7 +330,23 @@ def f():
a = 'v'
"#;
let locator = Locator::new(contents);
assert_eq!(detect_quote(contents, &locator), Some(Quote::Single));
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Single
);
let contents = r#"
'''Module docstring.'''
a = "v"
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
);
}
#[test]