ruff/crates/ruff_python_codegen/src/stylist.rs
konsti 14e65afdc6
Update to Rust 1.74 and use new clippy lints table (#8722)
Update to [Rust
1.74](https://blog.rust-lang.org/2023/11/16/Rust-1.74.0.html) and use
the new clippy lints table.

The update itself introduced a new clippy lint about superfluous hashes
in raw strings, which got removed.

I moved our lint config from `rustflags` to the newly stabilized
[workspace.lints](https://doc.rust-lang.org/stable/cargo/reference/workspaces.html#the-lints-table).
One consequence is that we have to `unsafe_code = "warn"` instead of
"forbid" because the latter now actually bans unsafe code:

```
error[E0453]: allow(unsafe_code) incompatible with previous forbid
  --> crates/ruff_source_file/src/newlines.rs:62:17
   |
62 |         #[allow(unsafe_code)]
   |                 ^^^^^^^^^^^ overruled by previous forbid
   |
   = note: `forbid` lint level was set on command line
```

---------

Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
2023-11-16 18:12:46 -05:00

410 lines
11 KiB
Rust
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! Detect code style from Python source code.
use std::fmt;
use std::ops::Deref;
use once_cell::unsync::OnceCell;
use ruff_python_literal::escape::Quote as StrQuote;
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::Tok;
use ruff_source_file::{find_newline, LineEnding};
use ruff_python_ast::str::leading_quote;
use ruff_source_file::Locator;
pub struct Stylist<'a> {
locator: &'a Locator<'a>,
indentation: Indentation,
quote: Quote,
line_ending: OnceCell<LineEnding>,
}
impl<'a> Stylist<'a> {
pub fn indentation(&'a self) -> &'a Indentation {
&self.indentation
}
pub fn quote(&'a self) -> Quote {
self.quote
}
pub fn line_ending(&'a self) -> LineEnding {
*self.line_ending.get_or_init(|| {
let contents = self.locator.contents();
find_newline(contents)
.map(|(_, ending)| ending)
.unwrap_or_default()
})
}
pub fn from_tokens(tokens: &[LexResult], locator: &'a Locator<'a>) -> Self {
let indentation = detect_indention(tokens, locator);
Self {
locator,
indentation,
quote: detect_quote(tokens, locator),
line_ending: OnceCell::default(),
}
}
}
fn detect_quote(tokens: &[LexResult], locator: &Locator) -> Quote {
let quote_range = tokens.iter().flatten().find_map(|(t, range)| match t {
Tok::String {
triple_quoted: false,
..
} => Some(*range),
// No need to check if it's triple-quoted as f-strings cannot be used
// as docstrings.
Tok::FStringStart => Some(*range),
_ => None,
});
if let Some(quote_range) = quote_range {
let content = &locator.slice(quote_range);
if let Some(quotes) = leading_quote(content) {
return if quotes.contains('\'') {
Quote::Single
} else if quotes.contains('"') {
Quote::Double
} else {
unreachable!("Expected string to start with a valid quote prefix")
};
}
}
Quote::default()
}
fn detect_indention(tokens: &[LexResult], locator: &Locator) -> Indentation {
let indent_range = tokens.iter().flatten().find_map(|(t, range)| {
if matches!(t, Tok::Indent) {
Some(range)
} else {
None
}
});
if let Some(indent_range) = indent_range {
let mut whitespace = locator.slice(*indent_range);
// https://docs.python.org/3/reference/lexical_analysis.html#indentation
// > A formfeed character may be present at the start of the line; it will be ignored for
// > the indentation calculations above. Formfeed characters occurring elsewhere in the
// > leading whitespace have an undefined effect (for instance, they may reset the space
// > count to zero).
// So there's UB in python lexer -.-
// In practice, they just reset the indentation:
// https://github.com/python/cpython/blob/df8b3a46a7aa369f246a09ffd11ceedf1d34e921/Parser/tokenizer.c#L1819-L1821
// https://github.com/astral-sh/ruff/blob/a41bb2733fe75a71f4cf6d4bb21e659fc4630b30/crates/ruff_python_parser/src/lexer.rs#L664-L667
// We also reset the indentation when we see a formfeed character.
// See also https://github.com/astral-sh/ruff/issues/7455#issuecomment-1722458825
if let Some((_before, after)) = whitespace.rsplit_once('\x0C') {
whitespace = after;
}
Indentation(whitespace.to_string())
} else {
Indentation::default()
}
}
/// The quotation style used in Python source code.
#[derive(Debug, Default, PartialEq, Eq, Copy, Clone)]
pub enum Quote {
Single,
#[default]
Double,
}
impl From<Quote> for char {
fn from(val: Quote) -> Self {
match val {
Quote::Single => '\'',
Quote::Double => '"',
}
}
}
impl From<Quote> for StrQuote {
fn from(val: Quote) -> Self {
match val {
Quote::Single => StrQuote::Single,
Quote::Double => StrQuote::Double,
}
}
}
impl fmt::Display for Quote {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Quote::Single => write!(f, "\'"),
Quote::Double => write!(f, "\""),
}
}
}
/// The indentation style used in Python source code.
#[derive(Debug, PartialEq, Eq)]
pub struct Indentation(String);
impl Indentation {
pub const fn new(indentation: String) -> Self {
Self(indentation)
}
}
impl Default for Indentation {
fn default() -> Self {
Indentation(" ".to_string())
}
}
impl Indentation {
pub fn as_str(&self) -> &str {
self.0.as_str()
}
pub fn as_char(&self) -> char {
self.0.chars().next().unwrap()
}
}
impl Deref for Indentation {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
#[cfg(test)]
mod tests {
use ruff_python_parser::lexer::lex;
use ruff_python_parser::Mode;
use ruff_source_file::{find_newline, LineEnding};
use super::{Indentation, Quote, Stylist};
use ruff_source_file::Locator;
#[test]
fn indentation() {
let contents = r"x = 1";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation::default()
);
let contents = r"
if True:
pass
";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation(" ".to_string())
);
let contents = r"
if True:
pass
";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation(" ".to_string())
);
let contents = r"
if True:
pass
";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation("\t".to_string())
);
// TODO(charlie): Should non-significant whitespace be detected?
let contents = r"
x = (
1,
2,
3,
)
";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation::default()
);
// formfeed indent, see `detect_indention` comment.
let contents = r"
class FormFeedIndent:
def __init__(self, a=[]):
print(a)
";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).indentation(),
&Indentation(" ".to_string())
);
}
#[test]
fn quote() {
let contents = r"x = 1";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::default()
);
let contents = r"x = '1'";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Single
);
let contents = r"x = f'1'";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Single
);
let contents = r#"x = "1""#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
);
let contents = r#"x = f"1""#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
);
let contents = r#"s = "It's done.""#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
);
// No style if only double quoted docstring (will take default Double)
let contents = r#"
def f():
"""Docstring."""
pass
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::default()
);
// Detect from string literal appearing after docstring
let contents = r#"
"""Module docstring."""
a = 'v'
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Single
);
let contents = r#"
'''Module docstring.'''
a = "v"
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
);
// Detect from f-string appearing after docstring
let contents = r#"
"""Module docstring."""
a = f'v'
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Single
);
let contents = r#"
'''Module docstring.'''
a = f"v"
"#;
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Double
);
let contents = r"
f'''Module docstring.'''
";
let locator = Locator::new(contents);
let tokens: Vec<_> = lex(contents, Mode::Module).collect();
assert_eq!(
Stylist::from_tokens(&tokens, &locator).quote(),
Quote::Single
);
}
#[test]
fn line_ending() {
let contents = "x = 1";
assert_eq!(find_newline(contents).map(|(_, ending)| ending), None);
let contents = "x = 1\n";
assert_eq!(
find_newline(contents).map(|(_, ending)| ending),
Some(LineEnding::Lf)
);
let contents = "x = 1\r";
assert_eq!(
find_newline(contents).map(|(_, ending)| ending),
Some(LineEnding::Cr)
);
let contents = "x = 1\r\n";
assert_eq!(
find_newline(contents).map(|(_, ending)| ending),
Some(LineEnding::CrLf)
);
}
}