Remove copied core modules from ruff_python_formatter (#3371)

This commit is contained in:
Charlie Marsh 2023-03-08 14:03:40 -05:00 committed by GitHub
parent 130e733023
commit 0a9d259f9c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 105 additions and 273 deletions

View file

@ -1,15 +1,21 @@
use ruff_formatter::{FormatContext, SimpleFormatOptions};
use std::rc::Rc;
use crate::core::locator::Locator;
use ruff_formatter::{FormatContext, SimpleFormatOptions};
use ruff_python_ast::source_code::Locator;
pub struct ASTFormatContext<'a> {
options: SimpleFormatOptions,
contents: Rc<str>,
locator: Locator<'a>,
}
impl<'a> ASTFormatContext<'a> {
pub fn new(options: SimpleFormatOptions, locator: Locator<'a>) -> Self {
Self { options, locator }
Self {
options,
contents: Rc::from(locator.contents()),
locator,
}
}
}
@ -22,6 +28,10 @@ impl FormatContext for ASTFormatContext<'_> {
}
impl<'a> ASTFormatContext<'a> {
pub fn contents(&'a self) -> Rc<str> {
self.contents.clone()
}
pub fn locator(&'a self) -> &'a Locator {
&self.locator
}

View file

@ -1,7 +1,7 @@
use rustpython_parser::ast::Location;
use crate::core::locator::Locator;
use crate::core::types::Range;
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::types::Range;
/// Return `true` if the given string is a radix literal (e.g., `0b101`).
pub fn is_radix_literal(content: &str) -> bool {
@ -20,9 +20,8 @@ pub fn find_tok(
locator: &Locator,
f: impl Fn(rustpython_parser::Tok) -> bool,
) -> (Location, Location) {
let (source, start_index, end_index) = locator.slice(Range::new(location, end_location));
for (start, tok, end) in rustpython_parser::lexer::lex_located(
&source[start_index..end_index],
locator.slice(Range::new(location, end_location)),
rustpython_parser::Mode::Module,
location,
)
@ -48,8 +47,8 @@ pub fn expand_indented_block(
locator: &Locator,
) -> (Location, Location) {
let contents = locator.contents();
let start_index = locator.index(location);
let end_index = locator.index(end_location);
let start_index = locator.offset(location);
let end_index = locator.offset(end_location);
// Find the colon, which indicates the end of the header.
let mut nesting = 0;
@ -76,7 +75,7 @@ pub fn expand_indented_block(
}
}
let colon_location = colon.unwrap();
let colon_index = locator.index(colon_location);
let colon_index = locator.offset(colon_location);
// From here, we have two options: simple statement or compound statement.
let indent = rustpython_parser::lexer::lex_located(
@ -120,11 +119,8 @@ pub fn expand_indented_block(
/// Return true if the `orelse` block of an `if` statement is an `elif` statement.
pub fn is_elif(orelse: &[rustpython_parser::ast::Stmt], locator: &Locator) -> bool {
if orelse.len() == 1 && matches!(orelse[0].node, rustpython_parser::ast::StmtKind::If { .. }) {
let (source, start, end) = locator.slice(Range::new(
orelse[0].location,
orelse[0].end_location.unwrap(),
));
if source[start..end].starts_with("elif") {
let contents = locator.skip(orelse[0].location);
if contents.starts_with("elif") {
return true;
}
}

View file

@ -1,127 +0,0 @@
//! Struct used to efficiently slice source code at (row, column) Locations.
use std::rc::Rc;
use once_cell::unsync::OnceCell;
use rustpython_parser::ast::Location;
use crate::core::types::Range;
pub struct Locator<'a> {
contents: &'a str,
contents_rc: Rc<str>,
index: OnceCell<Index>,
}
pub enum Index {
Ascii(Vec<usize>),
Utf8(Vec<Vec<usize>>),
}
/// Compute the starting byte index of each line in ASCII source code.
fn index_ascii(contents: &str) -> Vec<usize> {
let mut index = Vec::with_capacity(48);
index.push(0);
let bytes = contents.as_bytes();
for (i, byte) in bytes.iter().enumerate() {
if *byte == b'\n' {
index.push(i + 1);
}
}
index
}
/// Compute the starting byte index of each character in UTF-8 source code.
fn index_utf8(contents: &str) -> Vec<Vec<usize>> {
let mut index = Vec::with_capacity(48);
let mut current_row = Vec::with_capacity(48);
let mut current_byte_offset = 0;
let mut previous_char = '\0';
for char in contents.chars() {
current_row.push(current_byte_offset);
if char == '\n' {
if previous_char == '\r' {
current_row.pop();
}
index.push(current_row);
current_row = Vec::with_capacity(48);
}
current_byte_offset += char.len_utf8();
previous_char = char;
}
index.push(current_row);
index
}
/// Compute the starting byte index of each line in source code.
pub fn index(contents: &str) -> Index {
if contents.is_ascii() {
Index::Ascii(index_ascii(contents))
} else {
Index::Utf8(index_utf8(contents))
}
}
/// Truncate a [`Location`] to a byte offset in ASCII source code.
fn truncate_ascii(location: Location, index: &[usize], contents: &str) -> usize {
if location.row() - 1 == index.len() && location.column() == 0
|| (!index.is_empty()
&& location.row() - 1 == index.len() - 1
&& index[location.row() - 1] + location.column() >= contents.len())
{
contents.len()
} else {
index[location.row() - 1] + location.column()
}
}
/// Truncate a [`Location`] to a byte offset in UTF-8 source code.
fn truncate_utf8(location: Location, index: &[Vec<usize>], contents: &str) -> usize {
if (location.row() - 1 == index.len() && location.column() == 0)
|| (location.row() - 1 == index.len() - 1
&& location.column() == index[location.row() - 1].len())
{
contents.len()
} else {
index[location.row() - 1][location.column()]
}
}
/// Truncate a [`Location`] to a byte offset in source code.
fn truncate(location: Location, index: &Index, contents: &str) -> usize {
match index {
Index::Ascii(index) => truncate_ascii(location, index, contents),
Index::Utf8(index) => truncate_utf8(location, index, contents),
}
}
impl<'a> Locator<'a> {
pub fn new(contents: &'a str) -> Self {
Locator {
contents,
contents_rc: Rc::from(contents),
index: OnceCell::new(),
}
}
fn get_or_init_index(&self) -> &Index {
self.index.get_or_init(|| index(self.contents))
}
pub fn index(&self, location: Location) -> usize {
let index = self.get_or_init_index();
truncate(location, index, self.contents)
}
pub fn contents(&self) -> &str {
self.contents
}
/// Slice the source code at a [`Range`].
pub fn slice(&self, range: Range) -> (Rc<str>, usize, usize) {
let index = self.get_or_init_index();
let start = truncate(range.location, index, self.contents);
let end = truncate(range.end_location, index, self.contents);
(Rc::clone(&self.contents_rc), start, end)
}
}

View file

@ -1,4 +1,2 @@
pub mod helpers;
pub mod locator;
pub mod types;
pub mod visitor;

View file

@ -1,76 +0,0 @@
use std::ops::Deref;
use rustpython_parser::ast::Location;
use crate::cst::{Expr, Located, Stmt};
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub struct Range {
pub location: Location,
pub end_location: Location,
}
impl Range {
pub fn new(location: Location, end_location: Location) -> Self {
Self {
location,
end_location,
}
}
pub fn from_located<T>(located: &Located<T>) -> Self {
Range::new(located.location, located.end_location.unwrap())
}
}
#[derive(Debug, Copy, Clone)]
pub struct RefEquality<'a, T>(pub &'a T);
impl<'a, T> std::hash::Hash for RefEquality<'a, T> {
fn hash<H>(&self, state: &mut H)
where
H: std::hash::Hasher,
{
(self.0 as *const T).hash(state);
}
}
impl<'a, 'b, T> PartialEq<RefEquality<'b, T>> for RefEquality<'a, T> {
fn eq(&self, other: &RefEquality<'b, T>) -> bool {
std::ptr::eq(self.0, other.0)
}
}
impl<'a, T> Eq for RefEquality<'a, T> {}
impl<'a, T> Deref for RefEquality<'a, T> {
type Target = T;
fn deref(&self) -> &T {
self.0
}
}
impl<'a> From<&RefEquality<'a, Stmt>> for &'a Stmt {
fn from(r: &RefEquality<'a, Stmt>) -> Self {
r.0
}
}
impl<'a> From<&RefEquality<'a, Expr>> for &'a Expr {
fn from(r: &RefEquality<'a, Expr>) -> Self {
r.0
}
}
impl<'a> From<&RefEquality<'a, rustpython_parser::ast::Stmt>> for &'a rustpython_parser::ast::Stmt {
fn from(r: &RefEquality<'a, rustpython_parser::ast::Stmt>) -> Self {
r.0
}
}
impl<'a> From<&RefEquality<'a, rustpython_parser::ast::Expr>> for &'a rustpython_parser::ast::Expr {
fn from(r: &RefEquality<'a, rustpython_parser::ast::Expr>) -> Self {
r.0
}
}

View file

@ -2,14 +2,14 @@
use std::iter;
use itertools::Itertools;
use rustpython_parser::ast::{Constant, Location};
use rustpython_parser::Mode;
use itertools::Itertools;
use ruff_python_ast::source_code::Locator;
use ruff_python_ast::types::Range;
use crate::core::helpers::{expand_indented_block, find_tok, is_elif};
use crate::core::locator::Locator;
use crate::core::types::Range;
use crate::trivia::{Parenthesize, Trivia};
type Ident = String;
@ -45,13 +45,13 @@ impl<T> Located<T> {
impl<T> From<&Located<T>> for Range {
fn from(located: &Located<T>) -> Self {
Range::new(located.location, located.end_location.unwrap())
Self::new(located.location, located.end_location.unwrap())
}
}
impl<T> From<&Box<Located<T>>> for Range {
fn from(located: &Box<Located<T>>) -> Self {
Range::new(located.location, located.end_location.unwrap())
Self::new(located.location, located.end_location.unwrap())
}
}
@ -2158,10 +2158,8 @@ impl From<(rustpython_parser::ast::Expr, &Locator<'_>)> for Expr {
},
rustpython_parser::ast::ExprKind::Slice { lower, upper, step } => {
// Locate the colon tokens, which indicate the number of index segments.
let (source, start, end) =
locator.slice(Range::new(expr.location, expr.end_location.unwrap()));
let tokens = rustpython_parser::lexer::lex_located(
&source[start..end],
locator.slice(Range::new(expr.location, expr.end_location.unwrap())),
Mode::Module,
expr.location,
);

View file

@ -1,9 +1,9 @@
use ruff_formatter::prelude::*;
use ruff_formatter::{write, Format};
use ruff_python_ast::types::Range;
use ruff_text_size::{TextRange, TextSize};
use crate::context::ASTFormatContext;
use crate::core::types::Range;
use crate::cst::{Body, Stmt};
use crate::shared_traits::AsFormat;
use crate::trivia::{Relationship, TriviaKind};
@ -73,10 +73,17 @@ pub struct Literal {
impl Format<ASTFormatContext<'_>> for Literal {
fn fmt(&self, f: &mut Formatter<ASTFormatContext<'_>>) -> FormatResult<()> {
let (text, start, end) = f.context().locator().slice(self.range);
let text = f.context().contents();
let locator = f.context().locator();
let start_index = locator.offset(self.range.location);
let end_index = locator.offset(self.range.end_location);
f.write_element(FormatElement::StaticTextSlice {
text,
range: TextRange::new(start.try_into().unwrap(), end.try_into().unwrap()),
range: TextRange::new(
start_index.try_into().unwrap(),
end_index.try_into().unwrap(),
),
})
}
}

View file

@ -4,10 +4,10 @@ use rustpython_parser::ast::Constant;
use ruff_formatter::prelude::*;
use ruff_formatter::{format_args, write};
use ruff_python_ast::types::Range;
use ruff_text_size::TextSize;
use crate::context::ASTFormatContext;
use crate::core::types::Range;
use crate::cst::{
Arguments, BoolOp, CmpOp, Comprehension, Expr, ExprKind, Keyword, Operator, OperatorKind,
SliceIndex, SliceIndexKind, UnaryOp, UnaryOpKind,

View file

@ -2,10 +2,10 @@ use rustpython_parser::ast::Location;
use ruff_formatter::prelude::*;
use ruff_formatter::{write, Format};
use ruff_python_ast::types::Range;
use ruff_text_size::TextSize;
use crate::context::ASTFormatContext;
use crate::core::types::Range;
use crate::format::builders::literal;
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
@ -15,11 +15,14 @@ struct FloatAtom {
impl Format<ASTFormatContext<'_>> for FloatAtom {
fn fmt(&self, f: &mut Formatter<ASTFormatContext<'_>>) -> FormatResult<()> {
let (source, start, end) = f.context().locator().slice(self.range);
let locator = f.context().locator();
let contents = f.context().contents();
let start_index = locator.offset(self.range.location);
let end_index = locator.offset(self.range.end_location);
if let Some(dot_index) = source[start..end].find('.') {
let integer = &source[start..start + dot_index];
let fractional = &source[start + dot_index + 1..end];
if let Some(dot_index) = contents[start_index..end_index].find('.') {
let integer = &contents[start_index..start_index + dot_index];
let fractional = &contents[start_index + dot_index + 1..end_index];
if integer.is_empty() {
write!(f, [text("0")])?;
@ -72,12 +75,15 @@ pub struct FloatLiteral {
impl Format<ASTFormatContext<'_>> for FloatLiteral {
fn fmt(&self, f: &mut Formatter<ASTFormatContext<'_>>) -> FormatResult<()> {
let (source, start, end) = f.context().locator().slice(self.range);
let locator = f.context().locator();
let contents = f.context().contents();
let start_index = locator.offset(self.range.location);
let end_index = locator.offset(self.range.end_location);
// Scientific notation
if let Some(exponent_index) = source[start..end]
if let Some(exponent_index) = contents[start_index..end_index]
.find('e')
.or_else(|| source[start..end].find('E'))
.or_else(|| contents[start_index..end_index].find('E'))
{
// Write the base.
write!(
@ -94,7 +100,7 @@ impl Format<ASTFormatContext<'_>> for FloatLiteral {
write!(f, [text("e")])?;
// Write the exponent, omitting the sign if it's positive.
let plus = source[start + exponent_index + 1..end].starts_with('+');
let plus = contents[start_index + exponent_index + 1..end_index].starts_with('+');
write!(
f,
[literal(Range::new(
@ -125,13 +131,16 @@ pub struct IntLiteral {
impl Format<ASTFormatContext<'_>> for IntLiteral {
fn fmt(&self, f: &mut Formatter<ASTFormatContext<'_>>) -> FormatResult<()> {
let (source, start, end) = f.context().locator().slice(self.range);
let locator = f.context().locator();
let contents = f.context().contents();
let start_index = locator.offset(self.range.location);
let end_index = locator.offset(self.range.end_location);
for prefix in ["0b", "0B", "0o", "0O", "0x", "0X"] {
if source[start..end].starts_with(prefix) {
if contents[start_index..end_index].starts_with(prefix) {
// In each case, the prefix must be lowercase, while the suffix must be uppercase.
let prefix = &source[start..start + prefix.len()];
let suffix = &source[start + prefix.len()..end];
let prefix = &contents[start_index..start_index + prefix.len()];
let suffix = &contents[start_index + prefix.len()..end_index];
if prefix.bytes().any(|b| b.is_ascii_uppercase())
|| suffix.bytes().any(|b| b.is_ascii_lowercase())
@ -171,11 +180,14 @@ pub struct ComplexLiteral {
impl Format<ASTFormatContext<'_>> for ComplexLiteral {
fn fmt(&self, f: &mut Formatter<ASTFormatContext<'_>>) -> FormatResult<()> {
let (source, start, end) = f.context().locator().slice(self.range);
let locator = f.context().locator();
let contents = f.context().contents();
let start_index = locator.offset(self.range.location);
let end_index = locator.offset(self.range.end_location);
if source[start..end].ends_with('j') {
if contents[start_index..end_index].ends_with('j') {
write!(f, [literal(self.range)])?;
} else if source[start..end].ends_with('J') {
} else if contents[start_index..end_index].ends_with('J') {
write!(
f,
[literal(Range::new(

View file

@ -1,7 +1,8 @@
use rustpython_parser::ast::Constant;
use ruff_formatter::prelude::*;
use ruff_formatter::write;
use ruff_text_size::TextSize;
use rustpython_parser::ast::Constant;
use crate::context::ASTFormatContext;
use crate::cst::{Pattern, PatternKind};

View file

@ -3,10 +3,10 @@ use rustpython_parser::{Mode, Tok};
use ruff_formatter::prelude::*;
use ruff_formatter::{write, Format};
use ruff_python_ast::str::{leading_quote, trailing_quote};
use ruff_python_ast::types::Range;
use ruff_text_size::TextSize;
use crate::context::ASTFormatContext;
use crate::core::types::Range;
use crate::cst::Expr;
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
@ -16,13 +16,16 @@ pub struct StringLiteralPart {
impl Format<ASTFormatContext<'_>> for StringLiteralPart {
fn fmt(&self, f: &mut Formatter<ASTFormatContext<'_>>) -> FormatResult<()> {
let (source, start, end) = f.context().locator().slice(self.range);
let locator = f.context().locator();
let contents = f.context().contents();
let start_index = locator.offset(self.range.location);
let end_index = locator.offset(self.range.end_location);
// Extract leading and trailing quotes.
let content = &source[start..end];
let leading_quote = leading_quote(content).unwrap();
let trailing_quote = trailing_quote(content).unwrap();
let body = &content[leading_quote.len()..content.len() - trailing_quote.len()];
let contents = &contents[start_index..end_index];
let leading_quote = leading_quote(contents).unwrap();
let trailing_quote = trailing_quote(contents).unwrap();
let body = &contents[leading_quote.len()..contents.len() - trailing_quote.len()];
// Determine the correct quote style.
// TODO(charlie): Make this parameterizable.
@ -126,18 +129,17 @@ impl Format<ASTFormatContext<'_>> for StringLiteral<'_> {
// TODO(charlie): This tokenization needs to happen earlier, so that we can attach
// comments to individual string literals.
let (source, start, end) = f.context().locator().slice(Range::from(expr));
let elts =
rustpython_parser::lexer::lex_located(&source[start..end], Mode::Module, expr.location)
.flatten()
.filter_map(|(start, tok, end)| {
if matches!(tok, Tok::String { .. }) {
Some(Range::new(start, end))
} else {
None
}
})
.collect::<Vec<_>>();
let contents = f.context().locator().slice(expr);
let elts = rustpython_parser::lexer::lex_located(contents, Mode::Module, expr.location)
.flatten()
.filter_map(|(start, tok, end)| {
if matches!(tok, Tok::String { .. }) {
Some(Range::new(start, end))
} else {
None
}
})
.collect::<Vec<_>>();
write!(
f,
[group(&format_with(|f| {

View file

@ -2,10 +2,10 @@ use anyhow::Result;
use rustpython_parser::lexer::LexResult;
use ruff_formatter::{format, Formatted, IndentStyle, SimpleFormatOptions};
use ruff_python_ast::source_code::Locator;
use crate::attachment::attach;
use crate::context::ASTFormatContext;
use crate::core::locator::Locator;
use crate::cst::Stmt;
use crate::newlines::normalize_newlines;
use crate::parentheses::normalize_parentheses;

View file

@ -1,8 +1,8 @@
use rustpython_parser::ast::Constant;
use ruff_python_ast::source_code::Locator;
use crate::core::helpers::is_radix_literal;
use crate::core::locator::Locator;
use crate::core::types::Range;
use crate::core::visitor;
use crate::core::visitor::Visitor;
use crate::cst::{Expr, ExprKind, Stmt, StmtKind};
@ -154,9 +154,8 @@ impl<'a> Visitor<'a> for ParenthesesNormalizer<'_> {
..
},
) {
let (source, start, end) = self.locator.slice(Range::from(&*value));
// TODO(charlie): Encode this in the AST via separate node types.
if !is_radix_literal(&source[start..end]) {
if !is_radix_literal(self.locator.slice(&**value)) {
value.parentheses = Parenthesize::Always;
}
}

View file

@ -3,7 +3,8 @@ use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
use crate::core::types::Range;
use ruff_python_ast::types::Range;
use crate::cst::{
Alias, Arg, Body, BoolOp, CmpOp, Excepthandler, ExcepthandlerKind, Expr, ExprKind, Keyword,
Operator, Pattern, PatternKind, SliceIndex, SliceIndexKind, Stmt, StmtKind, UnaryOp,