mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-29 21:34:57 +00:00
perf(pycodestyle): Reduce allocations when computing logical lines (#3715)
This commit is contained in:
parent
c3917eab38
commit
113a8b8fda
8 changed files with 432 additions and 204 deletions
7
Cargo.lock
generated
7
Cargo.lock
generated
|
@ -132,12 +132,6 @@ dependencies = [
|
|||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bisection"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "021e079a1bab0ecce6cf4b4b74c0c37afa4a697136eb3b127875c84a8f04a8c3"
|
||||
|
||||
[[package]]
|
||||
name = "bit-set"
|
||||
version = "0.5.3"
|
||||
|
@ -1985,7 +1979,6 @@ name = "ruff"
|
|||
version = "0.0.259"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bisection",
|
||||
"bitflags",
|
||||
"chrono",
|
||||
"clap 4.1.8",
|
||||
|
|
|
@ -22,7 +22,6 @@ ruff_python_stdlib = { path = "../ruff_python_stdlib" }
|
|||
ruff_rustpython = { path = "../ruff_rustpython" }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
bisection = { version = "0.1.0" }
|
||||
bitflags = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
clap = { workspace = true, features = ["derive", "string"], optional = true }
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
#![allow(dead_code, unused_imports, unused_variables)]
|
||||
|
||||
use bisection::bisect_left;
|
||||
use itertools::Itertools;
|
||||
use rustpython_parser::ast::Location;
|
||||
use rustpython_parser::lexer::LexResult;
|
||||
|
@ -10,7 +9,7 @@ use ruff_python_ast::source_code::{Locator, Stylist};
|
|||
use ruff_python_ast::types::Range;
|
||||
|
||||
use crate::registry::{AsRule, Rule};
|
||||
use crate::rules::pycodestyle::logical_lines::{iter_logical_lines, TokenFlags};
|
||||
use crate::rules::pycodestyle::logical_lines::{LogicalLines, TokenFlags};
|
||||
use crate::rules::pycodestyle::rules::{
|
||||
extraneous_whitespace, indentation, missing_whitespace, missing_whitespace_after_keyword,
|
||||
missing_whitespace_around_operator, space_around_operator, whitespace_around_keywords,
|
||||
|
@ -20,23 +19,18 @@ use crate::rules::pycodestyle::rules::{
|
|||
use crate::settings::{flags, Settings};
|
||||
|
||||
/// Return the amount of indentation, expanding tabs to the next multiple of 8.
|
||||
fn expand_indent(mut line: &str) -> usize {
|
||||
while line.ends_with("\n\r") {
|
||||
line = &line[..line.len() - 2];
|
||||
}
|
||||
if !line.contains('\t') {
|
||||
return line.len() - line.trim_start().len();
|
||||
}
|
||||
fn expand_indent(line: &str) -> usize {
|
||||
let line = line.trim_end_matches(['\n', '\r']);
|
||||
|
||||
let mut indent = 0;
|
||||
for c in line.chars() {
|
||||
if c == '\t' {
|
||||
indent = (indent / 8) * 8 + 8;
|
||||
} else if c == ' ' {
|
||||
indent += 1;
|
||||
} else {
|
||||
break;
|
||||
for c in line.bytes() {
|
||||
match c {
|
||||
b'\t' => indent = (indent / 8) * 8 + 8,
|
||||
b' ' => indent += 1,
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
|
||||
indent
|
||||
}
|
||||
|
||||
|
@ -52,25 +46,18 @@ pub fn check_logical_lines(
|
|||
let indent_char = stylist.indentation().as_char();
|
||||
let mut prev_line = None;
|
||||
let mut prev_indent_level = None;
|
||||
for line in iter_logical_lines(tokens, locator) {
|
||||
if line.mapping.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
for line in &LogicalLines::from_tokens(tokens, locator) {
|
||||
// Extract the indentation level.
|
||||
let start_loc = line.mapping[0].1;
|
||||
let start_line = locator.slice(Range::new(Location::new(start_loc.row(), 0), start_loc));
|
||||
let Some(start_loc) = line.first_token_location() else { continue; };
|
||||
let start_line = locator.slice(Range::new(Location::new(start_loc.row(), 0), *start_loc));
|
||||
let indent_level = expand_indent(start_line);
|
||||
let indent_size = 4;
|
||||
|
||||
// Generate mapping from logical to physical offsets.
|
||||
let mapping_offsets = line.mapping.iter().map(|(offset, _)| *offset).collect_vec();
|
||||
|
||||
if line.flags.contains(TokenFlags::OPERATOR) {
|
||||
for (index, kind) in space_around_operator(&line.text) {
|
||||
let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
|
||||
let location = Location::new(pos.row(), pos.column() + index - token_offset);
|
||||
if line.flags().contains(TokenFlags::OPERATOR) {
|
||||
for (index, kind) in space_around_operator(line.text()) {
|
||||
if settings.rules.enabled(kind.rule()) {
|
||||
let (token_offset, pos) = line.mapping(index);
|
||||
let location = Location::new(pos.row(), pos.column() + index - token_offset);
|
||||
diagnostics.push(Diagnostic {
|
||||
kind,
|
||||
location,
|
||||
|
@ -82,13 +69,13 @@ pub fn check_logical_lines(
|
|||
}
|
||||
}
|
||||
if line
|
||||
.flags
|
||||
.flags()
|
||||
.contains(TokenFlags::OPERATOR | TokenFlags::PUNCTUATION)
|
||||
{
|
||||
for (index, kind) in extraneous_whitespace(&line.text) {
|
||||
let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
|
||||
let location = Location::new(pos.row(), pos.column() + index - token_offset);
|
||||
for (index, kind) in extraneous_whitespace(line.text()) {
|
||||
if settings.rules.enabled(kind.rule()) {
|
||||
let (token_offset, pos) = line.mapping(index);
|
||||
let location = Location::new(pos.row(), pos.column() + index - token_offset);
|
||||
diagnostics.push(Diagnostic {
|
||||
kind,
|
||||
location,
|
||||
|
@ -99,11 +86,11 @@ pub fn check_logical_lines(
|
|||
}
|
||||
}
|
||||
}
|
||||
if line.flags.contains(TokenFlags::KEYWORD) {
|
||||
for (index, kind) in whitespace_around_keywords(&line.text) {
|
||||
let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
|
||||
let location = Location::new(pos.row(), pos.column() + index - token_offset);
|
||||
if line.flags().contains(TokenFlags::KEYWORD) {
|
||||
for (index, kind) in whitespace_around_keywords(line.text()) {
|
||||
if settings.rules.enabled(kind.rule()) {
|
||||
let (token_offset, pos) = line.mapping(index);
|
||||
let location = Location::new(pos.row(), pos.column() + index - token_offset);
|
||||
diagnostics.push(Diagnostic {
|
||||
kind,
|
||||
location,
|
||||
|
@ -114,7 +101,7 @@ pub fn check_logical_lines(
|
|||
}
|
||||
}
|
||||
|
||||
for (location, kind) in missing_whitespace_after_keyword(&line.tokens) {
|
||||
for (location, kind) in missing_whitespace_after_keyword(line.tokens()) {
|
||||
if settings.rules.enabled(kind.rule()) {
|
||||
diagnostics.push(Diagnostic {
|
||||
kind,
|
||||
|
@ -126,8 +113,8 @@ pub fn check_logical_lines(
|
|||
}
|
||||
}
|
||||
}
|
||||
if line.flags.contains(TokenFlags::COMMENT) {
|
||||
for (range, kind) in whitespace_before_comment(&line.tokens, locator) {
|
||||
if line.flags().contains(TokenFlags::COMMENT) {
|
||||
for (range, kind) in whitespace_before_comment(line.tokens(), locator) {
|
||||
if settings.rules.enabled(kind.rule()) {
|
||||
diagnostics.push(Diagnostic {
|
||||
kind,
|
||||
|
@ -139,9 +126,9 @@ pub fn check_logical_lines(
|
|||
}
|
||||
}
|
||||
}
|
||||
if line.flags.contains(TokenFlags::OPERATOR) {
|
||||
if line.flags().contains(TokenFlags::OPERATOR) {
|
||||
for (location, kind) in
|
||||
whitespace_around_named_parameter_equals(&line.tokens, &line.text)
|
||||
whitespace_around_named_parameter_equals(line.tokens(), line.text())
|
||||
{
|
||||
if settings.rules.enabled(kind.rule()) {
|
||||
diagnostics.push(Diagnostic {
|
||||
|
@ -153,7 +140,7 @@ pub fn check_logical_lines(
|
|||
});
|
||||
}
|
||||
}
|
||||
for (location, kind) in missing_whitespace_around_operator(&line.tokens) {
|
||||
for (location, kind) in missing_whitespace_around_operator(line.tokens()) {
|
||||
if settings.rules.enabled(kind.rule()) {
|
||||
diagnostics.push(Diagnostic {
|
||||
kind,
|
||||
|
@ -172,7 +159,7 @@ pub fn check_logical_lines(
|
|||
let should_fix = false;
|
||||
|
||||
for diagnostic in
|
||||
missing_whitespace(&line.text, start_loc.row(), should_fix, indent_level)
|
||||
missing_whitespace(line.text(), start_loc.row(), should_fix, indent_level)
|
||||
{
|
||||
if settings.rules.enabled(diagnostic.kind.rule()) {
|
||||
diagnostics.push(diagnostic);
|
||||
|
@ -180,7 +167,7 @@ pub fn check_logical_lines(
|
|||
}
|
||||
}
|
||||
|
||||
if line.flags.contains(TokenFlags::BRACKET) {
|
||||
if line.flags().contains(TokenFlags::BRACKET) {
|
||||
#[cfg(feature = "logical_lines")]
|
||||
let should_fix =
|
||||
autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters);
|
||||
|
@ -188,7 +175,7 @@ pub fn check_logical_lines(
|
|||
#[cfg(not(feature = "logical_lines"))]
|
||||
let should_fix = false;
|
||||
|
||||
for diagnostic in whitespace_before_parameters(&line.tokens, should_fix) {
|
||||
for diagnostic in whitespace_before_parameters(line.tokens(), should_fix) {
|
||||
if settings.rules.enabled(diagnostic.kind.rule()) {
|
||||
diagnostics.push(diagnostic);
|
||||
}
|
||||
|
@ -203,7 +190,7 @@ pub fn check_logical_lines(
|
|||
prev_indent_level,
|
||||
indent_size,
|
||||
) {
|
||||
let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
|
||||
let (token_offset, pos) = line.mapping(index);
|
||||
let location = Location::new(pos.row(), pos.column() + index - token_offset);
|
||||
if settings.rules.enabled(kind.rule()) {
|
||||
diagnostics.push(Diagnostic {
|
||||
|
@ -229,10 +216,9 @@ mod tests {
|
|||
use rustpython_parser::lexer::LexResult;
|
||||
use rustpython_parser::{lexer, Mode};
|
||||
|
||||
use crate::rules::pycodestyle::logical_lines::LogicalLines;
|
||||
use ruff_python_ast::source_code::Locator;
|
||||
|
||||
use crate::checkers::logical_lines::iter_logical_lines;
|
||||
|
||||
#[test]
|
||||
fn split_logical_lines() {
|
||||
let contents = r#"
|
||||
|
@ -241,9 +227,9 @@ y = 2
|
|||
z = x + 1"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let locator = Locator::new(contents);
|
||||
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
|
||||
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
|
||||
.into_iter()
|
||||
.map(|line| line.text)
|
||||
.map(|line| line.text().to_string())
|
||||
.collect();
|
||||
let expected = vec![
|
||||
"x = 1".to_string(),
|
||||
|
@ -262,9 +248,9 @@ y = 2
|
|||
z = x + 1"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let locator = Locator::new(contents);
|
||||
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
|
||||
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
|
||||
.into_iter()
|
||||
.map(|line| line.text)
|
||||
.map(|line| line.text().to_string())
|
||||
.collect();
|
||||
let expected = vec![
|
||||
"x = [1, 2, 3, ]".to_string(),
|
||||
|
@ -276,9 +262,9 @@ z = x + 1"#;
|
|||
let contents = "x = 'abc'";
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let locator = Locator::new(contents);
|
||||
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
|
||||
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
|
||||
.into_iter()
|
||||
.map(|line| line.text)
|
||||
.map(|line| line.text().to_string())
|
||||
.collect();
|
||||
let expected = vec!["x = \"xxx\"".to_string()];
|
||||
assert_eq!(actual, expected);
|
||||
|
@ -289,9 +275,9 @@ def f():
|
|||
f()"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let locator = Locator::new(contents);
|
||||
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
|
||||
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
|
||||
.into_iter()
|
||||
.map(|line| line.text)
|
||||
.map(|line| line.text().to_string())
|
||||
.collect();
|
||||
let expected = vec!["def f():", "x = 1", "f()"];
|
||||
assert_eq!(actual, expected);
|
||||
|
@ -304,9 +290,9 @@ def f():
|
|||
f()"#;
|
||||
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
|
||||
let locator = Locator::new(contents);
|
||||
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
|
||||
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
|
||||
.into_iter()
|
||||
.map(|line| line.text)
|
||||
.map(|line| line.text().to_string())
|
||||
.collect();
|
||||
let expected = vec!["def f():", "\"xxxxxxxxxxxxxxxxxxxx\"", "", "x = 1", "f()"];
|
||||
assert_eq!(actual, expected);
|
||||
|
|
|
@ -58,7 +58,7 @@ pub fn is_overlong(
|
|||
true
|
||||
}
|
||||
|
||||
pub fn is_keyword_token(token: &Tok) -> bool {
|
||||
pub const fn is_keyword_token(token: &Tok) -> bool {
|
||||
matches!(
|
||||
token,
|
||||
Tok::False
|
||||
|
@ -98,14 +98,14 @@ pub fn is_keyword_token(token: &Tok) -> bool {
|
|||
)
|
||||
}
|
||||
|
||||
pub fn is_singleton_token(token: &Tok) -> bool {
|
||||
pub const fn is_singleton_token(token: &Tok) -> bool {
|
||||
matches!(
|
||||
token,
|
||||
Tok::False { .. } | Tok::True { .. } | Tok::None { .. },
|
||||
)
|
||||
}
|
||||
|
||||
pub fn is_op_token(token: &Tok) -> bool {
|
||||
pub const fn is_op_token(token: &Tok) -> bool {
|
||||
matches!(
|
||||
token,
|
||||
Tok::Lpar
|
||||
|
@ -157,25 +157,25 @@ pub fn is_op_token(token: &Tok) -> bool {
|
|||
)
|
||||
}
|
||||
|
||||
pub fn is_skip_comment_token(token: &Tok) -> bool {
|
||||
pub const fn is_skip_comment_token(token: &Tok) -> bool {
|
||||
matches!(
|
||||
token,
|
||||
Tok::Newline | Tok::Indent | Tok::Dedent | Tok::NonLogicalNewline | Tok::Comment { .. }
|
||||
)
|
||||
}
|
||||
|
||||
pub fn is_soft_keyword_token(token: &Tok) -> bool {
|
||||
pub const fn is_soft_keyword_token(token: &Tok) -> bool {
|
||||
matches!(token, Tok::Match | Tok::Case)
|
||||
}
|
||||
|
||||
pub fn is_arithmetic_token(token: &Tok) -> bool {
|
||||
pub const fn is_arithmetic_token(token: &Tok) -> bool {
|
||||
matches!(
|
||||
token,
|
||||
Tok::DoubleStar | Tok::Star | Tok::Plus | Tok::Minus | Tok::Slash | Tok::At
|
||||
)
|
||||
}
|
||||
|
||||
pub fn is_ws_optional_token(token: &Tok) -> bool {
|
||||
pub const fn is_ws_optional_token(token: &Tok) -> bool {
|
||||
is_arithmetic_token(token)
|
||||
|| matches!(
|
||||
token,
|
||||
|
@ -188,7 +188,7 @@ pub fn is_ws_optional_token(token: &Tok) -> bool {
|
|||
)
|
||||
}
|
||||
|
||||
pub fn is_ws_needed_token(token: &Tok) -> bool {
|
||||
pub const fn is_ws_needed_token(token: &Tok) -> bool {
|
||||
matches!(
|
||||
token,
|
||||
Tok::DoubleStarEqual
|
||||
|
@ -218,7 +218,7 @@ pub fn is_ws_needed_token(token: &Tok) -> bool {
|
|||
)
|
||||
}
|
||||
|
||||
pub fn is_unary_token(token: &Tok) -> bool {
|
||||
pub const fn is_unary_token(token: &Tok) -> bool {
|
||||
matches!(
|
||||
token,
|
||||
Tok::Plus | Tok::Minus | Tok::Star | Tok::DoubleStar | Tok::RightShift
|
||||
|
|
|
@ -2,6 +2,9 @@ use bitflags::bitflags;
|
|||
use rustpython_parser::ast::Location;
|
||||
use rustpython_parser::lexer::LexResult;
|
||||
use rustpython_parser::Tok;
|
||||
use std::borrow::Cow;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::iter::FusedIterator;
|
||||
use unicode_width::UnicodeWidthStr;
|
||||
|
||||
use ruff_python_ast::source_code::Locator;
|
||||
|
@ -11,7 +14,7 @@ use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token};
|
|||
|
||||
bitflags! {
|
||||
#[derive(Default)]
|
||||
pub struct TokenFlags: u32 {
|
||||
pub struct TokenFlags: u8 {
|
||||
/// Whether the logical line contains an operator.
|
||||
const OPERATOR = 0b0000_0001;
|
||||
/// Whether the logical line contains a bracket.
|
||||
|
@ -25,78 +28,327 @@ bitflags! {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LogicalLine<'a> {
|
||||
pub text: String,
|
||||
pub mapping: Vec<(usize, Location)>,
|
||||
pub flags: TokenFlags,
|
||||
pub tokens: Vec<(Location, &'a Tok, Location)>,
|
||||
#[derive(Clone)]
|
||||
pub struct LogicalLines<'a> {
|
||||
text: String,
|
||||
|
||||
/// start position, token, end position
|
||||
tokens: Vec<(Location, &'a Tok, Location)>,
|
||||
|
||||
mappings: Mappings,
|
||||
|
||||
lines: Vec<Line>,
|
||||
}
|
||||
|
||||
impl<'a> LogicalLine<'a> {
|
||||
pub fn is_comment(&self) -> bool {
|
||||
self.text.is_empty()
|
||||
impl<'a> LogicalLines<'a> {
|
||||
pub fn from_tokens(tokens: &'a [LexResult], locator: &Locator) -> Self {
|
||||
assert!(u32::try_from(tokens.len()).is_ok());
|
||||
|
||||
let single_token = tokens.len() == 1;
|
||||
let mut builder = LogicalLinesBuilder::with_token_capacity(tokens.len());
|
||||
let mut parens: u32 = 0;
|
||||
|
||||
for (start, token, end) in tokens.iter().flatten() {
|
||||
builder.push_token(*start, token, *end, locator);
|
||||
|
||||
match token {
|
||||
Tok::Lbrace | Tok::Lpar | Tok::Lsqb => {
|
||||
parens += 1;
|
||||
}
|
||||
Tok::Rbrace | Tok::Rpar | Tok::Rsqb => {
|
||||
parens -= 1;
|
||||
}
|
||||
Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(_) if parens == 0 => {
|
||||
if matches!(token, Tok::Newline) {
|
||||
builder.finish_line();
|
||||
}
|
||||
// Comment only file or non logical new line?
|
||||
else if single_token {
|
||||
builder.discard_line();
|
||||
} else {
|
||||
builder.finish_line();
|
||||
};
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
builder.finish()
|
||||
}
|
||||
}
|
||||
|
||||
fn build_line<'a>(
|
||||
impl std::fmt::Debug for LogicalLines<'_> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_list()
|
||||
.entries(self.into_iter().map(DebugLogicalLine))
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> IntoIterator for &'a LogicalLines<'a> {
|
||||
type Item = LogicalLine<'a>;
|
||||
type IntoIter = LogicalLinesIter<'a>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
LogicalLinesIter {
|
||||
lines: self,
|
||||
inner: self.lines.iter(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct Line {
|
||||
flags: TokenFlags,
|
||||
/// Byte offset of the start of the text of this line.
|
||||
text_start: u32,
|
||||
|
||||
/// Byte offset of the end of the text of this line.
|
||||
text_end: u32,
|
||||
mappings_start: u32,
|
||||
mappings_end: u32,
|
||||
tokens_start: u32,
|
||||
tokens_end: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LogicalLine<'a> {
|
||||
lines: &'a LogicalLines<'a>,
|
||||
line: &'a Line,
|
||||
}
|
||||
|
||||
impl<'a> LogicalLine<'a> {
|
||||
/// Returns true if this is a comment only line
|
||||
pub fn is_comment(&self) -> bool {
|
||||
self.text().is_empty() && self.flags().contains(TokenFlags::COMMENT)
|
||||
}
|
||||
|
||||
/// Returns the text of this line
|
||||
pub fn text(&self) -> &'a str {
|
||||
&self.lines.text[self.line.text_start as usize..self.line.text_end as usize]
|
||||
}
|
||||
|
||||
/// Returns the tokens of the line
|
||||
pub fn tokens(&self) -> &'a [(Location, &'a Tok, Location)] {
|
||||
&self.lines.tokens[self.line.tokens_start as usize..self.line.tokens_end as usize]
|
||||
}
|
||||
|
||||
/// Returns the [`Location`] of the first token on the line or [`None`].
|
||||
pub fn first_token_location(&self) -> Option<&Location> {
|
||||
self.token_locations().first()
|
||||
}
|
||||
|
||||
fn token_offsets(&self) -> &[u32] {
|
||||
&self.lines.mappings.logical_line_offsets
|
||||
[self.line.mappings_start as usize..self.line.mappings_end as usize]
|
||||
}
|
||||
|
||||
fn token_locations(&self) -> &[Location] {
|
||||
&self.lines.mappings.locations
|
||||
[self.line.mappings_start as usize..self.line.mappings_end as usize]
|
||||
}
|
||||
|
||||
/// Returns the mapping for an offset in the logical line.
|
||||
///
|
||||
/// The offset of the closest token and its corresponding location.
|
||||
pub fn mapping(&self, offset: usize) -> (usize, Location) {
|
||||
let index = self
|
||||
.token_offsets()
|
||||
.binary_search(&(self.line.text_start + u32::try_from(offset).unwrap()))
|
||||
.unwrap_or_default();
|
||||
|
||||
(
|
||||
(self.token_offsets()[index] - self.line.text_start) as usize,
|
||||
self.token_locations()[index],
|
||||
)
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.lines.mappings.is_empty()
|
||||
}
|
||||
|
||||
pub const fn flags(&self) -> TokenFlags {
|
||||
self.line.flags
|
||||
}
|
||||
}
|
||||
|
||||
struct DebugLogicalLine<'a>(LogicalLine<'a>);
|
||||
|
||||
impl Debug for DebugLogicalLine<'_> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("LogicalLine")
|
||||
.field("text", &self.0.text())
|
||||
.field("flags", &self.0.flags())
|
||||
.field("tokens", &self.0.tokens())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator over the logical lines of a document.
|
||||
pub struct LogicalLinesIter<'a> {
|
||||
lines: &'a LogicalLines<'a>,
|
||||
inner: std::slice::Iter<'a, Line>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for LogicalLinesIter<'a> {
|
||||
type Item = LogicalLine<'a>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let line = self.inner.next()?;
|
||||
|
||||
Some(LogicalLine {
|
||||
lines: self.lines,
|
||||
line,
|
||||
})
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.inner.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
impl DoubleEndedIterator for LogicalLinesIter<'_> {
|
||||
fn next_back(&mut self) -> Option<Self::Item> {
|
||||
let line = self.inner.next_back()?;
|
||||
|
||||
Some(LogicalLine {
|
||||
lines: self.lines,
|
||||
line,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl ExactSizeIterator for LogicalLinesIter<'_> {}
|
||||
|
||||
impl FusedIterator for LogicalLinesIter<'_> {}
|
||||
|
||||
/// Source map that maps byte positions in the logical line text to the [`Location`] in the
|
||||
/// original document.
|
||||
#[derive(Debug, Default, Clone)]
|
||||
struct Mappings {
|
||||
/// byte offsets of the logical lines at which tokens start/end.
|
||||
logical_line_offsets: Vec<u32>,
|
||||
|
||||
/// Corresponding [`Location`]s for each byte offset mapping it to the position in the original document.
|
||||
locations: Vec<Location>,
|
||||
}
|
||||
|
||||
impl Mappings {
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
logical_line_offsets: Vec::with_capacity(capacity),
|
||||
locations: Vec::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.logical_line_offsets.len()
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
self.logical_line_offsets.is_empty()
|
||||
}
|
||||
|
||||
fn truncate(&mut self, len: usize) {
|
||||
self.locations.truncate(len);
|
||||
self.logical_line_offsets.truncate(len);
|
||||
}
|
||||
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
fn push(&mut self, offset: usize, location: Location) {
|
||||
self.logical_line_offsets.push(offset as u32);
|
||||
self.locations.push(location);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct CurrentLine {
|
||||
flags: TokenFlags,
|
||||
text_start: u32,
|
||||
mappings_start: u32,
|
||||
tokens_start: u32,
|
||||
previous_token: Option<Location>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct LogicalLinesBuilder<'a> {
|
||||
text: String,
|
||||
tokens: Vec<(Location, &'a Tok, Location)>,
|
||||
locator: &Locator,
|
||||
) -> LogicalLine<'a> {
|
||||
let mut logical = String::with_capacity(88);
|
||||
let mut mapping = Vec::new();
|
||||
let mut flags = TokenFlags::empty();
|
||||
let mut prev: Option<&Location> = None;
|
||||
let mut length = 0;
|
||||
for (start, tok, end) in &tokens {
|
||||
mappings: Mappings,
|
||||
lines: Vec<Line>,
|
||||
current_line: Option<CurrentLine>,
|
||||
}
|
||||
|
||||
impl<'a> LogicalLinesBuilder<'a> {
|
||||
fn with_token_capacity(capacity: usize) -> Self {
|
||||
Self {
|
||||
tokens: Vec::with_capacity(capacity),
|
||||
mappings: Mappings::with_capacity(capacity + 1),
|
||||
..Self::default()
|
||||
}
|
||||
}
|
||||
|
||||
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
fn push_token(&mut self, start: Location, token: &'a Tok, end: Location, locator: &Locator) {
|
||||
let tokens_start = self.tokens.len();
|
||||
self.tokens.push((start, token, end));
|
||||
|
||||
let mut line = self.current_line.get_or_insert_with(|| {
|
||||
let mappings_start = self.mappings.len();
|
||||
self.mappings.push(self.text.len(), start);
|
||||
|
||||
CurrentLine {
|
||||
flags: TokenFlags::empty(),
|
||||
text_start: self.text.len() as u32,
|
||||
mappings_start: mappings_start as u32,
|
||||
tokens_start: tokens_start as u32,
|
||||
previous_token: None,
|
||||
}
|
||||
});
|
||||
|
||||
if matches!(
|
||||
tok,
|
||||
token,
|
||||
Tok::Newline | Tok::NonLogicalNewline | Tok::Indent | Tok::Dedent
|
||||
) {
|
||||
continue;
|
||||
return;
|
||||
}
|
||||
|
||||
if mapping.is_empty() {
|
||||
mapping.push((0, *start));
|
||||
if matches!(token, Tok::Comment(..)) {
|
||||
line.flags.insert(TokenFlags::COMMENT);
|
||||
return;
|
||||
}
|
||||
|
||||
if matches!(tok, Tok::Comment { .. }) {
|
||||
flags.insert(TokenFlags::COMMENT);
|
||||
continue;
|
||||
}
|
||||
|
||||
if is_op_token(tok) {
|
||||
flags.insert(TokenFlags::OPERATOR);
|
||||
if is_op_token(token) {
|
||||
line.flags.insert(TokenFlags::OPERATOR);
|
||||
}
|
||||
|
||||
if matches!(
|
||||
tok,
|
||||
token,
|
||||
Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace
|
||||
) {
|
||||
flags.insert(TokenFlags::BRACKET);
|
||||
line.flags.insert(TokenFlags::BRACKET);
|
||||
}
|
||||
|
||||
if matches!(tok, Tok::Comma | Tok::Semi | Tok::Colon) {
|
||||
flags.insert(TokenFlags::PUNCTUATION);
|
||||
if matches!(token, Tok::Comma | Tok::Semi | Tok::Colon) {
|
||||
line.flags.insert(TokenFlags::PUNCTUATION);
|
||||
}
|
||||
|
||||
if is_keyword_token(tok) {
|
||||
flags.insert(TokenFlags::KEYWORD);
|
||||
if is_keyword_token(token) {
|
||||
line.flags.insert(TokenFlags::KEYWORD);
|
||||
}
|
||||
|
||||
// TODO(charlie): "Mute" strings.
|
||||
let s;
|
||||
let text = if let Tok::String { value, .. } = tok {
|
||||
s = format!("\"{}\"", "x".repeat(value.width()).clone());
|
||||
&s
|
||||
let text = if let Tok::String { value, .. } = token {
|
||||
Cow::Owned(format!("\"{}\"", "x".repeat(value.width())))
|
||||
} else {
|
||||
locator.slice(Range {
|
||||
location: *start,
|
||||
end_location: *end,
|
||||
})
|
||||
Cow::Borrowed(locator.slice(Range {
|
||||
location: start,
|
||||
end_location: end,
|
||||
}))
|
||||
};
|
||||
|
||||
if let Some(prev) = prev {
|
||||
if let Some(prev) = line.previous_token.take() {
|
||||
if prev.row() != start.row() {
|
||||
let prev_text = locator.slice(Range {
|
||||
location: Location::new(prev.row(), prev.column() - 1),
|
||||
|
@ -106,61 +358,54 @@ fn build_line<'a>(
|
|||
|| ((prev_text != "{" && prev_text != "[" && prev_text != "(")
|
||||
&& (text != "}" && text != "]" && text != ")"))
|
||||
{
|
||||
logical.push(' ');
|
||||
length += 1;
|
||||
self.text.push(' ');
|
||||
}
|
||||
} else if prev.column() != start.column() {
|
||||
let prev_text = locator.slice(Range {
|
||||
location: *prev,
|
||||
end_location: *start,
|
||||
location: prev,
|
||||
end_location: start,
|
||||
});
|
||||
logical.push_str(prev_text);
|
||||
length += prev_text.len();
|
||||
self.text.push_str(prev_text);
|
||||
}
|
||||
}
|
||||
logical.push_str(text);
|
||||
length += text.len();
|
||||
mapping.push((length, *end));
|
||||
prev = Some(end);
|
||||
|
||||
line.previous_token = Some(end);
|
||||
self.text.push_str(&text);
|
||||
self.mappings.push(self.text.len(), end);
|
||||
}
|
||||
|
||||
LogicalLine {
|
||||
text: logical,
|
||||
mapping,
|
||||
flags,
|
||||
tokens,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter_logical_lines<'a>(tokens: &'a [LexResult], locator: &Locator) -> Vec<LogicalLine<'a>> {
|
||||
let mut parens = 0;
|
||||
let mut accumulator = Vec::with_capacity(32);
|
||||
let mut lines = Vec::with_capacity(128);
|
||||
for &(start, ref tok, end) in tokens.iter().flatten() {
|
||||
accumulator.push((start, tok, end));
|
||||
if matches!(tok, Tok::Lbrace | Tok::Lpar | Tok::Lsqb) {
|
||||
parens += 1;
|
||||
} else if matches!(tok, Tok::Rbrace | Tok::Rpar | Tok::Rsqb) {
|
||||
parens -= 1;
|
||||
} else if parens == 0 {
|
||||
if matches!(
|
||||
tok,
|
||||
Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(..)
|
||||
) {
|
||||
if matches!(tok, Tok::Newline) {
|
||||
lines.push(build_line(accumulator, locator));
|
||||
accumulator = Vec::with_capacity(32);
|
||||
} else if tokens.len() == 1 {
|
||||
accumulator.remove(0);
|
||||
} else {
|
||||
lines.push(build_line(accumulator, locator));
|
||||
accumulator = Vec::with_capacity(32);
|
||||
}
|
||||
}
|
||||
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
fn finish_line(&mut self) {
|
||||
if let Some(current) = self.current_line.take() {
|
||||
self.lines.push(Line {
|
||||
flags: current.flags,
|
||||
text_start: current.text_start,
|
||||
text_end: self.text.len() as u32,
|
||||
mappings_start: current.mappings_start,
|
||||
mappings_end: self.mappings.len() as u32,
|
||||
tokens_start: current.tokens_start,
|
||||
tokens_end: self.tokens.len() as u32,
|
||||
});
|
||||
}
|
||||
}
|
||||
if !accumulator.is_empty() {
|
||||
lines.push(build_line(accumulator, locator));
|
||||
|
||||
fn discard_line(&mut self) {
|
||||
if let Some(current) = self.current_line.take() {
|
||||
self.text.truncate(current.text_start as usize);
|
||||
self.tokens.truncate(current.tokens_start as usize);
|
||||
self.mappings.truncate(current.mappings_start as usize);
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(mut self) -> LogicalLines<'a> {
|
||||
self.finish_line();
|
||||
|
||||
LogicalLines {
|
||||
text: self.text,
|
||||
tokens: self.tokens,
|
||||
mappings: self.mappings,
|
||||
lines: self.lines,
|
||||
}
|
||||
}
|
||||
lines
|
||||
}
|
||||
|
|
|
@ -251,7 +251,7 @@ pub fn indentation(
|
|||
));
|
||||
}
|
||||
let indent_expect = prev_logical_line.map_or(false, |prev_logical_line| {
|
||||
prev_logical_line.text.ends_with(':')
|
||||
prev_logical_line.text().ends_with(':')
|
||||
});
|
||||
if indent_expect && indent_level <= prev_indent_level.unwrap_or(0) {
|
||||
diagnostics.push((
|
||||
|
|
|
@ -37,50 +37,55 @@ pub fn missing_whitespace(
|
|||
) -> Vec<Diagnostic> {
|
||||
let mut diagnostics = vec![];
|
||||
|
||||
let mut num_lsqb = 0;
|
||||
let mut num_rsqb = 0;
|
||||
let mut num_lsqb = 0u32;
|
||||
let mut num_rsqb = 0u32;
|
||||
let mut prev_lsqb = None;
|
||||
let mut prev_lbrace = None;
|
||||
for (idx, (char, next_char)) in line.chars().tuple_windows().enumerate() {
|
||||
if char == '[' {
|
||||
num_lsqb += 1;
|
||||
prev_lsqb = Some(idx);
|
||||
} else if char == ']' {
|
||||
num_rsqb += 1;
|
||||
} else if char == '{' {
|
||||
prev_lbrace = Some(idx);
|
||||
}
|
||||
|
||||
if (char == ',' || char == ';' || char == ':') && !char::is_whitespace(next_char) {
|
||||
if char == ':' && num_lsqb > num_rsqb && prev_lsqb > prev_lbrace {
|
||||
continue; // Slice syntax, no space required
|
||||
match char {
|
||||
'[' => {
|
||||
num_lsqb += 1;
|
||||
prev_lsqb = Some(idx);
|
||||
}
|
||||
if char == ',' && (next_char == ')' || next_char == ']') {
|
||||
continue; // Allow tuple with only one element: (3,)
|
||||
']' => {
|
||||
num_rsqb += 1;
|
||||
}
|
||||
if char == ':' && next_char == '=' {
|
||||
continue; // Allow assignment expression
|
||||
'{' => {
|
||||
prev_lbrace = Some(idx);
|
||||
}
|
||||
|
||||
let kind: MissingWhitespace = MissingWhitespace {
|
||||
token: char.to_string(),
|
||||
};
|
||||
',' | ';' | ':' if !next_char.is_whitespace() => {
|
||||
if char == ':' && num_lsqb > num_rsqb && prev_lsqb > prev_lbrace {
|
||||
continue; // Slice syntax, no space required
|
||||
}
|
||||
if char == ',' && matches!(next_char, ')' | ']') {
|
||||
continue; // Allow tuple with only one element: (3,)
|
||||
}
|
||||
if char == ':' && next_char == '=' {
|
||||
continue; // Allow assignment expression
|
||||
}
|
||||
|
||||
let mut diagnostic = Diagnostic::new(
|
||||
kind,
|
||||
Range::new(
|
||||
Location::new(row, indent_level + idx),
|
||||
Location::new(row, indent_level + idx),
|
||||
),
|
||||
);
|
||||
let kind = MissingWhitespace {
|
||||
token: char.to_string(),
|
||||
};
|
||||
|
||||
if autofix {
|
||||
diagnostic.set_fix(Edit::insertion(
|
||||
" ".to_string(),
|
||||
Location::new(row, indent_level + idx + 1),
|
||||
));
|
||||
let mut diagnostic = Diagnostic::new(
|
||||
kind,
|
||||
Range::new(
|
||||
Location::new(row, indent_level + idx),
|
||||
Location::new(row, indent_level + idx),
|
||||
),
|
||||
);
|
||||
|
||||
if autofix {
|
||||
diagnostic.set_fix(Edit::insertion(
|
||||
" ".to_string(),
|
||||
Location::new(row, indent_level + idx + 1),
|
||||
));
|
||||
}
|
||||
diagnostics.push(diagnostic);
|
||||
}
|
||||
diagnostics.push(diagnostic);
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
diagnostics
|
||||
|
|
|
@ -67,7 +67,7 @@ pub fn missing_whitespace_around_operator(
|
|||
let mut needs_space_main: Option<bool> = Some(false);
|
||||
let mut needs_space_aux: Option<bool> = None;
|
||||
let mut prev_end_aux: Option<&Location> = None;
|
||||
let mut parens = 0;
|
||||
let mut parens = 0u32;
|
||||
let mut prev_type: Option<&Tok> = None;
|
||||
let mut prev_end: Option<&Location> = None;
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue