perf(pycodestyle): Reduce allocations when computing logical lines (#3715)

This commit is contained in:
Micha Reiser 2023-03-28 09:09:27 +02:00 committed by GitHub
parent c3917eab38
commit 113a8b8fda
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 432 additions and 204 deletions

7
Cargo.lock generated
View file

@ -132,12 +132,6 @@ dependencies = [
"serde",
]
[[package]]
name = "bisection"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "021e079a1bab0ecce6cf4b4b74c0c37afa4a697136eb3b127875c84a8f04a8c3"
[[package]]
name = "bit-set"
version = "0.5.3"
@ -1985,7 +1979,6 @@ name = "ruff"
version = "0.0.259"
dependencies = [
"anyhow",
"bisection",
"bitflags",
"chrono",
"clap 4.1.8",

View file

@ -22,7 +22,6 @@ ruff_python_stdlib = { path = "../ruff_python_stdlib" }
ruff_rustpython = { path = "../ruff_rustpython" }
anyhow = { workspace = true }
bisection = { version = "0.1.0" }
bitflags = { workspace = true }
chrono = { workspace = true }
clap = { workspace = true, features = ["derive", "string"], optional = true }

View file

@ -1,6 +1,5 @@
#![allow(dead_code, unused_imports, unused_variables)]
use bisection::bisect_left;
use itertools::Itertools;
use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult;
@ -10,7 +9,7 @@ use ruff_python_ast::source_code::{Locator, Stylist};
use ruff_python_ast::types::Range;
use crate::registry::{AsRule, Rule};
use crate::rules::pycodestyle::logical_lines::{iter_logical_lines, TokenFlags};
use crate::rules::pycodestyle::logical_lines::{LogicalLines, TokenFlags};
use crate::rules::pycodestyle::rules::{
extraneous_whitespace, indentation, missing_whitespace, missing_whitespace_after_keyword,
missing_whitespace_around_operator, space_around_operator, whitespace_around_keywords,
@ -20,23 +19,18 @@ use crate::rules::pycodestyle::rules::{
use crate::settings::{flags, Settings};
/// Return the amount of indentation, expanding tabs to the next multiple of 8.
fn expand_indent(mut line: &str) -> usize {
while line.ends_with("\n\r") {
line = &line[..line.len() - 2];
}
if !line.contains('\t') {
return line.len() - line.trim_start().len();
}
fn expand_indent(line: &str) -> usize {
let line = line.trim_end_matches(['\n', '\r']);
let mut indent = 0;
for c in line.chars() {
if c == '\t' {
indent = (indent / 8) * 8 + 8;
} else if c == ' ' {
indent += 1;
} else {
break;
for c in line.bytes() {
match c {
b'\t' => indent = (indent / 8) * 8 + 8,
b' ' => indent += 1,
_ => break,
}
}
indent
}
@ -52,25 +46,18 @@ pub fn check_logical_lines(
let indent_char = stylist.indentation().as_char();
let mut prev_line = None;
let mut prev_indent_level = None;
for line in iter_logical_lines(tokens, locator) {
if line.mapping.is_empty() {
continue;
}
for line in &LogicalLines::from_tokens(tokens, locator) {
// Extract the indentation level.
let start_loc = line.mapping[0].1;
let start_line = locator.slice(Range::new(Location::new(start_loc.row(), 0), start_loc));
let Some(start_loc) = line.first_token_location() else { continue; };
let start_line = locator.slice(Range::new(Location::new(start_loc.row(), 0), *start_loc));
let indent_level = expand_indent(start_line);
let indent_size = 4;
// Generate mapping from logical to physical offsets.
let mapping_offsets = line.mapping.iter().map(|(offset, _)| *offset).collect_vec();
if line.flags.contains(TokenFlags::OPERATOR) {
for (index, kind) in space_around_operator(&line.text) {
let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
let location = Location::new(pos.row(), pos.column() + index - token_offset);
if line.flags().contains(TokenFlags::OPERATOR) {
for (index, kind) in space_around_operator(line.text()) {
if settings.rules.enabled(kind.rule()) {
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
diagnostics.push(Diagnostic {
kind,
location,
@ -82,13 +69,13 @@ pub fn check_logical_lines(
}
}
if line
.flags
.flags()
.contains(TokenFlags::OPERATOR | TokenFlags::PUNCTUATION)
{
for (index, kind) in extraneous_whitespace(&line.text) {
let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
let location = Location::new(pos.row(), pos.column() + index - token_offset);
for (index, kind) in extraneous_whitespace(line.text()) {
if settings.rules.enabled(kind.rule()) {
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
diagnostics.push(Diagnostic {
kind,
location,
@ -99,11 +86,11 @@ pub fn check_logical_lines(
}
}
}
if line.flags.contains(TokenFlags::KEYWORD) {
for (index, kind) in whitespace_around_keywords(&line.text) {
let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
let location = Location::new(pos.row(), pos.column() + index - token_offset);
if line.flags().contains(TokenFlags::KEYWORD) {
for (index, kind) in whitespace_around_keywords(line.text()) {
if settings.rules.enabled(kind.rule()) {
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
diagnostics.push(Diagnostic {
kind,
location,
@ -114,7 +101,7 @@ pub fn check_logical_lines(
}
}
for (location, kind) in missing_whitespace_after_keyword(&line.tokens) {
for (location, kind) in missing_whitespace_after_keyword(line.tokens()) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
@ -126,8 +113,8 @@ pub fn check_logical_lines(
}
}
}
if line.flags.contains(TokenFlags::COMMENT) {
for (range, kind) in whitespace_before_comment(&line.tokens, locator) {
if line.flags().contains(TokenFlags::COMMENT) {
for (range, kind) in whitespace_before_comment(line.tokens(), locator) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
@ -139,9 +126,9 @@ pub fn check_logical_lines(
}
}
}
if line.flags.contains(TokenFlags::OPERATOR) {
if line.flags().contains(TokenFlags::OPERATOR) {
for (location, kind) in
whitespace_around_named_parameter_equals(&line.tokens, &line.text)
whitespace_around_named_parameter_equals(line.tokens(), line.text())
{
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
@ -153,7 +140,7 @@ pub fn check_logical_lines(
});
}
}
for (location, kind) in missing_whitespace_around_operator(&line.tokens) {
for (location, kind) in missing_whitespace_around_operator(line.tokens()) {
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
kind,
@ -172,7 +159,7 @@ pub fn check_logical_lines(
let should_fix = false;
for diagnostic in
missing_whitespace(&line.text, start_loc.row(), should_fix, indent_level)
missing_whitespace(line.text(), start_loc.row(), should_fix, indent_level)
{
if settings.rules.enabled(diagnostic.kind.rule()) {
diagnostics.push(diagnostic);
@ -180,7 +167,7 @@ pub fn check_logical_lines(
}
}
if line.flags.contains(TokenFlags::BRACKET) {
if line.flags().contains(TokenFlags::BRACKET) {
#[cfg(feature = "logical_lines")]
let should_fix =
autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters);
@ -188,7 +175,7 @@ pub fn check_logical_lines(
#[cfg(not(feature = "logical_lines"))]
let should_fix = false;
for diagnostic in whitespace_before_parameters(&line.tokens, should_fix) {
for diagnostic in whitespace_before_parameters(line.tokens(), should_fix) {
if settings.rules.enabled(diagnostic.kind.rule()) {
diagnostics.push(diagnostic);
}
@ -203,7 +190,7 @@ pub fn check_logical_lines(
prev_indent_level,
indent_size,
) {
let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
let (token_offset, pos) = line.mapping(index);
let location = Location::new(pos.row(), pos.column() + index - token_offset);
if settings.rules.enabled(kind.rule()) {
diagnostics.push(Diagnostic {
@ -229,10 +216,9 @@ mod tests {
use rustpython_parser::lexer::LexResult;
use rustpython_parser::{lexer, Mode};
use crate::rules::pycodestyle::logical_lines::LogicalLines;
use ruff_python_ast::source_code::Locator;
use crate::checkers::logical_lines::iter_logical_lines;
#[test]
fn split_logical_lines() {
let contents = r#"
@ -241,9 +227,9 @@ y = 2
z = x + 1"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let locator = Locator::new(contents);
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter()
.map(|line| line.text)
.map(|line| line.text().to_string())
.collect();
let expected = vec![
"x = 1".to_string(),
@ -262,9 +248,9 @@ y = 2
z = x + 1"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let locator = Locator::new(contents);
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter()
.map(|line| line.text)
.map(|line| line.text().to_string())
.collect();
let expected = vec![
"x = [1, 2, 3, ]".to_string(),
@ -276,9 +262,9 @@ z = x + 1"#;
let contents = "x = 'abc'";
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let locator = Locator::new(contents);
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter()
.map(|line| line.text)
.map(|line| line.text().to_string())
.collect();
let expected = vec!["x = \"xxx\"".to_string()];
assert_eq!(actual, expected);
@ -289,9 +275,9 @@ def f():
f()"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let locator = Locator::new(contents);
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter()
.map(|line| line.text)
.map(|line| line.text().to_string())
.collect();
let expected = vec!["def f():", "x = 1", "f()"];
assert_eq!(actual, expected);
@ -304,9 +290,9 @@ def f():
f()"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let locator = Locator::new(contents);
let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
.into_iter()
.map(|line| line.text)
.map(|line| line.text().to_string())
.collect();
let expected = vec!["def f():", "\"xxxxxxxxxxxxxxxxxxxx\"", "", "x = 1", "f()"];
assert_eq!(actual, expected);

View file

@ -58,7 +58,7 @@ pub fn is_overlong(
true
}
pub fn is_keyword_token(token: &Tok) -> bool {
pub const fn is_keyword_token(token: &Tok) -> bool {
matches!(
token,
Tok::False
@ -98,14 +98,14 @@ pub fn is_keyword_token(token: &Tok) -> bool {
)
}
pub fn is_singleton_token(token: &Tok) -> bool {
pub const fn is_singleton_token(token: &Tok) -> bool {
matches!(
token,
Tok::False { .. } | Tok::True { .. } | Tok::None { .. },
)
}
pub fn is_op_token(token: &Tok) -> bool {
pub const fn is_op_token(token: &Tok) -> bool {
matches!(
token,
Tok::Lpar
@ -157,25 +157,25 @@ pub fn is_op_token(token: &Tok) -> bool {
)
}
pub fn is_skip_comment_token(token: &Tok) -> bool {
pub const fn is_skip_comment_token(token: &Tok) -> bool {
matches!(
token,
Tok::Newline | Tok::Indent | Tok::Dedent | Tok::NonLogicalNewline | Tok::Comment { .. }
)
}
pub fn is_soft_keyword_token(token: &Tok) -> bool {
pub const fn is_soft_keyword_token(token: &Tok) -> bool {
matches!(token, Tok::Match | Tok::Case)
}
pub fn is_arithmetic_token(token: &Tok) -> bool {
pub const fn is_arithmetic_token(token: &Tok) -> bool {
matches!(
token,
Tok::DoubleStar | Tok::Star | Tok::Plus | Tok::Minus | Tok::Slash | Tok::At
)
}
pub fn is_ws_optional_token(token: &Tok) -> bool {
pub const fn is_ws_optional_token(token: &Tok) -> bool {
is_arithmetic_token(token)
|| matches!(
token,
@ -188,7 +188,7 @@ pub fn is_ws_optional_token(token: &Tok) -> bool {
)
}
pub fn is_ws_needed_token(token: &Tok) -> bool {
pub const fn is_ws_needed_token(token: &Tok) -> bool {
matches!(
token,
Tok::DoubleStarEqual
@ -218,7 +218,7 @@ pub fn is_ws_needed_token(token: &Tok) -> bool {
)
}
pub fn is_unary_token(token: &Tok) -> bool {
pub const fn is_unary_token(token: &Tok) -> bool {
matches!(
token,
Tok::Plus | Tok::Minus | Tok::Star | Tok::DoubleStar | Tok::RightShift

View file

@ -2,6 +2,9 @@ use bitflags::bitflags;
use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
use std::borrow::Cow;
use std::fmt::{Debug, Formatter};
use std::iter::FusedIterator;
use unicode_width::UnicodeWidthStr;
use ruff_python_ast::source_code::Locator;
@ -11,7 +14,7 @@ use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token};
bitflags! {
#[derive(Default)]
pub struct TokenFlags: u32 {
pub struct TokenFlags: u8 {
/// Whether the logical line contains an operator.
const OPERATOR = 0b0000_0001;
/// Whether the logical line contains a bracket.
@ -25,78 +28,327 @@ bitflags! {
}
}
#[derive(Debug)]
pub struct LogicalLine<'a> {
pub text: String,
pub mapping: Vec<(usize, Location)>,
pub flags: TokenFlags,
pub tokens: Vec<(Location, &'a Tok, Location)>,
#[derive(Clone)]
pub struct LogicalLines<'a> {
text: String,
/// start position, token, end position
tokens: Vec<(Location, &'a Tok, Location)>,
mappings: Mappings,
lines: Vec<Line>,
}
impl<'a> LogicalLine<'a> {
pub fn is_comment(&self) -> bool {
self.text.is_empty()
impl<'a> LogicalLines<'a> {
pub fn from_tokens(tokens: &'a [LexResult], locator: &Locator) -> Self {
assert!(u32::try_from(tokens.len()).is_ok());
let single_token = tokens.len() == 1;
let mut builder = LogicalLinesBuilder::with_token_capacity(tokens.len());
let mut parens: u32 = 0;
for (start, token, end) in tokens.iter().flatten() {
builder.push_token(*start, token, *end, locator);
match token {
Tok::Lbrace | Tok::Lpar | Tok::Lsqb => {
parens += 1;
}
Tok::Rbrace | Tok::Rpar | Tok::Rsqb => {
parens -= 1;
}
Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(_) if parens == 0 => {
if matches!(token, Tok::Newline) {
builder.finish_line();
}
// Comment only file or non logical new line?
else if single_token {
builder.discard_line();
} else {
builder.finish_line();
};
}
_ => {}
}
}
builder.finish()
}
}
fn build_line<'a>(
impl std::fmt::Debug for LogicalLines<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_list()
.entries(self.into_iter().map(DebugLogicalLine))
.finish()
}
}
impl<'a> IntoIterator for &'a LogicalLines<'a> {
type Item = LogicalLine<'a>;
type IntoIter = LogicalLinesIter<'a>;
fn into_iter(self) -> Self::IntoIter {
LogicalLinesIter {
lines: self,
inner: self.lines.iter(),
}
}
}
#[derive(Debug, Clone)]
struct Line {
flags: TokenFlags,
/// Byte offset of the start of the text of this line.
text_start: u32,
/// Byte offset of the end of the text of this line.
text_end: u32,
mappings_start: u32,
mappings_end: u32,
tokens_start: u32,
tokens_end: u32,
}
#[derive(Debug)]
pub struct LogicalLine<'a> {
lines: &'a LogicalLines<'a>,
line: &'a Line,
}
impl<'a> LogicalLine<'a> {
/// Returns true if this is a comment only line
pub fn is_comment(&self) -> bool {
self.text().is_empty() && self.flags().contains(TokenFlags::COMMENT)
}
/// Returns the text of this line
pub fn text(&self) -> &'a str {
&self.lines.text[self.line.text_start as usize..self.line.text_end as usize]
}
/// Returns the tokens of the line
pub fn tokens(&self) -> &'a [(Location, &'a Tok, Location)] {
&self.lines.tokens[self.line.tokens_start as usize..self.line.tokens_end as usize]
}
/// Returns the [`Location`] of the first token on the line or [`None`].
pub fn first_token_location(&self) -> Option<&Location> {
self.token_locations().first()
}
fn token_offsets(&self) -> &[u32] {
&self.lines.mappings.logical_line_offsets
[self.line.mappings_start as usize..self.line.mappings_end as usize]
}
fn token_locations(&self) -> &[Location] {
&self.lines.mappings.locations
[self.line.mappings_start as usize..self.line.mappings_end as usize]
}
/// Returns the mapping for an offset in the logical line.
///
/// The offset of the closest token and its corresponding location.
pub fn mapping(&self, offset: usize) -> (usize, Location) {
let index = self
.token_offsets()
.binary_search(&(self.line.text_start + u32::try_from(offset).unwrap()))
.unwrap_or_default();
(
(self.token_offsets()[index] - self.line.text_start) as usize,
self.token_locations()[index],
)
}
pub fn is_empty(&self) -> bool {
self.lines.mappings.is_empty()
}
pub const fn flags(&self) -> TokenFlags {
self.line.flags
}
}
struct DebugLogicalLine<'a>(LogicalLine<'a>);
impl Debug for DebugLogicalLine<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("LogicalLine")
.field("text", &self.0.text())
.field("flags", &self.0.flags())
.field("tokens", &self.0.tokens())
.finish()
}
}
/// Iterator over the logical lines of a document.
pub struct LogicalLinesIter<'a> {
lines: &'a LogicalLines<'a>,
inner: std::slice::Iter<'a, Line>,
}
impl<'a> Iterator for LogicalLinesIter<'a> {
type Item = LogicalLine<'a>;
fn next(&mut self) -> Option<Self::Item> {
let line = self.inner.next()?;
Some(LogicalLine {
lines: self.lines,
line,
})
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}
impl DoubleEndedIterator for LogicalLinesIter<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
let line = self.inner.next_back()?;
Some(LogicalLine {
lines: self.lines,
line,
})
}
}
impl ExactSizeIterator for LogicalLinesIter<'_> {}
impl FusedIterator for LogicalLinesIter<'_> {}
/// Source map that maps byte positions in the logical line text to the [`Location`] in the
/// original document.
#[derive(Debug, Default, Clone)]
struct Mappings {
/// byte offsets of the logical lines at which tokens start/end.
logical_line_offsets: Vec<u32>,
/// Corresponding [`Location`]s for each byte offset mapping it to the position in the original document.
locations: Vec<Location>,
}
impl Mappings {
fn with_capacity(capacity: usize) -> Self {
Self {
logical_line_offsets: Vec::with_capacity(capacity),
locations: Vec::with_capacity(capacity),
}
}
fn len(&self) -> usize {
self.logical_line_offsets.len()
}
fn is_empty(&self) -> bool {
self.logical_line_offsets.is_empty()
}
fn truncate(&mut self, len: usize) {
self.locations.truncate(len);
self.logical_line_offsets.truncate(len);
}
#[allow(clippy::cast_possible_truncation)]
fn push(&mut self, offset: usize, location: Location) {
self.logical_line_offsets.push(offset as u32);
self.locations.push(location);
}
}
#[derive(Debug, Default)]
struct CurrentLine {
flags: TokenFlags,
text_start: u32,
mappings_start: u32,
tokens_start: u32,
previous_token: Option<Location>,
}
#[derive(Debug, Default)]
pub struct LogicalLinesBuilder<'a> {
text: String,
tokens: Vec<(Location, &'a Tok, Location)>,
locator: &Locator,
) -> LogicalLine<'a> {
let mut logical = String::with_capacity(88);
let mut mapping = Vec::new();
let mut flags = TokenFlags::empty();
let mut prev: Option<&Location> = None;
let mut length = 0;
for (start, tok, end) in &tokens {
mappings: Mappings,
lines: Vec<Line>,
current_line: Option<CurrentLine>,
}
impl<'a> LogicalLinesBuilder<'a> {
fn with_token_capacity(capacity: usize) -> Self {
Self {
tokens: Vec::with_capacity(capacity),
mappings: Mappings::with_capacity(capacity + 1),
..Self::default()
}
}
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
#[allow(clippy::cast_possible_truncation)]
fn push_token(&mut self, start: Location, token: &'a Tok, end: Location, locator: &Locator) {
let tokens_start = self.tokens.len();
self.tokens.push((start, token, end));
let mut line = self.current_line.get_or_insert_with(|| {
let mappings_start = self.mappings.len();
self.mappings.push(self.text.len(), start);
CurrentLine {
flags: TokenFlags::empty(),
text_start: self.text.len() as u32,
mappings_start: mappings_start as u32,
tokens_start: tokens_start as u32,
previous_token: None,
}
});
if matches!(
tok,
token,
Tok::Newline | Tok::NonLogicalNewline | Tok::Indent | Tok::Dedent
) {
continue;
return;
}
if mapping.is_empty() {
mapping.push((0, *start));
if matches!(token, Tok::Comment(..)) {
line.flags.insert(TokenFlags::COMMENT);
return;
}
if matches!(tok, Tok::Comment { .. }) {
flags.insert(TokenFlags::COMMENT);
continue;
}
if is_op_token(tok) {
flags.insert(TokenFlags::OPERATOR);
if is_op_token(token) {
line.flags.insert(TokenFlags::OPERATOR);
}
if matches!(
tok,
token,
Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace
) {
flags.insert(TokenFlags::BRACKET);
line.flags.insert(TokenFlags::BRACKET);
}
if matches!(tok, Tok::Comma | Tok::Semi | Tok::Colon) {
flags.insert(TokenFlags::PUNCTUATION);
if matches!(token, Tok::Comma | Tok::Semi | Tok::Colon) {
line.flags.insert(TokenFlags::PUNCTUATION);
}
if is_keyword_token(tok) {
flags.insert(TokenFlags::KEYWORD);
if is_keyword_token(token) {
line.flags.insert(TokenFlags::KEYWORD);
}
// TODO(charlie): "Mute" strings.
let s;
let text = if let Tok::String { value, .. } = tok {
s = format!("\"{}\"", "x".repeat(value.width()).clone());
&s
let text = if let Tok::String { value, .. } = token {
Cow::Owned(format!("\"{}\"", "x".repeat(value.width())))
} else {
locator.slice(Range {
location: *start,
end_location: *end,
})
Cow::Borrowed(locator.slice(Range {
location: start,
end_location: end,
}))
};
if let Some(prev) = prev {
if let Some(prev) = line.previous_token.take() {
if prev.row() != start.row() {
let prev_text = locator.slice(Range {
location: Location::new(prev.row(), prev.column() - 1),
@ -106,61 +358,54 @@ fn build_line<'a>(
|| ((prev_text != "{" && prev_text != "[" && prev_text != "(")
&& (text != "}" && text != "]" && text != ")"))
{
logical.push(' ');
length += 1;
self.text.push(' ');
}
} else if prev.column() != start.column() {
let prev_text = locator.slice(Range {
location: *prev,
end_location: *start,
location: prev,
end_location: start,
});
logical.push_str(prev_text);
length += prev_text.len();
self.text.push_str(prev_text);
}
}
logical.push_str(text);
length += text.len();
mapping.push((length, *end));
prev = Some(end);
line.previous_token = Some(end);
self.text.push_str(&text);
self.mappings.push(self.text.len(), end);
}
LogicalLine {
text: logical,
mapping,
flags,
tokens,
}
}
pub fn iter_logical_lines<'a>(tokens: &'a [LexResult], locator: &Locator) -> Vec<LogicalLine<'a>> {
let mut parens = 0;
let mut accumulator = Vec::with_capacity(32);
let mut lines = Vec::with_capacity(128);
for &(start, ref tok, end) in tokens.iter().flatten() {
accumulator.push((start, tok, end));
if matches!(tok, Tok::Lbrace | Tok::Lpar | Tok::Lsqb) {
parens += 1;
} else if matches!(tok, Tok::Rbrace | Tok::Rpar | Tok::Rsqb) {
parens -= 1;
} else if parens == 0 {
if matches!(
tok,
Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(..)
) {
if matches!(tok, Tok::Newline) {
lines.push(build_line(accumulator, locator));
accumulator = Vec::with_capacity(32);
} else if tokens.len() == 1 {
accumulator.remove(0);
} else {
lines.push(build_line(accumulator, locator));
accumulator = Vec::with_capacity(32);
}
}
// SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
#[allow(clippy::cast_possible_truncation)]
fn finish_line(&mut self) {
if let Some(current) = self.current_line.take() {
self.lines.push(Line {
flags: current.flags,
text_start: current.text_start,
text_end: self.text.len() as u32,
mappings_start: current.mappings_start,
mappings_end: self.mappings.len() as u32,
tokens_start: current.tokens_start,
tokens_end: self.tokens.len() as u32,
});
}
}
if !accumulator.is_empty() {
lines.push(build_line(accumulator, locator));
fn discard_line(&mut self) {
if let Some(current) = self.current_line.take() {
self.text.truncate(current.text_start as usize);
self.tokens.truncate(current.tokens_start as usize);
self.mappings.truncate(current.mappings_start as usize);
}
}
fn finish(mut self) -> LogicalLines<'a> {
self.finish_line();
LogicalLines {
text: self.text,
tokens: self.tokens,
mappings: self.mappings,
lines: self.lines,
}
}
lines
}

View file

@ -251,7 +251,7 @@ pub fn indentation(
));
}
let indent_expect = prev_logical_line.map_or(false, |prev_logical_line| {
prev_logical_line.text.ends_with(':')
prev_logical_line.text().ends_with(':')
});
if indent_expect && indent_level <= prev_indent_level.unwrap_or(0) {
diagnostics.push((

View file

@ -37,50 +37,55 @@ pub fn missing_whitespace(
) -> Vec<Diagnostic> {
let mut diagnostics = vec![];
let mut num_lsqb = 0;
let mut num_rsqb = 0;
let mut num_lsqb = 0u32;
let mut num_rsqb = 0u32;
let mut prev_lsqb = None;
let mut prev_lbrace = None;
for (idx, (char, next_char)) in line.chars().tuple_windows().enumerate() {
if char == '[' {
num_lsqb += 1;
prev_lsqb = Some(idx);
} else if char == ']' {
num_rsqb += 1;
} else if char == '{' {
prev_lbrace = Some(idx);
}
if (char == ',' || char == ';' || char == ':') && !char::is_whitespace(next_char) {
if char == ':' && num_lsqb > num_rsqb && prev_lsqb > prev_lbrace {
continue; // Slice syntax, no space required
match char {
'[' => {
num_lsqb += 1;
prev_lsqb = Some(idx);
}
if char == ',' && (next_char == ')' || next_char == ']') {
continue; // Allow tuple with only one element: (3,)
']' => {
num_rsqb += 1;
}
if char == ':' && next_char == '=' {
continue; // Allow assignment expression
'{' => {
prev_lbrace = Some(idx);
}
let kind: MissingWhitespace = MissingWhitespace {
token: char.to_string(),
};
',' | ';' | ':' if !next_char.is_whitespace() => {
if char == ':' && num_lsqb > num_rsqb && prev_lsqb > prev_lbrace {
continue; // Slice syntax, no space required
}
if char == ',' && matches!(next_char, ')' | ']') {
continue; // Allow tuple with only one element: (3,)
}
if char == ':' && next_char == '=' {
continue; // Allow assignment expression
}
let mut diagnostic = Diagnostic::new(
kind,
Range::new(
Location::new(row, indent_level + idx),
Location::new(row, indent_level + idx),
),
);
let kind = MissingWhitespace {
token: char.to_string(),
};
if autofix {
diagnostic.set_fix(Edit::insertion(
" ".to_string(),
Location::new(row, indent_level + idx + 1),
));
let mut diagnostic = Diagnostic::new(
kind,
Range::new(
Location::new(row, indent_level + idx),
Location::new(row, indent_level + idx),
),
);
if autofix {
diagnostic.set_fix(Edit::insertion(
" ".to_string(),
Location::new(row, indent_level + idx + 1),
));
}
diagnostics.push(diagnostic);
}
diagnostics.push(diagnostic);
_ => {}
}
}
diagnostics

View file

@ -67,7 +67,7 @@ pub fn missing_whitespace_around_operator(
let mut needs_space_main: Option<bool> = Some(false);
let mut needs_space_aux: Option<bool> = None;
let mut prev_end_aux: Option<&Location> = None;
let mut parens = 0;
let mut parens = 0u32;
let mut prev_type: Option<&Tok> = None;
let mut prev_end: Option<&Location> = None;