perf(pycodestyle): Reduce allocations when computing logical lines (#3715)

2025-09-29 21:34:57 +00:00 · 2023-03-28 09:09:27 +02:00 · 2023-03-28 09:09:27 +02:00 · 113a8b8fda
commit 113a8b8fda
parent c3917eab38
8 changed files with 432 additions and 204 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -132,12 +132,6 @@ dependencies = [
 "serde",
 ]

-[[package]]
-name = "bisection"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "021e079a1bab0ecce6cf4b4b74c0c37afa4a697136eb3b127875c84a8f04a8c3"
-
 [[package]]
 name = "bit-set"
 version = "0.5.3"
@ -1985,7 +1979,6 @@ name = "ruff"
 version = "0.0.259"
 dependencies = [
 "anyhow",
- "bisection",
 "bitflags",
 "chrono",
 "clap 4.1.8",
--- a/crates/ruff/Cargo.toml
+++ b/crates/ruff/Cargo.toml
@ -22,7 +22,6 @@ ruff_python_stdlib = { path = "../ruff_python_stdlib" }
 ruff_rustpython = { path = "../ruff_rustpython" }

 anyhow = { workspace = true }
-bisection = { version = "0.1.0" }
 bitflags = { workspace = true }
 chrono = { workspace = true }
 clap = { workspace = true, features = ["derive", "string"], optional = true }
--- a/crates/ruff/src/checkers/logical_lines.rs
+++ b/crates/ruff/src/checkers/logical_lines.rs
@ -1,6 +1,5 @@
 #![allow(dead_code, unused_imports, unused_variables)]

-use bisection::bisect_left;
 use itertools::Itertools;
 use rustpython_parser::ast::Location;
 use rustpython_parser::lexer::LexResult;
@ -10,7 +9,7 @@ use ruff_python_ast::source_code::{Locator, Stylist};
 use ruff_python_ast::types::Range;

 use crate::registry::{AsRule, Rule};
-use crate::rules::pycodestyle::logical_lines::{iter_logical_lines, TokenFlags};
+use crate::rules::pycodestyle::logical_lines::{LogicalLines, TokenFlags};
 use crate::rules::pycodestyle::rules::{
    extraneous_whitespace, indentation, missing_whitespace, missing_whitespace_after_keyword,
    missing_whitespace_around_operator, space_around_operator, whitespace_around_keywords,
@ -20,23 +19,18 @@ use crate::rules::pycodestyle::rules::{
 use crate::settings::{flags, Settings};

 /// Return the amount of indentation, expanding tabs to the next multiple of 8.
-fn expand_indent(mut line: &str) -> usize {
-    while line.ends_with("\n\r") {
-        line = &line[..line.len() - 2];
-    }
-    if !line.contains('\t') {
-        return line.len() - line.trim_start().len();
-    }
+fn expand_indent(line: &str) -> usize {
+    let line = line.trim_end_matches(['\n', '\r']);
+
    let mut indent = 0;
-    for c in line.chars() {
-        if c == '\t' {
-            indent = (indent / 8) * 8 + 8;
-        } else if c == ' ' {
-            indent += 1;
-        } else {
-            break;
+    for c in line.bytes() {
+        match c {
+            b'\t' => indent = (indent / 8) * 8 + 8,
+            b' ' => indent += 1,
+            _ => break,
        }
    }
+
    indent
 }

@ -52,25 +46,18 @@ pub fn check_logical_lines(
    let indent_char = stylist.indentation().as_char();
    let mut prev_line = None;
    let mut prev_indent_level = None;
-    for line in iter_logical_lines(tokens, locator) {
-        if line.mapping.is_empty() {
-            continue;
-        }
-
+    for line in &LogicalLines::from_tokens(tokens, locator) {
        // Extract the indentation level.
-        let start_loc = line.mapping[0].1;
-        let start_line = locator.slice(Range::new(Location::new(start_loc.row(), 0), start_loc));
+        let Some(start_loc) = line.first_token_location() else { continue; };
+        let start_line = locator.slice(Range::new(Location::new(start_loc.row(), 0), *start_loc));
        let indent_level = expand_indent(start_line);
        let indent_size = 4;

-        // Generate mapping from logical to physical offsets.
-        let mapping_offsets = line.mapping.iter().map(|(offset, _)| *offset).collect_vec();
-
-        if line.flags.contains(TokenFlags::OPERATOR) {
-            for (index, kind) in space_around_operator(&line.text) {
-                let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
-                let location = Location::new(pos.row(), pos.column() + index - token_offset);
+        if line.flags().contains(TokenFlags::OPERATOR) {
+            for (index, kind) in space_around_operator(line.text()) {
                if settings.rules.enabled(kind.rule()) {
+                    let (token_offset, pos) = line.mapping(index);
+                    let location = Location::new(pos.row(), pos.column() + index - token_offset);
                    diagnostics.push(Diagnostic {
                        kind,
                        location,
@ -82,13 +69,13 @@ pub fn check_logical_lines(
            }
        }
        if line
-            .flags
+            .flags()
            .contains(TokenFlags::OPERATOR | TokenFlags::PUNCTUATION)
        {
-            for (index, kind) in extraneous_whitespace(&line.text) {
-                let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
-                let location = Location::new(pos.row(), pos.column() + index - token_offset);
+            for (index, kind) in extraneous_whitespace(line.text()) {
                if settings.rules.enabled(kind.rule()) {
+                    let (token_offset, pos) = line.mapping(index);
+                    let location = Location::new(pos.row(), pos.column() + index - token_offset);
                    diagnostics.push(Diagnostic {
                        kind,
                        location,
@ -99,11 +86,11 @@ pub fn check_logical_lines(
                }
            }
        }
-        if line.flags.contains(TokenFlags::KEYWORD) {
-            for (index, kind) in whitespace_around_keywords(&line.text) {
-                let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
-                let location = Location::new(pos.row(), pos.column() + index - token_offset);
+        if line.flags().contains(TokenFlags::KEYWORD) {
+            for (index, kind) in whitespace_around_keywords(line.text()) {
                if settings.rules.enabled(kind.rule()) {
+                    let (token_offset, pos) = line.mapping(index);
+                    let location = Location::new(pos.row(), pos.column() + index - token_offset);
                    diagnostics.push(Diagnostic {
                        kind,
                        location,
@ -114,7 +101,7 @@ pub fn check_logical_lines(
                }
            }

-            for (location, kind) in missing_whitespace_after_keyword(&line.tokens) {
+            for (location, kind) in missing_whitespace_after_keyword(line.tokens()) {
                if settings.rules.enabled(kind.rule()) {
                    diagnostics.push(Diagnostic {
                        kind,
@ -126,8 +113,8 @@ pub fn check_logical_lines(
                }
            }
        }
-        if line.flags.contains(TokenFlags::COMMENT) {
-            for (range, kind) in whitespace_before_comment(&line.tokens, locator) {
+        if line.flags().contains(TokenFlags::COMMENT) {
+            for (range, kind) in whitespace_before_comment(line.tokens(), locator) {
                if settings.rules.enabled(kind.rule()) {
                    diagnostics.push(Diagnostic {
                        kind,
@ -139,9 +126,9 @@ pub fn check_logical_lines(
                }
            }
        }
-        if line.flags.contains(TokenFlags::OPERATOR) {
+        if line.flags().contains(TokenFlags::OPERATOR) {
            for (location, kind) in
-                whitespace_around_named_parameter_equals(&line.tokens, &line.text)
+                whitespace_around_named_parameter_equals(line.tokens(), line.text())
            {
                if settings.rules.enabled(kind.rule()) {
                    diagnostics.push(Diagnostic {
@ -153,7 +140,7 @@ pub fn check_logical_lines(
                    });
                }
            }
-            for (location, kind) in missing_whitespace_around_operator(&line.tokens) {
+            for (location, kind) in missing_whitespace_around_operator(line.tokens()) {
                if settings.rules.enabled(kind.rule()) {
                    diagnostics.push(Diagnostic {
                        kind,
@ -172,7 +159,7 @@ pub fn check_logical_lines(
            let should_fix = false;

            for diagnostic in
-                missing_whitespace(&line.text, start_loc.row(), should_fix, indent_level)
+                missing_whitespace(line.text(), start_loc.row(), should_fix, indent_level)
            {
                if settings.rules.enabled(diagnostic.kind.rule()) {
                    diagnostics.push(diagnostic);
@ -180,7 +167,7 @@ pub fn check_logical_lines(
            }
        }

-        if line.flags.contains(TokenFlags::BRACKET) {
+        if line.flags().contains(TokenFlags::BRACKET) {
            #[cfg(feature = "logical_lines")]
            let should_fix =
                autofix.into() && settings.rules.should_fix(Rule::WhitespaceBeforeParameters);
@ -188,7 +175,7 @@ pub fn check_logical_lines(
            #[cfg(not(feature = "logical_lines"))]
            let should_fix = false;

-            for diagnostic in whitespace_before_parameters(&line.tokens, should_fix) {
+            for diagnostic in whitespace_before_parameters(line.tokens(), should_fix) {
                if settings.rules.enabled(diagnostic.kind.rule()) {
                    diagnostics.push(diagnostic);
                }
@ -203,7 +190,7 @@ pub fn check_logical_lines(
            prev_indent_level,
            indent_size,
        ) {
-            let (token_offset, pos) = line.mapping[bisect_left(&mapping_offsets, &index)];
+            let (token_offset, pos) = line.mapping(index);
            let location = Location::new(pos.row(), pos.column() + index - token_offset);
            if settings.rules.enabled(kind.rule()) {
                diagnostics.push(Diagnostic {
@ -229,10 +216,9 @@ mod tests {
    use rustpython_parser::lexer::LexResult;
    use rustpython_parser::{lexer, Mode};

+    use crate::rules::pycodestyle::logical_lines::LogicalLines;
    use ruff_python_ast::source_code::Locator;

-    use crate::checkers::logical_lines::iter_logical_lines;
-
    #[test]
    fn split_logical_lines() {
        let contents = r#"
@ -241,9 +227,9 @@ y = 2
 z = x + 1"#;
        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
        let locator = Locator::new(contents);
-        let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
+        let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
            .into_iter()
-            .map(|line| line.text)
+            .map(|line| line.text().to_string())
            .collect();
        let expected = vec![
            "x = 1".to_string(),
@ -262,9 +248,9 @@ y = 2
 z = x + 1"#;
        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
        let locator = Locator::new(contents);
-        let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
+        let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
            .into_iter()
-            .map(|line| line.text)
+            .map(|line| line.text().to_string())
            .collect();
        let expected = vec![
            "x = [1, 2, 3, ]".to_string(),
@ -276,9 +262,9 @@ z = x + 1"#;
        let contents = "x = 'abc'";
        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
        let locator = Locator::new(contents);
-        let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
+        let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
            .into_iter()
-            .map(|line| line.text)
+            .map(|line| line.text().to_string())
            .collect();
        let expected = vec!["x = \"xxx\"".to_string()];
        assert_eq!(actual, expected);
@ -289,9 +275,9 @@ def f():
 f()"#;
        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
        let locator = Locator::new(contents);
-        let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
+        let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
            .into_iter()
-            .map(|line| line.text)
+            .map(|line| line.text().to_string())
            .collect();
        let expected = vec!["def f():", "x = 1", "f()"];
        assert_eq!(actual, expected);
@ -304,9 +290,9 @@ def f():
 f()"#;
        let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
        let locator = Locator::new(contents);
-        let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
+        let actual: Vec<String> = LogicalLines::from_tokens(&lxr, &locator)
            .into_iter()
-            .map(|line| line.text)
+            .map(|line| line.text().to_string())
            .collect();
        let expected = vec!["def f():", "\"xxxxxxxxxxxxxxxxxxxx\"", "", "x = 1", "f()"];
        assert_eq!(actual, expected);
--- a/crates/ruff/src/rules/pycodestyle/helpers.rs
+++ b/crates/ruff/src/rules/pycodestyle/helpers.rs
@ -58,7 +58,7 @@ pub fn is_overlong(
    true
 }

-pub fn is_keyword_token(token: &Tok) -> bool {
+pub const fn is_keyword_token(token: &Tok) -> bool {
    matches!(
        token,
        Tok::False
@ -98,14 +98,14 @@ pub fn is_keyword_token(token: &Tok) -> bool {
    )
 }

-pub fn is_singleton_token(token: &Tok) -> bool {
+pub const fn is_singleton_token(token: &Tok) -> bool {
    matches!(
        token,
        Tok::False { .. } | Tok::True { .. } | Tok::None { .. },
    )
 }

-pub fn is_op_token(token: &Tok) -> bool {
+pub const fn is_op_token(token: &Tok) -> bool {
    matches!(
        token,
        Tok::Lpar
@ -157,25 +157,25 @@ pub fn is_op_token(token: &Tok) -> bool {
    )
 }

-pub fn is_skip_comment_token(token: &Tok) -> bool {
+pub const fn is_skip_comment_token(token: &Tok) -> bool {
    matches!(
        token,
        Tok::Newline | Tok::Indent | Tok::Dedent | Tok::NonLogicalNewline | Tok::Comment { .. }
    )
 }

-pub fn is_soft_keyword_token(token: &Tok) -> bool {
+pub const fn is_soft_keyword_token(token: &Tok) -> bool {
    matches!(token, Tok::Match | Tok::Case)
 }

-pub fn is_arithmetic_token(token: &Tok) -> bool {
+pub const fn is_arithmetic_token(token: &Tok) -> bool {
    matches!(
        token,
        Tok::DoubleStar | Tok::Star | Tok::Plus | Tok::Minus | Tok::Slash | Tok::At
    )
 }

-pub fn is_ws_optional_token(token: &Tok) -> bool {
+pub const fn is_ws_optional_token(token: &Tok) -> bool {
    is_arithmetic_token(token)
        || matches!(
            token,
@ -188,7 +188,7 @@ pub fn is_ws_optional_token(token: &Tok) -> bool {
        )
 }

-pub fn is_ws_needed_token(token: &Tok) -> bool {
+pub const fn is_ws_needed_token(token: &Tok) -> bool {
    matches!(
        token,
        Tok::DoubleStarEqual
@ -218,7 +218,7 @@ pub fn is_ws_needed_token(token: &Tok) -> bool {
    )
 }

-pub fn is_unary_token(token: &Tok) -> bool {
+pub const fn is_unary_token(token: &Tok) -> bool {
    matches!(
        token,
        Tok::Plus | Tok::Minus | Tok::Star | Tok::DoubleStar | Tok::RightShift
--- a/crates/ruff/src/rules/pycodestyle/logical_lines.rs
+++ b/crates/ruff/src/rules/pycodestyle/logical_lines.rs
@ -2,6 +2,9 @@ use bitflags::bitflags;
 use rustpython_parser::ast::Location;
 use rustpython_parser::lexer::LexResult;
 use rustpython_parser::Tok;
+use std::borrow::Cow;
+use std::fmt::{Debug, Formatter};
+use std::iter::FusedIterator;
 use unicode_width::UnicodeWidthStr;

 use ruff_python_ast::source_code::Locator;
@ -11,7 +14,7 @@ use crate::rules::pycodestyle::helpers::{is_keyword_token, is_op_token};

 bitflags! {
    #[derive(Default)]
-    pub struct TokenFlags: u32 {
+    pub struct TokenFlags: u8 {
        /// Whether the logical line contains an operator.
        const OPERATOR = 0b0000_0001;
        /// Whether the logical line contains a bracket.
@ -25,78 +28,327 @@ bitflags! {
    }
 }

-#[derive(Debug)]
-pub struct LogicalLine<'a> {
-    pub text: String,
-    pub mapping: Vec<(usize, Location)>,
-    pub flags: TokenFlags,
-    pub tokens: Vec<(Location, &'a Tok, Location)>,
+#[derive(Clone)]
+pub struct LogicalLines<'a> {
+    text: String,
+
+    /// start position, token, end position
+    tokens: Vec<(Location, &'a Tok, Location)>,
+
+    mappings: Mappings,
+
+    lines: Vec<Line>,
 }

-impl<'a> LogicalLine<'a> {
-    pub fn is_comment(&self) -> bool {
-        self.text.is_empty()
+impl<'a> LogicalLines<'a> {
+    pub fn from_tokens(tokens: &'a [LexResult], locator: &Locator) -> Self {
+        assert!(u32::try_from(tokens.len()).is_ok());
+
+        let single_token = tokens.len() == 1;
+        let mut builder = LogicalLinesBuilder::with_token_capacity(tokens.len());
+        let mut parens: u32 = 0;
+
+        for (start, token, end) in tokens.iter().flatten() {
+            builder.push_token(*start, token, *end, locator);
+
+            match token {
+                Tok::Lbrace | Tok::Lpar | Tok::Lsqb => {
+                    parens += 1;
+                }
+                Tok::Rbrace | Tok::Rpar | Tok::Rsqb => {
+                    parens -= 1;
+                }
+                Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(_) if parens == 0 => {
+                    if matches!(token, Tok::Newline) {
+                        builder.finish_line();
+                    }
+                    // Comment only file or non logical new line?
+                    else if single_token {
+                        builder.discard_line();
+                    } else {
+                        builder.finish_line();
+                    };
+                }
+                _ => {}
+            }
+        }
+
+        builder.finish()
    }
 }

-fn build_line<'a>(
+impl std::fmt::Debug for LogicalLines<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_list()
+            .entries(self.into_iter().map(DebugLogicalLine))
+            .finish()
+    }
+}
+
+impl<'a> IntoIterator for &'a LogicalLines<'a> {
+    type Item = LogicalLine<'a>;
+    type IntoIter = LogicalLinesIter<'a>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        LogicalLinesIter {
+            lines: self,
+            inner: self.lines.iter(),
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+struct Line {
+    flags: TokenFlags,
+    /// Byte offset of the start of the text of this line.
+    text_start: u32,
+
+    /// Byte offset of the end of the text of this line.
+    text_end: u32,
+    mappings_start: u32,
+    mappings_end: u32,
+    tokens_start: u32,
+    tokens_end: u32,
+}
+
+#[derive(Debug)]
+pub struct LogicalLine<'a> {
+    lines: &'a LogicalLines<'a>,
+    line: &'a Line,
+}
+
+impl<'a> LogicalLine<'a> {
+    /// Returns true if this is a comment only line
+    pub fn is_comment(&self) -> bool {
+        self.text().is_empty() && self.flags().contains(TokenFlags::COMMENT)
+    }
+
+    /// Returns the text of this line
+    pub fn text(&self) -> &'a str {
+        &self.lines.text[self.line.text_start as usize..self.line.text_end as usize]
+    }
+
+    /// Returns the tokens of the line
+    pub fn tokens(&self) -> &'a [(Location, &'a Tok, Location)] {
+        &self.lines.tokens[self.line.tokens_start as usize..self.line.tokens_end as usize]
+    }
+
+    /// Returns the [`Location`] of the first token on the line or [`None`].
+    pub fn first_token_location(&self) -> Option<&Location> {
+        self.token_locations().first()
+    }
+
+    fn token_offsets(&self) -> &[u32] {
+        &self.lines.mappings.logical_line_offsets
+            [self.line.mappings_start as usize..self.line.mappings_end as usize]
+    }
+
+    fn token_locations(&self) -> &[Location] {
+        &self.lines.mappings.locations
+            [self.line.mappings_start as usize..self.line.mappings_end as usize]
+    }
+
+    /// Returns the mapping for an offset in the logical line.
+    ///
+    /// The offset of the closest token and its corresponding location.
+    pub fn mapping(&self, offset: usize) -> (usize, Location) {
+        let index = self
+            .token_offsets()
+            .binary_search(&(self.line.text_start + u32::try_from(offset).unwrap()))
+            .unwrap_or_default();
+
+        (
+            (self.token_offsets()[index] - self.line.text_start) as usize,
+            self.token_locations()[index],
+        )
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.lines.mappings.is_empty()
+    }
+
+    pub const fn flags(&self) -> TokenFlags {
+        self.line.flags
+    }
+}
+
+struct DebugLogicalLine<'a>(LogicalLine<'a>);
+
+impl Debug for DebugLogicalLine<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("LogicalLine")
+            .field("text", &self.0.text())
+            .field("flags", &self.0.flags())
+            .field("tokens", &self.0.tokens())
+            .finish()
+    }
+}
+
+/// Iterator over the logical lines of a document.
+pub struct LogicalLinesIter<'a> {
+    lines: &'a LogicalLines<'a>,
+    inner: std::slice::Iter<'a, Line>,
+}
+
+impl<'a> Iterator for LogicalLinesIter<'a> {
+    type Item = LogicalLine<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let line = self.inner.next()?;
+
+        Some(LogicalLine {
+            lines: self.lines,
+            line,
+        })
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.inner.size_hint()
+    }
+}
+
+impl DoubleEndedIterator for LogicalLinesIter<'_> {
+    fn next_back(&mut self) -> Option<Self::Item> {
+        let line = self.inner.next_back()?;
+
+        Some(LogicalLine {
+            lines: self.lines,
+            line,
+        })
+    }
+}
+
+impl ExactSizeIterator for LogicalLinesIter<'_> {}
+
+impl FusedIterator for LogicalLinesIter<'_> {}
+
+/// Source map that maps byte positions in the logical line text to the [`Location`] in the
+/// original document.
+#[derive(Debug, Default, Clone)]
+struct Mappings {
+    /// byte offsets of the logical lines at which tokens start/end.
+    logical_line_offsets: Vec<u32>,
+
+    /// Corresponding [`Location`]s for each byte offset mapping it to the position in the original document.
+    locations: Vec<Location>,
+}
+
+impl Mappings {
+    fn with_capacity(capacity: usize) -> Self {
+        Self {
+            logical_line_offsets: Vec::with_capacity(capacity),
+            locations: Vec::with_capacity(capacity),
+        }
+    }
+
+    fn len(&self) -> usize {
+        self.logical_line_offsets.len()
+    }
+
+    fn is_empty(&self) -> bool {
+        self.logical_line_offsets.is_empty()
+    }
+
+    fn truncate(&mut self, len: usize) {
+        self.locations.truncate(len);
+        self.logical_line_offsets.truncate(len);
+    }
+
+    #[allow(clippy::cast_possible_truncation)]
+    fn push(&mut self, offset: usize, location: Location) {
+        self.logical_line_offsets.push(offset as u32);
+        self.locations.push(location);
+    }
+}
+
+#[derive(Debug, Default)]
+struct CurrentLine {
+    flags: TokenFlags,
+    text_start: u32,
+    mappings_start: u32,
+    tokens_start: u32,
+    previous_token: Option<Location>,
+}
+
+#[derive(Debug, Default)]
+pub struct LogicalLinesBuilder<'a> {
+    text: String,
    tokens: Vec<(Location, &'a Tok, Location)>,
-    locator: &Locator,
-) -> LogicalLine<'a> {
-    let mut logical = String::with_capacity(88);
-    let mut mapping = Vec::new();
-    let mut flags = TokenFlags::empty();
-    let mut prev: Option<&Location> = None;
-    let mut length = 0;
-    for (start, tok, end) in &tokens {
+    mappings: Mappings,
+    lines: Vec<Line>,
+    current_line: Option<CurrentLine>,
+}
+
+impl<'a> LogicalLinesBuilder<'a> {
+    fn with_token_capacity(capacity: usize) -> Self {
+        Self {
+            tokens: Vec::with_capacity(capacity),
+            mappings: Mappings::with_capacity(capacity + 1),
+            ..Self::default()
+        }
+    }
+
+    // SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
+    #[allow(clippy::cast_possible_truncation)]
+    fn push_token(&mut self, start: Location, token: &'a Tok, end: Location, locator: &Locator) {
+        let tokens_start = self.tokens.len();
+        self.tokens.push((start, token, end));
+
+        let mut line = self.current_line.get_or_insert_with(|| {
+            let mappings_start = self.mappings.len();
+            self.mappings.push(self.text.len(), start);
+
+            CurrentLine {
+                flags: TokenFlags::empty(),
+                text_start: self.text.len() as u32,
+                mappings_start: mappings_start as u32,
+                tokens_start: tokens_start as u32,
+                previous_token: None,
+            }
+        });
+
        if matches!(
-            tok,
+            token,
            Tok::Newline | Tok::NonLogicalNewline | Tok::Indent | Tok::Dedent
        ) {
-            continue;
+            return;
        }

-        if mapping.is_empty() {
-            mapping.push((0, *start));
+        if matches!(token, Tok::Comment(..)) {
+            line.flags.insert(TokenFlags::COMMENT);
+            return;
        }

-        if matches!(tok, Tok::Comment { .. }) {
-            flags.insert(TokenFlags::COMMENT);
-            continue;
-        }
-
-        if is_op_token(tok) {
-            flags.insert(TokenFlags::OPERATOR);
+        if is_op_token(token) {
+            line.flags.insert(TokenFlags::OPERATOR);
        }

        if matches!(
-            tok,
+            token,
            Tok::Lpar | Tok::Lsqb | Tok::Lbrace | Tok::Rpar | Tok::Rsqb | Tok::Rbrace
        ) {
-            flags.insert(TokenFlags::BRACKET);
+            line.flags.insert(TokenFlags::BRACKET);
        }

-        if matches!(tok, Tok::Comma | Tok::Semi | Tok::Colon) {
-            flags.insert(TokenFlags::PUNCTUATION);
+        if matches!(token, Tok::Comma | Tok::Semi | Tok::Colon) {
+            line.flags.insert(TokenFlags::PUNCTUATION);
        }

-        if is_keyword_token(tok) {
-            flags.insert(TokenFlags::KEYWORD);
+        if is_keyword_token(token) {
+            line.flags.insert(TokenFlags::KEYWORD);
        }

        // TODO(charlie): "Mute" strings.
-        let s;
-        let text = if let Tok::String { value, .. } = tok {
-            s = format!("\"{}\"", "x".repeat(value.width()).clone());
-            &s
+        let text = if let Tok::String { value, .. } = token {
+            Cow::Owned(format!("\"{}\"", "x".repeat(value.width())))
        } else {
-            locator.slice(Range {
-                location: *start,
-                end_location: *end,
-            })
+            Cow::Borrowed(locator.slice(Range {
+                location: start,
+                end_location: end,
+            }))
        };

-        if let Some(prev) = prev {
+        if let Some(prev) = line.previous_token.take() {
            if prev.row() != start.row() {
                let prev_text = locator.slice(Range {
                    location: Location::new(prev.row(), prev.column() - 1),
@ -106,61 +358,54 @@ fn build_line<'a>(
                    || ((prev_text != "{" && prev_text != "[" && prev_text != "(")
                        && (text != "}" && text != "]" && text != ")"))
                {
-                    logical.push(' ');
-                    length += 1;
+                    self.text.push(' ');
                }
            } else if prev.column() != start.column() {
                let prev_text = locator.slice(Range {
-                    location: *prev,
-                    end_location: *start,
+                    location: prev,
+                    end_location: start,
                });
-                logical.push_str(prev_text);
-                length += prev_text.len();
+                self.text.push_str(prev_text);
            }
        }
-        logical.push_str(text);
-        length += text.len();
-        mapping.push((length, *end));
-        prev = Some(end);
+
+        line.previous_token = Some(end);
+        self.text.push_str(&text);
+        self.mappings.push(self.text.len(), end);
    }

-    LogicalLine {
-        text: logical,
-        mapping,
-        flags,
-        tokens,
-    }
-}
-
-pub fn iter_logical_lines<'a>(tokens: &'a [LexResult], locator: &Locator) -> Vec<LogicalLine<'a>> {
-    let mut parens = 0;
-    let mut accumulator = Vec::with_capacity(32);
-    let mut lines = Vec::with_capacity(128);
-    for &(start, ref tok, end) in tokens.iter().flatten() {
-        accumulator.push((start, tok, end));
-        if matches!(tok, Tok::Lbrace | Tok::Lpar | Tok::Lsqb) {
-            parens += 1;
-        } else if matches!(tok, Tok::Rbrace | Tok::Rpar | Tok::Rsqb) {
-            parens -= 1;
-        } else if parens == 0 {
-            if matches!(
-                tok,
-                Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(..)
-            ) {
-                if matches!(tok, Tok::Newline) {
-                    lines.push(build_line(accumulator, locator));
-                    accumulator = Vec::with_capacity(32);
-                } else if tokens.len() == 1 {
-                    accumulator.remove(0);
-                } else {
-                    lines.push(build_line(accumulator, locator));
-                    accumulator = Vec::with_capacity(32);
-                }
-            }
+    // SAFETY: `LogicalLines::from_tokens` asserts that the file has less than `u32::MAX` tokens and each tokens is at least one character long
+    #[allow(clippy::cast_possible_truncation)]
+    fn finish_line(&mut self) {
+        if let Some(current) = self.current_line.take() {
+            self.lines.push(Line {
+                flags: current.flags,
+                text_start: current.text_start,
+                text_end: self.text.len() as u32,
+                mappings_start: current.mappings_start,
+                mappings_end: self.mappings.len() as u32,
+                tokens_start: current.tokens_start,
+                tokens_end: self.tokens.len() as u32,
+            });
        }
    }
-    if !accumulator.is_empty() {
-        lines.push(build_line(accumulator, locator));
+
+    fn discard_line(&mut self) {
+        if let Some(current) = self.current_line.take() {
+            self.text.truncate(current.text_start as usize);
+            self.tokens.truncate(current.tokens_start as usize);
+            self.mappings.truncate(current.mappings_start as usize);
+        }
+    }
+
+    fn finish(mut self) -> LogicalLines<'a> {
+        self.finish_line();
+
+        LogicalLines {
+            text: self.text,
+            tokens: self.tokens,
+            mappings: self.mappings,
+            lines: self.lines,
+        }
    }
-    lines
 }
--- a/crates/ruff/src/rules/pycodestyle/rules/indentation.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/indentation.rs
@ -251,7 +251,7 @@ pub fn indentation(
        ));
    }
    let indent_expect = prev_logical_line.map_or(false, |prev_logical_line| {
-        prev_logical_line.text.ends_with(':')
+        prev_logical_line.text().ends_with(':')
    });
    if indent_expect && indent_level <= prev_indent_level.unwrap_or(0) {
        diagnostics.push((
--- a/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace.rs
@ -37,50 +37,55 @@ pub fn missing_whitespace(
 ) -> Vec<Diagnostic> {
    let mut diagnostics = vec![];

-    let mut num_lsqb = 0;
-    let mut num_rsqb = 0;
+    let mut num_lsqb = 0u32;
+    let mut num_rsqb = 0u32;
    let mut prev_lsqb = None;
    let mut prev_lbrace = None;
    for (idx, (char, next_char)) in line.chars().tuple_windows().enumerate() {
-        if char == '[' {
-            num_lsqb += 1;
-            prev_lsqb = Some(idx);
-        } else if char == ']' {
-            num_rsqb += 1;
-        } else if char == '{' {
-            prev_lbrace = Some(idx);
-        }
-
-        if (char == ',' || char == ';' || char == ':') && !char::is_whitespace(next_char) {
-            if char == ':' && num_lsqb > num_rsqb && prev_lsqb > prev_lbrace {
-                continue; // Slice syntax, no space required
+        match char {
+            '[' => {
+                num_lsqb += 1;
+                prev_lsqb = Some(idx);
            }
-            if char == ',' && (next_char == ')' || next_char == ']') {
-                continue; // Allow tuple with only one element: (3,)
+            ']' => {
+                num_rsqb += 1;
            }
-            if char == ':' && next_char == '=' {
-                continue; // Allow assignment expression
+            '{' => {
+                prev_lbrace = Some(idx);
            }

-            let kind: MissingWhitespace = MissingWhitespace {
-                token: char.to_string(),
-            };
+            ',' | ';' | ':' if !next_char.is_whitespace() => {
+                if char == ':' && num_lsqb > num_rsqb && prev_lsqb > prev_lbrace {
+                    continue; // Slice syntax, no space required
+                }
+                if char == ',' && matches!(next_char, ')' | ']') {
+                    continue; // Allow tuple with only one element: (3,)
+                }
+                if char == ':' && next_char == '=' {
+                    continue; // Allow assignment expression
+                }

-            let mut diagnostic = Diagnostic::new(
-                kind,
-                Range::new(
-                    Location::new(row, indent_level + idx),
-                    Location::new(row, indent_level + idx),
-                ),
-            );
+                let kind = MissingWhitespace {
+                    token: char.to_string(),
+                };

-            if autofix {
-                diagnostic.set_fix(Edit::insertion(
-                    " ".to_string(),
-                    Location::new(row, indent_level + idx + 1),
-                ));
+                let mut diagnostic = Diagnostic::new(
+                    kind,
+                    Range::new(
+                        Location::new(row, indent_level + idx),
+                        Location::new(row, indent_level + idx),
+                    ),
+                );
+
+                if autofix {
+                    diagnostic.set_fix(Edit::insertion(
+                        " ".to_string(),
+                        Location::new(row, indent_level + idx + 1),
+                    ));
+                }
+                diagnostics.push(diagnostic);
            }
-            diagnostics.push(diagnostic);
+            _ => {}
        }
    }
    diagnostics
--- a/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_around_operator.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/missing_whitespace_around_operator.rs
@ -67,7 +67,7 @@ pub fn missing_whitespace_around_operator(
    let mut needs_space_main: Option<bool> = Some(false);
    let mut needs_space_aux: Option<bool> = None;
    let mut prev_end_aux: Option<&Location> = None;
-    let mut parens = 0;
+    let mut parens = 0u32;
    let mut prev_type: Option<&Tok> = None;
    let mut prev_end: Option<&Location> = None;