Store source code on message (#3897)

2025-07-24 13:33:50 +00:00 · 2023-04-11 09:57:36 +02:00 · 2023-04-11 09:57:36 +02:00 · 381203c084
commit 381203c084
parent 76c47a9a43
20 changed files with 537 additions and 217 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2015,6 +2015,7 @@ dependencies = [
 "ruff_python_semantic",
 "ruff_python_stdlib",
 "ruff_rustpython",
+ "ruff_text_size",
 "rustc-hash",
 "rustpython-common",
 "rustpython-parser",
--- a/crates/ruff/Cargo.toml
+++ b/crates/ruff/Cargo.toml
@ -21,6 +21,7 @@ ruff_python_ast = { path = "../ruff_python_ast" }
 ruff_python_semantic = { path = "../ruff_python_semantic" }
 ruff_python_stdlib = { path = "../ruff_python_stdlib" }
 ruff_rustpython = { path = "../ruff_rustpython" }
+ruff_text_size = { path = "../ruff_text_size" }

 annotate-snippets = { version = "0.9.1", features = ["color"] }
 anyhow = { workspace = true }
--- a/crates/ruff/src/autofix/actions.rs
+++ b/crates/ruff/src/autofix/actions.rs
@ -103,7 +103,7 @@ fn is_lone_child(child: &Stmt, parent: &Stmt, deleted: &[&Stmt]) -> Result<bool>
 /// Return the location of a trailing semicolon following a `Stmt`, if it's part
 /// of a multi-statement line.
 fn trailing_semicolon(stmt: &Stmt, locator: &Locator) -> Option<Location> {
-    let contents = locator.skip(stmt.end_location.unwrap());
+    let contents = locator.after(stmt.end_location.unwrap());
    for (row, line) in NewlineWithTrailingNewline::from(contents).enumerate() {
        let trimmed = line.trim();
        if trimmed.starts_with(';') {
@ -126,7 +126,7 @@ fn trailing_semicolon(stmt: &Stmt, locator: &Locator) -> Option<Location> {
 /// Find the next valid break for a `Stmt` after a semicolon.
 fn next_stmt_break(semicolon: Location, locator: &Locator) -> Location {
    let start_location = Location::new(semicolon.row(), semicolon.column() + 1);
-    let contents = locator.skip(start_location);
+    let contents = locator.after(start_location);
    for (row, line) in NewlineWithTrailingNewline::from(contents).enumerate() {
        let trimmed = line.trim();
        // Skip past any continuations.
@ -158,7 +158,7 @@ fn next_stmt_break(semicolon: Location, locator: &Locator) -> Location {

 /// Return `true` if a `Stmt` occurs at the end of a file.
 fn is_end_of_file(stmt: &Stmt, locator: &Locator) -> bool {
-    let contents = locator.skip(stmt.end_location.unwrap());
+    let contents = locator.after(stmt.end_location.unwrap());
    contents.is_empty()
 }

@ -361,7 +361,7 @@ pub fn remove_argument(
    remove_parentheses: bool,
 ) -> Result<Edit> {
    // TODO(sbrugman): Preserve trailing comments.
-    let contents = locator.skip(call_at);
+    let contents = locator.after(call_at);

    let mut fix_start = None;
    let mut fix_end = None;
--- a/crates/ruff/src/autofix/mod.rs
+++ b/crates/ruff/src/autofix/mod.rs
@ -80,7 +80,7 @@ fn apply_fixes<'a>(
    }

    // Add the remaining content.
-    let slice = locator.skip(last_pos.unwrap_or_default());
+    let slice = locator.after(last_pos.unwrap_or_default());
    output.push_str(slice);

    (output, fixed)
--- a/crates/ruff/src/importer.rs
+++ b/crates/ruff/src/importer.rs
@ -174,7 +174,7 @@ fn match_docstring_end(body: &[Stmt]) -> Option<Location> {
 /// along with a trailing newline suffix.
 fn end_of_statement_insertion(stmt: &Stmt, locator: &Locator, stylist: &Stylist) -> Insertion {
    let location = stmt.end_location.unwrap();
-    let mut tokens = lexer::lex_located(locator.skip(location), Mode::Module, location).flatten();
+    let mut tokens = lexer::lex_located(locator.after(location), Mode::Module, location).flatten();
    if let Some((.., Tok::Semi, end)) = tokens.next() {
        // If the first token after the docstring is a semicolon, insert after the semicolon as an
        // inline statement;
@ -207,7 +207,7 @@ fn top_of_file_insertion(body: &[Stmt], locator: &Locator, stylist: &Stylist) ->
    let mut location = if let Some(location) = match_docstring_end(body) {
        // If the first token after the docstring is a semicolon, insert after the semicolon as an
        // inline statement;
-        let first_token = lexer::lex_located(locator.skip(location), Mode::Module, location)
+        let first_token = lexer::lex_located(locator.after(location), Mode::Module, location)
            .flatten()
            .next();
        if let Some((.., Tok::Semi, end)) = first_token {
@ -222,7 +222,7 @@ fn top_of_file_insertion(body: &[Stmt], locator: &Locator, stylist: &Stylist) ->

    // Skip over any comments and empty lines.
    for (.., tok, end) in
-        lexer::lex_located(locator.skip(location), Mode::Module, location).flatten()
+        lexer::lex_located(locator.after(location), Mode::Module, location).flatten()
    {
        if matches!(tok, Tok::Comment(..) | Tok::Newline) {
            location = Location::new(end.row() + 1, 0);
--- a/crates/ruff/src/linter.rs
+++ b/crates/ruff/src/linter.rs
@ -22,7 +22,7 @@ use crate::checkers::physical_lines::check_physical_lines;
 use crate::checkers::tokens::check_tokens;
 use crate::directives::Directives;
 use crate::doc_lines::{doc_lines_from_ast, doc_lines_from_tokens};
-use crate::message::{Message, Source};
+use crate::message::Message;
 use crate::noqa::add_noqa;
 use crate::registry::{AsRule, Rule};
 use crate::rules::pycodestyle;
@ -355,19 +355,26 @@ pub fn lint_only(

    // Convert from diagnostics to messages.
    let path_lossy = path.to_string_lossy();
+
    result.map(|(messages, imports)| {
+        let source_code = if settings.show_source && !messages.is_empty() {
+            Some(locator.to_source_code_buf())
+        } else {
+            None
+        };
+
        (
            messages
                .into_iter()
                .map(|diagnostic| {
-                    let source = if settings.show_source {
-                        Some(Source::from_diagnostic(&diagnostic, &locator))
-                    } else {
-                        None
-                    };
                    let lineno = diagnostic.location.row();
                    let noqa_row = *directives.noqa_line_for.get(&lineno).unwrap_or(&lineno);
-                    Message::from_diagnostic(diagnostic, path_lossy.to_string(), source, noqa_row)
+                    Message::from_diagnostic(
+                        diagnostic,
+                        path_lossy.to_string(),
+                        source_code.clone(),
+                        noqa_row,
+                    )
                })
                .collect(),
            imports,
@ -500,22 +507,23 @@ This indicates a bug in `{}`. If you could open an issue at:
        let path_lossy = path.to_string_lossy();
        return Ok(FixerResult {
            result: result.map(|(messages, imports)| {
+                let source_code = if settings.show_source && !messages.is_empty() {
+                    Some(locator.to_source_code_buf())
+                } else {
+                    None
+                };
+
                (
                    messages
                        .into_iter()
                        .map(|diagnostic| {
-                            let source = if settings.show_source {
-                                Some(Source::from_diagnostic(&diagnostic, &locator))
-                            } else {
-                                None
-                            };
                            let lineno = diagnostic.location.row();
                            let noqa_row =
                                *directives.noqa_line_for.get(&lineno).unwrap_or(&lineno);
                            Message::from_diagnostic(
                                diagnostic,
                                path_lossy.to_string(),
-                                source,
+                                source_code.clone(),
                                noqa_row,
                            )
                        })
--- a/crates/ruff/src/message/grouped.rs
+++ b/crates/ruff/src/message/grouped.rs
@ -1,12 +1,9 @@
 use crate::fs::relativize_path;
 use crate::jupyter::JupyterIndex;
-use crate::message::text::RuleCodeAndBody;
+use crate::message::text::{MessageCodeFrame, RuleCodeAndBody};
 use crate::message::{group_messages_by_filename, Emitter, EmitterContext, Message};
-use crate::registry::AsRule;
-use annotate_snippets::display_list::{DisplayList, FormatOptions};
-use annotate_snippets::snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation};
 use colored::Colorize;
-use std::fmt::{Display, Formatter, Write as fmtWrite};
+use std::fmt::{Display, Formatter};
 use std::io::Write;

 #[derive(Default)]
@ -116,47 +113,10 @@ impl Display for DisplayGroupedMessage<'_> {
            },
        )?;

-        if let Some(source) = &message.source {
-            let suggestion = message.kind.suggestion.clone();
-            let footer = if suggestion.is_some() {
-                vec![Annotation {
-                    id: None,
-                    label: suggestion.as_deref(),
-                    annotation_type: AnnotationType::Help,
-                }]
-            } else {
-                vec![]
-            };
-            let label = message.kind.rule().noqa_code().to_string();
-            let snippet = Snippet {
-                title: None,
-                footer,
-                slices: vec![Slice {
-                    source: &source.contents,
-                    line_start: message.location.row(),
-                    annotations: vec![SourceAnnotation {
-                        label: &label,
-                        annotation_type: AnnotationType::Error,
-                        range: source.range,
-                    }],
-                    // The origin (file name, line number, and column number) is already encoded
-                    // in the `label`.
-                    origin: None,
-                    fold: false,
-                }],
-                opt: FormatOptions {
-                    #[cfg(test)]
-                    color: false,
-                    #[cfg(not(test))]
-                    color: colored::control::SHOULD_COLORIZE.should_colorize(),
-                    ..FormatOptions::default()
-                },
-            };
-            // Skip the first line, since we format the `label` ourselves.
-            let message = DisplayList::from(snippet);
+        {
+            use std::fmt::Write;
            let mut padded = PadAdapter::new(f);
-
-            writeln!(&mut padded, "{message}")?;
+            write!(padded, "{}", MessageCodeFrame { message })?;
        }

        writeln!(f)?;
--- a/crates/ruff/src/message/mod.rs
+++ b/crates/ruff/src/message/mod.rs
@ -20,22 +20,20 @@ pub use json::JsonEmitter;
 pub use junit::JunitEmitter;
 pub use pylint::PylintEmitter;
 pub use rustpython_parser::ast::Location;
-use serde::{Deserialize, Serialize};
 pub use text::TextEmitter;

 use crate::jupyter::JupyterIndex;
 use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix};
-use ruff_python_ast::source_code::Locator;
-use ruff_python_ast::types::Range;
+use ruff_python_ast::source_code::SourceCodeBuf;

-#[derive(Debug, PartialEq, Eq, Serialize, Deserialize)]
+#[derive(Debug, PartialEq, Eq)]
 pub struct Message {
    pub kind: DiagnosticKind,
    pub location: Location,
    pub end_location: Location,
    pub fix: Fix,
    pub filename: String,
-    pub source: Option<Source>,
+    pub source: Option<SourceCodeBuf>,
    pub noqa_row: usize,
 }

@ -43,7 +41,7 @@ impl Message {
    pub fn from_diagnostic(
        diagnostic: Diagnostic,
        filename: String,
-        source: Option<Source>,
+        source: Option<SourceCodeBuf>,
        noqa_row: usize,
    ) -> Self {
        Self {
@ -77,38 +75,6 @@ impl PartialOrd for Message {
    }
 }

-#[derive(Debug, PartialEq, Eq, Serialize, Deserialize, Clone)]
-pub struct Source {
-    pub contents: String,
-    pub range: (usize, usize),
-}
-
-impl Source {
-    pub fn from_diagnostic(diagnostic: &Diagnostic, locator: &Locator) -> Self {
-        let location = Location::new(diagnostic.location.row(), 0);
-        // Diagnostics can already extend one-past-the-end. If they do, though, then
-        // they'll end at the start of a line. We need to avoid extending by yet another
-        // line past-the-end.
-        let end_location = if diagnostic.end_location.column() == 0 {
-            diagnostic.end_location
-        } else {
-            Location::new(diagnostic.end_location.row() + 1, 0)
-        };
-        let source = locator.slice(Range::new(location, end_location));
-        let num_chars_in_range = locator
-            .slice(Range::new(diagnostic.location, diagnostic.end_location))
-            .chars()
-            .count();
-        Source {
-            contents: source.to_string(),
-            range: (
-                diagnostic.location.column(),
-                diagnostic.location.column() + num_chars_in_range,
-            ),
-        }
-    }
-}
-
 fn group_messages_by_filename(messages: &[Message]) -> BTreeMap<&str, Vec<&Message>> {
    let mut grouped_messages = BTreeMap::default();
    for message in messages {
@ -156,15 +122,15 @@ impl<'a> EmitterContext<'a> {

 #[cfg(test)]
 mod tests {
-    use crate::message::{Emitter, EmitterContext, Location, Message, Source};
+    use crate::message::{Emitter, EmitterContext, Location, Message};
    use crate::rules::pyflakes::rules::{UndefinedName, UnusedImport, UnusedVariable};
    use ruff_diagnostics::{Diagnostic, Edit, Fix};
-    use ruff_python_ast::source_code::Locator;
+    use ruff_python_ast::source_code::SourceCodeBuf;
    use ruff_python_ast::types::Range;
    use rustc_hash::FxHashMap;

    pub(super) fn create_messages() -> Vec<Message> {
-        let file_1 = r#"import os
+        let fib = r#"import os

 def fibonacci(n):
    """Compute the nth number in the Fibonacci sequence."""
@ -177,8 +143,6 @@ def fibonacci(n):
        return fibonacci(n - 1) + fibonacci(n - 2)
        "#;

-        let file_1_locator = Locator::new(file_1);
-
        let unused_import = Diagnostic::new(
            UnusedImport {
                name: "os".to_string(),
@ -188,7 +152,7 @@ def fibonacci(n):
            Range::new(Location::new(1, 7), Location::new(1, 9)),
        );

-        let unused_source = Source::from_diagnostic(&unused_import, &file_1_locator);
+        let fib_source = SourceCodeBuf::from_content(fib);

        let unused_variable = Diagnostic::new(
            UnusedVariable {
@ -201,8 +165,6 @@ def fibonacci(n):
            Location::new(5, 9),
        )]));

-        let unused_variable_source = Source::from_diagnostic(&unused_variable, &file_1_locator);
-
        let file_2 = r#"if a == 1: pass"#;

        let undefined_name = Diagnostic::new(
@ -212,20 +174,20 @@ def fibonacci(n):
            Range::new(Location::new(1, 3), Location::new(1, 4)),
        );

-        let undefined_source = Source::from_diagnostic(&undefined_name, &Locator::new(file_2));
+        let file_2_source = SourceCodeBuf::from_content(file_2);

        vec![
-            Message::from_diagnostic(unused_import, "fib.py".to_string(), Some(unused_source), 1),
            Message::from_diagnostic(
-                unused_variable,
+                unused_import,
                "fib.py".to_string(),
-                Some(unused_variable_source),
+                Some(fib_source.clone()),
                1,
            ),
+            Message::from_diagnostic(unused_variable, "fib.py".to_string(), Some(fib_source), 1),
            Message::from_diagnostic(
                undefined_name,
                "undef.py".to_string(),
-                Some(undefined_source),
+                Some(file_2_source),
                1,
            ),
        ]
--- a/crates/ruff/src/message/text.rs
+++ b/crates/ruff/src/message/text.rs
@ -1,11 +1,14 @@
 use crate::fs::relativize_path;
-use crate::message::{Emitter, EmitterContext, Message};
+use crate::message::{Emitter, EmitterContext, Location, Message};
 use crate::registry::AsRule;
 use annotate_snippets::display_list::{DisplayList, FormatOptions};
 use annotate_snippets::snippet::{Annotation, AnnotationType, Slice, Snippet, SourceAnnotation};
 use colored::Colorize;
 use ruff_diagnostics::DiagnosticKind;
-use std::fmt::Display;
+use ruff_python_ast::source_code::OneIndexed;
+use ruff_python_ast::types::Range;
+use ruff_text_size::TextRange;
+use std::fmt::{Display, Formatter};
 use std::io::Write;

 #[derive(Default)]
@ -63,45 +66,8 @@ impl Emitter for TextEmitter {
                }
            )?;

-            if let Some(source) = &message.source {
-                let suggestion = message.kind.suggestion.clone();
-                let footer = if suggestion.is_some() {
-                    vec![Annotation {
-                        id: None,
-                        label: suggestion.as_deref(),
-                        annotation_type: AnnotationType::Help,
-                    }]
-                } else {
-                    Vec::new()
-                };
-
-                let label = message.kind.rule().noqa_code().to_string();
-                let snippet = Snippet {
-                    title: None,
-                    slices: vec![Slice {
-                        source: &source.contents,
-                        line_start: message.location.row(),
-                        annotations: vec![SourceAnnotation {
-                            label: &label,
-                            annotation_type: AnnotationType::Error,
-                            range: source.range,
-                        }],
-                        // The origin (file name, line number, and column number) is already encoded
-                        // in the `label`.
-                        origin: None,
-                        fold: false,
-                    }],
-                    footer,
-                    opt: FormatOptions {
-                        #[cfg(test)]
-                        color: false,
-                        #[cfg(not(test))]
-                        color: colored::control::SHOULD_COLORIZE.should_colorize(),
-                        ..FormatOptions::default()
-                    },
-                };
-
-                writeln!(writer, "{message}\n", message = DisplayList::from(snippet))?;
+            if message.source.is_some() {
+                writeln!(writer, "{}", MessageCodeFrame { message })?;
            }
        }

@ -147,6 +113,95 @@ impl Display for RuleCodeAndBody<'_> {
    }
 }

+pub(super) struct MessageCodeFrame<'a> {
+    pub message: &'a Message,
+}
+
+impl Display for MessageCodeFrame<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+        let Message {
+            kind,
+            source,
+            location,
+            end_location,
+            ..
+        } = self.message;
+
+        if let Some(source_code) = source {
+            let suggestion = kind.suggestion.as_deref();
+            let footer = if suggestion.is_some() {
+                vec![Annotation {
+                    id: None,
+                    label: suggestion,
+                    annotation_type: AnnotationType::Help,
+                }]
+            } else {
+                Vec::new()
+            };
+
+            let source_code_start =
+                source_code.line_start(OneIndexed::new(location.row()).unwrap());
+
+            let source_code_end = source_code.line_start(
+                OneIndexed::new(
+                    end_location
+                        .row()
+                        .saturating_add(1)
+                        .min(source_code.line_count() + 1),
+                )
+                .unwrap(),
+            );
+
+            let source_text =
+                &source_code.text()[TextRange::new(source_code_start, source_code_end)];
+
+            let content_range = source_code.text_range(Range::new(
+                // Subtract 1 because message column indices are 1 based but the index columns are 1 based.
+                Location::new(location.row(), location.column().saturating_sub(1)),
+                Location::new(end_location.row(), end_location.column().saturating_sub(1)),
+            ));
+
+            let annotation_length = &source_text[content_range - source_code_start]
+                .chars()
+                .count();
+
+            let label = kind.rule().noqa_code().to_string();
+
+            let snippet = Snippet {
+                title: None,
+                slices: vec![Slice {
+                    source: source_text,
+                    line_start: location.row(),
+                    annotations: vec![SourceAnnotation {
+                        label: &label,
+                        annotation_type: AnnotationType::Error,
+                        range: (
+                            location.column() - 1,
+                            location.column() + annotation_length - 1,
+                        ),
+                    }],
+                    // The origin (file name, line number, and column number) is already encoded
+                    // in the `label`.
+                    origin: None,
+                    fold: false,
+                }],
+                footer,
+                opt: FormatOptions {
+                    #[cfg(test)]
+                    color: false,
+                    #[cfg(not(test))]
+                    color: colored::control::SHOULD_COLORIZE.should_colorize(),
+                    ..FormatOptions::default()
+                },
+            };
+
+            writeln!(f, "{message}", message = DisplayList::from(snippet))?;
+        }
+
+        Ok(())
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use crate::message::tests::{capture_emitter_output, create_messages};
--- a/crates/ruff/src/rules/flake8_return/helpers.rs
+++ b/crates/ruff/src/rules/flake8_return/helpers.rs
@ -29,7 +29,7 @@ pub fn result_exists(returns: &[(&Stmt, Option<&Expr>)]) -> bool {
 /// This method assumes that the statement is the last statement in its body; specifically, that
 /// the statement isn't followed by a semicolon, followed by a multi-line statement.
 pub fn end_of_last_statement(stmt: &Stmt, locator: &Locator) -> Location {
-    let contents = locator.skip(stmt.end_location.unwrap());
+    let contents = locator.after(stmt.end_location.unwrap());

    // End-of-file, so just return the end of the statement.
    if contents.is_empty() {
--- a/crates/ruff/src/rules/isort/helpers.rs
+++ b/crates/ruff/src/rules/isort/helpers.rs
@ -62,7 +62,7 @@ pub fn has_comment_break(stmt: &Stmt, locator: &Locator) -> bool {
    //   # Direct comment.
    //   def f(): pass
    let mut seen_blank = false;
-    for line in locator.take(stmt.location).universal_newlines().rev() {
+    for line in locator.up_to(stmt.location).universal_newlines().rev() {
        let line = line.trim();
        if seen_blank {
            if line.starts_with('#') {
--- a/crates/ruff/src/rules/pyflakes/rules/unused_variable.rs
+++ b/crates/ruff/src/rules/pyflakes/rules/unused_variable.rs
@ -66,7 +66,7 @@ fn match_token_after<F, T>(located: &Located<T>, locator: &Locator, f: F) -> Ran
 where
    F: Fn(Tok) -> bool,
 {
-    let contents = locator.skip(located.location);
+    let contents = locator.after(located.location);

    // Track the bracket depth.
    let mut par_count = 0;
@ -129,7 +129,7 @@ fn match_token<F, T>(located: &Located<T>, locator: &Locator, f: F) -> Range
 where
    F: Fn(Tok) -> bool,
 {
-    let contents = locator.skip(located.location);
+    let contents = locator.after(located.location);

    // Track the bracket depth.
    let mut par_count = 0;
--- a/crates/ruff/src/rules/pyupgrade/fixes.rs
+++ b/crates/ruff/src/rules/pyupgrade/fixes.rs
@ -142,7 +142,7 @@ pub fn remove_import_members(contents: &str, members: &[&str]) -> String {
    }

    // Add the remaining content.
-    let slice = locator.skip(last_pos);
+    let slice = locator.after(last_pos);
    output.push_str(slice);
    output
 }
--- a/crates/ruff_cli/src/cache.rs
+++ b/crates/ruff_cli/src/cache.rs
@ -5,13 +5,18 @@ use std::path::Path;

 use anyhow::Result;
 use filetime::FileTime;
+use itertools::Itertools;
 use log::error;
 use path_absolutize::Absolutize;
-use ruff::message::Message;
+use ruff::message::{Location, Message};
 use ruff::settings::{flags, AllSettings, Settings};
 use ruff_cache::{CacheKey, CacheKeyHasher};
+use ruff_diagnostics::{DiagnosticKind, Fix};
 use ruff_python_ast::imports::ImportMap;
-use serde::{Deserialize, Serialize};
+use ruff_python_ast::source_code::{LineIndex, SourceCodeBuf};
+use rustc_hash::FxHashMap;
+use serde::ser::{SerializeSeq, SerializeStruct};
+use serde::{Deserialize, Serialize, Serializer};
 #[cfg(unix)]
 use std::os::unix::fs::PermissionsExt;

@ -19,33 +24,90 @@ const CARGO_PKG_VERSION: &str = env!("CARGO_PKG_VERSION");

 #[derive(Serialize)]
 struct CheckResultRef<'a> {
+    #[serde(serialize_with = "serialize_messages")]
    messages: &'a [Message],
    imports: &'a ImportMap,
+    sources: Vec<(&'a str, &'a str)>,
+}
+
+fn serialize_messages<S>(messages: &[Message], serializer: S) -> Result<S::Ok, S::Error>
+where
+    S: Serializer,
+{
+    let mut s = serializer.serialize_seq(Some(messages.len()))?;
+
+    for message in messages {
+        s.serialize_element(&SerializeMessage(message))?;
+    }
+
+    s.end()
+}
+
+struct SerializeMessage<'a>(&'a Message);
+
+impl Serialize for SerializeMessage<'_> {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let Message {
+            kind,
+            location,
+            end_location,
+            fix,
+            filename,
+            // Serialized manually for all files
+            source: _source,
+            noqa_row,
+        } = self.0;
+
+        let mut s = serializer.serialize_struct("Message", 6)?;
+
+        s.serialize_field("kind", &kind)?;
+        s.serialize_field("location", &location)?;
+        s.serialize_field("end_location", &end_location)?;
+        s.serialize_field("fix", &fix)?;
+        s.serialize_field("filename", &filename)?;
+        s.serialize_field("noqa_row", &noqa_row)?;
+
+        s.end()
+    }
+}
+
+#[derive(Deserialize)]
+struct MessageHeader {
+    kind: DiagnosticKind,
+    location: Location,
+    end_location: Location,
+    fix: Fix,
+    filename: String,
+    noqa_row: usize,
 }

 #[derive(Deserialize)]
 struct CheckResult {
-    messages: Vec<Message>,
+    messages: Vec<MessageHeader>,
    imports: ImportMap,
+    sources: Vec<(String, String)>,
 }

 fn content_dir() -> &'static Path {
    Path::new("content")
 }

-fn cache_key<P: AsRef<Path>>(
-    path: P,
-    package: Option<&P>,
+fn cache_key(
+    path: &Path,
+    package: Option<&Path>,
    metadata: &fs::Metadata,
    settings: &Settings,
    autofix: flags::Autofix,
 ) -> u64 {
    let mut hasher = CacheKeyHasher::new();
    CARGO_PKG_VERSION.cache_key(&mut hasher);
-    path.as_ref().absolutize().unwrap().cache_key(&mut hasher);
+    path.absolutize().unwrap().cache_key(&mut hasher);
    package
        .as_ref()
-        .map(|path| path.as_ref().absolutize().unwrap())
+        .map(|path| path.absolutize().unwrap())
        .cache_key(&mut hasher);
    FileTime::from_last_modification_time(metadata).cache_key(&mut hasher);
    #[cfg(unix)]
@ -92,9 +154,9 @@ fn del_sync(cache_dir: &Path, key: u64) -> Result<(), std::io::Error> {
 }

 /// Get a value from the cache.
-pub fn get<P: AsRef<Path>>(
-    path: P,
-    package: Option<&P>,
+pub fn get(
+    path: &Path,
+    package: Option<&Path>,
    metadata: &fs::Metadata,
    settings: &AllSettings,
    autofix: flags::Autofix,
@ -105,7 +167,34 @@ pub fn get<P: AsRef<Path>>(
    )
    .ok()?;
    match bincode::deserialize::<CheckResult>(&encoded[..]) {
-        Ok(CheckResult { messages, imports }) => Some((messages, imports)),
+        Ok(CheckResult {
+            messages: headers,
+            imports,
+            sources,
+        }) => {
+            let mut messages = Vec::with_capacity(headers.len());
+            let sources: FxHashMap<_, _> = sources
+                .into_iter()
+                .map(|(filename, content)| {
+                    let index = LineIndex::from_source_text(&content);
+                    (filename, SourceCodeBuf::new(&content, index))
+                })
+                .collect();
+
+            for header in headers {
+                messages.push(Message {
+                    kind: header.kind,
+                    location: header.location,
+                    end_location: header.end_location,
+                    fix: header.fix,
+                    source: sources.get(&header.filename).cloned(),
+                    filename: header.filename,
+                    noqa_row: header.noqa_row,
+                });
+            }
+
+            Some((messages, imports))
+        }
        Err(e) => {
            error!("Failed to deserialize encoded cache entry: {e:?}");
            None
@ -114,16 +203,32 @@ pub fn get<P: AsRef<Path>>(
 }

 /// Set a value in the cache.
-pub fn set<P: AsRef<Path>>(
-    path: P,
-    package: Option<&P>,
+pub fn set(
+    path: &Path,
+    package: Option<&Path>,
    metadata: &fs::Metadata,
    settings: &AllSettings,
    autofix: flags::Autofix,
    messages: &[Message],
    imports: &ImportMap,
 ) {
-    let check_result = CheckResultRef { messages, imports };
+    // Store the content of the source files, assuming that all files with the same name have the same content
+    let sources: Vec<_> = messages
+        .iter()
+        .filter_map(|message| {
+            message
+                .source
+                .as_ref()
+                .map(|source| (&*message.filename, source.text()))
+        })
+        .unique_by(|(filename, _)| *filename)
+        .collect();
+
+    let check_result = CheckResultRef {
+        messages,
+        imports,
+        sources,
+    };
    if let Err(e) = write_sync(
        &settings.cli.cache_dir,
        cache_key(path, package, metadata, &settings.lib, autofix),
@ -134,9 +239,9 @@ pub fn set<P: AsRef<Path>>(
 }

 /// Delete a value from the cache.
-pub fn del<P: AsRef<Path>>(
-    path: P,
-    package: Option<&P>,
+pub fn del(
+    path: &Path,
+    package: Option<&Path>,
    metadata: &fs::Metadata,
    settings: &AllSettings,
    autofix: flags::Autofix,
--- a/crates/ruff_cli/src/diagnostics.rs
+++ b/crates/ruff_cli/src/diagnostics.rs
@ -118,7 +118,7 @@ pub fn lint_path(
    {
        let metadata = path.metadata()?;
        if let Some((messages, imports)) =
-            cache::get(path, package.as_ref(), &metadata, settings, autofix.into())
+            cache::get(path, package, &metadata, settings, autofix.into())
        {
            debug!("Cache hit for: {}", path.display());
            return Ok(Diagnostics::new(messages, imports));
@ -207,14 +207,14 @@ pub fn lint_path(

        // Purge the cache.
        if let Some(metadata) = metadata {
-            cache::del(path, package.as_ref(), &metadata, settings, autofix.into());
+            cache::del(path, package, &metadata, settings, autofix.into());
        }
    } else {
        // Re-populate the cache.
        if let Some(metadata) = metadata {
            cache::set(
                path,
-                package.as_ref(),
+                package,
                &metadata,
                settings,
                autofix.into(),
--- a/crates/ruff_python_ast/src/helpers.rs
+++ b/crates/ruff_python_ast/src/helpers.rs
@ -897,7 +897,7 @@ pub fn match_trailing_comment<T>(located: &Located<T>, locator: &Locator) -> Opt

 /// Return the number of trailing empty lines following a statement.
 pub fn count_trailing_lines(stmt: &Stmt, locator: &Locator) -> usize {
-    let suffix = locator.skip(Location::new(stmt.end_location.unwrap().row() + 1, 0));
+    let suffix = locator.after(Location::new(stmt.end_location.unwrap().row() + 1, 0));
    suffix
        .lines()
        .take_while(|line| line.trim().is_empty())
@ -906,7 +906,7 @@ pub fn count_trailing_lines(stmt: &Stmt, locator: &Locator) -> usize {

 /// Return the range of the first parenthesis pair after a given [`Location`].
 pub fn match_parens(start: Location, locator: &Locator) -> Option<Range> {
-    let contents = locator.skip(start);
+    let contents = locator.after(start);
    let mut fix_start = None;
    let mut fix_end = None;
    let mut count: usize = 0;
--- a/crates/ruff_python_ast/src/source_code/line_index.rs
+++ b/crates/ruff_python_ast/src/source_code/line_index.rs
@ -83,12 +83,12 @@ impl LineIndex {
    }

    /// Return the number of lines in the source code.
-    pub(crate) fn lines_count(&self) -> usize {
+    pub(crate) fn line_count(&self) -> usize {
        self.line_starts().len()
    }

    /// Returns the [byte offset](TextSize) for the `line` with the given index.
-    fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
+    pub(crate) fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
        let row_index = line.to_zero_indexed();
        let starts = self.line_starts();

@ -103,7 +103,7 @@ impl LineIndex {
    /// Returns the [`TextRange`] of the `line` with the given index.
    /// The start points to the first character's [byte offset](TextSize), the end up to, and including
    /// the newline character ending the line (if any).
-    fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
+    pub(crate) fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
        let starts = self.line_starts();

        if starts.len() == line.to_zero_indexed() {
@ -175,6 +175,11 @@ impl OneIndexed {
        Self(ONE.saturating_add(value))
    }

+    /// Returns the value as a primitive type.
+    pub const fn get(self) -> usize {
+        self.0.get()
+    }
+
    /// Return the zero-indexed primitive value for this [`OneIndexed`]
    pub const fn to_zero_indexed(self) -> usize {
        self.0.get() - 1
@ -306,18 +311,18 @@ mod tests {
    #[test]
    fn utf8_index() {
        let index = LineIndex::from_source_text("x = '🫣'");
-        assert_eq!(index.lines_count(), 1);
+        assert_eq!(index.line_count(), 1);
        assert_eq!(index.line_starts(), &[TextSize::from(0)]);

        let index = LineIndex::from_source_text("x = '🫣'\n");
-        assert_eq!(index.lines_count(), 2);
+        assert_eq!(index.line_count(), 2);
        assert_eq!(
            index.line_starts(),
            &[TextSize::from(0), TextSize::from(11)]
        );

        let index = LineIndex::from_source_text("x = '🫣'\ny = 2\nz = x + y\n");
-        assert_eq!(index.lines_count(), 4);
+        assert_eq!(index.line_count(), 4);
        assert_eq!(
            index.line_starts(),
            &[
@ -329,7 +334,7 @@ mod tests {
        );

        let index = LineIndex::from_source_text("# 🫣\nclass Foo:\n    \"\"\".\"\"\"");
-        assert_eq!(index.lines_count(), 3);
+        assert_eq!(index.line_count(), 3);
        assert_eq!(
            index.line_starts(),
            &[TextSize::from(0), TextSize::from(7), TextSize::from(18)]
@ -340,7 +345,7 @@ mod tests {
    fn utf8_carriage_return() {
        let contents = "x = '🫣'\ry = 3";
        let index = LineIndex::from_source_text(contents);
-        assert_eq!(index.lines_count(), 2);
+        assert_eq!(index.line_count(), 2);
        assert_eq!(
            index.line_starts(),
            &[TextSize::from(0), TextSize::from(11)]
@ -365,7 +370,7 @@ mod tests {
    fn utf8_carriage_return_newline() {
        let contents = "x = '🫣'\r\ny = 3";
        let index = LineIndex::from_source_text(contents);
-        assert_eq!(index.lines_count(), 2);
+        assert_eq!(index.line_count(), 2);
        assert_eq!(
            index.line_starts(),
            &[TextSize::from(0), TextSize::from(12)]
--- a/crates/ruff_python_ast/src/source_code/locator.rs
+++ b/crates/ruff_python_ast/src/source_code/locator.rs
@ -1,57 +1,65 @@
 //! Struct used to efficiently slice source code at (row, column) Locations.

 use crate::source_code::line_index::LineIndex;
+use crate::source_code::{SourceCode, SourceCodeBuf};
 use once_cell::unsync::OnceCell;
-use ruff_text_size::{TextRange, TextSize};
+use ruff_text_size::TextSize;
 use rustpython_parser::ast::Location;

 use crate::types::Range;

 pub struct Locator<'a> {
    contents: &'a str,
-    index: OnceCell<LineIndex>,
+    line_index: OnceCell<LineIndex>,
 }

 impl<'a> Locator<'a> {
    pub const fn new(contents: &'a str) -> Self {
        Self {
            contents,
-            index: OnceCell::new(),
+            line_index: OnceCell::new(),
        }
    }

    fn get_or_init_index(&self) -> &LineIndex {
-        self.index
+        self.line_index
            .get_or_init(|| LineIndex::from_source_text(self.contents))
    }

+    fn source_code(&self) -> SourceCode<'a, '_> {
+        SourceCode {
+            index: self.get_or_init_index(),
+            text: self.contents,
+        }
+    }
+
+    #[inline]
+    pub fn to_source_code_buf(&self) -> SourceCodeBuf {
+        self.source_code().to_owned()
+    }
+
    /// Take the source code up to the given [`Location`].
-    pub fn take(&self, location: Location) -> &'a str {
-        let index = self.get_or_init_index();
-        let offset = index.location_offset(location, self.contents);
-        &self.contents[TextRange::up_to(offset)]
+    #[inline]
+    pub fn up_to(&self, location: Location) -> &'a str {
+        self.source_code().up_to(location)
    }

    /// Take the source code after the given [`Location`].
-    pub fn skip(&self, location: Location) -> &'a str {
-        let index = self.get_or_init_index();
-        let offset = index.location_offset(location, self.contents);
-        &self.contents[usize::from(offset)..]
+    #[inline]
+    pub fn after(&self, location: Location) -> &'a str {
+        self.source_code().after(location)
    }

    /// Take the source code between the given [`Range`].
+    #[inline]
    pub fn slice<R: Into<Range>>(&self, range: R) -> &'a str {
-        let index = self.get_or_init_index();
-        let range = range.into();
-        let start = index.location_offset(range.location, self.contents);
-        let end = index.location_offset(range.end_location, self.contents);
-        &self.contents[TextRange::new(start, end)]
+        self.source_code().slice(range)
    }

    /// Return the byte offset of the given [`Location`].
+    #[inline]
    pub fn offset(&self, location: Location) -> TextSize {
-        let index = self.get_or_init_index();
-        index.location_offset(location, self.contents)
+        self.source_code().offset(location)
    }

    /// Return the underlying source code.
@ -62,7 +70,7 @@ impl<'a> Locator<'a> {
    /// Return the number of lines in the source code.
    pub fn count_lines(&self) -> usize {
        let index = self.get_or_init_index();
-        index.lines_count()
+        index.line_count()
    }

    /// Return the number of bytes in the source code.
--- a/crates/ruff_python_ast/src/source_code/mod.rs
+++ b/crates/ruff_python_ast/src/source_code/mod.rs
@ -5,12 +5,16 @@ mod locator;
 mod stylist;

 pub use crate::source_code::line_index::{LineIndex, OneIndexed};
+use crate::types::Range;
 pub use generator::Generator;
 pub use indexer::Indexer;
 pub use locator::Locator;
+use ruff_text_size::{TextRange, TextSize};
 use rustpython_parser as parser;
+use rustpython_parser::ast::Location;
 use rustpython_parser::{lexer, Mode, ParseError};

+use std::sync::Arc;
 pub use stylist::{LineEnding, Stylist};

 /// Run round-trip source code generation on a given Python code.
@ -23,3 +27,214 @@ pub fn round_trip(code: &str, source_path: &str) -> Result<String, ParseError> {
    generator.unparse_suite(&python_ast);
    Ok(generator.generate())
 }
+
+/// Gives access to the source code of a file and allows mapping between [`Location`] and byte offsets.
+#[derive(Debug)]
+pub struct SourceCode<'src, 'index> {
+    text: &'src str,
+    index: &'index LineIndex,
+}
+
+impl<'src, 'index> SourceCode<'src, 'index> {
+    pub fn new(content: &'src str, index: &'index LineIndex) -> Self {
+        Self {
+            text: content,
+            index,
+        }
+    }
+
+    /// Take the source code up to the given [`Location`].
+    pub fn up_to(&self, location: Location) -> &'src str {
+        let offset = self.index.location_offset(location, self.text);
+        &self.text[TextRange::up_to(offset)]
+    }
+
+    /// Take the source code after the given [`Location`].
+    pub fn after(&self, location: Location) -> &'src str {
+        let offset = self.index.location_offset(location, self.text);
+        &self.text[usize::from(offset)..]
+    }
+
+    /// Take the source code between the given [`Range`].
+    pub fn slice<R: Into<Range>>(&self, range: R) -> &'src str {
+        let range = self.text_range(range);
+        &self.text[range]
+    }
+
+    /// Converts a [`Location`] range to a byte offset range
+    pub fn text_range<R: Into<Range>>(&self, range: R) -> TextRange {
+        let range = range.into();
+        let start = self.index.location_offset(range.location, self.text);
+        let end = self.index.location_offset(range.end_location, self.text);
+        TextRange::new(start, end)
+    }
+
+    /// Return the byte offset of the given [`Location`].
+    pub fn offset(&self, location: Location) -> TextSize {
+        self.index.location_offset(location, self.text)
+    }
+
+    pub fn line_start(&self, line: OneIndexed) -> TextSize {
+        self.index.line_start(line, self.text)
+    }
+
+    pub fn line_range(&self, line: OneIndexed) -> TextRange {
+        self.index.line_range(line, self.text)
+    }
+
+    /// Returns a string with the lines spawning between location and end location.
+    pub fn lines(&self, range: Range) -> &'src str {
+        let start_line = self
+            .index
+            .line_range(OneIndexed::new(range.location.row()).unwrap(), self.text);
+
+        let end_line = self.index.line_range(
+            OneIndexed::new(range.end_location.row()).unwrap(),
+            self.text,
+        );
+
+        &self.text[TextRange::new(start_line.start(), end_line.end())]
+    }
+
+    /// Returns the source text of the line with the given index
+    #[inline]
+    pub fn line_text(&self, index: OneIndexed) -> &'src str {
+        let range = self.index.line_range(index, self.text);
+        &self.text[range]
+    }
+
+    pub fn text(&self) -> &'src str {
+        self.text
+    }
+
+    #[inline]
+    pub fn line_count(&self) -> usize {
+        self.index.line_count()
+    }
+
+    pub fn to_source_code_buf(&self) -> SourceCodeBuf {
+        self.to_owned()
+    }
+
+    pub fn to_owned(&self) -> SourceCodeBuf {
+        SourceCodeBuf::new(self.text, self.index.clone())
+    }
+}
+
+impl PartialEq<Self> for SourceCode<'_, '_> {
+    fn eq(&self, other: &Self) -> bool {
+        self.text == other.text
+    }
+}
+
+impl Eq for SourceCode<'_, '_> {}
+
+impl PartialEq<SourceCodeBuf> for SourceCode<'_, '_> {
+    fn eq(&self, other: &SourceCodeBuf) -> bool {
+        self.text == &*other.text
+    }
+}
+
+/// Gives access to the source code of a file and allows mapping between [`Location`] and byte offsets.
+///
+/// This is the owned pendant to [`SourceCode`]. Cloning only requires bumping reference counters.
+#[derive(Clone, Debug)]
+pub struct SourceCodeBuf {
+    text: Arc<str>,
+    index: LineIndex,
+}
+
+impl SourceCodeBuf {
+    pub fn new(content: &str, index: LineIndex) -> Self {
+        Self {
+            text: Arc::from(content),
+            index,
+        }
+    }
+
+    /// Creates the [`LineIndex`] for `text` and returns the [`SourceCodeBuf`].
+    pub fn from_content(text: &str) -> Self {
+        Self::new(text, LineIndex::from_source_text(text))
+    }
+
+    #[inline]
+    fn as_source_code(&self) -> SourceCode {
+        SourceCode {
+            text: &self.text,
+            index: &self.index,
+        }
+    }
+
+    /// Take the source code up to the given [`Location`].
+    pub fn up_to(&self, location: Location) -> &str {
+        self.as_source_code().up_to(location)
+    }
+
+    /// Take the source code after the given [`Location`].
+    pub fn after(&self, location: Location) -> &str {
+        self.as_source_code().after(location)
+    }
+
+    /// Take the source code between the given [`Range`].
+    #[inline]
+    pub fn slice<R: Into<Range>>(&self, range: R) -> &str {
+        self.as_source_code().slice(range)
+    }
+
+    /// Converts a [`Location`] range to a byte offset range
+    #[inline]
+    pub fn text_range<R: Into<Range>>(&self, range: R) -> TextRange {
+        self.as_source_code().text_range(range)
+    }
+
+    #[inline]
+    pub fn line_range(&self, line: OneIndexed) -> TextRange {
+        self.as_source_code().line_range(line)
+    }
+
+    /// Return the byte offset of the given [`Location`].
+    #[inline]
+    pub fn offset(&self, location: Location) -> TextSize {
+        self.as_source_code().offset(location)
+    }
+
+    #[inline]
+    pub fn line_start(&self, line: OneIndexed) -> TextSize {
+        self.as_source_code().line_start(line)
+    }
+
+    #[inline]
+    pub fn lines(&self, range: Range) -> &str {
+        self.as_source_code().lines(range)
+    }
+
+    /// Returns the source text of the line with the given index
+    #[inline]
+    pub fn line_text(&self, index: OneIndexed) -> &str {
+        self.as_source_code().line_text(index)
+    }
+
+    #[inline]
+    pub fn line_count(&self) -> usize {
+        self.index.line_count()
+    }
+
+    pub fn text(&self) -> &str {
+        &self.text
+    }
+}
+
+impl PartialEq<Self> for SourceCodeBuf {
+    // The same source text should have the same index
+    fn eq(&self, other: &Self) -> bool {
+        self.text == other.text
+    }
+}
+
+impl PartialEq<SourceCode<'_, '_>> for SourceCodeBuf {
+    fn eq(&self, other: &SourceCode<'_, '_>) -> bool {
+        &*self.text == other.text
+    }
+}
+
+impl Eq for SourceCodeBuf {}
--- a/crates/ruff_python_formatter/src/cst/helpers.rs
+++ b/crates/ruff_python_formatter/src/cst/helpers.rs
@ -125,7 +125,7 @@ pub fn expand_indented_block(
 /// Return true if the `orelse` block of an `if` statement is an `elif` statement.
 pub fn is_elif(orelse: &[rustpython_parser::ast::Stmt], locator: &Locator) -> bool {
    if orelse.len() == 1 && matches!(orelse[0].node, rustpython_parser::ast::StmtKind::If { .. }) {
-        let contents = locator.skip(orelse[0].location);
+        let contents = locator.after(orelse[0].location);
        if contents.starts_with("elif") {
            return true;
        }