Split SourceLocation into LineColumn and SourceLocation (#17587)

This commit is contained in:
Micha Reiser 2025-04-27 11:27:33 +01:00 committed by GitHub
parent 4443f6653c
commit 1c65e0ad25
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 695 additions and 537 deletions

View file

@ -27,6 +27,16 @@ pub enum PositionEncoding {
UTF8, UTF8,
} }
impl From<PositionEncoding> for ruff_source_file::PositionEncoding {
fn from(value: PositionEncoding) -> Self {
match value {
PositionEncoding::UTF8 => Self::Utf8,
PositionEncoding::UTF16 => Self::Utf16,
PositionEncoding::UTF32 => Self::Utf32,
}
}
}
/// A unique document ID, derived from a URL passed as part of an LSP request. /// A unique document ID, derived from a URL passed as part of an LSP request.
/// This document ID can point to either be a standalone Python file, a full notebook, or a cell within a notebook. /// This document ID can point to either be a standalone Python file, a full notebook, or a cell within a notebook.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]

View file

@ -9,8 +9,8 @@ use red_knot_python_semantic::Db;
use ruff_db::files::FileRange; use ruff_db::files::FileRange;
use ruff_db::source::{line_index, source_text}; use ruff_db::source::{line_index, source_text};
use ruff_notebook::NotebookIndex; use ruff_notebook::NotebookIndex;
use ruff_source_file::OneIndexed; use ruff_source_file::LineIndex;
use ruff_source_file::{LineIndex, SourceLocation}; use ruff_source_file::{OneIndexed, SourceLocation};
use ruff_text_size::{Ranged, TextRange, TextSize}; use ruff_text_size::{Ranged, TextRange, TextSize};
#[expect(dead_code)] #[expect(dead_code)]
@ -46,7 +46,7 @@ impl TextSizeExt for TextSize {
index: &LineIndex, index: &LineIndex,
encoding: PositionEncoding, encoding: PositionEncoding,
) -> types::Position { ) -> types::Position {
let source_location = offset_to_source_location(self, text, index, encoding); let source_location = index.source_location(self, text, encoding.into());
source_location_to_position(&source_location) source_location_to_position(&source_location)
} }
} }
@ -75,36 +75,14 @@ fn u32_index_to_usize(index: u32) -> usize {
impl PositionExt for lsp_types::Position { impl PositionExt for lsp_types::Position {
fn to_text_size(&self, text: &str, index: &LineIndex, encoding: PositionEncoding) -> TextSize { fn to_text_size(&self, text: &str, index: &LineIndex, encoding: PositionEncoding) -> TextSize {
let start_line = index.line_range( index.offset(
OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)), SourceLocation {
line: OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)),
character_offset: OneIndexed::from_zero_indexed(u32_index_to_usize(self.character)),
},
text, text,
); encoding.into(),
)
let start_column_offset = match encoding {
PositionEncoding::UTF8 => TextSize::new(self.character),
PositionEncoding::UTF16 => {
// Fast path for ASCII only documents
if index.is_ascii() {
TextSize::new(self.character)
} else {
// UTF16 encodes characters either as one or two 16 bit words.
// The position in `range` is the 16-bit word offset from the start of the line (and not the character offset)
// UTF-16 with a text that may use variable-length characters.
utf8_column_offset(self.character, &text[start_line])
}
}
PositionEncoding::UTF32 => {
// UTF-32 uses 4 bytes for each character. Meaning, the position in range is a character offset.
return index.offset(
OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)),
OneIndexed::from_zero_indexed(u32_index_to_usize(self.character)),
text,
);
}
};
start_line.start() + start_column_offset.clamp(TextSize::new(0), start_line.end())
} }
} }
@ -142,26 +120,23 @@ impl ToRangeExt for TextRange {
notebook_index: &NotebookIndex, notebook_index: &NotebookIndex,
encoding: PositionEncoding, encoding: PositionEncoding,
) -> NotebookRange { ) -> NotebookRange {
let start = offset_to_source_location(self.start(), text, source_index, encoding); let start = source_index.source_location(self.start(), text, encoding.into());
let mut end = offset_to_source_location(self.end(), text, source_index, encoding); let mut end = source_index.source_location(self.end(), text, encoding.into());
let starting_cell = notebook_index.cell(start.row); let starting_cell = notebook_index.cell(start.line);
// weird edge case here - if the end of the range is where the newline after the cell got added (making it 'out of bounds') // weird edge case here - if the end of the range is where the newline after the cell got added (making it 'out of bounds')
// we need to move it one character back (which should place it at the end of the last line). // we need to move it one character back (which should place it at the end of the last line).
// we test this by checking if the ending offset is in a different (or nonexistent) cell compared to the cell of the starting offset. // we test this by checking if the ending offset is in a different (or nonexistent) cell compared to the cell of the starting offset.
if notebook_index.cell(end.row) != starting_cell { if notebook_index.cell(end.line) != starting_cell {
end.row = end.row.saturating_sub(1); end.line = end.line.saturating_sub(1);
end.column = offset_to_source_location( let offset = self.end().checked_sub(1.into()).unwrap_or_default();
self.end().checked_sub(1.into()).unwrap_or_default(), end.character_offset = source_index
text, .source_location(offset, text, encoding.into())
source_index, .character_offset;
encoding,
)
.column;
} }
let start = source_location_to_position(&notebook_index.translate_location(&start)); let start = source_location_to_position(&notebook_index.translate_source_location(&start));
let end = source_location_to_position(&notebook_index.translate_location(&end)); let end = source_location_to_position(&notebook_index.translate_source_location(&end));
NotebookRange { NotebookRange {
cell: starting_cell cell: starting_cell
@ -172,67 +147,10 @@ impl ToRangeExt for TextRange {
} }
} }
/// Converts a UTF-16 code unit offset for a given line into a UTF-8 column number.
fn utf8_column_offset(utf16_code_unit_offset: u32, line: &str) -> TextSize {
let mut utf8_code_unit_offset = TextSize::new(0);
let mut i = 0u32;
for c in line.chars() {
if i >= utf16_code_unit_offset {
break;
}
// Count characters encoded as two 16 bit words as 2 characters.
{
utf8_code_unit_offset +=
TextSize::new(u32::try_from(c.len_utf8()).expect("utf8 len always <=4"));
i += u32::try_from(c.len_utf16()).expect("utf16 len always <=2");
}
}
utf8_code_unit_offset
}
fn offset_to_source_location(
offset: TextSize,
text: &str,
index: &LineIndex,
encoding: PositionEncoding,
) -> SourceLocation {
match encoding {
PositionEncoding::UTF8 => {
let row = index.line_index(offset);
let column = offset - index.line_start(row, text);
SourceLocation {
column: OneIndexed::from_zero_indexed(column.to_usize()),
row,
}
}
PositionEncoding::UTF16 => {
let row = index.line_index(offset);
let column = if index.is_ascii() {
(offset - index.line_start(row, text)).to_usize()
} else {
let up_to_line = &text[TextRange::new(index.line_start(row, text), offset)];
up_to_line.encode_utf16().count()
};
SourceLocation {
column: OneIndexed::from_zero_indexed(column),
row,
}
}
PositionEncoding::UTF32 => index.source_location(offset, text),
}
}
fn source_location_to_position(location: &SourceLocation) -> types::Position { fn source_location_to_position(location: &SourceLocation) -> types::Position {
types::Position { types::Position {
line: u32::try_from(location.row.to_zero_indexed()).expect("row usize fits in u32"), line: u32::try_from(location.line.to_zero_indexed()).expect("line usize fits in u32"),
character: u32::try_from(location.column.to_zero_indexed()) character: u32::try_from(location.character_offset.to_zero_indexed())
.expect("character usize fits in u32"), .expect("character usize fits in u32"),
} }
} }

View file

@ -263,7 +263,7 @@ impl Matcher {
.and_then(|span| span.range()) .and_then(|span| span.range())
.map(|range| { .map(|range| {
self.line_index self.line_index
.source_location(range.start(), &self.source) .line_column(range.start(), &self.source)
.column .column
}) })
.unwrap_or(OneIndexed::from_zero_indexed(0)) .unwrap_or(OneIndexed::from_zero_indexed(0))

View file

@ -19,7 +19,7 @@ use ruff_db::system::{
use ruff_db::Upcast; use ruff_db::Upcast;
use ruff_notebook::Notebook; use ruff_notebook::Notebook;
use ruff_python_formatter::formatted_file; use ruff_python_formatter::formatted_file;
use ruff_source_file::{LineIndex, OneIndexed, SourceLocation}; use ruff_source_file::{LineColumn, LineIndex, OneIndexed, PositionEncoding, SourceLocation};
use ruff_text_size::{Ranged, TextSize}; use ruff_text_size::{Ranged, TextSize};
use wasm_bindgen::prelude::*; use wasm_bindgen::prelude::*;
@ -408,8 +408,8 @@ impl Range {
} }
} }
impl From<(SourceLocation, SourceLocation)> for Range { impl From<(LineColumn, LineColumn)> for Range {
fn from((start, end): (SourceLocation, SourceLocation)) -> Self { fn from((start, end): (LineColumn, LineColumn)) -> Self {
Self { Self {
start: start.into(), start: start.into(),
end: end.into(), end: end.into(),
@ -438,29 +438,34 @@ impl Position {
impl Position { impl Position {
fn to_text_size(self, text: &str, index: &LineIndex) -> Result<TextSize, Error> { fn to_text_size(self, text: &str, index: &LineIndex) -> Result<TextSize, Error> {
let text_size = index.offset( let text_size = index.offset(
OneIndexed::new(self.line).ok_or_else(|| { SourceLocation {
Error::new("Invalid value `0` for `position.line`. The line index is 1-indexed.") line: OneIndexed::new(self.line).ok_or_else(|| {
})?, Error::new(
OneIndexed::new(self.column).ok_or_else(|| { "Invalid value `0` for `position.line`. The line index is 1-indexed.",
Error::new( )
"Invalid value `0` for `position.column`. The column index is 1-indexed.", })?,
) character_offset: OneIndexed::new(self.column).ok_or_else(|| {
})?, Error::new(
"Invalid value `0` for `position.column`. The column index is 1-indexed.",
)
})?,
},
text, text,
PositionEncoding::Utf32,
); );
Ok(text_size) Ok(text_size)
} }
fn from_text_size(offset: TextSize, line_index: &LineIndex, source: &str) -> Self { fn from_text_size(offset: TextSize, line_index: &LineIndex, source: &str) -> Self {
line_index.source_location(offset, source).into() line_index.line_column(offset, source).into()
} }
} }
impl From<SourceLocation> for Position { impl From<LineColumn> for Position {
fn from(location: SourceLocation) -> Self { fn from(location: LineColumn) -> Self {
Self { Self {
line: location.row.get(), line: location.line.get(),
column: location.column.get(), column: location.column.get(),
} }
} }

View file

@ -5,6 +5,7 @@ use std::path::{Path, PathBuf};
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use crate::commands::completions::config::{OptionString, OptionStringParser};
use anyhow::bail; use anyhow::bail;
use clap::builder::{TypedValueParser, ValueParserFactory}; use clap::builder::{TypedValueParser, ValueParserFactory};
use clap::{command, Parser, Subcommand}; use clap::{command, Parser, Subcommand};
@ -22,7 +23,7 @@ use ruff_linter::settings::types::{
}; };
use ruff_linter::{RuleParser, RuleSelector, RuleSelectorParser}; use ruff_linter::{RuleParser, RuleSelector, RuleSelectorParser};
use ruff_python_ast as ast; use ruff_python_ast as ast;
use ruff_source_file::{LineIndex, OneIndexed}; use ruff_source_file::{LineIndex, OneIndexed, PositionEncoding};
use ruff_text_size::TextRange; use ruff_text_size::TextRange;
use ruff_workspace::configuration::{Configuration, RuleSelection}; use ruff_workspace::configuration::{Configuration, RuleSelection};
use ruff_workspace::options::{Options, PycodestyleOptions}; use ruff_workspace::options::{Options, PycodestyleOptions};
@ -31,8 +32,6 @@ use ruff_workspace::resolver::ConfigurationTransformer;
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use toml; use toml;
use crate::commands::completions::config::{OptionString, OptionStringParser};
/// All configuration options that can be passed "globally", /// All configuration options that can be passed "globally",
/// i.e., can be passed to all subcommands /// i.e., can be passed to all subcommands
#[derive(Debug, Default, Clone, clap::Args)] #[derive(Debug, Default, Clone, clap::Args)]
@ -1070,8 +1069,9 @@ impl FormatRange {
/// ///
/// Returns an empty range if the start range is past the end of `source`. /// Returns an empty range if the start range is past the end of `source`.
pub(super) fn to_text_range(self, source: &str, line_index: &LineIndex) -> TextRange { pub(super) fn to_text_range(self, source: &str, line_index: &LineIndex) -> TextRange {
let start_byte_offset = line_index.offset(self.start.line, self.start.column, source); let start_byte_offset =
let end_byte_offset = line_index.offset(self.end.line, self.end.column, source); line_index.offset(self.start.into(), source, PositionEncoding::Utf32);
let end_byte_offset = line_index.offset(self.end.into(), source, PositionEncoding::Utf32);
TextRange::new(start_byte_offset, end_byte_offset) TextRange::new(start_byte_offset, end_byte_offset)
} }
@ -1142,6 +1142,15 @@ pub struct LineColumn {
pub column: OneIndexed, pub column: OneIndexed,
} }
impl From<LineColumn> for ruff_source_file::SourceLocation {
fn from(value: LineColumn) -> Self {
Self {
line: value.line,
character_offset: value.column,
}
}
}
impl std::fmt::Display for LineColumn { impl std::fmt::Display for LineColumn {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{line}:{column}", line = self.line, column = self.column) write!(f, "{line}:{column}", line = self.line, column = self.column)

View file

@ -71,8 +71,8 @@ impl std::fmt::Display for DisplayDiagnostic<'_> {
write!(f, " {path}", path = self.resolver.path(span.file()))?; write!(f, " {path}", path = self.resolver.path(span.file()))?;
if let Some(range) = span.range() { if let Some(range) = span.range() {
let input = self.resolver.input(span.file()); let input = self.resolver.input(span.file());
let start = input.as_source_code().source_location(range.start()); let start = input.as_source_code().line_column(range.start());
write!(f, ":{line}:{col}", line = start.row, col = start.column)?; write!(f, ":{line}:{col}", line = start.line, col = start.column)?;
} }
write!(f, ":")?; write!(f, ":")?;
} }

View file

@ -191,7 +191,7 @@ pub(crate) fn definitions(checker: &mut Checker) {
warn_user!( warn_user!(
"Docstring at {}:{}:{} contains implicit string concatenation; ignoring...", "Docstring at {}:{}:{} contains implicit string concatenation; ignoring...",
relativize_path(checker.path), relativize_path(checker.path),
location.row, location.line,
location.column location.column
); );
continue; continue;

View file

@ -2,7 +2,7 @@
use std::cell::OnceCell; use std::cell::OnceCell;
use ruff_source_file::{LineIndex, LineRanges, OneIndexed, SourceCode, SourceLocation}; use ruff_source_file::{LineColumn, LineIndex, LineRanges, OneIndexed, SourceCode};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
#[derive(Debug)] #[derive(Debug)]
@ -36,8 +36,8 @@ impl<'a> Locator<'a> {
#[deprecated( #[deprecated(
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead." note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
)] )]
pub fn compute_source_location(&self, offset: TextSize) -> SourceLocation { pub fn compute_source_location(&self, offset: TextSize) -> LineColumn {
self.to_source_code().source_location(offset) self.to_source_code().line_column(offset)
} }
pub fn to_index(&self) -> &LineIndex { pub fn to_index(&self) -> &LineIndex {

View file

@ -9,7 +9,7 @@ use log::Level;
use ruff_python_parser::{ParseError, ParseErrorType}; use ruff_python_parser::{ParseError, ParseErrorType};
use rustc_hash::FxHashSet; use rustc_hash::FxHashSet;
use ruff_source_file::{LineIndex, OneIndexed, SourceCode, SourceLocation}; use ruff_source_file::{LineColumn, LineIndex, OneIndexed, SourceCode};
use crate::fs; use crate::fs;
use crate::source_kind::SourceKind; use crate::source_kind::SourceKind;
@ -195,21 +195,21 @@ impl DisplayParseError {
// Translate the byte offset to a location in the originating source. // Translate the byte offset to a location in the originating source.
let location = let location =
if let Some(jupyter_index) = source_kind.as_ipy_notebook().map(Notebook::index) { if let Some(jupyter_index) = source_kind.as_ipy_notebook().map(Notebook::index) {
let source_location = source_code.source_location(error.location.start()); let source_location = source_code.line_column(error.location.start());
ErrorLocation::Cell( ErrorLocation::Cell(
jupyter_index jupyter_index
.cell(source_location.row) .cell(source_location.line)
.unwrap_or(OneIndexed::MIN), .unwrap_or(OneIndexed::MIN),
SourceLocation { LineColumn {
row: jupyter_index line: jupyter_index
.cell_row(source_location.row) .cell_row(source_location.line)
.unwrap_or(OneIndexed::MIN), .unwrap_or(OneIndexed::MIN),
column: source_location.column, column: source_location.column,
}, },
) )
} else { } else {
ErrorLocation::File(source_code.source_location(error.location.start())) ErrorLocation::File(source_code.line_column(error.location.start()))
}; };
Self { Self {
@ -245,7 +245,7 @@ impl Display for DisplayParseError {
write!( write!(
f, f,
"{row}{colon}{column}{colon} {inner}", "{row}{colon}{column}{colon} {inner}",
row = location.row, row = location.line,
column = location.column, column = location.column,
colon = ":".cyan(), colon = ":".cyan(),
inner = &DisplayParseErrorType(&self.error.error) inner = &DisplayParseErrorType(&self.error.error)
@ -256,7 +256,7 @@ impl Display for DisplayParseError {
f, f,
"{cell}{colon}{row}{colon}{column}{colon} {inner}", "{cell}{colon}{row}{colon}{column}{colon} {inner}",
cell = cell, cell = cell,
row = location.row, row = location.line,
column = location.column, column = location.column,
colon = ":".cyan(), colon = ":".cyan(),
inner = &DisplayParseErrorType(&self.error.error) inner = &DisplayParseErrorType(&self.error.error)
@ -283,9 +283,9 @@ impl Display for DisplayParseErrorType<'_> {
#[derive(Debug)] #[derive(Debug)]
enum ErrorLocation { enum ErrorLocation {
/// The error occurred in a Python file. /// The error occurred in a Python file.
File(SourceLocation), File(LineColumn),
/// The error occurred in a Jupyter cell. /// The error occurred in a Jupyter cell.
Cell(OneIndexed, SourceLocation), Cell(OneIndexed, LineColumn),
} }
/// Truncates the display text before the first newline character to avoid line breaks. /// Truncates the display text before the first newline character to avoid line breaks.

View file

@ -1,6 +1,6 @@
use std::io::Write; use std::io::Write;
use ruff_source_file::SourceLocation; use ruff_source_file::LineColumn;
use crate::message::{Emitter, EmitterContext, Message}; use crate::message::{Emitter, EmitterContext, Message};
@ -20,7 +20,7 @@ impl Emitter for AzureEmitter {
let location = if context.is_notebook(message.filename()) { let location = if context.is_notebook(message.filename()) {
// We can't give a reasonable location for the structured formats, // We can't give a reasonable location for the structured formats,
// so we show one that's clearly a fallback // so we show one that's clearly a fallback
SourceLocation::default() LineColumn::default()
} else { } else {
message.compute_start_location() message.compute_start_location()
}; };
@ -30,7 +30,7 @@ impl Emitter for AzureEmitter {
"##vso[task.logissue type=error\ "##vso[task.logissue type=error\
;sourcepath={filename};linenumber={line};columnnumber={col};{code}]{body}", ;sourcepath={filename};linenumber={line};columnnumber={col};{code}]{body}",
filename = message.filename(), filename = message.filename(),
line = location.row, line = location.line,
col = location.column, col = location.column,
code = message code = message
.rule() .rule()

View file

@ -1,6 +1,6 @@
use std::io::Write; use std::io::Write;
use ruff_source_file::SourceLocation; use ruff_source_file::LineColumn;
use crate::fs::relativize_path; use crate::fs::relativize_path;
use crate::message::{Emitter, EmitterContext, Message}; use crate::message::{Emitter, EmitterContext, Message};
@ -22,9 +22,9 @@ impl Emitter for GithubEmitter {
let location = if context.is_notebook(message.filename()) { let location = if context.is_notebook(message.filename()) {
// We can't give a reasonable location for the structured formats, // We can't give a reasonable location for the structured formats,
// so we show one that's clearly a fallback // so we show one that's clearly a fallback
SourceLocation::default() LineColumn::default()
} else { } else {
source_location.clone() source_location
}; };
let end_location = message.compute_end_location(); let end_location = message.compute_end_location();
@ -34,9 +34,9 @@ impl Emitter for GithubEmitter {
"::error title=Ruff{code},file={file},line={row},col={column},endLine={end_row},endColumn={end_column}::", "::error title=Ruff{code},file={file},line={row},col={column},endLine={end_row},endColumn={end_column}::",
code = message.rule().map_or_else(String::new, |rule| format!(" ({})", rule.noqa_code())), code = message.rule().map_or_else(String::new, |rule| format!(" ({})", rule.noqa_code())),
file = message.filename(), file = message.filename(),
row = source_location.row, row = source_location.line,
column = source_location.column, column = source_location.column,
end_row = end_location.row, end_row = end_location.line,
end_column = end_location.column, end_column = end_location.column,
)?; )?;
@ -44,7 +44,7 @@ impl Emitter for GithubEmitter {
writer, writer,
"{path}:{row}:{column}:", "{path}:{row}:{column}:",
path = relativize_path(message.filename()), path = relativize_path(message.filename()),
row = location.row, row = location.line,
column = location.column, column = location.column,
)?; )?;

View file

@ -71,8 +71,8 @@ impl Serialize for SerializedMessages<'_> {
}) })
} else { } else {
json!({ json!({
"begin": start_location.row, "begin": start_location.line,
"end": end_location.row "end": end_location.line
}) })
}; };

View file

@ -57,7 +57,7 @@ impl Emitter for GroupedEmitter {
let mut max_column_length = OneIndexed::MIN; let mut max_column_length = OneIndexed::MIN;
for message in &messages { for message in &messages {
max_row_length = max_row_length.max(message.start_location.row); max_row_length = max_row_length.max(message.start_location.line);
max_column_length = max_column_length.max(message.start_location.column); max_column_length = max_column_length.max(message.start_location.column);
} }
@ -115,8 +115,8 @@ impl Display for DisplayGroupedMessage<'_> {
write!( write!(
f, f,
" {row_padding}", " {row_padding}",
row_padding = row_padding = " "
" ".repeat(self.row_length.get() - calculate_print_width(start_location.row).get()) .repeat(self.row_length.get() - calculate_print_width(start_location.line).get())
)?; )?;
// Check if we're working on a jupyter notebook and translate positions with cell accordingly // Check if we're working on a jupyter notebook and translate positions with cell accordingly
@ -125,18 +125,18 @@ impl Display for DisplayGroupedMessage<'_> {
f, f,
"cell {cell}{sep}", "cell {cell}{sep}",
cell = jupyter_index cell = jupyter_index
.cell(start_location.row) .cell(start_location.line)
.unwrap_or(OneIndexed::MIN), .unwrap_or(OneIndexed::MIN),
sep = ":".cyan() sep = ":".cyan()
)?; )?;
( (
jupyter_index jupyter_index
.cell_row(start_location.row) .cell_row(start_location.line)
.unwrap_or(OneIndexed::MIN), .unwrap_or(OneIndexed::MIN),
start_location.column, start_location.column,
) )
} else { } else {
(start_location.row, start_location.column) (start_location.line, start_location.column)
}; };
writeln!( writeln!(

View file

@ -6,7 +6,7 @@ use serde_json::{json, Value};
use ruff_diagnostics::Edit; use ruff_diagnostics::Edit;
use ruff_notebook::NotebookIndex; use ruff_notebook::NotebookIndex;
use ruff_source_file::{OneIndexed, SourceCode, SourceLocation}; use ruff_source_file::{LineColumn, OneIndexed, SourceCode};
use ruff_text_size::Ranged; use ruff_text_size::Ranged;
use crate::message::{Emitter, EmitterContext, Message}; use crate::message::{Emitter, EmitterContext, Message};
@ -60,22 +60,23 @@ pub(crate) fn message_to_json_value(message: &Message, context: &EmitterContext)
}) })
}); });
let mut start_location = source_code.source_location(message.start()); let mut start_location = source_code.line_column(message.start());
let mut end_location = source_code.source_location(message.end()); let mut end_location = source_code.line_column(message.end());
let mut noqa_location = message let mut noqa_location = message
.noqa_offset() .noqa_offset()
.map(|offset| source_code.source_location(offset)); .map(|offset| source_code.line_column(offset));
let mut notebook_cell_index = None; let mut notebook_cell_index = None;
if let Some(notebook_index) = notebook_index { if let Some(notebook_index) = notebook_index {
notebook_cell_index = Some( notebook_cell_index = Some(
notebook_index notebook_index
.cell(start_location.row) .cell(start_location.line)
.unwrap_or(OneIndexed::MIN), .unwrap_or(OneIndexed::MIN),
); );
start_location = notebook_index.translate_location(&start_location); start_location = notebook_index.translate_line_column(&start_location);
end_location = notebook_index.translate_location(&end_location); end_location = notebook_index.translate_line_column(&end_location);
noqa_location = noqa_location.map(|location| notebook_index.translate_location(&location)); noqa_location =
noqa_location.map(|location| notebook_index.translate_line_column(&location));
} }
json!({ json!({
@ -84,10 +85,17 @@ pub(crate) fn message_to_json_value(message: &Message, context: &EmitterContext)
"message": message.body(), "message": message.body(),
"fix": fix, "fix": fix,
"cell": notebook_cell_index, "cell": notebook_cell_index,
"location": start_location, "location": location_to_json(start_location),
"end_location": end_location, "end_location": location_to_json(end_location),
"filename": message.filename(), "filename": message.filename(),
"noqa_row": noqa_location.map(|location| location.row) "noqa_row": noqa_location.map(|location| location.line)
})
}
fn location_to_json(location: LineColumn) -> serde_json::Value {
json!({
"row": location.line,
"column": location.column
}) })
} }
@ -105,8 +113,8 @@ impl Serialize for ExpandedEdits<'_> {
let mut s = serializer.serialize_seq(Some(self.edits.len()))?; let mut s = serializer.serialize_seq(Some(self.edits.len()))?;
for edit in self.edits { for edit in self.edits {
let mut location = self.source_code.source_location(edit.start()); let mut location = self.source_code.line_column(edit.start());
let mut end_location = self.source_code.source_location(edit.end()); let mut end_location = self.source_code.line_column(edit.end());
if let Some(notebook_index) = self.notebook_index { if let Some(notebook_index) = self.notebook_index {
// There exists a newline between each cell's source code in the // There exists a newline between each cell's source code in the
@ -118,44 +126,44 @@ impl Serialize for ExpandedEdits<'_> {
// If it does, we need to translate the end location to the last // If it does, we need to translate the end location to the last
// character of the previous cell. // character of the previous cell.
match ( match (
notebook_index.cell(location.row), notebook_index.cell(location.line),
notebook_index.cell(end_location.row), notebook_index.cell(end_location.line),
) { ) {
(Some(start_cell), Some(end_cell)) if start_cell != end_cell => { (Some(start_cell), Some(end_cell)) if start_cell != end_cell => {
debug_assert_eq!(end_location.column.get(), 1); debug_assert_eq!(end_location.column.get(), 1);
let prev_row = end_location.row.saturating_sub(1); let prev_row = end_location.line.saturating_sub(1);
end_location = SourceLocation { end_location = LineColumn {
row: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN), line: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN),
column: self column: self
.source_code .source_code
.source_location(self.source_code.line_end_exclusive(prev_row)) .line_column(self.source_code.line_end_exclusive(prev_row))
.column, .column,
}; };
} }
(Some(_), None) => { (Some(_), None) => {
debug_assert_eq!(end_location.column.get(), 1); debug_assert_eq!(end_location.column.get(), 1);
let prev_row = end_location.row.saturating_sub(1); let prev_row = end_location.line.saturating_sub(1);
end_location = SourceLocation { end_location = LineColumn {
row: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN), line: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN),
column: self column: self
.source_code .source_code
.source_location(self.source_code.line_end_exclusive(prev_row)) .line_column(self.source_code.line_end_exclusive(prev_row))
.column, .column,
}; };
} }
_ => { _ => {
end_location = notebook_index.translate_location(&end_location); end_location = notebook_index.translate_line_column(&end_location);
} }
} }
location = notebook_index.translate_location(&location); location = notebook_index.translate_line_column(&location);
} }
let value = json!({ let value = json!({
"content": edit.content().unwrap_or_default(), "content": edit.content().unwrap_or_default(),
"location": location, "location": location_to_json(location),
"end_location": end_location "end_location": location_to_json(end_location)
}); });
s.serialize_element(&value)?; s.serialize_element(&value)?;

View file

@ -3,7 +3,7 @@ use std::path::Path;
use quick_junit::{NonSuccessKind, Report, TestCase, TestCaseStatus, TestSuite, XmlString}; use quick_junit::{NonSuccessKind, Report, TestCase, TestCaseStatus, TestSuite, XmlString};
use ruff_source_file::SourceLocation; use ruff_source_file::LineColumn;
use crate::message::{ use crate::message::{
group_messages_by_filename, Emitter, EmitterContext, Message, MessageWithLocation, group_messages_by_filename, Emitter, EmitterContext, Message, MessageWithLocation,
@ -47,14 +47,14 @@ impl Emitter for JunitEmitter {
let location = if context.is_notebook(message.filename()) { let location = if context.is_notebook(message.filename()) {
// We can't give a reasonable location for the structured formats, // We can't give a reasonable location for the structured formats,
// so we show one that's clearly a fallback // so we show one that's clearly a fallback
SourceLocation::default() LineColumn::default()
} else { } else {
start_location start_location
}; };
status.set_description(format!( status.set_description(format!(
"line {row}, col {col}, {body}", "line {row}, col {col}, {body}",
row = location.row, row = location.line,
col = location.column, col = location.column,
body = message.body() body = message.body()
)); ));
@ -72,7 +72,7 @@ impl Emitter for JunitEmitter {
case.set_classname(classname.to_str().unwrap()); case.set_classname(classname.to_str().unwrap());
case.extra.insert( case.extra.insert(
XmlString::new("line"), XmlString::new("line"),
XmlString::new(location.row.to_string()), XmlString::new(location.line.to_string()),
); );
case.extra.insert( case.extra.insert(
XmlString::new("column"), XmlString::new("column"),

View file

@ -18,7 +18,7 @@ pub use rdjson::RdjsonEmitter;
use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix}; use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix};
use ruff_notebook::NotebookIndex; use ruff_notebook::NotebookIndex;
use ruff_python_parser::{ParseError, UnsupportedSyntaxError}; use ruff_python_parser::{ParseError, UnsupportedSyntaxError};
use ruff_source_file::{SourceFile, SourceLocation}; use ruff_source_file::{LineColumn, SourceFile};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
pub use sarif::SarifEmitter; pub use sarif::SarifEmitter;
pub use text::TextEmitter; pub use text::TextEmitter;
@ -239,17 +239,15 @@ impl Message {
} }
/// Computes the start source location for the message. /// Computes the start source location for the message.
pub fn compute_start_location(&self) -> SourceLocation { pub fn compute_start_location(&self) -> LineColumn {
self.source_file() self.source_file()
.to_source_code() .to_source_code()
.source_location(self.start()) .line_column(self.start())
} }
/// Computes the end source location for the message. /// Computes the end source location for the message.
pub fn compute_end_location(&self) -> SourceLocation { pub fn compute_end_location(&self) -> LineColumn {
self.source_file() self.source_file().to_source_code().line_column(self.end())
.to_source_code()
.source_location(self.end())
} }
/// Returns the [`SourceFile`] which the message belongs to. /// Returns the [`SourceFile`] which the message belongs to.
@ -284,7 +282,7 @@ impl Ranged for Message {
struct MessageWithLocation<'a> { struct MessageWithLocation<'a> {
message: &'a Message, message: &'a Message,
start_location: SourceLocation, start_location: LineColumn,
} }
impl Deref for MessageWithLocation<'_> { impl Deref for MessageWithLocation<'_> {

View file

@ -23,7 +23,7 @@ impl Emitter for PylintEmitter {
// so we show one that's clearly a fallback // so we show one that's clearly a fallback
OneIndexed::from_zero_indexed(0) OneIndexed::from_zero_indexed(0)
} else { } else {
message.compute_start_location().row message.compute_start_location().line
}; };
let body = if let Some(rule) = message.rule() { let body = if let Some(rule) = message.rule() {

View file

@ -8,7 +8,7 @@ use ruff_diagnostics::Edit;
use ruff_source_file::SourceCode; use ruff_source_file::SourceCode;
use ruff_text_size::Ranged; use ruff_text_size::Ranged;
use crate::message::{Emitter, EmitterContext, Message, SourceLocation}; use crate::message::{Emitter, EmitterContext, LineColumn, Message};
#[derive(Default)] #[derive(Default)]
pub struct RdjsonEmitter; pub struct RdjsonEmitter;
@ -59,15 +59,15 @@ impl Serialize for ExpandedMessages<'_> {
fn message_to_rdjson_value(message: &Message) -> Value { fn message_to_rdjson_value(message: &Message) -> Value {
let source_code = message.source_file().to_source_code(); let source_code = message.source_file().to_source_code();
let start_location = source_code.source_location(message.start()); let start_location = source_code.line_column(message.start());
let end_location = source_code.source_location(message.end()); let end_location = source_code.line_column(message.end());
if let Some(fix) = message.fix() { if let Some(fix) = message.fix() {
json!({ json!({
"message": message.body(), "message": message.body(),
"location": { "location": {
"path": message.filename(), "path": message.filename(),
"range": rdjson_range(&start_location, &end_location), "range": rdjson_range(start_location, end_location),
}, },
"code": { "code": {
"value": message.rule().map(|rule| rule.noqa_code().to_string()), "value": message.rule().map(|rule| rule.noqa_code().to_string()),
@ -80,7 +80,7 @@ fn message_to_rdjson_value(message: &Message) -> Value {
"message": message.body(), "message": message.body(),
"location": { "location": {
"path": message.filename(), "path": message.filename(),
"range": rdjson_range(&start_location, &end_location), "range": rdjson_range(start_location, end_location),
}, },
"code": { "code": {
"value": message.rule().map(|rule| rule.noqa_code().to_string()), "value": message.rule().map(|rule| rule.noqa_code().to_string()),
@ -95,11 +95,11 @@ fn rdjson_suggestions(edits: &[Edit], source_code: &SourceCode) -> Value {
edits edits
.iter() .iter()
.map(|edit| { .map(|edit| {
let location = source_code.source_location(edit.start()); let location = source_code.line_column(edit.start());
let end_location = source_code.source_location(edit.end()); let end_location = source_code.line_column(edit.end());
json!({ json!({
"range": rdjson_range(&location, &end_location), "range": rdjson_range(location, end_location),
"text": edit.content().unwrap_or_default(), "text": edit.content().unwrap_or_default(),
}) })
}) })
@ -107,16 +107,10 @@ fn rdjson_suggestions(edits: &[Edit], source_code: &SourceCode) -> Value {
) )
} }
fn rdjson_range(start: &SourceLocation, end: &SourceLocation) -> Value { fn rdjson_range(start: LineColumn, end: LineColumn) -> Value {
json!({ json!({
"start": { "start": start,
"line": start.row, "end": end,
"column": start.column,
},
"end": {
"line": end.row,
"column": end.column,
},
}) })
} }

View file

@ -129,9 +129,9 @@ impl SarifResult {
uri: url::Url::from_file_path(&path) uri: url::Url::from_file_path(&path)
.map_err(|()| anyhow::anyhow!("Failed to convert path to URL: {}", path.display()))? .map_err(|()| anyhow::anyhow!("Failed to convert path to URL: {}", path.display()))?
.to_string(), .to_string(),
start_line: start_location.row, start_line: start_location.line,
start_column: start_location.column, start_column: start_location.column,
end_line: end_location.row, end_line: end_location.line,
end_column: end_location.column, end_column: end_location.column,
}) })
} }
@ -147,9 +147,9 @@ impl SarifResult {
level: "error".to_string(), level: "error".to_string(),
message: message.body().to_string(), message: message.body().to_string(),
uri: path.display().to_string(), uri: path.display().to_string(),
start_line: start_location.row, start_line: start_location.line,
start_column: start_location.column, start_column: start_location.column,
end_line: end_location.row, end_line: end_location.line,
end_column: end_location.column, end_column: end_location.column,
}) })
} }

View file

@ -7,7 +7,7 @@ use colored::Colorize;
use ruff_annotate_snippets::{Level, Renderer, Snippet}; use ruff_annotate_snippets::{Level, Renderer, Snippet};
use ruff_notebook::NotebookIndex; use ruff_notebook::NotebookIndex;
use ruff_source_file::{OneIndexed, SourceLocation}; use ruff_source_file::{LineColumn, OneIndexed};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize}; use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use crate::fs::relativize_path; use crate::fs::relativize_path;
@ -86,14 +86,14 @@ impl Emitter for TextEmitter {
writer, writer,
"cell {cell}{sep}", "cell {cell}{sep}",
cell = notebook_index cell = notebook_index
.cell(start_location.row) .cell(start_location.line)
.unwrap_or(OneIndexed::MIN), .unwrap_or(OneIndexed::MIN),
sep = ":".cyan(), sep = ":".cyan(),
)?; )?;
SourceLocation { LineColumn {
row: notebook_index line: notebook_index
.cell_row(start_location.row) .cell_row(start_location.line)
.unwrap_or(OneIndexed::MIN), .unwrap_or(OneIndexed::MIN),
column: start_location.column, column: start_location.column,
} }
@ -104,7 +104,7 @@ impl Emitter for TextEmitter {
writeln!( writeln!(
writer, writer,
"{row}{sep}{col}{sep} {code_and_body}", "{row}{sep}{col}{sep} {code_and_body}",
row = diagnostic_location.row, row = diagnostic_location.line,
col = diagnostic_location.column, col = diagnostic_location.column,
sep = ":".cyan(), sep = ":".cyan(),
code_and_body = RuleCodeAndBody { code_and_body = RuleCodeAndBody {

View file

@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use ruff_source_file::{OneIndexed, SourceLocation}; use ruff_source_file::{LineColumn, OneIndexed, SourceLocation};
/// Jupyter Notebook indexing table /// Jupyter Notebook indexing table
/// ///
@ -33,16 +33,29 @@ impl NotebookIndex {
self.row_to_row_in_cell.get(row.to_zero_indexed()).copied() self.row_to_row_in_cell.get(row.to_zero_indexed()).copied()
} }
/// Translates the given source location based on the indexing table. /// Translates the given [`LineColumn`] based on the indexing table.
/// ///
/// This will translate the row/column in the concatenated source code /// This will translate the row/column in the concatenated source code
/// to the row/column in the Jupyter Notebook. /// to the row/column in the Jupyter Notebook.
pub fn translate_location(&self, source_location: &SourceLocation) -> SourceLocation { pub fn translate_line_column(&self, source_location: &LineColumn) -> LineColumn {
SourceLocation { LineColumn {
row: self line: self
.cell_row(source_location.row) .cell_row(source_location.line)
.unwrap_or(OneIndexed::MIN), .unwrap_or(OneIndexed::MIN),
column: source_location.column, column: source_location.column,
} }
} }
/// Translates the given [`SourceLocation`] based on the indexing table.
///
/// This will translate the line/character in the concatenated source code
/// to the line/character in the Jupyter Notebook.
pub fn translate_source_location(&self, source_location: &SourceLocation) -> SourceLocation {
SourceLocation {
line: self
.cell_row(source_location.line)
.unwrap_or(OneIndexed::MIN),
character_offset: source_location.character_offset,
}
}
} }

View file

@ -430,10 +430,10 @@ fn ensure_unchanged_ast(
formatted_unsupported_syntax_errors formatted_unsupported_syntax_errors
.into_values() .into_values()
.map(|error| { .map(|error| {
let location = index.source_location(error.start(), formatted_code); let location = index.line_column(error.start(), formatted_code);
format!( format!(
"{row}:{col} {error}", "{row}:{col} {error}",
row = location.row, row = location.line,
col = location.column col = location.column
) )
}) })

View file

@ -31,6 +31,16 @@ pub enum PositionEncoding {
UTF8, UTF8,
} }
impl From<PositionEncoding> for ruff_source_file::PositionEncoding {
fn from(value: PositionEncoding) -> Self {
match value {
PositionEncoding::UTF8 => Self::Utf8,
PositionEncoding::UTF16 => Self::Utf16,
PositionEncoding::UTF32 => Self::Utf32,
}
}
}
/// A unique document ID, derived from a URL passed as part of an LSP request. /// A unique document ID, derived from a URL passed as part of an LSP request.
/// This document ID can point to either be a standalone Python file, a full notebook, or a cell within a notebook. /// This document ID can point to either be a standalone Python file, a full notebook, or a cell within a notebook.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]

View file

@ -2,9 +2,9 @@ use super::notebook;
use super::PositionEncoding; use super::PositionEncoding;
use lsp_types as types; use lsp_types as types;
use ruff_notebook::NotebookIndex; use ruff_notebook::NotebookIndex;
use ruff_source_file::OneIndexed; use ruff_source_file::LineIndex;
use ruff_source_file::{LineIndex, SourceLocation}; use ruff_source_file::{OneIndexed, SourceLocation};
use ruff_text_size::{TextRange, TextSize}; use ruff_text_size::TextRange;
pub(crate) struct NotebookRange { pub(crate) struct NotebookRange {
pub(crate) cell: notebook::CellId, pub(crate) cell: notebook::CellId,
@ -38,76 +38,43 @@ impl RangeExt for lsp_types::Range {
index: &LineIndex, index: &LineIndex,
encoding: PositionEncoding, encoding: PositionEncoding,
) -> TextRange { ) -> TextRange {
let start_line = index.line_range( let start = index.offset(
OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.line)), SourceLocation {
line: OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.line)),
character_offset: OneIndexed::from_zero_indexed(u32_index_to_usize(
self.start.character,
)),
},
text, text,
encoding.into(),
); );
let end_line = index.line_range( let end = index.offset(
OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.line)), SourceLocation {
line: OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.line)),
character_offset: OneIndexed::from_zero_indexed(u32_index_to_usize(
self.end.character,
)),
},
text, text,
encoding.into(),
); );
let (start_column_offset, end_column_offset) = match encoding { TextRange::new(start, end)
PositionEncoding::UTF8 => (
TextSize::new(self.start.character),
TextSize::new(self.end.character),
),
PositionEncoding::UTF16 => {
// Fast path for ASCII only documents
if index.is_ascii() {
(
TextSize::new(self.start.character),
TextSize::new(self.end.character),
)
} else {
// UTF16 encodes characters either as one or two 16 bit words.
// The position in `range` is the 16-bit word offset from the start of the line (and not the character offset)
// UTF-16 with a text that may use variable-length characters.
(
utf8_column_offset(self.start.character, &text[start_line]),
utf8_column_offset(self.end.character, &text[end_line]),
)
}
}
PositionEncoding::UTF32 => {
// UTF-32 uses 4 bytes for each character. Meaning, the position in range is a character offset.
return TextRange::new(
index.offset(
OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.line)),
OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.character)),
text,
),
index.offset(
OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.line)),
OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.character)),
text,
),
);
}
};
TextRange::new(
start_line.start() + start_column_offset.clamp(TextSize::new(0), start_line.end()),
end_line.start() + end_column_offset.clamp(TextSize::new(0), end_line.end()),
)
} }
} }
impl ToRangeExt for TextRange { impl ToRangeExt for TextRange {
fn to_range(&self, text: &str, index: &LineIndex, encoding: PositionEncoding) -> types::Range { fn to_range(&self, text: &str, index: &LineIndex, encoding: PositionEncoding) -> types::Range {
types::Range { types::Range {
start: source_location_to_position(&offset_to_source_location( start: source_location_to_position(&index.source_location(
self.start(), self.start(),
text, text,
index, encoding.into(),
encoding,
)), )),
end: source_location_to_position(&offset_to_source_location( end: source_location_to_position(&index.source_location(
self.end(), self.end(),
text, text,
index, encoding.into(),
encoding,
)), )),
} }
} }
@ -119,26 +86,26 @@ impl ToRangeExt for TextRange {
notebook_index: &NotebookIndex, notebook_index: &NotebookIndex,
encoding: PositionEncoding, encoding: PositionEncoding,
) -> NotebookRange { ) -> NotebookRange {
let start = offset_to_source_location(self.start(), text, source_index, encoding); let start = source_index.source_location(self.start(), text, encoding.into());
let mut end = offset_to_source_location(self.end(), text, source_index, encoding); let mut end = source_index.source_location(self.end(), text, encoding.into());
let starting_cell = notebook_index.cell(start.row); let starting_cell = notebook_index.cell(start.line);
// weird edge case here - if the end of the range is where the newline after the cell got added (making it 'out of bounds') // weird edge case here - if the end of the range is where the newline after the cell got added (making it 'out of bounds')
// we need to move it one character back (which should place it at the end of the last line). // we need to move it one character back (which should place it at the end of the last line).
// we test this by checking if the ending offset is in a different (or nonexistent) cell compared to the cell of the starting offset. // we test this by checking if the ending offset is in a different (or nonexistent) cell compared to the cell of the starting offset.
if notebook_index.cell(end.row) != starting_cell { if notebook_index.cell(end.line) != starting_cell {
end.row = end.row.saturating_sub(1); end.line = end.line.saturating_sub(1);
end.column = offset_to_source_location( end.character_offset = source_index
self.end().checked_sub(1.into()).unwrap_or_default(), .source_location(
text, self.end().checked_sub(1.into()).unwrap_or_default(),
source_index, text,
encoding, encoding.into(),
) )
.column; .character_offset;
} }
let start = source_location_to_position(&notebook_index.translate_location(&start)); let start = source_location_to_position(&notebook_index.translate_source_location(&start));
let end = source_location_to_position(&notebook_index.translate_location(&end)); let end = source_location_to_position(&notebook_index.translate_source_location(&end));
NotebookRange { NotebookRange {
cell: starting_cell cell: starting_cell
@ -149,67 +116,10 @@ impl ToRangeExt for TextRange {
} }
} }
/// Converts a UTF-16 code unit offset for a given line into a UTF-8 column number.
fn utf8_column_offset(utf16_code_unit_offset: u32, line: &str) -> TextSize {
let mut utf8_code_unit_offset = TextSize::new(0);
let mut i = 0u32;
for c in line.chars() {
if i >= utf16_code_unit_offset {
break;
}
// Count characters encoded as two 16 bit words as 2 characters.
{
utf8_code_unit_offset +=
TextSize::new(u32::try_from(c.len_utf8()).expect("utf8 len always <=4"));
i += u32::try_from(c.len_utf16()).expect("utf16 len always <=2");
}
}
utf8_code_unit_offset
}
fn offset_to_source_location(
offset: TextSize,
text: &str,
index: &LineIndex,
encoding: PositionEncoding,
) -> SourceLocation {
match encoding {
PositionEncoding::UTF8 => {
let row = index.line_index(offset);
let column = offset - index.line_start(row, text);
SourceLocation {
column: OneIndexed::from_zero_indexed(column.to_usize()),
row,
}
}
PositionEncoding::UTF16 => {
let row = index.line_index(offset);
let column = if index.is_ascii() {
(offset - index.line_start(row, text)).to_usize()
} else {
let up_to_line = &text[TextRange::new(index.line_start(row, text), offset)];
up_to_line.encode_utf16().count()
};
SourceLocation {
column: OneIndexed::from_zero_indexed(column),
row,
}
}
PositionEncoding::UTF32 => index.source_location(offset, text),
}
}
fn source_location_to_position(location: &SourceLocation) -> types::Position { fn source_location_to_position(location: &SourceLocation) -> types::Position {
types::Position { types::Position {
line: u32::try_from(location.row.to_zero_indexed()).expect("row usize fits in u32"), line: u32::try_from(location.line.to_zero_indexed()).expect("row usize fits in u32"),
character: u32::try_from(location.column.to_zero_indexed()) character: u32::try_from(location.character_offset.to_zero_indexed())
.expect("character usize fits in u32"), .expect("character usize fits in u32"),
} }
} }

View file

@ -59,6 +59,7 @@ impl Server {
let client_capabilities = init_params.capabilities; let client_capabilities = init_params.capabilities;
let position_encoding = Self::find_best_position_encoding(&client_capabilities); let position_encoding = Self::find_best_position_encoding(&client_capabilities);
let server_capabilities = Self::server_capabilities(position_encoding); let server_capabilities = Self::server_capabilities(position_encoding);
let connection = connection.initialize_finish( let connection = connection.initialize_finish(
@ -98,6 +99,8 @@ impl Server {
workspace_settings.unwrap_or_default(), workspace_settings.unwrap_or_default(),
)?; )?;
tracing::debug!("Negotiated position encoding: {position_encoding:?}");
Ok(Self { Ok(Self {
connection, connection,
worker_threads, worker_threads,

View file

@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
use ruff_text_size::{Ranged, TextRange, TextSize}; use ruff_text_size::{Ranged, TextRange, TextSize};
pub use crate::line_index::{LineIndex, OneIndexed}; pub use crate::line_index::{LineIndex, OneIndexed, PositionEncoding};
pub use crate::line_ranges::LineRanges; pub use crate::line_ranges::LineRanges;
pub use crate::newlines::{ pub use crate::newlines::{
find_newline, Line, LineEnding, NewlineWithTrailingNewline, UniversalNewlineIterator, find_newline, Line, LineEnding, NewlineWithTrailingNewline, UniversalNewlineIterator,
@ -18,7 +18,7 @@ mod line_index;
mod line_ranges; mod line_ranges;
mod newlines; mod newlines;
/// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`SourceLocation`]. /// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`LineColumn`].
#[derive(Debug)] #[derive(Debug)]
pub struct SourceCode<'src, 'index> { pub struct SourceCode<'src, 'index> {
text: &'src str, text: &'src str,
@ -33,10 +33,20 @@ impl<'src, 'index> SourceCode<'src, 'index> {
} }
} }
/// Computes the one indexed row and column numbers for `offset`. /// Computes the one indexed line and column numbers for `offset`, skipping any potential BOM.
#[inline] #[inline]
pub fn source_location(&self, offset: TextSize) -> SourceLocation { pub fn line_column(&self, offset: TextSize) -> LineColumn {
self.index.source_location(offset, self.text) self.index.line_column(offset, self.text)
}
#[inline]
pub fn source_location(
&self,
offset: TextSize,
position_encoding: PositionEncoding,
) -> SourceLocation {
self.index
.source_location(offset, self.text, position_encoding)
} }
#[inline] #[inline]
@ -229,34 +239,62 @@ impl PartialEq for SourceFileInner {
impl Eq for SourceFileInner {} impl Eq for SourceFileInner {}
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] /// The line and column of an offset in a source file.
///
/// See [`LineIndex::line_column`] for more information.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SourceLocation { pub struct LineColumn {
pub row: OneIndexed, /// The line in the source text.
pub line: OneIndexed,
/// The column (UTF scalar values) relative to the start of the line except any
/// potential BOM on the first line.
pub column: OneIndexed, pub column: OneIndexed,
} }
impl Default for SourceLocation { impl Default for LineColumn {
fn default() -> Self { fn default() -> Self {
Self { Self {
row: OneIndexed::MIN, line: OneIndexed::MIN,
column: OneIndexed::MIN, column: OneIndexed::MIN,
} }
} }
} }
impl Debug for SourceLocation { impl Debug for LineColumn {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SourceLocation") f.debug_struct("LineColumn")
.field("row", &self.row.get()) .field("line", &self.line.get())
.field("column", &self.column.get()) .field("column", &self.column.get())
.finish() .finish()
} }
} }
impl std::fmt::Display for SourceLocation { impl std::fmt::Display for LineColumn {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{row}:{column}", row = self.row, column = self.column) write!(f, "{line}:{column}", line = self.line, column = self.column)
}
}
/// A position into a source file represented by the line number and the offset to that character relative to the start of that line.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SourceLocation {
/// The line in the source text.
pub line: OneIndexed,
/// The offset from the start of the line to the character.
///
/// This can be a byte offset, the number of UTF16 code points, or the UTF8 code units, depending on the
/// [`PositionEncoding`] used.
pub character_offset: OneIndexed,
}
impl Default for SourceLocation {
fn default() -> Self {
Self {
line: OneIndexed::MIN,
character_offset: OneIndexed::MIN,
}
} }
} }

View file

@ -5,13 +5,12 @@ use std::ops::Deref;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use crate::{LineColumn, SourceLocation};
use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_text_size::{TextLen, TextRange, TextSize};
#[cfg(feature = "serde")] #[cfg(feature = "serde")]
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::SourceLocation; /// Index for fast [byte offset](TextSize) to [`LineColumn`] conversions.
/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions.
/// ///
/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count. /// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
#[derive(Clone, Eq, PartialEq)] #[derive(Clone, Eq, PartialEq)]
@ -66,60 +65,146 @@ impl LineIndex {
self.inner.kind self.inner.kind
} }
/// Returns the row and column index for an offset. /// Returns the line and column number for an UTF-8 byte offset.
///
/// The `column` number is the nth-character of the line, except for the first line
/// where it doesn't include the UTF-8 BOM marker at the start of the file.
///
/// ### BOM handling
///
/// For files starting with a UTF-8 BOM marker, the byte offsets
/// in the range `0...3` are all mapped to line 0 and column 0.
/// Because of this, the conversion isn't losless.
/// ///
/// ## Examples /// ## Examples
/// ///
/// ``` /// ```
/// # use ruff_text_size::TextSize; /// # use ruff_text_size::TextSize;
/// # use ruff_source_file::{LineIndex, OneIndexed, SourceLocation}; /// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn};
/// let source = "def a():\n pass"; /// let source = format!("\u{FEFF}{}", "def a():\n pass");
/// let index = LineIndex::from_source_text(source); /// let index = LineIndex::from_source_text(&source);
/// ///
/// // Before BOM, maps to after BOM
/// assert_eq!( /// assert_eq!(
/// index.source_location(TextSize::from(0), source), /// index.line_column(TextSize::from(0), &source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) } /// LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
/// );
///
/// // After BOM, maps to after BOM
/// assert_eq!(
/// index.line_column(TextSize::from(3), &source),
/// LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
/// ); /// );
/// ///
/// assert_eq!( /// assert_eq!(
/// index.source_location(TextSize::from(4), source), /// index.line_column(TextSize::from(7), &source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) } /// LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) }
/// ); /// );
/// assert_eq!( /// assert_eq!(
/// index.source_location(TextSize::from(13), source), /// index.line_column(TextSize::from(16), &source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) } /// LineColumn { line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) }
/// ); /// );
/// ``` /// ```
/// ///
/// ## Panics /// ## Panics
/// ///
/// If the offset is out of bounds. /// If the byte offset isn't within the bounds of `content`.
pub fn source_location(&self, offset: TextSize, content: &str) -> SourceLocation { pub fn line_column(&self, offset: TextSize, content: &str) -> LineColumn {
match self.line_starts().binary_search(&offset) { let location = self.source_location(offset, content, PositionEncoding::Utf32);
// Offset is at the start of a line
Ok(row) => SourceLocation {
row: OneIndexed::from_zero_indexed(row),
column: OneIndexed::from_zero_indexed(0),
},
Err(next_row) => {
// SAFETY: Safe because the index always contains an entry for the offset 0
let row = next_row - 1;
let mut line_start = self.line_starts()[row];
let column = if self.kind().is_ascii() { // Don't count the BOM character as a column, but only on the first line.
usize::from(offset) - usize::from(line_start) let column = if location.line.to_zero_indexed() == 0 && content.starts_with('\u{feff}') {
} else { location.character_offset.saturating_sub(1)
// Don't count the BOM character as a column. } else {
if line_start == TextSize::from(0) && content.starts_with('\u{feff}') { location.character_offset
line_start = '\u{feff}'.text_len(); };
}
content[TextRange::new(line_start, offset)].chars().count() LineColumn {
}; line: location.line,
column,
}
}
/// Given a UTF-8 byte offset, returns the line and character offset according to the given encoding.
///
/// ### BOM handling
///
/// Unlike [`Self::line_column`], this method does not skip the BOM character at the start of the file.
/// This allows for bidirectional mapping between [`SourceLocation`] and [`TextSize`] (see [`Self::offset`]).
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::TextSize;
/// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn, SourceLocation, PositionEncoding, Line};
/// let source = format!("\u{FEFF}{}", "def a():\n pass");
/// let index = LineIndex::from_source_text(&source);
///
/// // Before BOM, maps to character 0
/// assert_eq!(
/// index.source_location(TextSize::from(0), &source, PositionEncoding::Utf32),
/// SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(0) }
/// );
///
/// // After BOM, maps to after BOM
/// assert_eq!(
/// index.source_location(TextSize::from(3), &source, PositionEncoding::Utf32),
/// SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(1) }
/// );
///
/// assert_eq!(
/// index.source_location(TextSize::from(7), &source, PositionEncoding::Utf32),
/// SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(5) }
/// );
/// assert_eq!(
/// index.source_location(TextSize::from(16), &source, PositionEncoding::Utf32),
/// SourceLocation { line: OneIndexed::from_zero_indexed(1), character_offset: OneIndexed::from_zero_indexed(4) }
/// );
/// ```
///
/// ## Panics
///
/// If the UTF-8 byte offset is out of bounds of `text`.
pub fn source_location(
&self,
offset: TextSize,
text: &str,
encoding: PositionEncoding,
) -> SourceLocation {
let line = self.line_index(offset);
let line_start = self.line_start(line, text);
if self.is_ascii() {
return SourceLocation {
line,
character_offset: OneIndexed::from_zero_indexed((offset - line_start).to_usize()),
};
}
match encoding {
PositionEncoding::Utf8 => {
let character_offset = offset - line_start;
SourceLocation {
line,
character_offset: OneIndexed::from_zero_indexed(character_offset.to_usize()),
}
}
PositionEncoding::Utf16 => {
let up_to_character = &text[TextRange::new(line_start, offset)];
let character = up_to_character.encode_utf16().count();
SourceLocation { SourceLocation {
row: OneIndexed::from_zero_indexed(row), line,
column: OneIndexed::from_zero_indexed(column), character_offset: OneIndexed::from_zero_indexed(character),
}
}
PositionEncoding::Utf32 => {
let up_to_character = &text[TextRange::new(line_start, offset)];
let character = up_to_character.chars().count();
SourceLocation {
line,
character_offset: OneIndexed::from_zero_indexed(character),
} }
} }
} }
@ -141,7 +226,7 @@ impl LineIndex {
/// ///
/// ``` /// ```
/// # use ruff_text_size::TextSize; /// # use ruff_text_size::TextSize;
/// # use ruff_source_file::{LineIndex, OneIndexed, SourceLocation}; /// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn};
/// let source = "def a():\n pass"; /// let source = "def a():\n pass";
/// let index = LineIndex::from_source_text(source); /// let index = LineIndex::from_source_text(source);
/// ///
@ -221,83 +306,211 @@ impl LineIndex {
} }
} }
/// Returns the [byte offset](TextSize) at `line` and `column`. /// Returns the [UTF-8 byte offset](TextSize) at `line` and `character` where character is counted using the given encoding.
/// ///
/// ## Examples /// ## Examples
/// ///
/// ### ASCII /// ### ASCII only source text
/// ///
/// ``` /// ```
/// use ruff_source_file::{LineIndex, OneIndexed}; /// # use ruff_source_file::{SourceLocation, LineIndex, OneIndexed, PositionEncoding};
/// use ruff_text_size::TextSize; /// # use ruff_text_size::TextSize;
/// let source = r#"a = 4 /// let source = r#"a = 4
/// c = "some string" /// c = "some string"
/// x = b"#; /// x = b"#;
/// ///
/// let index = LineIndex::from_source_text(source); /// let index = LineIndex::from_source_text(source);
/// ///
/// // First line, first column /// // First line, first character
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(0), OneIndexed::from_zero_indexed(0), source), TextSize::new(0)); /// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(0),
/// character_offset: OneIndexed::from_zero_indexed(0)
/// },
/// source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(0)
/// );
/// ///
/// // Second line, 4th column /// assert_eq!(
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(1), OneIndexed::from_zero_indexed(4), source), TextSize::new(10)); /// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(1),
/// character_offset: OneIndexed::from_zero_indexed(4)
/// },
/// source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(10)
/// );
/// ///
/// // Offset past the end of the first line /// // Offset past the end of the first line
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(0), OneIndexed::from_zero_indexed(10), source), TextSize::new(6)); /// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(0),
/// character_offset: OneIndexed::from_zero_indexed(10)
/// },
/// source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(6)
/// );
/// ///
/// // Offset past the end of the file /// // Offset past the end of the file
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(3), OneIndexed::from_zero_indexed(0), source), TextSize::new(29)); /// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(3),
/// character_offset: OneIndexed::from_zero_indexed(0)
/// },
/// source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(29)
/// );
/// ``` /// ```
/// ///
/// ### UTF8 /// ### Non-ASCII source text
/// ///
/// ``` /// ```
/// use ruff_source_file::{LineIndex, OneIndexed}; /// use ruff_source_file::{LineIndex, OneIndexed, SourceLocation, PositionEncoding};
/// use ruff_text_size::TextSize; /// use ruff_text_size::TextSize;
/// let source = r#"a = 4 /// let source = format!("\u{FEFF}{}", r#"a = 4
/// c = "❤️" /// c = "❤️"
/// x = b"#; /// x = b"#);
/// ///
/// let index = LineIndex::from_source_text(source); /// let index = LineIndex::from_source_text(&source);
/// ///
/// // First line, first column /// // First line, first character, points at the BOM
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(0), OneIndexed::from_zero_indexed(0), source), TextSize::new(0)); /// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(0),
/// character_offset: OneIndexed::from_zero_indexed(0)
/// },
/// &source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(0)
/// );
///
/// // First line, after the BOM
/// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(0),
/// character_offset: OneIndexed::from_zero_indexed(1)
/// },
/// &source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(3)
/// );
///
/// // second line, 7th character, after emoji, UTF32
/// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(1),
/// character_offset: OneIndexed::from_zero_indexed(7)
/// },
/// &source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(20)
/// );
///
/// // Second line, 7th character, after emoji, UTF 16
/// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(1),
/// character_offset: OneIndexed::from_zero_indexed(7)
/// },
/// &source,
/// PositionEncoding::Utf16,
/// ),
/// TextSize::new(20)
/// );
/// ///
/// // Third line, 2nd column, after emoji
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(2), OneIndexed::from_zero_indexed(1), source), TextSize::new(20));
/// ///
/// // Offset past the end of the second line /// // Offset past the end of the second line
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(1), OneIndexed::from_zero_indexed(10), source), TextSize::new(19)); /// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(1),
/// character_offset: OneIndexed::from_zero_indexed(10)
/// },
/// &source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(22)
/// );
/// ///
/// // Offset past the end of the file /// // Offset past the end of the file
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(3), OneIndexed::from_zero_indexed(0), source), TextSize::new(24)); /// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(3),
/// character_offset: OneIndexed::from_zero_indexed(0)
/// },
/// &source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(27)
/// );
/// ``` /// ```
/// pub fn offset(
pub fn offset(&self, line: OneIndexed, column: OneIndexed, contents: &str) -> TextSize { &self,
position: SourceLocation,
text: &str,
position_encoding: PositionEncoding,
) -> TextSize {
// If start-of-line position after last line // If start-of-line position after last line
if line.to_zero_indexed() > self.line_starts().len() { if position.line.to_zero_indexed() > self.line_starts().len() {
return contents.text_len(); return text.text_len();
} }
let line_range = self.line_range(line, contents); let line_range = self.line_range(position.line, text);
match self.kind() { let character_offset = position.character_offset.to_zero_indexed();
IndexKind::Ascii => { let character_byte_offset = if self.is_ascii() {
line_range.start() TextSize::try_from(character_offset).unwrap()
+ TextSize::try_from(column.to_zero_indexed()) } else {
.unwrap_or(line_range.len()) let line = &text[line_range];
.clamp(TextSize::new(0), line_range.len())
} match position_encoding {
IndexKind::Utf8 => { PositionEncoding::Utf8 => {
let rest = &contents[line_range]; TextSize::try_from(position.character_offset.to_zero_indexed()).unwrap()
let column_offset: TextSize = rest }
PositionEncoding::Utf16 => {
let mut byte_offset = TextSize::new(0);
let mut utf16_code_unit_offset = 0;
for c in line.chars() {
if utf16_code_unit_offset >= character_offset {
break;
}
// Count characters encoded as two 16 bit words as 2 characters.
byte_offset += c.text_len();
utf16_code_unit_offset += c.len_utf16();
}
byte_offset
}
PositionEncoding::Utf32 => line
.chars() .chars()
.take(column.to_zero_indexed()) .take(position.character_offset.to_zero_indexed())
.map(ruff_text_size::TextLen::text_len) .map(ruff_text_size::TextLen::text_len)
.sum(); .sum(),
line_range.start() + column_offset
} }
} };
line_range.start() + character_byte_offset.clamp(TextSize::new(0), line_range.len())
} }
/// Returns the [byte offsets](TextSize) for every line /// Returns the [byte offsets](TextSize) for every line
@ -430,12 +643,26 @@ impl FromStr for OneIndexed {
} }
} }
#[derive(Default, Copy, Clone, Debug)]
pub enum PositionEncoding {
/// Character offsets count the number of bytes from the start of the line.
#[default]
Utf8,
/// Character offsets count the number of UTF-16 code units from the start of the line.
Utf16,
/// Character offsets count the number of UTF-32 code points units (the same as number of characters in Rust)
/// from the start of the line.
Utf32,
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use ruff_text_size::TextSize; use ruff_text_size::TextSize;
use crate::line_index::LineIndex; use crate::line_index::LineIndex;
use crate::{OneIndexed, SourceLocation}; use crate::{LineColumn, OneIndexed};
#[test] #[test]
fn ascii_index() { fn ascii_index() {
@ -466,30 +693,30 @@ mod tests {
let index = LineIndex::from_source_text(contents); let index = LineIndex::from_source_text(contents);
// First row. // First row.
let loc = index.source_location(TextSize::from(2), contents); let loc = index.line_column(TextSize::from(2), contents);
assert_eq!( assert_eq!(
loc, loc,
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(0), line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(2) column: OneIndexed::from_zero_indexed(2)
} }
); );
// Second row. // Second row.
let loc = index.source_location(TextSize::from(6), contents); let loc = index.line_column(TextSize::from(6), contents);
assert_eq!( assert_eq!(
loc, loc,
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(1), line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0) column: OneIndexed::from_zero_indexed(0)
} }
); );
let loc = index.source_location(TextSize::from(11), contents); let loc = index.line_column(TextSize::from(11), contents);
assert_eq!( assert_eq!(
loc, loc,
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(1), line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(5) column: OneIndexed::from_zero_indexed(5)
} }
); );
@ -502,23 +729,23 @@ mod tests {
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]); assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
assert_eq!( assert_eq!(
index.source_location(TextSize::from(4), contents), index.line_column(TextSize::from(4), contents),
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(0), line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(4) column: OneIndexed::from_zero_indexed(4)
} }
); );
assert_eq!( assert_eq!(
index.source_location(TextSize::from(6), contents), index.line_column(TextSize::from(6), contents),
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(1), line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0) column: OneIndexed::from_zero_indexed(0)
} }
); );
assert_eq!( assert_eq!(
index.source_location(TextSize::from(7), contents), index.line_column(TextSize::from(7), contents),
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(1), line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1) column: OneIndexed::from_zero_indexed(1)
} }
); );
@ -531,23 +758,23 @@ mod tests {
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]); assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]);
assert_eq!( assert_eq!(
index.source_location(TextSize::from(4), contents), index.line_column(TextSize::from(4), contents),
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(0), line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(4) column: OneIndexed::from_zero_indexed(4)
} }
); );
assert_eq!( assert_eq!(
index.source_location(TextSize::from(7), contents), index.line_column(TextSize::from(7), contents),
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(1), line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0) column: OneIndexed::from_zero_indexed(0)
} }
); );
assert_eq!( assert_eq!(
index.source_location(TextSize::from(8), contents), index.line_column(TextSize::from(8), contents),
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(1), line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1) column: OneIndexed::from_zero_indexed(1)
} }
); );
@ -598,23 +825,23 @@ mod tests {
// Second ' // Second '
assert_eq!( assert_eq!(
index.source_location(TextSize::from(9), contents), index.line_column(TextSize::from(9), contents),
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(0), line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6) column: OneIndexed::from_zero_indexed(6)
} }
); );
assert_eq!( assert_eq!(
index.source_location(TextSize::from(11), contents), index.line_column(TextSize::from(11), contents),
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(1), line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0) column: OneIndexed::from_zero_indexed(0)
} }
); );
assert_eq!( assert_eq!(
index.source_location(TextSize::from(12), contents), index.line_column(TextSize::from(12), contents),
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(1), line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1) column: OneIndexed::from_zero_indexed(1)
} }
); );
@ -632,23 +859,23 @@ mod tests {
// Second ' // Second '
assert_eq!( assert_eq!(
index.source_location(TextSize::from(9), contents), index.line_column(TextSize::from(9), contents),
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(0), line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6) column: OneIndexed::from_zero_indexed(6)
} }
); );
assert_eq!( assert_eq!(
index.source_location(TextSize::from(12), contents), index.line_column(TextSize::from(12), contents),
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(1), line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0) column: OneIndexed::from_zero_indexed(0)
} }
); );
assert_eq!( assert_eq!(
index.source_location(TextSize::from(13), contents), index.line_column(TextSize::from(13), contents),
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(1), line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1) column: OneIndexed::from_zero_indexed(1)
} }
); );
@ -664,49 +891,49 @@ mod tests {
); );
// First row. // First row.
let loc = index.source_location(TextSize::from(0), contents); let loc = index.line_column(TextSize::from(0), contents);
assert_eq!( assert_eq!(
loc, loc,
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(0), line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(0) column: OneIndexed::from_zero_indexed(0)
} }
); );
let loc = index.source_location(TextSize::from(5), contents); let loc = index.line_column(TextSize::from(5), contents);
assert_eq!( assert_eq!(
loc, loc,
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(0), line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(5) column: OneIndexed::from_zero_indexed(5)
} }
); );
let loc = index.source_location(TextSize::from(8), contents); let loc = index.line_column(TextSize::from(8), contents);
assert_eq!( assert_eq!(
loc, loc,
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(0), line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6) column: OneIndexed::from_zero_indexed(6)
} }
); );
// Second row. // Second row.
let loc = index.source_location(TextSize::from(10), contents); let loc = index.line_column(TextSize::from(10), contents);
assert_eq!( assert_eq!(
loc, loc,
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(1), line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0) column: OneIndexed::from_zero_indexed(0)
} }
); );
// One-past-the-end. // One-past-the-end.
let loc = index.source_location(TextSize::from(15), contents); let loc = index.line_column(TextSize::from(15), contents);
assert_eq!( assert_eq!(
loc, loc,
SourceLocation { LineColumn {
row: OneIndexed::from_zero_indexed(1), line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(5) column: OneIndexed::from_zero_indexed(5)
} }
); );

View file

@ -21,7 +21,7 @@ use ruff_python_formatter::{format_module_ast, pretty_comments, PyFormatContext,
use ruff_python_index::Indexer; use ruff_python_index::Indexer;
use ruff_python_parser::{parse, parse_unchecked, Mode, ParseOptions, Parsed}; use ruff_python_parser::{parse, parse_unchecked, Mode, ParseOptions, Parsed};
use ruff_python_trivia::CommentRanges; use ruff_python_trivia::CommentRanges;
use ruff_source_file::SourceLocation; use ruff_source_file::{LineColumn, OneIndexed};
use ruff_text_size::Ranged; use ruff_text_size::Ranged;
use ruff_workspace::configuration::Configuration; use ruff_workspace::configuration::Configuration;
use ruff_workspace::options::{FormatOptions, LintCommonOptions, LintOptions, Options}; use ruff_workspace::options::{FormatOptions, LintCommonOptions, LintOptions, Options};
@ -61,8 +61,8 @@ export interface Diagnostic {
pub struct ExpandedMessage { pub struct ExpandedMessage {
pub code: Option<String>, pub code: Option<String>,
pub message: String, pub message: String,
pub start_location: SourceLocation, pub start_location: Location,
pub end_location: SourceLocation, pub end_location: Location,
pub fix: Option<ExpandedFix>, pub fix: Option<ExpandedFix>,
} }
@ -74,8 +74,8 @@ pub struct ExpandedFix {
#[derive(Serialize, Deserialize, Eq, PartialEq, Debug)] #[derive(Serialize, Deserialize, Eq, PartialEq, Debug)]
struct ExpandedEdit { struct ExpandedEdit {
location: SourceLocation, location: Location,
end_location: SourceLocation, end_location: Location,
content: Option<String>, content: Option<String>,
} }
@ -214,16 +214,16 @@ impl Workspace {
}) => ExpandedMessage { }) => ExpandedMessage {
code: Some(kind.rule().noqa_code().to_string()), code: Some(kind.rule().noqa_code().to_string()),
message: kind.body, message: kind.body,
start_location: source_code.source_location(range.start()), start_location: source_code.line_column(range.start()).into(),
end_location: source_code.source_location(range.end()), end_location: source_code.line_column(range.end()).into(),
fix: fix.map(|fix| ExpandedFix { fix: fix.map(|fix| ExpandedFix {
message: kind.suggestion, message: kind.suggestion,
edits: fix edits: fix
.edits() .edits()
.iter() .iter()
.map(|edit| ExpandedEdit { .map(|edit| ExpandedEdit {
location: source_code.source_location(edit.start()), location: source_code.line_column(edit.start()).into(),
end_location: source_code.source_location(edit.end()), end_location: source_code.line_column(edit.end()).into(),
content: edit.content().map(ToString::to_string), content: edit.content().map(ToString::to_string),
}) })
.collect(), .collect(),
@ -233,8 +233,8 @@ impl Workspace {
ExpandedMessage { ExpandedMessage {
code: None, code: None,
message, message,
start_location: source_code.source_location(range.start()), start_location: source_code.line_column(range.start()).into(),
end_location: source_code.source_location(range.end()), end_location: source_code.line_column(range.end()).into(),
fix: None, fix: None,
} }
} }
@ -316,3 +316,18 @@ impl<'a> ParsedModule<'a> {
) )
} }
} }
#[derive(Serialize, Deserialize, Eq, PartialEq, Debug)]
pub struct Location {
pub row: OneIndexed,
pub column: OneIndexed,
}
impl From<LineColumn> for Location {
fn from(value: LineColumn) -> Self {
Self {
row: value.line,
column: value.column,
}
}
}

View file

@ -3,8 +3,8 @@
use wasm_bindgen_test::wasm_bindgen_test; use wasm_bindgen_test::wasm_bindgen_test;
use ruff_linter::registry::Rule; use ruff_linter::registry::Rule;
use ruff_source_file::{OneIndexed, SourceLocation}; use ruff_source_file::OneIndexed;
use ruff_wasm::{ExpandedMessage, Workspace}; use ruff_wasm::{ExpandedMessage, Location, Workspace};
macro_rules! check { macro_rules! check {
($source:expr, $config:expr, $expected:expr) => {{ ($source:expr, $config:expr, $expected:expr) => {{
@ -27,11 +27,11 @@ fn empty_config() {
[ExpandedMessage { [ExpandedMessage {
code: Some(Rule::IfTuple.noqa_code().to_string()), code: Some(Rule::IfTuple.noqa_code().to_string()),
message: "If test is a tuple, which is always `True`".to_string(), message: "If test is a tuple, which is always `True`".to_string(),
start_location: SourceLocation { start_location: Location {
row: OneIndexed::from_zero_indexed(0), row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(3) column: OneIndexed::from_zero_indexed(3)
}, },
end_location: SourceLocation { end_location: Location {
row: OneIndexed::from_zero_indexed(0), row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(9) column: OneIndexed::from_zero_indexed(9)
}, },
@ -48,11 +48,11 @@ fn syntax_error() {
[ExpandedMessage { [ExpandedMessage {
code: None, code: None,
message: "SyntaxError: Expected an expression".to_string(), message: "SyntaxError: Expected an expression".to_string(),
start_location: SourceLocation { start_location: Location {
row: OneIndexed::from_zero_indexed(0), row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(3) column: OneIndexed::from_zero_indexed(3)
}, },
end_location: SourceLocation { end_location: Location {
row: OneIndexed::from_zero_indexed(1), row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0) column: OneIndexed::from_zero_indexed(0)
}, },
@ -69,11 +69,11 @@ fn unsupported_syntax_error() {
[ExpandedMessage { [ExpandedMessage {
code: None, code: None,
message: "SyntaxError: Cannot use `match` statement on Python 3.9 (syntax was added in Python 3.10)".to_string(), message: "SyntaxError: Cannot use `match` statement on Python 3.9 (syntax was added in Python 3.10)".to_string(),
start_location: SourceLocation { start_location: Location {
row: OneIndexed::from_zero_indexed(0), row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(0) column: OneIndexed::from_zero_indexed(0)
}, },
end_location: SourceLocation { end_location: Location {
row: OneIndexed::from_zero_indexed(0), row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(5) column: OneIndexed::from_zero_indexed(5)
}, },