Split SourceLocation into LineColumn and SourceLocation (#17587)

This commit is contained in:
Micha Reiser 2025-04-27 11:27:33 +01:00 committed by GitHub
parent 4443f6653c
commit 1c65e0ad25
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
29 changed files with 695 additions and 537 deletions

View file

@ -27,6 +27,16 @@ pub enum PositionEncoding {
UTF8,
}
impl From<PositionEncoding> for ruff_source_file::PositionEncoding {
fn from(value: PositionEncoding) -> Self {
match value {
PositionEncoding::UTF8 => Self::Utf8,
PositionEncoding::UTF16 => Self::Utf16,
PositionEncoding::UTF32 => Self::Utf32,
}
}
}
/// A unique document ID, derived from a URL passed as part of an LSP request.
/// This document ID can point to either be a standalone Python file, a full notebook, or a cell within a notebook.
#[derive(Clone, Debug)]

View file

@ -9,8 +9,8 @@ use red_knot_python_semantic::Db;
use ruff_db::files::FileRange;
use ruff_db::source::{line_index, source_text};
use ruff_notebook::NotebookIndex;
use ruff_source_file::OneIndexed;
use ruff_source_file::{LineIndex, SourceLocation};
use ruff_source_file::LineIndex;
use ruff_source_file::{OneIndexed, SourceLocation};
use ruff_text_size::{Ranged, TextRange, TextSize};
#[expect(dead_code)]
@ -46,7 +46,7 @@ impl TextSizeExt for TextSize {
index: &LineIndex,
encoding: PositionEncoding,
) -> types::Position {
let source_location = offset_to_source_location(self, text, index, encoding);
let source_location = index.source_location(self, text, encoding.into());
source_location_to_position(&source_location)
}
}
@ -75,36 +75,14 @@ fn u32_index_to_usize(index: u32) -> usize {
impl PositionExt for lsp_types::Position {
fn to_text_size(&self, text: &str, index: &LineIndex, encoding: PositionEncoding) -> TextSize {
let start_line = index.line_range(
OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)),
index.offset(
SourceLocation {
line: OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)),
character_offset: OneIndexed::from_zero_indexed(u32_index_to_usize(self.character)),
},
text,
);
let start_column_offset = match encoding {
PositionEncoding::UTF8 => TextSize::new(self.character),
PositionEncoding::UTF16 => {
// Fast path for ASCII only documents
if index.is_ascii() {
TextSize::new(self.character)
} else {
// UTF16 encodes characters either as one or two 16 bit words.
// The position in `range` is the 16-bit word offset from the start of the line (and not the character offset)
// UTF-16 with a text that may use variable-length characters.
utf8_column_offset(self.character, &text[start_line])
}
}
PositionEncoding::UTF32 => {
// UTF-32 uses 4 bytes for each character. Meaning, the position in range is a character offset.
return index.offset(
OneIndexed::from_zero_indexed(u32_index_to_usize(self.line)),
OneIndexed::from_zero_indexed(u32_index_to_usize(self.character)),
text,
);
}
};
start_line.start() + start_column_offset.clamp(TextSize::new(0), start_line.end())
encoding.into(),
)
}
}
@ -142,26 +120,23 @@ impl ToRangeExt for TextRange {
notebook_index: &NotebookIndex,
encoding: PositionEncoding,
) -> NotebookRange {
let start = offset_to_source_location(self.start(), text, source_index, encoding);
let mut end = offset_to_source_location(self.end(), text, source_index, encoding);
let starting_cell = notebook_index.cell(start.row);
let start = source_index.source_location(self.start(), text, encoding.into());
let mut end = source_index.source_location(self.end(), text, encoding.into());
let starting_cell = notebook_index.cell(start.line);
// weird edge case here - if the end of the range is where the newline after the cell got added (making it 'out of bounds')
// we need to move it one character back (which should place it at the end of the last line).
// we test this by checking if the ending offset is in a different (or nonexistent) cell compared to the cell of the starting offset.
if notebook_index.cell(end.row) != starting_cell {
end.row = end.row.saturating_sub(1);
end.column = offset_to_source_location(
self.end().checked_sub(1.into()).unwrap_or_default(),
text,
source_index,
encoding,
)
.column;
if notebook_index.cell(end.line) != starting_cell {
end.line = end.line.saturating_sub(1);
let offset = self.end().checked_sub(1.into()).unwrap_or_default();
end.character_offset = source_index
.source_location(offset, text, encoding.into())
.character_offset;
}
let start = source_location_to_position(&notebook_index.translate_location(&start));
let end = source_location_to_position(&notebook_index.translate_location(&end));
let start = source_location_to_position(&notebook_index.translate_source_location(&start));
let end = source_location_to_position(&notebook_index.translate_source_location(&end));
NotebookRange {
cell: starting_cell
@ -172,67 +147,10 @@ impl ToRangeExt for TextRange {
}
}
/// Converts a UTF-16 code unit offset for a given line into a UTF-8 column number.
fn utf8_column_offset(utf16_code_unit_offset: u32, line: &str) -> TextSize {
let mut utf8_code_unit_offset = TextSize::new(0);
let mut i = 0u32;
for c in line.chars() {
if i >= utf16_code_unit_offset {
break;
}
// Count characters encoded as two 16 bit words as 2 characters.
{
utf8_code_unit_offset +=
TextSize::new(u32::try_from(c.len_utf8()).expect("utf8 len always <=4"));
i += u32::try_from(c.len_utf16()).expect("utf16 len always <=2");
}
}
utf8_code_unit_offset
}
fn offset_to_source_location(
offset: TextSize,
text: &str,
index: &LineIndex,
encoding: PositionEncoding,
) -> SourceLocation {
match encoding {
PositionEncoding::UTF8 => {
let row = index.line_index(offset);
let column = offset - index.line_start(row, text);
SourceLocation {
column: OneIndexed::from_zero_indexed(column.to_usize()),
row,
}
}
PositionEncoding::UTF16 => {
let row = index.line_index(offset);
let column = if index.is_ascii() {
(offset - index.line_start(row, text)).to_usize()
} else {
let up_to_line = &text[TextRange::new(index.line_start(row, text), offset)];
up_to_line.encode_utf16().count()
};
SourceLocation {
column: OneIndexed::from_zero_indexed(column),
row,
}
}
PositionEncoding::UTF32 => index.source_location(offset, text),
}
}
fn source_location_to_position(location: &SourceLocation) -> types::Position {
types::Position {
line: u32::try_from(location.row.to_zero_indexed()).expect("row usize fits in u32"),
character: u32::try_from(location.column.to_zero_indexed())
line: u32::try_from(location.line.to_zero_indexed()).expect("line usize fits in u32"),
character: u32::try_from(location.character_offset.to_zero_indexed())
.expect("character usize fits in u32"),
}
}

View file

@ -263,7 +263,7 @@ impl Matcher {
.and_then(|span| span.range())
.map(|range| {
self.line_index
.source_location(range.start(), &self.source)
.line_column(range.start(), &self.source)
.column
})
.unwrap_or(OneIndexed::from_zero_indexed(0))

View file

@ -19,7 +19,7 @@ use ruff_db::system::{
use ruff_db::Upcast;
use ruff_notebook::Notebook;
use ruff_python_formatter::formatted_file;
use ruff_source_file::{LineIndex, OneIndexed, SourceLocation};
use ruff_source_file::{LineColumn, LineIndex, OneIndexed, PositionEncoding, SourceLocation};
use ruff_text_size::{Ranged, TextSize};
use wasm_bindgen::prelude::*;
@ -408,8 +408,8 @@ impl Range {
}
}
impl From<(SourceLocation, SourceLocation)> for Range {
fn from((start, end): (SourceLocation, SourceLocation)) -> Self {
impl From<(LineColumn, LineColumn)> for Range {
fn from((start, end): (LineColumn, LineColumn)) -> Self {
Self {
start: start.into(),
end: end.into(),
@ -438,29 +438,34 @@ impl Position {
impl Position {
fn to_text_size(self, text: &str, index: &LineIndex) -> Result<TextSize, Error> {
let text_size = index.offset(
OneIndexed::new(self.line).ok_or_else(|| {
Error::new("Invalid value `0` for `position.line`. The line index is 1-indexed.")
})?,
OneIndexed::new(self.column).ok_or_else(|| {
Error::new(
"Invalid value `0` for `position.column`. The column index is 1-indexed.",
)
})?,
SourceLocation {
line: OneIndexed::new(self.line).ok_or_else(|| {
Error::new(
"Invalid value `0` for `position.line`. The line index is 1-indexed.",
)
})?,
character_offset: OneIndexed::new(self.column).ok_or_else(|| {
Error::new(
"Invalid value `0` for `position.column`. The column index is 1-indexed.",
)
})?,
},
text,
PositionEncoding::Utf32,
);
Ok(text_size)
}
fn from_text_size(offset: TextSize, line_index: &LineIndex, source: &str) -> Self {
line_index.source_location(offset, source).into()
line_index.line_column(offset, source).into()
}
}
impl From<SourceLocation> for Position {
fn from(location: SourceLocation) -> Self {
impl From<LineColumn> for Position {
fn from(location: LineColumn) -> Self {
Self {
line: location.row.get(),
line: location.line.get(),
column: location.column.get(),
}
}

View file

@ -5,6 +5,7 @@ use std::path::{Path, PathBuf};
use std::str::FromStr;
use std::sync::Arc;
use crate::commands::completions::config::{OptionString, OptionStringParser};
use anyhow::bail;
use clap::builder::{TypedValueParser, ValueParserFactory};
use clap::{command, Parser, Subcommand};
@ -22,7 +23,7 @@ use ruff_linter::settings::types::{
};
use ruff_linter::{RuleParser, RuleSelector, RuleSelectorParser};
use ruff_python_ast as ast;
use ruff_source_file::{LineIndex, OneIndexed};
use ruff_source_file::{LineIndex, OneIndexed, PositionEncoding};
use ruff_text_size::TextRange;
use ruff_workspace::configuration::{Configuration, RuleSelection};
use ruff_workspace::options::{Options, PycodestyleOptions};
@ -31,8 +32,6 @@ use ruff_workspace::resolver::ConfigurationTransformer;
use rustc_hash::FxHashMap;
use toml;
use crate::commands::completions::config::{OptionString, OptionStringParser};
/// All configuration options that can be passed "globally",
/// i.e., can be passed to all subcommands
#[derive(Debug, Default, Clone, clap::Args)]
@ -1070,8 +1069,9 @@ impl FormatRange {
///
/// Returns an empty range if the start range is past the end of `source`.
pub(super) fn to_text_range(self, source: &str, line_index: &LineIndex) -> TextRange {
let start_byte_offset = line_index.offset(self.start.line, self.start.column, source);
let end_byte_offset = line_index.offset(self.end.line, self.end.column, source);
let start_byte_offset =
line_index.offset(self.start.into(), source, PositionEncoding::Utf32);
let end_byte_offset = line_index.offset(self.end.into(), source, PositionEncoding::Utf32);
TextRange::new(start_byte_offset, end_byte_offset)
}
@ -1142,6 +1142,15 @@ pub struct LineColumn {
pub column: OneIndexed,
}
impl From<LineColumn> for ruff_source_file::SourceLocation {
fn from(value: LineColumn) -> Self {
Self {
line: value.line,
character_offset: value.column,
}
}
}
impl std::fmt::Display for LineColumn {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{line}:{column}", line = self.line, column = self.column)

View file

@ -71,8 +71,8 @@ impl std::fmt::Display for DisplayDiagnostic<'_> {
write!(f, " {path}", path = self.resolver.path(span.file()))?;
if let Some(range) = span.range() {
let input = self.resolver.input(span.file());
let start = input.as_source_code().source_location(range.start());
write!(f, ":{line}:{col}", line = start.row, col = start.column)?;
let start = input.as_source_code().line_column(range.start());
write!(f, ":{line}:{col}", line = start.line, col = start.column)?;
}
write!(f, ":")?;
}

View file

@ -191,7 +191,7 @@ pub(crate) fn definitions(checker: &mut Checker) {
warn_user!(
"Docstring at {}:{}:{} contains implicit string concatenation; ignoring...",
relativize_path(checker.path),
location.row,
location.line,
location.column
);
continue;

View file

@ -2,7 +2,7 @@
use std::cell::OnceCell;
use ruff_source_file::{LineIndex, LineRanges, OneIndexed, SourceCode, SourceLocation};
use ruff_source_file::{LineColumn, LineIndex, LineRanges, OneIndexed, SourceCode};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
#[derive(Debug)]
@ -36,8 +36,8 @@ impl<'a> Locator<'a> {
#[deprecated(
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
)]
pub fn compute_source_location(&self, offset: TextSize) -> SourceLocation {
self.to_source_code().source_location(offset)
pub fn compute_source_location(&self, offset: TextSize) -> LineColumn {
self.to_source_code().line_column(offset)
}
pub fn to_index(&self) -> &LineIndex {

View file

@ -9,7 +9,7 @@ use log::Level;
use ruff_python_parser::{ParseError, ParseErrorType};
use rustc_hash::FxHashSet;
use ruff_source_file::{LineIndex, OneIndexed, SourceCode, SourceLocation};
use ruff_source_file::{LineColumn, LineIndex, OneIndexed, SourceCode};
use crate::fs;
use crate::source_kind::SourceKind;
@ -195,21 +195,21 @@ impl DisplayParseError {
// Translate the byte offset to a location in the originating source.
let location =
if let Some(jupyter_index) = source_kind.as_ipy_notebook().map(Notebook::index) {
let source_location = source_code.source_location(error.location.start());
let source_location = source_code.line_column(error.location.start());
ErrorLocation::Cell(
jupyter_index
.cell(source_location.row)
.cell(source_location.line)
.unwrap_or(OneIndexed::MIN),
SourceLocation {
row: jupyter_index
.cell_row(source_location.row)
LineColumn {
line: jupyter_index
.cell_row(source_location.line)
.unwrap_or(OneIndexed::MIN),
column: source_location.column,
},
)
} else {
ErrorLocation::File(source_code.source_location(error.location.start()))
ErrorLocation::File(source_code.line_column(error.location.start()))
};
Self {
@ -245,7 +245,7 @@ impl Display for DisplayParseError {
write!(
f,
"{row}{colon}{column}{colon} {inner}",
row = location.row,
row = location.line,
column = location.column,
colon = ":".cyan(),
inner = &DisplayParseErrorType(&self.error.error)
@ -256,7 +256,7 @@ impl Display for DisplayParseError {
f,
"{cell}{colon}{row}{colon}{column}{colon} {inner}",
cell = cell,
row = location.row,
row = location.line,
column = location.column,
colon = ":".cyan(),
inner = &DisplayParseErrorType(&self.error.error)
@ -283,9 +283,9 @@ impl Display for DisplayParseErrorType<'_> {
#[derive(Debug)]
enum ErrorLocation {
/// The error occurred in a Python file.
File(SourceLocation),
File(LineColumn),
/// The error occurred in a Jupyter cell.
Cell(OneIndexed, SourceLocation),
Cell(OneIndexed, LineColumn),
}
/// Truncates the display text before the first newline character to avoid line breaks.

View file

@ -1,6 +1,6 @@
use std::io::Write;
use ruff_source_file::SourceLocation;
use ruff_source_file::LineColumn;
use crate::message::{Emitter, EmitterContext, Message};
@ -20,7 +20,7 @@ impl Emitter for AzureEmitter {
let location = if context.is_notebook(message.filename()) {
// We can't give a reasonable location for the structured formats,
// so we show one that's clearly a fallback
SourceLocation::default()
LineColumn::default()
} else {
message.compute_start_location()
};
@ -30,7 +30,7 @@ impl Emitter for AzureEmitter {
"##vso[task.logissue type=error\
;sourcepath={filename};linenumber={line};columnnumber={col};{code}]{body}",
filename = message.filename(),
line = location.row,
line = location.line,
col = location.column,
code = message
.rule()

View file

@ -1,6 +1,6 @@
use std::io::Write;
use ruff_source_file::SourceLocation;
use ruff_source_file::LineColumn;
use crate::fs::relativize_path;
use crate::message::{Emitter, EmitterContext, Message};
@ -22,9 +22,9 @@ impl Emitter for GithubEmitter {
let location = if context.is_notebook(message.filename()) {
// We can't give a reasonable location for the structured formats,
// so we show one that's clearly a fallback
SourceLocation::default()
LineColumn::default()
} else {
source_location.clone()
source_location
};
let end_location = message.compute_end_location();
@ -34,9 +34,9 @@ impl Emitter for GithubEmitter {
"::error title=Ruff{code},file={file},line={row},col={column},endLine={end_row},endColumn={end_column}::",
code = message.rule().map_or_else(String::new, |rule| format!(" ({})", rule.noqa_code())),
file = message.filename(),
row = source_location.row,
row = source_location.line,
column = source_location.column,
end_row = end_location.row,
end_row = end_location.line,
end_column = end_location.column,
)?;
@ -44,7 +44,7 @@ impl Emitter for GithubEmitter {
writer,
"{path}:{row}:{column}:",
path = relativize_path(message.filename()),
row = location.row,
row = location.line,
column = location.column,
)?;

View file

@ -71,8 +71,8 @@ impl Serialize for SerializedMessages<'_> {
})
} else {
json!({
"begin": start_location.row,
"end": end_location.row
"begin": start_location.line,
"end": end_location.line
})
};

View file

@ -57,7 +57,7 @@ impl Emitter for GroupedEmitter {
let mut max_column_length = OneIndexed::MIN;
for message in &messages {
max_row_length = max_row_length.max(message.start_location.row);
max_row_length = max_row_length.max(message.start_location.line);
max_column_length = max_column_length.max(message.start_location.column);
}
@ -115,8 +115,8 @@ impl Display for DisplayGroupedMessage<'_> {
write!(
f,
" {row_padding}",
row_padding =
" ".repeat(self.row_length.get() - calculate_print_width(start_location.row).get())
row_padding = " "
.repeat(self.row_length.get() - calculate_print_width(start_location.line).get())
)?;
// Check if we're working on a jupyter notebook and translate positions with cell accordingly
@ -125,18 +125,18 @@ impl Display for DisplayGroupedMessage<'_> {
f,
"cell {cell}{sep}",
cell = jupyter_index
.cell(start_location.row)
.cell(start_location.line)
.unwrap_or(OneIndexed::MIN),
sep = ":".cyan()
)?;
(
jupyter_index
.cell_row(start_location.row)
.cell_row(start_location.line)
.unwrap_or(OneIndexed::MIN),
start_location.column,
)
} else {
(start_location.row, start_location.column)
(start_location.line, start_location.column)
};
writeln!(

View file

@ -6,7 +6,7 @@ use serde_json::{json, Value};
use ruff_diagnostics::Edit;
use ruff_notebook::NotebookIndex;
use ruff_source_file::{OneIndexed, SourceCode, SourceLocation};
use ruff_source_file::{LineColumn, OneIndexed, SourceCode};
use ruff_text_size::Ranged;
use crate::message::{Emitter, EmitterContext, Message};
@ -60,22 +60,23 @@ pub(crate) fn message_to_json_value(message: &Message, context: &EmitterContext)
})
});
let mut start_location = source_code.source_location(message.start());
let mut end_location = source_code.source_location(message.end());
let mut start_location = source_code.line_column(message.start());
let mut end_location = source_code.line_column(message.end());
let mut noqa_location = message
.noqa_offset()
.map(|offset| source_code.source_location(offset));
.map(|offset| source_code.line_column(offset));
let mut notebook_cell_index = None;
if let Some(notebook_index) = notebook_index {
notebook_cell_index = Some(
notebook_index
.cell(start_location.row)
.cell(start_location.line)
.unwrap_or(OneIndexed::MIN),
);
start_location = notebook_index.translate_location(&start_location);
end_location = notebook_index.translate_location(&end_location);
noqa_location = noqa_location.map(|location| notebook_index.translate_location(&location));
start_location = notebook_index.translate_line_column(&start_location);
end_location = notebook_index.translate_line_column(&end_location);
noqa_location =
noqa_location.map(|location| notebook_index.translate_line_column(&location));
}
json!({
@ -84,10 +85,17 @@ pub(crate) fn message_to_json_value(message: &Message, context: &EmitterContext)
"message": message.body(),
"fix": fix,
"cell": notebook_cell_index,
"location": start_location,
"end_location": end_location,
"location": location_to_json(start_location),
"end_location": location_to_json(end_location),
"filename": message.filename(),
"noqa_row": noqa_location.map(|location| location.row)
"noqa_row": noqa_location.map(|location| location.line)
})
}
fn location_to_json(location: LineColumn) -> serde_json::Value {
json!({
"row": location.line,
"column": location.column
})
}
@ -105,8 +113,8 @@ impl Serialize for ExpandedEdits<'_> {
let mut s = serializer.serialize_seq(Some(self.edits.len()))?;
for edit in self.edits {
let mut location = self.source_code.source_location(edit.start());
let mut end_location = self.source_code.source_location(edit.end());
let mut location = self.source_code.line_column(edit.start());
let mut end_location = self.source_code.line_column(edit.end());
if let Some(notebook_index) = self.notebook_index {
// There exists a newline between each cell's source code in the
@ -118,44 +126,44 @@ impl Serialize for ExpandedEdits<'_> {
// If it does, we need to translate the end location to the last
// character of the previous cell.
match (
notebook_index.cell(location.row),
notebook_index.cell(end_location.row),
notebook_index.cell(location.line),
notebook_index.cell(end_location.line),
) {
(Some(start_cell), Some(end_cell)) if start_cell != end_cell => {
debug_assert_eq!(end_location.column.get(), 1);
let prev_row = end_location.row.saturating_sub(1);
end_location = SourceLocation {
row: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN),
let prev_row = end_location.line.saturating_sub(1);
end_location = LineColumn {
line: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN),
column: self
.source_code
.source_location(self.source_code.line_end_exclusive(prev_row))
.line_column(self.source_code.line_end_exclusive(prev_row))
.column,
};
}
(Some(_), None) => {
debug_assert_eq!(end_location.column.get(), 1);
let prev_row = end_location.row.saturating_sub(1);
end_location = SourceLocation {
row: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN),
let prev_row = end_location.line.saturating_sub(1);
end_location = LineColumn {
line: notebook_index.cell_row(prev_row).unwrap_or(OneIndexed::MIN),
column: self
.source_code
.source_location(self.source_code.line_end_exclusive(prev_row))
.line_column(self.source_code.line_end_exclusive(prev_row))
.column,
};
}
_ => {
end_location = notebook_index.translate_location(&end_location);
end_location = notebook_index.translate_line_column(&end_location);
}
}
location = notebook_index.translate_location(&location);
location = notebook_index.translate_line_column(&location);
}
let value = json!({
"content": edit.content().unwrap_or_default(),
"location": location,
"end_location": end_location
"location": location_to_json(location),
"end_location": location_to_json(end_location)
});
s.serialize_element(&value)?;

View file

@ -3,7 +3,7 @@ use std::path::Path;
use quick_junit::{NonSuccessKind, Report, TestCase, TestCaseStatus, TestSuite, XmlString};
use ruff_source_file::SourceLocation;
use ruff_source_file::LineColumn;
use crate::message::{
group_messages_by_filename, Emitter, EmitterContext, Message, MessageWithLocation,
@ -47,14 +47,14 @@ impl Emitter for JunitEmitter {
let location = if context.is_notebook(message.filename()) {
// We can't give a reasonable location for the structured formats,
// so we show one that's clearly a fallback
SourceLocation::default()
LineColumn::default()
} else {
start_location
};
status.set_description(format!(
"line {row}, col {col}, {body}",
row = location.row,
row = location.line,
col = location.column,
body = message.body()
));
@ -72,7 +72,7 @@ impl Emitter for JunitEmitter {
case.set_classname(classname.to_str().unwrap());
case.extra.insert(
XmlString::new("line"),
XmlString::new(location.row.to_string()),
XmlString::new(location.line.to_string()),
);
case.extra.insert(
XmlString::new("column"),

View file

@ -18,7 +18,7 @@ pub use rdjson::RdjsonEmitter;
use ruff_diagnostics::{Diagnostic, DiagnosticKind, Fix};
use ruff_notebook::NotebookIndex;
use ruff_python_parser::{ParseError, UnsupportedSyntaxError};
use ruff_source_file::{SourceFile, SourceLocation};
use ruff_source_file::{LineColumn, SourceFile};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
pub use sarif::SarifEmitter;
pub use text::TextEmitter;
@ -239,17 +239,15 @@ impl Message {
}
/// Computes the start source location for the message.
pub fn compute_start_location(&self) -> SourceLocation {
pub fn compute_start_location(&self) -> LineColumn {
self.source_file()
.to_source_code()
.source_location(self.start())
.line_column(self.start())
}
/// Computes the end source location for the message.
pub fn compute_end_location(&self) -> SourceLocation {
self.source_file()
.to_source_code()
.source_location(self.end())
pub fn compute_end_location(&self) -> LineColumn {
self.source_file().to_source_code().line_column(self.end())
}
/// Returns the [`SourceFile`] which the message belongs to.
@ -284,7 +282,7 @@ impl Ranged for Message {
struct MessageWithLocation<'a> {
message: &'a Message,
start_location: SourceLocation,
start_location: LineColumn,
}
impl Deref for MessageWithLocation<'_> {

View file

@ -23,7 +23,7 @@ impl Emitter for PylintEmitter {
// so we show one that's clearly a fallback
OneIndexed::from_zero_indexed(0)
} else {
message.compute_start_location().row
message.compute_start_location().line
};
let body = if let Some(rule) = message.rule() {

View file

@ -8,7 +8,7 @@ use ruff_diagnostics::Edit;
use ruff_source_file::SourceCode;
use ruff_text_size::Ranged;
use crate::message::{Emitter, EmitterContext, Message, SourceLocation};
use crate::message::{Emitter, EmitterContext, LineColumn, Message};
#[derive(Default)]
pub struct RdjsonEmitter;
@ -59,15 +59,15 @@ impl Serialize for ExpandedMessages<'_> {
fn message_to_rdjson_value(message: &Message) -> Value {
let source_code = message.source_file().to_source_code();
let start_location = source_code.source_location(message.start());
let end_location = source_code.source_location(message.end());
let start_location = source_code.line_column(message.start());
let end_location = source_code.line_column(message.end());
if let Some(fix) = message.fix() {
json!({
"message": message.body(),
"location": {
"path": message.filename(),
"range": rdjson_range(&start_location, &end_location),
"range": rdjson_range(start_location, end_location),
},
"code": {
"value": message.rule().map(|rule| rule.noqa_code().to_string()),
@ -80,7 +80,7 @@ fn message_to_rdjson_value(message: &Message) -> Value {
"message": message.body(),
"location": {
"path": message.filename(),
"range": rdjson_range(&start_location, &end_location),
"range": rdjson_range(start_location, end_location),
},
"code": {
"value": message.rule().map(|rule| rule.noqa_code().to_string()),
@ -95,11 +95,11 @@ fn rdjson_suggestions(edits: &[Edit], source_code: &SourceCode) -> Value {
edits
.iter()
.map(|edit| {
let location = source_code.source_location(edit.start());
let end_location = source_code.source_location(edit.end());
let location = source_code.line_column(edit.start());
let end_location = source_code.line_column(edit.end());
json!({
"range": rdjson_range(&location, &end_location),
"range": rdjson_range(location, end_location),
"text": edit.content().unwrap_or_default(),
})
})
@ -107,16 +107,10 @@ fn rdjson_suggestions(edits: &[Edit], source_code: &SourceCode) -> Value {
)
}
fn rdjson_range(start: &SourceLocation, end: &SourceLocation) -> Value {
fn rdjson_range(start: LineColumn, end: LineColumn) -> Value {
json!({
"start": {
"line": start.row,
"column": start.column,
},
"end": {
"line": end.row,
"column": end.column,
},
"start": start,
"end": end,
})
}

View file

@ -129,9 +129,9 @@ impl SarifResult {
uri: url::Url::from_file_path(&path)
.map_err(|()| anyhow::anyhow!("Failed to convert path to URL: {}", path.display()))?
.to_string(),
start_line: start_location.row,
start_line: start_location.line,
start_column: start_location.column,
end_line: end_location.row,
end_line: end_location.line,
end_column: end_location.column,
})
}
@ -147,9 +147,9 @@ impl SarifResult {
level: "error".to_string(),
message: message.body().to_string(),
uri: path.display().to_string(),
start_line: start_location.row,
start_line: start_location.line,
start_column: start_location.column,
end_line: end_location.row,
end_line: end_location.line,
end_column: end_location.column,
})
}

View file

@ -7,7 +7,7 @@ use colored::Colorize;
use ruff_annotate_snippets::{Level, Renderer, Snippet};
use ruff_notebook::NotebookIndex;
use ruff_source_file::{OneIndexed, SourceLocation};
use ruff_source_file::{LineColumn, OneIndexed};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use crate::fs::relativize_path;
@ -86,14 +86,14 @@ impl Emitter for TextEmitter {
writer,
"cell {cell}{sep}",
cell = notebook_index
.cell(start_location.row)
.cell(start_location.line)
.unwrap_or(OneIndexed::MIN),
sep = ":".cyan(),
)?;
SourceLocation {
row: notebook_index
.cell_row(start_location.row)
LineColumn {
line: notebook_index
.cell_row(start_location.line)
.unwrap_or(OneIndexed::MIN),
column: start_location.column,
}
@ -104,7 +104,7 @@ impl Emitter for TextEmitter {
writeln!(
writer,
"{row}{sep}{col}{sep} {code_and_body}",
row = diagnostic_location.row,
row = diagnostic_location.line,
col = diagnostic_location.column,
sep = ":".cyan(),
code_and_body = RuleCodeAndBody {

View file

@ -1,6 +1,6 @@
use serde::{Deserialize, Serialize};
use ruff_source_file::{OneIndexed, SourceLocation};
use ruff_source_file::{LineColumn, OneIndexed, SourceLocation};
/// Jupyter Notebook indexing table
///
@ -33,16 +33,29 @@ impl NotebookIndex {
self.row_to_row_in_cell.get(row.to_zero_indexed()).copied()
}
/// Translates the given source location based on the indexing table.
/// Translates the given [`LineColumn`] based on the indexing table.
///
/// This will translate the row/column in the concatenated source code
/// to the row/column in the Jupyter Notebook.
pub fn translate_location(&self, source_location: &SourceLocation) -> SourceLocation {
SourceLocation {
row: self
.cell_row(source_location.row)
pub fn translate_line_column(&self, source_location: &LineColumn) -> LineColumn {
LineColumn {
line: self
.cell_row(source_location.line)
.unwrap_or(OneIndexed::MIN),
column: source_location.column,
}
}
/// Translates the given [`SourceLocation`] based on the indexing table.
///
/// This will translate the line/character in the concatenated source code
/// to the line/character in the Jupyter Notebook.
pub fn translate_source_location(&self, source_location: &SourceLocation) -> SourceLocation {
SourceLocation {
line: self
.cell_row(source_location.line)
.unwrap_or(OneIndexed::MIN),
character_offset: source_location.character_offset,
}
}
}

View file

@ -430,10 +430,10 @@ fn ensure_unchanged_ast(
formatted_unsupported_syntax_errors
.into_values()
.map(|error| {
let location = index.source_location(error.start(), formatted_code);
let location = index.line_column(error.start(), formatted_code);
format!(
"{row}:{col} {error}",
row = location.row,
row = location.line,
col = location.column
)
})

View file

@ -31,6 +31,16 @@ pub enum PositionEncoding {
UTF8,
}
impl From<PositionEncoding> for ruff_source_file::PositionEncoding {
fn from(value: PositionEncoding) -> Self {
match value {
PositionEncoding::UTF8 => Self::Utf8,
PositionEncoding::UTF16 => Self::Utf16,
PositionEncoding::UTF32 => Self::Utf32,
}
}
}
/// A unique document ID, derived from a URL passed as part of an LSP request.
/// This document ID can point to either be a standalone Python file, a full notebook, or a cell within a notebook.
#[derive(Clone, Debug)]

View file

@ -2,9 +2,9 @@ use super::notebook;
use super::PositionEncoding;
use lsp_types as types;
use ruff_notebook::NotebookIndex;
use ruff_source_file::OneIndexed;
use ruff_source_file::{LineIndex, SourceLocation};
use ruff_text_size::{TextRange, TextSize};
use ruff_source_file::LineIndex;
use ruff_source_file::{OneIndexed, SourceLocation};
use ruff_text_size::TextRange;
pub(crate) struct NotebookRange {
pub(crate) cell: notebook::CellId,
@ -38,76 +38,43 @@ impl RangeExt for lsp_types::Range {
index: &LineIndex,
encoding: PositionEncoding,
) -> TextRange {
let start_line = index.line_range(
OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.line)),
let start = index.offset(
SourceLocation {
line: OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.line)),
character_offset: OneIndexed::from_zero_indexed(u32_index_to_usize(
self.start.character,
)),
},
text,
encoding.into(),
);
let end_line = index.line_range(
OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.line)),
let end = index.offset(
SourceLocation {
line: OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.line)),
character_offset: OneIndexed::from_zero_indexed(u32_index_to_usize(
self.end.character,
)),
},
text,
encoding.into(),
);
let (start_column_offset, end_column_offset) = match encoding {
PositionEncoding::UTF8 => (
TextSize::new(self.start.character),
TextSize::new(self.end.character),
),
PositionEncoding::UTF16 => {
// Fast path for ASCII only documents
if index.is_ascii() {
(
TextSize::new(self.start.character),
TextSize::new(self.end.character),
)
} else {
// UTF16 encodes characters either as one or two 16 bit words.
// The position in `range` is the 16-bit word offset from the start of the line (and not the character offset)
// UTF-16 with a text that may use variable-length characters.
(
utf8_column_offset(self.start.character, &text[start_line]),
utf8_column_offset(self.end.character, &text[end_line]),
)
}
}
PositionEncoding::UTF32 => {
// UTF-32 uses 4 bytes for each character. Meaning, the position in range is a character offset.
return TextRange::new(
index.offset(
OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.line)),
OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.character)),
text,
),
index.offset(
OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.line)),
OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.character)),
text,
),
);
}
};
TextRange::new(
start_line.start() + start_column_offset.clamp(TextSize::new(0), start_line.end()),
end_line.start() + end_column_offset.clamp(TextSize::new(0), end_line.end()),
)
TextRange::new(start, end)
}
}
impl ToRangeExt for TextRange {
fn to_range(&self, text: &str, index: &LineIndex, encoding: PositionEncoding) -> types::Range {
types::Range {
start: source_location_to_position(&offset_to_source_location(
start: source_location_to_position(&index.source_location(
self.start(),
text,
index,
encoding,
encoding.into(),
)),
end: source_location_to_position(&offset_to_source_location(
end: source_location_to_position(&index.source_location(
self.end(),
text,
index,
encoding,
encoding.into(),
)),
}
}
@ -119,26 +86,26 @@ impl ToRangeExt for TextRange {
notebook_index: &NotebookIndex,
encoding: PositionEncoding,
) -> NotebookRange {
let start = offset_to_source_location(self.start(), text, source_index, encoding);
let mut end = offset_to_source_location(self.end(), text, source_index, encoding);
let starting_cell = notebook_index.cell(start.row);
let start = source_index.source_location(self.start(), text, encoding.into());
let mut end = source_index.source_location(self.end(), text, encoding.into());
let starting_cell = notebook_index.cell(start.line);
// weird edge case here - if the end of the range is where the newline after the cell got added (making it 'out of bounds')
// we need to move it one character back (which should place it at the end of the last line).
// we test this by checking if the ending offset is in a different (or nonexistent) cell compared to the cell of the starting offset.
if notebook_index.cell(end.row) != starting_cell {
end.row = end.row.saturating_sub(1);
end.column = offset_to_source_location(
self.end().checked_sub(1.into()).unwrap_or_default(),
text,
source_index,
encoding,
)
.column;
if notebook_index.cell(end.line) != starting_cell {
end.line = end.line.saturating_sub(1);
end.character_offset = source_index
.source_location(
self.end().checked_sub(1.into()).unwrap_or_default(),
text,
encoding.into(),
)
.character_offset;
}
let start = source_location_to_position(&notebook_index.translate_location(&start));
let end = source_location_to_position(&notebook_index.translate_location(&end));
let start = source_location_to_position(&notebook_index.translate_source_location(&start));
let end = source_location_to_position(&notebook_index.translate_source_location(&end));
NotebookRange {
cell: starting_cell
@ -149,67 +116,10 @@ impl ToRangeExt for TextRange {
}
}
/// Converts a UTF-16 code unit offset for a given line into a UTF-8 column number.
fn utf8_column_offset(utf16_code_unit_offset: u32, line: &str) -> TextSize {
let mut utf8_code_unit_offset = TextSize::new(0);
let mut i = 0u32;
for c in line.chars() {
if i >= utf16_code_unit_offset {
break;
}
// Count characters encoded as two 16 bit words as 2 characters.
{
utf8_code_unit_offset +=
TextSize::new(u32::try_from(c.len_utf8()).expect("utf8 len always <=4"));
i += u32::try_from(c.len_utf16()).expect("utf16 len always <=2");
}
}
utf8_code_unit_offset
}
fn offset_to_source_location(
offset: TextSize,
text: &str,
index: &LineIndex,
encoding: PositionEncoding,
) -> SourceLocation {
match encoding {
PositionEncoding::UTF8 => {
let row = index.line_index(offset);
let column = offset - index.line_start(row, text);
SourceLocation {
column: OneIndexed::from_zero_indexed(column.to_usize()),
row,
}
}
PositionEncoding::UTF16 => {
let row = index.line_index(offset);
let column = if index.is_ascii() {
(offset - index.line_start(row, text)).to_usize()
} else {
let up_to_line = &text[TextRange::new(index.line_start(row, text), offset)];
up_to_line.encode_utf16().count()
};
SourceLocation {
column: OneIndexed::from_zero_indexed(column),
row,
}
}
PositionEncoding::UTF32 => index.source_location(offset, text),
}
}
fn source_location_to_position(location: &SourceLocation) -> types::Position {
types::Position {
line: u32::try_from(location.row.to_zero_indexed()).expect("row usize fits in u32"),
character: u32::try_from(location.column.to_zero_indexed())
line: u32::try_from(location.line.to_zero_indexed()).expect("row usize fits in u32"),
character: u32::try_from(location.character_offset.to_zero_indexed())
.expect("character usize fits in u32"),
}
}

View file

@ -59,6 +59,7 @@ impl Server {
let client_capabilities = init_params.capabilities;
let position_encoding = Self::find_best_position_encoding(&client_capabilities);
let server_capabilities = Self::server_capabilities(position_encoding);
let connection = connection.initialize_finish(
@ -98,6 +99,8 @@ impl Server {
workspace_settings.unwrap_or_default(),
)?;
tracing::debug!("Negotiated position encoding: {position_encoding:?}");
Ok(Self {
connection,
worker_threads,

View file

@ -7,7 +7,7 @@ use serde::{Deserialize, Serialize};
use ruff_text_size::{Ranged, TextRange, TextSize};
pub use crate::line_index::{LineIndex, OneIndexed};
pub use crate::line_index::{LineIndex, OneIndexed, PositionEncoding};
pub use crate::line_ranges::LineRanges;
pub use crate::newlines::{
find_newline, Line, LineEnding, NewlineWithTrailingNewline, UniversalNewlineIterator,
@ -18,7 +18,7 @@ mod line_index;
mod line_ranges;
mod newlines;
/// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`SourceLocation`].
/// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`LineColumn`].
#[derive(Debug)]
pub struct SourceCode<'src, 'index> {
text: &'src str,
@ -33,10 +33,20 @@ impl<'src, 'index> SourceCode<'src, 'index> {
}
}
/// Computes the one indexed row and column numbers for `offset`.
/// Computes the one indexed line and column numbers for `offset`, skipping any potential BOM.
#[inline]
pub fn source_location(&self, offset: TextSize) -> SourceLocation {
self.index.source_location(offset, self.text)
pub fn line_column(&self, offset: TextSize) -> LineColumn {
self.index.line_column(offset, self.text)
}
#[inline]
pub fn source_location(
&self,
offset: TextSize,
position_encoding: PositionEncoding,
) -> SourceLocation {
self.index
.source_location(offset, self.text, position_encoding)
}
#[inline]
@ -229,34 +239,62 @@ impl PartialEq for SourceFileInner {
impl Eq for SourceFileInner {}
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
/// The line and column of an offset in a source file.
///
/// See [`LineIndex::line_column`] for more information.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SourceLocation {
pub row: OneIndexed,
pub struct LineColumn {
/// The line in the source text.
pub line: OneIndexed,
/// The column (UTF scalar values) relative to the start of the line except any
/// potential BOM on the first line.
pub column: OneIndexed,
}
impl Default for SourceLocation {
impl Default for LineColumn {
fn default() -> Self {
Self {
row: OneIndexed::MIN,
line: OneIndexed::MIN,
column: OneIndexed::MIN,
}
}
}
impl Debug for SourceLocation {
impl Debug for LineColumn {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SourceLocation")
.field("row", &self.row.get())
f.debug_struct("LineColumn")
.field("line", &self.line.get())
.field("column", &self.column.get())
.finish()
}
}
impl std::fmt::Display for SourceLocation {
impl std::fmt::Display for LineColumn {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(f, "{row}:{column}", row = self.row, column = self.column)
write!(f, "{line}:{column}", line = self.line, column = self.column)
}
}
/// A position into a source file represented by the line number and the offset to that character relative to the start of that line.
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SourceLocation {
/// The line in the source text.
pub line: OneIndexed,
/// The offset from the start of the line to the character.
///
/// This can be a byte offset, the number of UTF16 code points, or the UTF8 code units, depending on the
/// [`PositionEncoding`] used.
pub character_offset: OneIndexed,
}
impl Default for SourceLocation {
fn default() -> Self {
Self {
line: OneIndexed::MIN,
character_offset: OneIndexed::MIN,
}
}
}

View file

@ -5,13 +5,12 @@ use std::ops::Deref;
use std::str::FromStr;
use std::sync::Arc;
use crate::{LineColumn, SourceLocation};
use ruff_text_size::{TextLen, TextRange, TextSize};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use crate::SourceLocation;
/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions.
/// Index for fast [byte offset](TextSize) to [`LineColumn`] conversions.
///
/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
#[derive(Clone, Eq, PartialEq)]
@ -66,60 +65,146 @@ impl LineIndex {
self.inner.kind
}
/// Returns the row and column index for an offset.
/// Returns the line and column number for an UTF-8 byte offset.
///
/// The `column` number is the nth-character of the line, except for the first line
/// where it doesn't include the UTF-8 BOM marker at the start of the file.
///
/// ### BOM handling
///
/// For files starting with a UTF-8 BOM marker, the byte offsets
/// in the range `0...3` are all mapped to line 0 and column 0.
/// Because of this, the conversion isn't losless.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::TextSize;
/// # use ruff_source_file::{LineIndex, OneIndexed, SourceLocation};
/// let source = "def a():\n pass";
/// let index = LineIndex::from_source_text(source);
/// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn};
/// let source = format!("\u{FEFF}{}", "def a():\n pass");
/// let index = LineIndex::from_source_text(&source);
///
/// // Before BOM, maps to after BOM
/// assert_eq!(
/// index.source_location(TextSize::from(0), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
/// index.line_column(TextSize::from(0), &source),
/// LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
/// );
///
/// // After BOM, maps to after BOM
/// assert_eq!(
/// index.line_column(TextSize::from(3), &source),
/// LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
/// );
///
/// assert_eq!(
/// index.source_location(TextSize::from(4), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) }
/// index.line_column(TextSize::from(7), &source),
/// LineColumn { line: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) }
/// );
/// assert_eq!(
/// index.source_location(TextSize::from(13), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) }
/// index.line_column(TextSize::from(16), &source),
/// LineColumn { line: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) }
/// );
/// ```
///
/// ## Panics
///
/// If the offset is out of bounds.
pub fn source_location(&self, offset: TextSize, content: &str) -> SourceLocation {
match self.line_starts().binary_search(&offset) {
// Offset is at the start of a line
Ok(row) => SourceLocation {
row: OneIndexed::from_zero_indexed(row),
column: OneIndexed::from_zero_indexed(0),
},
Err(next_row) => {
// SAFETY: Safe because the index always contains an entry for the offset 0
let row = next_row - 1;
let mut line_start = self.line_starts()[row];
/// If the byte offset isn't within the bounds of `content`.
pub fn line_column(&self, offset: TextSize, content: &str) -> LineColumn {
let location = self.source_location(offset, content, PositionEncoding::Utf32);
let column = if self.kind().is_ascii() {
usize::from(offset) - usize::from(line_start)
} else {
// Don't count the BOM character as a column.
if line_start == TextSize::from(0) && content.starts_with('\u{feff}') {
line_start = '\u{feff}'.text_len();
}
// Don't count the BOM character as a column, but only on the first line.
let column = if location.line.to_zero_indexed() == 0 && content.starts_with('\u{feff}') {
location.character_offset.saturating_sub(1)
} else {
location.character_offset
};
content[TextRange::new(line_start, offset)].chars().count()
};
LineColumn {
line: location.line,
column,
}
}
/// Given a UTF-8 byte offset, returns the line and character offset according to the given encoding.
///
/// ### BOM handling
///
/// Unlike [`Self::line_column`], this method does not skip the BOM character at the start of the file.
/// This allows for bidirectional mapping between [`SourceLocation`] and [`TextSize`] (see [`Self::offset`]).
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::TextSize;
/// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn, SourceLocation, PositionEncoding, Line};
/// let source = format!("\u{FEFF}{}", "def a():\n pass");
/// let index = LineIndex::from_source_text(&source);
///
/// // Before BOM, maps to character 0
/// assert_eq!(
/// index.source_location(TextSize::from(0), &source, PositionEncoding::Utf32),
/// SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(0) }
/// );
///
/// // After BOM, maps to after BOM
/// assert_eq!(
/// index.source_location(TextSize::from(3), &source, PositionEncoding::Utf32),
/// SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(1) }
/// );
///
/// assert_eq!(
/// index.source_location(TextSize::from(7), &source, PositionEncoding::Utf32),
/// SourceLocation { line: OneIndexed::from_zero_indexed(0), character_offset: OneIndexed::from_zero_indexed(5) }
/// );
/// assert_eq!(
/// index.source_location(TextSize::from(16), &source, PositionEncoding::Utf32),
/// SourceLocation { line: OneIndexed::from_zero_indexed(1), character_offset: OneIndexed::from_zero_indexed(4) }
/// );
/// ```
///
/// ## Panics
///
/// If the UTF-8 byte offset is out of bounds of `text`.
pub fn source_location(
&self,
offset: TextSize,
text: &str,
encoding: PositionEncoding,
) -> SourceLocation {
let line = self.line_index(offset);
let line_start = self.line_start(line, text);
if self.is_ascii() {
return SourceLocation {
line,
character_offset: OneIndexed::from_zero_indexed((offset - line_start).to_usize()),
};
}
match encoding {
PositionEncoding::Utf8 => {
let character_offset = offset - line_start;
SourceLocation {
line,
character_offset: OneIndexed::from_zero_indexed(character_offset.to_usize()),
}
}
PositionEncoding::Utf16 => {
let up_to_character = &text[TextRange::new(line_start, offset)];
let character = up_to_character.encode_utf16().count();
SourceLocation {
row: OneIndexed::from_zero_indexed(row),
column: OneIndexed::from_zero_indexed(column),
line,
character_offset: OneIndexed::from_zero_indexed(character),
}
}
PositionEncoding::Utf32 => {
let up_to_character = &text[TextRange::new(line_start, offset)];
let character = up_to_character.chars().count();
SourceLocation {
line,
character_offset: OneIndexed::from_zero_indexed(character),
}
}
}
@ -141,7 +226,7 @@ impl LineIndex {
///
/// ```
/// # use ruff_text_size::TextSize;
/// # use ruff_source_file::{LineIndex, OneIndexed, SourceLocation};
/// # use ruff_source_file::{LineIndex, OneIndexed, LineColumn};
/// let source = "def a():\n pass";
/// let index = LineIndex::from_source_text(source);
///
@ -221,83 +306,211 @@ impl LineIndex {
}
}
/// Returns the [byte offset](TextSize) at `line` and `column`.
/// Returns the [UTF-8 byte offset](TextSize) at `line` and `character` where character is counted using the given encoding.
///
/// ## Examples
///
/// ### ASCII
/// ### ASCII only source text
///
/// ```
/// use ruff_source_file::{LineIndex, OneIndexed};
/// use ruff_text_size::TextSize;
/// # use ruff_source_file::{SourceLocation, LineIndex, OneIndexed, PositionEncoding};
/// # use ruff_text_size::TextSize;
/// let source = r#"a = 4
/// c = "some string"
/// x = b"#;
///
/// let index = LineIndex::from_source_text(source);
///
/// // First line, first column
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(0), OneIndexed::from_zero_indexed(0), source), TextSize::new(0));
/// // First line, first character
/// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(0),
/// character_offset: OneIndexed::from_zero_indexed(0)
/// },
/// source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(0)
/// );
///
/// // Second line, 4th column
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(1), OneIndexed::from_zero_indexed(4), source), TextSize::new(10));
/// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(1),
/// character_offset: OneIndexed::from_zero_indexed(4)
/// },
/// source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(10)
/// );
///
/// // Offset past the end of the first line
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(0), OneIndexed::from_zero_indexed(10), source), TextSize::new(6));
/// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(0),
/// character_offset: OneIndexed::from_zero_indexed(10)
/// },
/// source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(6)
/// );
///
/// // Offset past the end of the file
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(3), OneIndexed::from_zero_indexed(0), source), TextSize::new(29));
/// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(3),
/// character_offset: OneIndexed::from_zero_indexed(0)
/// },
/// source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(29)
/// );
/// ```
///
/// ### UTF8
/// ### Non-ASCII source text
///
/// ```
/// use ruff_source_file::{LineIndex, OneIndexed};
/// use ruff_source_file::{LineIndex, OneIndexed, SourceLocation, PositionEncoding};
/// use ruff_text_size::TextSize;
/// let source = r#"a = 4
/// let source = format!("\u{FEFF}{}", r#"a = 4
/// c = "❤️"
/// x = b"#;
/// x = b"#);
///
/// let index = LineIndex::from_source_text(source);
/// let index = LineIndex::from_source_text(&source);
///
/// // First line, first column
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(0), OneIndexed::from_zero_indexed(0), source), TextSize::new(0));
/// // First line, first character, points at the BOM
/// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(0),
/// character_offset: OneIndexed::from_zero_indexed(0)
/// },
/// &source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(0)
/// );
///
/// // First line, after the BOM
/// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(0),
/// character_offset: OneIndexed::from_zero_indexed(1)
/// },
/// &source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(3)
/// );
///
/// // second line, 7th character, after emoji, UTF32
/// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(1),
/// character_offset: OneIndexed::from_zero_indexed(7)
/// },
/// &source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(20)
/// );
///
/// // Second line, 7th character, after emoji, UTF 16
/// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(1),
/// character_offset: OneIndexed::from_zero_indexed(7)
/// },
/// &source,
/// PositionEncoding::Utf16,
/// ),
/// TextSize::new(20)
/// );
///
/// // Third line, 2nd column, after emoji
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(2), OneIndexed::from_zero_indexed(1), source), TextSize::new(20));
///
/// // Offset past the end of the second line
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(1), OneIndexed::from_zero_indexed(10), source), TextSize::new(19));
/// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(1),
/// character_offset: OneIndexed::from_zero_indexed(10)
/// },
/// &source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(22)
/// );
///
/// // Offset past the end of the file
/// assert_eq!(index.offset(OneIndexed::from_zero_indexed(3), OneIndexed::from_zero_indexed(0), source), TextSize::new(24));
/// assert_eq!(
/// index.offset(
/// SourceLocation {
/// line: OneIndexed::from_zero_indexed(3),
/// character_offset: OneIndexed::from_zero_indexed(0)
/// },
/// &source,
/// PositionEncoding::Utf32,
/// ),
/// TextSize::new(27)
/// );
/// ```
///
pub fn offset(&self, line: OneIndexed, column: OneIndexed, contents: &str) -> TextSize {
pub fn offset(
&self,
position: SourceLocation,
text: &str,
position_encoding: PositionEncoding,
) -> TextSize {
// If start-of-line position after last line
if line.to_zero_indexed() > self.line_starts().len() {
return contents.text_len();
if position.line.to_zero_indexed() > self.line_starts().len() {
return text.text_len();
}
let line_range = self.line_range(line, contents);
let line_range = self.line_range(position.line, text);
match self.kind() {
IndexKind::Ascii => {
line_range.start()
+ TextSize::try_from(column.to_zero_indexed())
.unwrap_or(line_range.len())
.clamp(TextSize::new(0), line_range.len())
}
IndexKind::Utf8 => {
let rest = &contents[line_range];
let column_offset: TextSize = rest
let character_offset = position.character_offset.to_zero_indexed();
let character_byte_offset = if self.is_ascii() {
TextSize::try_from(character_offset).unwrap()
} else {
let line = &text[line_range];
match position_encoding {
PositionEncoding::Utf8 => {
TextSize::try_from(position.character_offset.to_zero_indexed()).unwrap()
}
PositionEncoding::Utf16 => {
let mut byte_offset = TextSize::new(0);
let mut utf16_code_unit_offset = 0;
for c in line.chars() {
if utf16_code_unit_offset >= character_offset {
break;
}
// Count characters encoded as two 16 bit words as 2 characters.
byte_offset += c.text_len();
utf16_code_unit_offset += c.len_utf16();
}
byte_offset
}
PositionEncoding::Utf32 => line
.chars()
.take(column.to_zero_indexed())
.take(position.character_offset.to_zero_indexed())
.map(ruff_text_size::TextLen::text_len)
.sum();
line_range.start() + column_offset
.sum(),
}
}
};
line_range.start() + character_byte_offset.clamp(TextSize::new(0), line_range.len())
}
/// Returns the [byte offsets](TextSize) for every line
@ -430,12 +643,26 @@ impl FromStr for OneIndexed {
}
}
#[derive(Default, Copy, Clone, Debug)]
pub enum PositionEncoding {
/// Character offsets count the number of bytes from the start of the line.
#[default]
Utf8,
/// Character offsets count the number of UTF-16 code units from the start of the line.
Utf16,
/// Character offsets count the number of UTF-32 code points units (the same as number of characters in Rust)
/// from the start of the line.
Utf32,
}
#[cfg(test)]
mod tests {
use ruff_text_size::TextSize;
use crate::line_index::LineIndex;
use crate::{OneIndexed, SourceLocation};
use crate::{LineColumn, OneIndexed};
#[test]
fn ascii_index() {
@ -466,30 +693,30 @@ mod tests {
let index = LineIndex::from_source_text(contents);
// First row.
let loc = index.source_location(TextSize::from(2), contents);
let loc = index.line_column(TextSize::from(2), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
LineColumn {
line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(2)
}
);
// Second row.
let loc = index.source_location(TextSize::from(6), contents);
let loc = index.line_column(TextSize::from(6), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
LineColumn {
line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
let loc = index.source_location(TextSize::from(11), contents);
let loc = index.line_column(TextSize::from(11), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
LineColumn {
line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(5)
}
);
@ -502,23 +729,23 @@ mod tests {
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
assert_eq!(
index.source_location(TextSize::from(4), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
index.line_column(TextSize::from(4), contents),
LineColumn {
line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(4)
}
);
assert_eq!(
index.source_location(TextSize::from(6), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
index.line_column(TextSize::from(6), contents),
LineColumn {
line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(7), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
index.line_column(TextSize::from(7), contents),
LineColumn {
line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
@ -531,23 +758,23 @@ mod tests {
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]);
assert_eq!(
index.source_location(TextSize::from(4), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
index.line_column(TextSize::from(4), contents),
LineColumn {
line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(4)
}
);
assert_eq!(
index.source_location(TextSize::from(7), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
index.line_column(TextSize::from(7), contents),
LineColumn {
line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(8), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
index.line_column(TextSize::from(8), contents),
LineColumn {
line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
@ -598,23 +825,23 @@ mod tests {
// Second '
assert_eq!(
index.source_location(TextSize::from(9), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
index.line_column(TextSize::from(9), contents),
LineColumn {
line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
assert_eq!(
index.source_location(TextSize::from(11), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
index.line_column(TextSize::from(11), contents),
LineColumn {
line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(12), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
index.line_column(TextSize::from(12), contents),
LineColumn {
line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
@ -632,23 +859,23 @@ mod tests {
// Second '
assert_eq!(
index.source_location(TextSize::from(9), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
index.line_column(TextSize::from(9), contents),
LineColumn {
line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
assert_eq!(
index.source_location(TextSize::from(12), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
index.line_column(TextSize::from(12), contents),
LineColumn {
line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(13), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
index.line_column(TextSize::from(13), contents),
LineColumn {
line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
@ -664,49 +891,49 @@ mod tests {
);
// First row.
let loc = index.source_location(TextSize::from(0), contents);
let loc = index.line_column(TextSize::from(0), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
LineColumn {
line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(0)
}
);
let loc = index.source_location(TextSize::from(5), contents);
let loc = index.line_column(TextSize::from(5), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
LineColumn {
line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(5)
}
);
let loc = index.source_location(TextSize::from(8), contents);
let loc = index.line_column(TextSize::from(8), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
LineColumn {
line: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
// Second row.
let loc = index.source_location(TextSize::from(10), contents);
let loc = index.line_column(TextSize::from(10), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
LineColumn {
line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
// One-past-the-end.
let loc = index.source_location(TextSize::from(15), contents);
let loc = index.line_column(TextSize::from(15), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
LineColumn {
line: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(5)
}
);

View file

@ -21,7 +21,7 @@ use ruff_python_formatter::{format_module_ast, pretty_comments, PyFormatContext,
use ruff_python_index::Indexer;
use ruff_python_parser::{parse, parse_unchecked, Mode, ParseOptions, Parsed};
use ruff_python_trivia::CommentRanges;
use ruff_source_file::SourceLocation;
use ruff_source_file::{LineColumn, OneIndexed};
use ruff_text_size::Ranged;
use ruff_workspace::configuration::Configuration;
use ruff_workspace::options::{FormatOptions, LintCommonOptions, LintOptions, Options};
@ -61,8 +61,8 @@ export interface Diagnostic {
pub struct ExpandedMessage {
pub code: Option<String>,
pub message: String,
pub start_location: SourceLocation,
pub end_location: SourceLocation,
pub start_location: Location,
pub end_location: Location,
pub fix: Option<ExpandedFix>,
}
@ -74,8 +74,8 @@ pub struct ExpandedFix {
#[derive(Serialize, Deserialize, Eq, PartialEq, Debug)]
struct ExpandedEdit {
location: SourceLocation,
end_location: SourceLocation,
location: Location,
end_location: Location,
content: Option<String>,
}
@ -214,16 +214,16 @@ impl Workspace {
}) => ExpandedMessage {
code: Some(kind.rule().noqa_code().to_string()),
message: kind.body,
start_location: source_code.source_location(range.start()),
end_location: source_code.source_location(range.end()),
start_location: source_code.line_column(range.start()).into(),
end_location: source_code.line_column(range.end()).into(),
fix: fix.map(|fix| ExpandedFix {
message: kind.suggestion,
edits: fix
.edits()
.iter()
.map(|edit| ExpandedEdit {
location: source_code.source_location(edit.start()),
end_location: source_code.source_location(edit.end()),
location: source_code.line_column(edit.start()).into(),
end_location: source_code.line_column(edit.end()).into(),
content: edit.content().map(ToString::to_string),
})
.collect(),
@ -233,8 +233,8 @@ impl Workspace {
ExpandedMessage {
code: None,
message,
start_location: source_code.source_location(range.start()),
end_location: source_code.source_location(range.end()),
start_location: source_code.line_column(range.start()).into(),
end_location: source_code.line_column(range.end()).into(),
fix: None,
}
}
@ -316,3 +316,18 @@ impl<'a> ParsedModule<'a> {
)
}
}
#[derive(Serialize, Deserialize, Eq, PartialEq, Debug)]
pub struct Location {
pub row: OneIndexed,
pub column: OneIndexed,
}
impl From<LineColumn> for Location {
fn from(value: LineColumn) -> Self {
Self {
row: value.line,
column: value.column,
}
}
}

View file

@ -3,8 +3,8 @@
use wasm_bindgen_test::wasm_bindgen_test;
use ruff_linter::registry::Rule;
use ruff_source_file::{OneIndexed, SourceLocation};
use ruff_wasm::{ExpandedMessage, Workspace};
use ruff_source_file::OneIndexed;
use ruff_wasm::{ExpandedMessage, Location, Workspace};
macro_rules! check {
($source:expr, $config:expr, $expected:expr) => {{
@ -27,11 +27,11 @@ fn empty_config() {
[ExpandedMessage {
code: Some(Rule::IfTuple.noqa_code().to_string()),
message: "If test is a tuple, which is always `True`".to_string(),
start_location: SourceLocation {
start_location: Location {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(3)
},
end_location: SourceLocation {
end_location: Location {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(9)
},
@ -48,11 +48,11 @@ fn syntax_error() {
[ExpandedMessage {
code: None,
message: "SyntaxError: Expected an expression".to_string(),
start_location: SourceLocation {
start_location: Location {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(3)
},
end_location: SourceLocation {
end_location: Location {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
},
@ -69,11 +69,11 @@ fn unsupported_syntax_error() {
[ExpandedMessage {
code: None,
message: "SyntaxError: Cannot use `match` statement on Python 3.9 (syntax was added in Python 3.10)".to_string(),
start_location: SourceLocation {
start_location: Location {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(0)
},
end_location: SourceLocation {
end_location: Location {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(5)
},