mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-24 13:33:50 +00:00
Split SourceLocation
into LineColumn
and SourceLocation
(#17587)
This commit is contained in:
parent
4443f6653c
commit
1c65e0ad25
29 changed files with 695 additions and 537 deletions
|
@ -31,6 +31,16 @@ pub enum PositionEncoding {
|
|||
UTF8,
|
||||
}
|
||||
|
||||
impl From<PositionEncoding> for ruff_source_file::PositionEncoding {
|
||||
fn from(value: PositionEncoding) -> Self {
|
||||
match value {
|
||||
PositionEncoding::UTF8 => Self::Utf8,
|
||||
PositionEncoding::UTF16 => Self::Utf16,
|
||||
PositionEncoding::UTF32 => Self::Utf32,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A unique document ID, derived from a URL passed as part of an LSP request.
|
||||
/// This document ID can point to either be a standalone Python file, a full notebook, or a cell within a notebook.
|
||||
#[derive(Clone, Debug)]
|
||||
|
|
|
@ -2,9 +2,9 @@ use super::notebook;
|
|||
use super::PositionEncoding;
|
||||
use lsp_types as types;
|
||||
use ruff_notebook::NotebookIndex;
|
||||
use ruff_source_file::OneIndexed;
|
||||
use ruff_source_file::{LineIndex, SourceLocation};
|
||||
use ruff_text_size::{TextRange, TextSize};
|
||||
use ruff_source_file::LineIndex;
|
||||
use ruff_source_file::{OneIndexed, SourceLocation};
|
||||
use ruff_text_size::TextRange;
|
||||
|
||||
pub(crate) struct NotebookRange {
|
||||
pub(crate) cell: notebook::CellId,
|
||||
|
@ -38,76 +38,43 @@ impl RangeExt for lsp_types::Range {
|
|||
index: &LineIndex,
|
||||
encoding: PositionEncoding,
|
||||
) -> TextRange {
|
||||
let start_line = index.line_range(
|
||||
OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.line)),
|
||||
let start = index.offset(
|
||||
SourceLocation {
|
||||
line: OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.line)),
|
||||
character_offset: OneIndexed::from_zero_indexed(u32_index_to_usize(
|
||||
self.start.character,
|
||||
)),
|
||||
},
|
||||
text,
|
||||
encoding.into(),
|
||||
);
|
||||
let end_line = index.line_range(
|
||||
OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.line)),
|
||||
let end = index.offset(
|
||||
SourceLocation {
|
||||
line: OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.line)),
|
||||
character_offset: OneIndexed::from_zero_indexed(u32_index_to_usize(
|
||||
self.end.character,
|
||||
)),
|
||||
},
|
||||
text,
|
||||
encoding.into(),
|
||||
);
|
||||
|
||||
let (start_column_offset, end_column_offset) = match encoding {
|
||||
PositionEncoding::UTF8 => (
|
||||
TextSize::new(self.start.character),
|
||||
TextSize::new(self.end.character),
|
||||
),
|
||||
|
||||
PositionEncoding::UTF16 => {
|
||||
// Fast path for ASCII only documents
|
||||
if index.is_ascii() {
|
||||
(
|
||||
TextSize::new(self.start.character),
|
||||
TextSize::new(self.end.character),
|
||||
)
|
||||
} else {
|
||||
// UTF16 encodes characters either as one or two 16 bit words.
|
||||
// The position in `range` is the 16-bit word offset from the start of the line (and not the character offset)
|
||||
// UTF-16 with a text that may use variable-length characters.
|
||||
(
|
||||
utf8_column_offset(self.start.character, &text[start_line]),
|
||||
utf8_column_offset(self.end.character, &text[end_line]),
|
||||
)
|
||||
}
|
||||
}
|
||||
PositionEncoding::UTF32 => {
|
||||
// UTF-32 uses 4 bytes for each character. Meaning, the position in range is a character offset.
|
||||
return TextRange::new(
|
||||
index.offset(
|
||||
OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.line)),
|
||||
OneIndexed::from_zero_indexed(u32_index_to_usize(self.start.character)),
|
||||
text,
|
||||
),
|
||||
index.offset(
|
||||
OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.line)),
|
||||
OneIndexed::from_zero_indexed(u32_index_to_usize(self.end.character)),
|
||||
text,
|
||||
),
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
TextRange::new(
|
||||
start_line.start() + start_column_offset.clamp(TextSize::new(0), start_line.end()),
|
||||
end_line.start() + end_column_offset.clamp(TextSize::new(0), end_line.end()),
|
||||
)
|
||||
TextRange::new(start, end)
|
||||
}
|
||||
}
|
||||
|
||||
impl ToRangeExt for TextRange {
|
||||
fn to_range(&self, text: &str, index: &LineIndex, encoding: PositionEncoding) -> types::Range {
|
||||
types::Range {
|
||||
start: source_location_to_position(&offset_to_source_location(
|
||||
start: source_location_to_position(&index.source_location(
|
||||
self.start(),
|
||||
text,
|
||||
index,
|
||||
encoding,
|
||||
encoding.into(),
|
||||
)),
|
||||
end: source_location_to_position(&offset_to_source_location(
|
||||
end: source_location_to_position(&index.source_location(
|
||||
self.end(),
|
||||
text,
|
||||
index,
|
||||
encoding,
|
||||
encoding.into(),
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
@ -119,26 +86,26 @@ impl ToRangeExt for TextRange {
|
|||
notebook_index: &NotebookIndex,
|
||||
encoding: PositionEncoding,
|
||||
) -> NotebookRange {
|
||||
let start = offset_to_source_location(self.start(), text, source_index, encoding);
|
||||
let mut end = offset_to_source_location(self.end(), text, source_index, encoding);
|
||||
let starting_cell = notebook_index.cell(start.row);
|
||||
let start = source_index.source_location(self.start(), text, encoding.into());
|
||||
let mut end = source_index.source_location(self.end(), text, encoding.into());
|
||||
let starting_cell = notebook_index.cell(start.line);
|
||||
|
||||
// weird edge case here - if the end of the range is where the newline after the cell got added (making it 'out of bounds')
|
||||
// we need to move it one character back (which should place it at the end of the last line).
|
||||
// we test this by checking if the ending offset is in a different (or nonexistent) cell compared to the cell of the starting offset.
|
||||
if notebook_index.cell(end.row) != starting_cell {
|
||||
end.row = end.row.saturating_sub(1);
|
||||
end.column = offset_to_source_location(
|
||||
self.end().checked_sub(1.into()).unwrap_or_default(),
|
||||
text,
|
||||
source_index,
|
||||
encoding,
|
||||
)
|
||||
.column;
|
||||
if notebook_index.cell(end.line) != starting_cell {
|
||||
end.line = end.line.saturating_sub(1);
|
||||
end.character_offset = source_index
|
||||
.source_location(
|
||||
self.end().checked_sub(1.into()).unwrap_or_default(),
|
||||
text,
|
||||
encoding.into(),
|
||||
)
|
||||
.character_offset;
|
||||
}
|
||||
|
||||
let start = source_location_to_position(¬ebook_index.translate_location(&start));
|
||||
let end = source_location_to_position(¬ebook_index.translate_location(&end));
|
||||
let start = source_location_to_position(¬ebook_index.translate_source_location(&start));
|
||||
let end = source_location_to_position(¬ebook_index.translate_source_location(&end));
|
||||
|
||||
NotebookRange {
|
||||
cell: starting_cell
|
||||
|
@ -149,67 +116,10 @@ impl ToRangeExt for TextRange {
|
|||
}
|
||||
}
|
||||
|
||||
/// Converts a UTF-16 code unit offset for a given line into a UTF-8 column number.
|
||||
fn utf8_column_offset(utf16_code_unit_offset: u32, line: &str) -> TextSize {
|
||||
let mut utf8_code_unit_offset = TextSize::new(0);
|
||||
|
||||
let mut i = 0u32;
|
||||
|
||||
for c in line.chars() {
|
||||
if i >= utf16_code_unit_offset {
|
||||
break;
|
||||
}
|
||||
|
||||
// Count characters encoded as two 16 bit words as 2 characters.
|
||||
{
|
||||
utf8_code_unit_offset +=
|
||||
TextSize::new(u32::try_from(c.len_utf8()).expect("utf8 len always <=4"));
|
||||
i += u32::try_from(c.len_utf16()).expect("utf16 len always <=2");
|
||||
}
|
||||
}
|
||||
|
||||
utf8_code_unit_offset
|
||||
}
|
||||
|
||||
fn offset_to_source_location(
|
||||
offset: TextSize,
|
||||
text: &str,
|
||||
index: &LineIndex,
|
||||
encoding: PositionEncoding,
|
||||
) -> SourceLocation {
|
||||
match encoding {
|
||||
PositionEncoding::UTF8 => {
|
||||
let row = index.line_index(offset);
|
||||
let column = offset - index.line_start(row, text);
|
||||
|
||||
SourceLocation {
|
||||
column: OneIndexed::from_zero_indexed(column.to_usize()),
|
||||
row,
|
||||
}
|
||||
}
|
||||
PositionEncoding::UTF16 => {
|
||||
let row = index.line_index(offset);
|
||||
|
||||
let column = if index.is_ascii() {
|
||||
(offset - index.line_start(row, text)).to_usize()
|
||||
} else {
|
||||
let up_to_line = &text[TextRange::new(index.line_start(row, text), offset)];
|
||||
up_to_line.encode_utf16().count()
|
||||
};
|
||||
|
||||
SourceLocation {
|
||||
column: OneIndexed::from_zero_indexed(column),
|
||||
row,
|
||||
}
|
||||
}
|
||||
PositionEncoding::UTF32 => index.source_location(offset, text),
|
||||
}
|
||||
}
|
||||
|
||||
fn source_location_to_position(location: &SourceLocation) -> types::Position {
|
||||
types::Position {
|
||||
line: u32::try_from(location.row.to_zero_indexed()).expect("row usize fits in u32"),
|
||||
character: u32::try_from(location.column.to_zero_indexed())
|
||||
line: u32::try_from(location.line.to_zero_indexed()).expect("row usize fits in u32"),
|
||||
character: u32::try_from(location.character_offset.to_zero_indexed())
|
||||
.expect("character usize fits in u32"),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -59,6 +59,7 @@ impl Server {
|
|||
|
||||
let client_capabilities = init_params.capabilities;
|
||||
let position_encoding = Self::find_best_position_encoding(&client_capabilities);
|
||||
|
||||
let server_capabilities = Self::server_capabilities(position_encoding);
|
||||
|
||||
let connection = connection.initialize_finish(
|
||||
|
@ -98,6 +99,8 @@ impl Server {
|
|||
workspace_settings.unwrap_or_default(),
|
||||
)?;
|
||||
|
||||
tracing::debug!("Negotiated position encoding: {position_encoding:?}");
|
||||
|
||||
Ok(Self {
|
||||
connection,
|
||||
worker_threads,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue