mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-29 21:35:58 +00:00

<!-- Thank you for contributing to Ruff! To help us out with reviewing, please consider the following: - Does this pull request include a summary of the change? (See below.) - Does this pull request include a descriptive title? - Does this pull request include references to any relevant issues? --> ## Summary This PR introduces the `ruff_server` crate and a new `ruff server` command. `ruff_server` is a re-implementation of [`ruff-lsp`](https://github.com/astral-sh/ruff-lsp), written entirely in Rust. It brings significant performance improvements, much tighter integration with Ruff, a foundation for supporting entirely new language server features, and more! This PR is an early version of `ruff_lsp` that we're calling the **pre-release** version. Anyone is more than welcome to use it and submit bug reports for any issues they encounter - we'll have some documentation on how to set it up with a few common editors, and we'll also provide a pre-release VSCode extension for those interested. This pre-release version supports: - **Diagnostics for `.py` files** - **Quick fixes** - **Full-file formatting** - **Range formatting** - **Multiple workspace folders** - **Automatic linter/formatter configuration** - taken from any `pyproject.toml` files in the workspace. Many thanks to @MichaReiser for his [proof-of-concept work](https://github.com/astral-sh/ruff/pull/7262), which was important groundwork for making this PR possible. ## Architectural Decisions I've made an executive choice to go with `lsp-server` as a base framework for the LSP, in favor of `tower-lsp`. There were several reasons for this: 1. I would like to avoid `async` in our implementation. LSPs are mostly computationally bound rather than I/O bound, and `async` adds a lot of complexity to the API, while also making harder to reason about execution order. This leads into the second reason, which is... 2. Any handlers that mutate state should be blocking and run in the event loop, and the state should be lock-free. This is the approach that `rust-analyzer` uses (also with the `lsp-server`/`lsp-types` crates as a framework), and it gives us assurances about data mutation and execution order. `tower-lsp` doesn't support this, which has caused some [issues](https://github.com/ebkalderon/tower-lsp/issues/284) around data races and out-of-order handler execution. 3. In general, I think it makes sense to have tight control over scheduling and the specifics of our implementation, in exchange for a slightly higher up-front cost of writing it ourselves. We'll be able to fine-tune it to our needs and support future LSP features without depending on an upstream maintainer. ## Test Plan The pre-release of `ruff_server` will have snapshot tests for common document editing scenarios. An expanded test suite is on the roadmap for future version of `ruff_server`.
484 lines
16 KiB
Rust
484 lines
16 KiB
Rust
//! Struct used to efficiently slice source code at (row, column) Locations.
|
||
|
||
use std::ops::Add;
|
||
|
||
use memchr::{memchr2, memrchr2};
|
||
use once_cell::unsync::OnceCell;
|
||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
||
|
||
use crate::newlines::find_newline;
|
||
use crate::{LineIndex, OneIndexed, SourceCode, SourceLocation};
|
||
|
||
#[derive(Debug)]
|
||
pub struct Locator<'a> {
|
||
contents: &'a str,
|
||
index: OnceCell<LineIndex>,
|
||
}
|
||
|
||
impl<'a> Locator<'a> {
|
||
pub const fn new(contents: &'a str) -> Self {
|
||
Self {
|
||
contents,
|
||
index: OnceCell::new(),
|
||
}
|
||
}
|
||
|
||
pub const fn with_index(contents: &'a str, index: LineIndex) -> Self {
|
||
Self {
|
||
contents,
|
||
index: OnceCell::with_value(index),
|
||
}
|
||
}
|
||
|
||
#[deprecated(
|
||
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
|
||
)]
|
||
pub fn compute_line_index(&self, offset: TextSize) -> OneIndexed {
|
||
self.to_index().line_index(offset)
|
||
}
|
||
|
||
#[deprecated(
|
||
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
|
||
)]
|
||
pub fn compute_source_location(&self, offset: TextSize) -> SourceLocation {
|
||
self.to_source_code().source_location(offset)
|
||
}
|
||
|
||
fn to_index(&self) -> &LineIndex {
|
||
self.index
|
||
.get_or_init(|| LineIndex::from_source_text(self.contents))
|
||
}
|
||
|
||
pub fn line_index(&self) -> Option<&LineIndex> {
|
||
self.index.get()
|
||
}
|
||
|
||
pub fn to_source_code(&self) -> SourceCode {
|
||
SourceCode {
|
||
index: self.to_index(),
|
||
text: self.contents,
|
||
}
|
||
}
|
||
|
||
/// Computes the start position of the line of `offset`.
|
||
///
|
||
/// ## Examples
|
||
///
|
||
/// ```
|
||
/// # use ruff_text_size::TextSize;
|
||
/// # use ruff_source_file::Locator;
|
||
///
|
||
/// let locator = Locator::new("First line\nsecond line\rthird line");
|
||
///
|
||
/// assert_eq!(locator.line_start(TextSize::from(0)), TextSize::from(0));
|
||
/// assert_eq!(locator.line_start(TextSize::from(4)), TextSize::from(0));
|
||
///
|
||
/// assert_eq!(locator.line_start(TextSize::from(14)), TextSize::from(11));
|
||
/// assert_eq!(locator.line_start(TextSize::from(28)), TextSize::from(23));
|
||
/// ```
|
||
///
|
||
/// ## Panics
|
||
/// If `offset` is out of bounds.
|
||
pub fn line_start(&self, offset: TextSize) -> TextSize {
|
||
let bytes = self.contents[TextRange::up_to(offset)].as_bytes();
|
||
if let Some(index) = memrchr2(b'\n', b'\r', bytes) {
|
||
// SAFETY: Safe because `index < offset`
|
||
TextSize::try_from(index).unwrap().add(TextSize::from(1))
|
||
} else {
|
||
self.contents_start()
|
||
}
|
||
}
|
||
|
||
/// Computes the start position of the file contents: either the first byte, or the byte after
|
||
/// the BOM.
|
||
pub fn contents_start(&self) -> TextSize {
|
||
if self.contents.starts_with('\u{feff}') {
|
||
// Skip the BOM.
|
||
'\u{feff}'.text_len()
|
||
} else {
|
||
// Start of file.
|
||
TextSize::default()
|
||
}
|
||
}
|
||
|
||
/// Returns `true` if `offset` is at the start of a line.
|
||
pub fn is_at_start_of_line(&self, offset: TextSize) -> bool {
|
||
self.line_start(offset) == offset
|
||
}
|
||
|
||
/// Computes the offset that is right after the newline character that ends `offset`'s line.
|
||
///
|
||
/// ## Examples
|
||
///
|
||
/// ```
|
||
/// # use ruff_text_size::{Ranged, TextRange, TextSize};
|
||
/// # use ruff_source_file::Locator;
|
||
///
|
||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||
///
|
||
/// assert_eq!(locator.full_line_end(TextSize::from(3)), TextSize::from(11));
|
||
/// assert_eq!(locator.full_line_end(TextSize::from(14)), TextSize::from(24));
|
||
/// assert_eq!(locator.full_line_end(TextSize::from(28)), TextSize::from(34));
|
||
/// ```
|
||
///
|
||
/// ## Panics
|
||
///
|
||
/// If `offset` is passed the end of the content.
|
||
pub fn full_line_end(&self, offset: TextSize) -> TextSize {
|
||
let slice = &self.contents[usize::from(offset)..];
|
||
if let Some((index, line_ending)) = find_newline(slice) {
|
||
offset + TextSize::try_from(index).unwrap() + line_ending.text_len()
|
||
} else {
|
||
self.contents.text_len()
|
||
}
|
||
}
|
||
|
||
/// Computes the offset that is right before the newline character that ends `offset`'s line.
|
||
///
|
||
/// ## Examples
|
||
///
|
||
/// ```
|
||
/// # use ruff_text_size::{Ranged, TextRange, TextSize};
|
||
/// # use ruff_source_file::Locator;
|
||
///
|
||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||
///
|
||
/// assert_eq!(locator.line_end(TextSize::from(3)), TextSize::from(10));
|
||
/// assert_eq!(locator.line_end(TextSize::from(14)), TextSize::from(22));
|
||
/// assert_eq!(locator.line_end(TextSize::from(28)), TextSize::from(34));
|
||
/// ```
|
||
///
|
||
/// ## Panics
|
||
///
|
||
/// If `offset` is passed the end of the content.
|
||
pub fn line_end(&self, offset: TextSize) -> TextSize {
|
||
let slice = &self.contents[usize::from(offset)..];
|
||
if let Some(index) = memchr2(b'\n', b'\r', slice.as_bytes()) {
|
||
offset + TextSize::try_from(index).unwrap()
|
||
} else {
|
||
self.contents.text_len()
|
||
}
|
||
}
|
||
|
||
/// Computes the range of this `offset`s line.
|
||
///
|
||
/// The range starts at the beginning of the line and goes up to, and including, the new line character
|
||
/// at the end of the line.
|
||
///
|
||
/// ## Examples
|
||
///
|
||
/// ```
|
||
/// # use ruff_text_size::{Ranged, TextRange, TextSize};
|
||
/// # use ruff_source_file::Locator;
|
||
///
|
||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||
///
|
||
/// assert_eq!(locator.full_line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(11)));
|
||
/// assert_eq!(locator.full_line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(24)));
|
||
/// assert_eq!(locator.full_line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
|
||
/// ```
|
||
///
|
||
/// ## Panics
|
||
/// If `offset` is out of bounds.
|
||
pub fn full_line_range(&self, offset: TextSize) -> TextRange {
|
||
TextRange::new(self.line_start(offset), self.full_line_end(offset))
|
||
}
|
||
|
||
/// Computes the range of this `offset`s line ending before the newline character.
|
||
///
|
||
/// The range starts at the beginning of the line and goes up to, but excluding, the new line character
|
||
/// at the end of the line.
|
||
///
|
||
/// ## Examples
|
||
///
|
||
/// ```
|
||
/// # use ruff_text_size::{Ranged, TextRange, TextSize};
|
||
/// # use ruff_source_file::Locator;
|
||
///
|
||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||
///
|
||
/// assert_eq!(locator.line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(10)));
|
||
/// assert_eq!(locator.line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(22)));
|
||
/// assert_eq!(locator.line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
|
||
/// ```
|
||
///
|
||
/// ## Panics
|
||
/// If `offset` is out of bounds.
|
||
pub fn line_range(&self, offset: TextSize) -> TextRange {
|
||
TextRange::new(self.line_start(offset), self.line_end(offset))
|
||
}
|
||
|
||
/// Returns the text of the `offset`'s line.
|
||
///
|
||
/// The line includes the newline characters at the end of the line.
|
||
///
|
||
/// ## Examples
|
||
///
|
||
/// ```
|
||
/// # use ruff_text_size::{Ranged, TextRange, TextSize};
|
||
/// # use ruff_source_file::Locator;
|
||
///
|
||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||
///
|
||
/// assert_eq!(locator.full_line(TextSize::from(3)), "First line\n");
|
||
/// assert_eq!(locator.full_line(TextSize::from(14)), "second line\r\n");
|
||
/// assert_eq!(locator.full_line(TextSize::from(28)), "third line");
|
||
/// ```
|
||
///
|
||
/// ## Panics
|
||
/// If `offset` is out of bounds.
|
||
pub fn full_line(&self, offset: TextSize) -> &'a str {
|
||
&self.contents[self.full_line_range(offset)]
|
||
}
|
||
|
||
/// Returns the text of the `offset`'s line.
|
||
///
|
||
/// Excludes the newline characters at the end of the line.
|
||
///
|
||
/// ## Examples
|
||
///
|
||
/// ```
|
||
/// # use ruff_text_size::{Ranged, TextRange, TextSize};
|
||
/// # use ruff_source_file::Locator;
|
||
///
|
||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||
///
|
||
/// assert_eq!(locator.line(TextSize::from(3)), "First line");
|
||
/// assert_eq!(locator.line(TextSize::from(14)), "second line");
|
||
/// assert_eq!(locator.line(TextSize::from(28)), "third line");
|
||
/// ```
|
||
///
|
||
/// ## Panics
|
||
/// If `offset` is out of bounds.
|
||
pub fn line(&self, offset: TextSize) -> &'a str {
|
||
&self.contents[self.line_range(offset)]
|
||
}
|
||
|
||
/// Computes the range of all lines that this `range` covers.
|
||
///
|
||
/// The range starts at the beginning of the line at `range.start()` and goes up to, and including, the new line character
|
||
/// at the end of `range.ends()`'s line.
|
||
///
|
||
/// ## Examples
|
||
///
|
||
/// ```
|
||
/// # use ruff_text_size::{Ranged, TextRange, TextSize};
|
||
/// # use ruff_source_file::Locator;
|
||
///
|
||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||
///
|
||
/// assert_eq!(
|
||
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||
/// TextRange::new(TextSize::from(0), TextSize::from(11))
|
||
/// );
|
||
/// assert_eq!(
|
||
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||
/// TextRange::new(TextSize::from(0), TextSize::from(24))
|
||
/// );
|
||
/// ```
|
||
///
|
||
/// ## Panics
|
||
/// If the start or end of `range` is out of bounds.
|
||
pub fn full_lines_range(&self, range: TextRange) -> TextRange {
|
||
TextRange::new(
|
||
self.line_start(range.start()),
|
||
self.full_line_end(range.end()),
|
||
)
|
||
}
|
||
|
||
/// Computes the range of all lines that this `range` covers.
|
||
///
|
||
/// The range starts at the beginning of the line at `range.start()` and goes up to, but excluding, the new line character
|
||
/// at the end of `range.end()`'s line.
|
||
///
|
||
/// ## Examples
|
||
///
|
||
/// ```
|
||
/// # use ruff_text_size::{Ranged, TextRange, TextSize};
|
||
/// # use ruff_source_file::Locator;
|
||
///
|
||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||
///
|
||
/// assert_eq!(
|
||
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||
/// TextRange::new(TextSize::from(0), TextSize::from(10))
|
||
/// );
|
||
/// assert_eq!(
|
||
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||
/// TextRange::new(TextSize::from(0), TextSize::from(22))
|
||
/// );
|
||
/// ```
|
||
///
|
||
/// ## Panics
|
||
/// If the start or end of `range` is out of bounds.
|
||
pub fn lines_range(&self, range: TextRange) -> TextRange {
|
||
TextRange::new(self.line_start(range.start()), self.line_end(range.end()))
|
||
}
|
||
|
||
/// Returns true if the text of `range` contains any line break.
|
||
///
|
||
/// ```
|
||
/// # use ruff_text_size::{Ranged, TextRange, TextSize};
|
||
/// # use ruff_source_file::Locator;
|
||
///
|
||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||
///
|
||
/// assert!(
|
||
/// !locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||
/// );
|
||
/// assert!(
|
||
/// locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||
/// );
|
||
/// ```
|
||
///
|
||
/// ## Panics
|
||
/// If the `range` is out of bounds.
|
||
pub fn contains_line_break(&self, range: TextRange) -> bool {
|
||
let text = &self.contents[range];
|
||
text.contains(['\n', '\r'])
|
||
}
|
||
|
||
/// Returns the text of all lines that include `range`.
|
||
///
|
||
/// ## Examples
|
||
///
|
||
/// ```
|
||
/// # use ruff_text_size::{Ranged, TextRange, TextSize};
|
||
/// # use ruff_source_file::Locator;
|
||
///
|
||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||
///
|
||
/// assert_eq!(
|
||
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||
/// "First line"
|
||
/// );
|
||
/// assert_eq!(
|
||
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||
/// "First line\nsecond line"
|
||
/// );
|
||
/// ```
|
||
///
|
||
/// ## Panics
|
||
/// If the start or end of `range` is out of bounds.
|
||
pub fn lines(&self, range: TextRange) -> &'a str {
|
||
&self.contents[self.lines_range(range)]
|
||
}
|
||
|
||
/// Returns the text of all lines that include `range`.
|
||
///
|
||
/// Includes the newline characters of the last line.
|
||
///
|
||
/// ## Examples
|
||
///
|
||
/// ```
|
||
/// # use ruff_text_size::{Ranged, TextRange, TextSize};
|
||
/// # use ruff_source_file::Locator;
|
||
///
|
||
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
|
||
///
|
||
/// assert_eq!(
|
||
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
|
||
/// "First line\n"
|
||
/// );
|
||
/// assert_eq!(
|
||
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
|
||
/// "First line\nsecond line\r\n"
|
||
/// );
|
||
/// ```
|
||
///
|
||
/// ## Panics
|
||
/// If the start or end of `range` is out of bounds.
|
||
pub fn full_lines(&self, range: TextRange) -> &'a str {
|
||
&self.contents[self.full_lines_range(range)]
|
||
}
|
||
|
||
/// Take the source code up to the given [`TextSize`].
|
||
#[inline]
|
||
pub fn up_to(&self, offset: TextSize) -> &'a str {
|
||
&self.contents[TextRange::up_to(offset)]
|
||
}
|
||
|
||
/// Take the source code after the given [`TextSize`].
|
||
#[inline]
|
||
pub fn after(&self, offset: TextSize) -> &'a str {
|
||
&self.contents[usize::from(offset)..]
|
||
}
|
||
|
||
/// Finds the closest [`TextSize`] not exceeding the offset for which `is_char_boundary` is
|
||
/// `true`.
|
||
///
|
||
/// Can be replaced with `str::floor_char_boundary` once it's stable.
|
||
///
|
||
/// ## Examples
|
||
///
|
||
/// ```
|
||
/// # use ruff_text_size::{Ranged, TextRange, TextSize};
|
||
/// # use ruff_source_file::Locator;
|
||
///
|
||
/// let locator = Locator::new("Hello");
|
||
///
|
||
/// assert_eq!(
|
||
/// locator.floor_char_boundary(TextSize::from(0)),
|
||
/// TextSize::from(0)
|
||
/// );
|
||
///
|
||
/// assert_eq!(
|
||
/// locator.floor_char_boundary(TextSize::from(5)),
|
||
/// TextSize::from(5)
|
||
/// );
|
||
///
|
||
/// let locator = Locator::new("α");
|
||
///
|
||
/// assert_eq!(
|
||
/// locator.floor_char_boundary(TextSize::from(0)),
|
||
/// TextSize::from(0)
|
||
/// );
|
||
///
|
||
/// assert_eq!(
|
||
/// locator.floor_char_boundary(TextSize::from(1)),
|
||
/// TextSize::from(0)
|
||
/// );
|
||
///
|
||
/// assert_eq!(
|
||
/// locator.floor_char_boundary(TextSize::from(2)),
|
||
/// TextSize::from(2)
|
||
/// );
|
||
/// ```
|
||
pub fn floor_char_boundary(&self, offset: TextSize) -> TextSize {
|
||
if offset >= self.text_len() {
|
||
self.text_len()
|
||
} else {
|
||
// We know that the character boundary is within four bytes.
|
||
(0u32..=3u32)
|
||
.map(TextSize::from)
|
||
.filter_map(|index| offset.checked_sub(index))
|
||
.find(|offset| self.contents.is_char_boundary(offset.to_usize()))
|
||
.unwrap_or_default()
|
||
}
|
||
}
|
||
|
||
/// Take the source code between the given [`TextRange`].
|
||
#[inline]
|
||
pub fn slice<T: Ranged>(&self, ranged: T) -> &'a str {
|
||
&self.contents[ranged.range()]
|
||
}
|
||
|
||
/// Return the underlying source code.
|
||
pub fn contents(&self) -> &'a str {
|
||
self.contents
|
||
}
|
||
|
||
/// Return the number of bytes in the source code.
|
||
pub const fn len(&self) -> usize {
|
||
self.contents.len()
|
||
}
|
||
|
||
pub fn text_len(&self) -> TextSize {
|
||
self.contents.text_len()
|
||
}
|
||
|
||
/// Return `true` if the source code is empty.
|
||
pub const fn is_empty(&self) -> bool {
|
||
self.contents.is_empty()
|
||
}
|
||
}
|