Cheap cloneable LineIndex (#3896)

This commit is contained in:
Micha Reiser 2023-04-11 09:33:40 +02:00 committed by GitHub
parent 9209e57c5a
commit 76c47a9a43
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 465 additions and 335 deletions

1
Cargo.lock generated
View file

@ -2177,6 +2177,7 @@ dependencies = [
"once_cell", "once_cell",
"regex", "regex",
"ruff_rustpython", "ruff_rustpython",
"ruff_text_size",
"rustc-hash", "rustc-hash",
"rustpython-common", "rustpython-common",
"rustpython-parser", "rustpython-parser",

View file

@ -58,6 +58,7 @@ impl Diagnostic {
/// Set the [`Fix`] used to fix the diagnostic, if the provided function returns `Ok`. /// Set the [`Fix`] used to fix the diagnostic, if the provided function returns `Ok`.
/// Otherwise, log the error. /// Otherwise, log the error.
#[inline]
pub fn try_set_fix<T: Into<Fix>>(&mut self, func: impl FnOnce() -> Result<T>) { pub fn try_set_fix<T: Into<Fix>>(&mut self, func: impl FnOnce() -> Result<T>) {
match func() { match func() {
Ok(fix) => self.fix = fix.into(), Ok(fix) => self.fix = fix.into(),
@ -66,6 +67,7 @@ impl Diagnostic {
} }
/// Set the location of the diagnostic's parent node. /// Set the location of the diagnostic's parent node.
#[inline]
pub fn set_parent(&mut self, parent: Location) { pub fn set_parent(&mut self, parent: Location) {
self.parent = Some(parent); self.parent = Some(parent);
} }

View file

@ -9,6 +9,7 @@ rust-version = { workspace = true }
[dependencies] [dependencies]
ruff_rustpython = { path = "../ruff_rustpython" } ruff_rustpython = { path = "../ruff_rustpython" }
ruff_text_size = { path = "../ruff_text_size" }
anyhow = { workspace = true } anyhow = { workspace = true }
bitflags = { workspace = true } bitflags = { workspace = true }

View file

@ -0,0 +1,418 @@
use ruff_text_size::{TextLen, TextRange, TextSize};
use rustpython_parser::ast::Location;
use std::fmt;
use std::fmt::{Debug, Formatter};
use std::num::NonZeroUsize;
use std::ops::Deref;
use std::sync::Arc;
/// Index for fast [`Location`] to [byte offset](TextSize) conversions.
///
/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
#[derive(Clone)]
pub struct LineIndex {
inner: Arc<LineIndexInner>,
}
struct LineIndexInner {
line_starts: Vec<TextSize>,
kind: IndexKind,
}
impl LineIndex {
/// Builds the [`LineIndex`] from the source text of a file.
pub fn from_source_text(text: &str) -> Self {
assert!(u32::try_from(text.len()).is_ok());
let mut line_starts: Vec<TextSize> = Vec::with_capacity(text.len() / 88);
line_starts.push(TextSize::default());
let bytes = text.as_bytes();
let mut utf8 = false;
for (i, byte) in bytes.iter().enumerate() {
utf8 |= !byte.is_ascii();
match byte {
// Only track one line break for `\r\n`.
b'\r' if bytes.get(i + 1) == Some(&b'\n') => continue,
b'\n' | b'\r' => {
line_starts.push(TextSize::try_from(i + 1).unwrap());
}
_ => {}
}
}
let kind = if utf8 {
IndexKind::Utf8
} else {
IndexKind::Ascii
};
Self {
inner: Arc::new(LineIndexInner { line_starts, kind }),
}
}
fn kind(&self) -> IndexKind {
self.inner.kind
}
/// Converts a [`Location`] to it's [byte offset](TextSize) in the source code.
pub fn location_offset(&self, location: Location, contents: &str) -> TextSize {
let line_index = OneIndexed::new(location.row()).unwrap();
let line_range = self.line_range(line_index, contents);
let column_offset = match self.kind() {
IndexKind::Ascii => TextSize::try_from(location.column()).unwrap(),
IndexKind::Utf8 => {
let line = &contents[line_range];
// Skip the bom character
let bom_len =
usize::from(line_index.to_zero_indexed() == 0 && line.starts_with('\u{feff}'));
match line.char_indices().nth(location.column() + bom_len) {
Some((offset, _)) => TextSize::try_from(offset).unwrap(),
None => line_range.len(),
}
}
};
line_range.start() + column_offset
}
/// Return the number of lines in the source code.
pub(crate) fn lines_count(&self) -> usize {
self.line_starts().len()
}
/// Returns the [byte offset](TextSize) for the `line` with the given index.
fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
let row_index = line.to_zero_indexed();
let starts = self.line_starts();
// If start-of-line position after last line
if row_index == starts.len() {
contents.text_len()
} else {
starts[row_index]
}
}
/// Returns the [`TextRange`] of the `line` with the given index.
/// The start points to the first character's [byte offset](TextSize), the end up to, and including
/// the newline character ending the line (if any).
fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
let starts = self.line_starts();
if starts.len() == line.to_zero_indexed() {
TextRange::empty(contents.text_len())
} else {
TextRange::new(
self.line_start(line, contents),
self.line_start(line.saturating_add(1), contents),
)
}
}
/// Returns the [byte offsets](TextSize) for every line
pub fn line_starts(&self) -> &[TextSize] {
&self.inner.line_starts
}
}
impl Deref for LineIndex {
type Target = [TextSize];
fn deref(&self) -> &Self::Target {
self.line_starts()
}
}
impl Debug for LineIndex {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.debug_list().entries(self.line_starts()).finish()
}
}
#[derive(Debug, Clone, Copy)]
enum IndexKind {
/// Optimized index for an ASCII only document
Ascii,
/// Index for UTF8 documents
Utf8,
}
/// Type-safe wrapper for a value whose logical range starts at `1`, for
/// instance the line or column numbers in a file
///
/// Internally this is represented as a [`NonZeroUsize`], this enables some
/// memory optimizations
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct OneIndexed(NonZeroUsize);
const ONE: NonZeroUsize = unwrap(NonZeroUsize::new(1));
impl OneIndexed {
// SAFETY: These constants are being initialized with non-zero values
/// The smallest value that can be represented by this integer type.
pub const MIN: Self = unwrap(Self::new(1));
/// The largest value that can be represented by this integer type
pub const MAX: Self = unwrap(Self::new(usize::MAX));
/// Creates a non-zero if the given value is not zero.
pub const fn new(value: usize) -> Option<Self> {
match NonZeroUsize::new(value) {
Some(value) => Some(Self(value)),
None => None,
}
}
/// Construct a new [`OneIndexed`] from a zero-indexed value
pub const fn from_zero_indexed(value: usize) -> Self {
Self(ONE.saturating_add(value))
}
/// Return the zero-indexed primitive value for this [`OneIndexed`]
pub const fn to_zero_indexed(self) -> usize {
self.0.get() - 1
}
/// Saturating integer addition. Computes `self + rhs`, saturating at
/// the numeric bounds instead of overflowing.
#[must_use]
pub const fn saturating_add(self, rhs: usize) -> Self {
match NonZeroUsize::new(self.0.get().saturating_add(rhs)) {
Some(value) => Self(value),
None => Self::MAX,
}
}
/// Saturating integer subtraction. Computes `self - rhs`, saturating
/// at the numeric bounds instead of overflowing.
#[must_use]
pub const fn saturating_sub(self, rhs: usize) -> Self {
match NonZeroUsize::new(self.0.get().saturating_sub(rhs)) {
Some(value) => Self(value),
None => Self::MIN,
}
}
}
impl std::fmt::Display for OneIndexed {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(&self.0.get(), f)
}
}
/// A const `Option::unwrap` without nightly features:
/// [Tracking issue](https://github.com/rust-lang/rust/issues/67441)
const fn unwrap<T: Copy>(option: Option<T>) -> T {
match option {
Some(value) => value,
None => panic!("unwrapping None"),
}
}
#[cfg(test)]
mod tests {
use crate::source_code::line_index::LineIndex;
use ruff_text_size::TextSize;
use rustpython_parser::ast::Location;
#[test]
fn ascii_index() {
let index = LineIndex::from_source_text("");
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
let index = LineIndex::from_source_text("x = 1");
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
let index = LineIndex::from_source_text("x = 1\n");
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
let index = LineIndex::from_source_text("x = 1\ny = 2\nz = x + y\n");
assert_eq!(
index.line_starts(),
&[
TextSize::from(0),
TextSize::from(6),
TextSize::from(12),
TextSize::from(22)
]
);
}
#[test]
fn ascii_byte_offset() {
let contents = "x = 1\ny = 2";
let index = LineIndex::from_source_text(contents);
// First row.
let loc = index.location_offset(Location::new(1, 0), contents);
assert_eq!(loc, TextSize::from(0));
// Second row.
let loc = index.location_offset(Location::new(2, 0), contents);
assert_eq!(loc, TextSize::from(6));
// One-past-the-end.
let loc = index.location_offset(Location::new(3, 0), contents);
assert_eq!(loc, TextSize::from(11));
}
#[test]
fn ascii_carriage_return() {
let contents = "x = 4\ry = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
assert_eq!(
index.location_offset(Location::new(1, 4), contents),
TextSize::from(4)
);
assert_eq!(
index.location_offset(Location::new(2, 0), contents),
TextSize::from(6)
);
assert_eq!(
index.location_offset(Location::new(2, 1), contents),
TextSize::from(7)
);
}
#[test]
fn ascii_carriage_return_newline() {
let contents = "x = 4\r\ny = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]);
assert_eq!(
index.location_offset(Location::new(1, 4), contents),
TextSize::from(4)
);
assert_eq!(
index.location_offset(Location::new(2, 0), contents),
TextSize::from(7)
);
assert_eq!(
index.location_offset(Location::new(2, 1), contents),
TextSize::from(8)
);
}
#[test]
fn utf8_index() {
let index = LineIndex::from_source_text("x = '🫣'");
assert_eq!(index.lines_count(), 1);
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
let index = LineIndex::from_source_text("x = '🫣'\n");
assert_eq!(index.lines_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(11)]
);
let index = LineIndex::from_source_text("x = '🫣'\ny = 2\nz = x + y\n");
assert_eq!(index.lines_count(), 4);
assert_eq!(
index.line_starts(),
&[
TextSize::from(0),
TextSize::from(11),
TextSize::from(17),
TextSize::from(27)
]
);
let index = LineIndex::from_source_text("# 🫣\nclass Foo:\n \"\"\".\"\"\"");
assert_eq!(index.lines_count(), 3);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(7), TextSize::from(18)]
);
}
#[test]
fn utf8_carriage_return() {
let contents = "x = '🫣'\ry = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.lines_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(11)]
);
// Second '
assert_eq!(
index.location_offset(Location::new(1, 6), contents),
TextSize::from(9)
);
assert_eq!(
index.location_offset(Location::new(2, 0), contents),
TextSize::from(11)
);
assert_eq!(
index.location_offset(Location::new(2, 1), contents),
TextSize::from(12)
);
}
#[test]
fn utf8_carriage_return_newline() {
let contents = "x = '🫣'\r\ny = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.lines_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(12)]
);
// Second '
assert_eq!(
index.location_offset(Location::new(1, 6), contents),
TextSize::from(9)
);
assert_eq!(
index.location_offset(Location::new(2, 0), contents),
TextSize::from(12)
);
assert_eq!(
index.location_offset(Location::new(2, 1), contents),
TextSize::from(13)
);
}
#[test]
fn utf8_byte_offset() {
let contents = "x = '☃'\ny = 2";
let index = LineIndex::from_source_text(contents);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(10)]
);
// First row.
let loc = index.location_offset(Location::new(1, 0), contents);
assert_eq!(loc, TextSize::from(0));
let loc = index.location_offset(Location::new(1, 5), contents);
assert_eq!(loc, TextSize::from(5));
assert_eq!(&"x = '☃'\ny = 2"[usize::from(loc)..], "☃'\ny = 2");
let loc = index.location_offset(Location::new(1, 6), contents);
assert_eq!(loc, TextSize::from(8));
assert_eq!(&"x = '☃'\ny = 2"[usize::from(loc)..], "'\ny = 2");
// Second row.
let loc = index.location_offset(Location::new(2, 0), contents);
assert_eq!(loc, TextSize::from(10));
// One-past-the-end.
let loc = index.location_offset(Location::new(3, 0), contents);
assert_eq!(loc, TextSize::from(15));
}
}

View file

@ -1,13 +1,15 @@
//! Struct used to efficiently slice source code at (row, column) Locations. //! Struct used to efficiently slice source code at (row, column) Locations.
use crate::source_code::line_index::LineIndex;
use once_cell::unsync::OnceCell; use once_cell::unsync::OnceCell;
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::ast::Location; use rustpython_parser::ast::Location;
use crate::types::Range; use crate::types::Range;
pub struct Locator<'a> { pub struct Locator<'a> {
contents: &'a str, contents: &'a str,
index: OnceCell<Index>, index: OnceCell<LineIndex>,
} }
impl<'a> Locator<'a> { impl<'a> Locator<'a> {
@ -18,37 +20,38 @@ impl<'a> Locator<'a> {
} }
} }
fn get_or_init_index(&self) -> &Index { fn get_or_init_index(&self) -> &LineIndex {
self.index.get_or_init(|| Index::from(self.contents)) self.index
.get_or_init(|| LineIndex::from_source_text(self.contents))
} }
/// Take the source code up to the given [`Location`]. /// Take the source code up to the given [`Location`].
pub fn take(&self, location: Location) -> &'a str { pub fn take(&self, location: Location) -> &'a str {
let index = self.get_or_init_index(); let index = self.get_or_init_index();
let offset = index.byte_offset(location, self.contents); let offset = index.location_offset(location, self.contents);
&self.contents[..offset] &self.contents[TextRange::up_to(offset)]
} }
/// Take the source code after the given [`Location`]. /// Take the source code after the given [`Location`].
pub fn skip(&self, location: Location) -> &'a str { pub fn skip(&self, location: Location) -> &'a str {
let index = self.get_or_init_index(); let index = self.get_or_init_index();
let offset = index.byte_offset(location, self.contents); let offset = index.location_offset(location, self.contents);
&self.contents[offset..] &self.contents[usize::from(offset)..]
} }
/// Take the source code between the given [`Range`]. /// Take the source code between the given [`Range`].
pub fn slice<R: Into<Range>>(&self, range: R) -> &'a str { pub fn slice<R: Into<Range>>(&self, range: R) -> &'a str {
let index = self.get_or_init_index(); let index = self.get_or_init_index();
let range = range.into(); let range = range.into();
let start = index.byte_offset(range.location, self.contents); let start = index.location_offset(range.location, self.contents);
let end = index.byte_offset(range.end_location, self.contents); let end = index.location_offset(range.end_location, self.contents);
&self.contents[start..end] &self.contents[TextRange::new(start, end)]
} }
/// Return the byte offset of the given [`Location`]. /// Return the byte offset of the given [`Location`].
pub fn offset(&self, location: Location) -> usize { pub fn offset(&self, location: Location) -> TextSize {
let index = self.get_or_init_index(); let index = self.get_or_init_index();
index.byte_offset(location, self.contents) index.location_offset(location, self.contents)
} }
/// Return the underlying source code. /// Return the underlying source code.
@ -59,7 +62,7 @@ impl<'a> Locator<'a> {
/// Return the number of lines in the source code. /// Return the number of lines in the source code.
pub fn count_lines(&self) -> usize { pub fn count_lines(&self) -> usize {
let index = self.get_or_init_index(); let index = self.get_or_init_index();
index.count_lines() index.lines_count()
} }
/// Return the number of bytes in the source code. /// Return the number of bytes in the source code.
@ -72,302 +75,3 @@ impl<'a> Locator<'a> {
self.contents.is_empty() self.contents.is_empty()
} }
} }
/// Index for fast [`Location`] to byte offset conversions.
#[derive(Debug, Clone)]
enum Index {
/// Optimized index for an ASCII only document
Ascii(AsciiIndex),
/// Index for UTF8 documents
Utf8(Utf8Index),
}
impl Index {
/// Truncate a [`Location`] to a byte offset in source code.
fn byte_offset(&self, location: Location, contents: &str) -> usize {
match self {
Index::Ascii(ascii) => ascii.byte_offset(location, contents),
Index::Utf8(utf8) => utf8.byte_offset(location, contents),
}
}
/// Return the number of lines in the source code.
fn count_lines(&self) -> usize {
match self {
Index::Ascii(ascii) => ascii.line_start_byte_offsets.len(),
Index::Utf8(utf8) => utf8.line_start_byte_offsets.len(),
}
}
}
impl From<&str> for Index {
fn from(contents: &str) -> Self {
assert!(u32::try_from(contents.len()).is_ok());
let mut line_start_offsets: Vec<u32> = Vec::with_capacity(48);
line_start_offsets.push(0);
let mut utf8 = false;
// SAFE because of length assertion above
#[allow(clippy::cast_possible_truncation)]
for (i, byte) in contents.bytes().enumerate() {
utf8 |= !byte.is_ascii();
match byte {
// Only track one line break for `\r\n`.
b'\r' if contents.as_bytes().get(i + 1) == Some(&b'\n') => continue,
b'\n' | b'\r' => {
line_start_offsets.push((i + 1) as u32);
}
_ => {}
}
}
if utf8 {
Self::Utf8(Utf8Index::new(line_start_offsets))
} else {
Self::Ascii(AsciiIndex::new(line_start_offsets))
}
}
}
/// Index for fast [`Location`] to byte offset conversions for ASCII documents.
///
/// The index stores the byte offsets for every line. It computes the byte offset for a [`Location`]
/// by retrieving the line offset from its index and adding the column.
#[derive(Debug, Clone, Eq, PartialEq)]
struct AsciiIndex {
line_start_byte_offsets: Vec<u32>,
}
impl AsciiIndex {
fn new(line_start_positions: Vec<u32>) -> Self {
Self {
line_start_byte_offsets: line_start_positions,
}
}
/// Truncate a [`Location`] to a byte offset in ASCII source code.
fn byte_offset(&self, location: Location, contents: &str) -> usize {
let index = &self.line_start_byte_offsets;
// If start-of-line position after last line
if location.row() - 1 == index.len() && location.column() == 0 {
contents.len()
} else {
let byte_offset = index[location.row() - 1] as usize + location.column();
byte_offset.min(contents.len())
}
}
}
/// Index for fast [`Location`] to byte offset conversions for UTF8 documents.
///
/// The index stores the byte offset of every line. The column offset is lazily computed by
/// adding the line start offset and then iterating to the `nth` character.
#[derive(Debug, Clone, PartialEq)]
struct Utf8Index {
line_start_byte_offsets: Vec<u32>,
}
impl Utf8Index {
fn new(line_byte_positions: Vec<u32>) -> Self {
Self {
line_start_byte_offsets: line_byte_positions,
}
}
/// Truncate a [`Location`] to a byte offset in UTF-8 source code.
fn byte_offset(&self, location: Location, contents: &str) -> usize {
let index = &self.line_start_byte_offsets;
if location.row() - 1 == index.len() && location.column() == 0 {
contents.len()
} else {
// Casting is safe because the length of utf8 characters is always between 1-4
#[allow(clippy::cast_possible_truncation)]
let line_start = if location.row() == 1 && contents.starts_with('\u{feff}') {
'\u{feff}'.len_utf8() as u32
} else {
index[location.row() - 1]
};
let rest = &contents[line_start as usize..];
let column_offset = match rest.char_indices().nth(location.column()) {
Some((offset, _)) => offset,
None => contents.len(),
};
let offset = line_start as usize + column_offset;
offset.min(contents.len())
}
}
}
#[cfg(test)]
mod tests {
use rustpython_parser::ast::Location;
use crate::source_code::locator::{AsciiIndex, Index, Utf8Index};
fn index_ascii(content: &str) -> AsciiIndex {
match Index::from(content) {
Index::Ascii(ascii) => ascii,
Index::Utf8(_) => {
panic!("Expected ASCII index")
}
}
}
fn index_utf8(content: &str) -> Utf8Index {
match Index::from(content) {
Index::Utf8(utf8) => utf8,
Index::Ascii(_) => {
panic!("Expected UTF8 index")
}
}
}
#[test]
fn ascii_index() {
let contents = "";
let index = index_ascii(contents);
assert_eq!(index, AsciiIndex::new(vec![0]));
let contents = "x = 1";
let index = index_ascii(contents);
assert_eq!(index, AsciiIndex::new(vec![0]));
let contents = "x = 1\n";
let index = index_ascii(contents);
assert_eq!(index, AsciiIndex::new(vec![0, 6]));
let contents = "x = 1\ny = 2\nz = x + y\n";
let index = index_ascii(contents);
assert_eq!(index, AsciiIndex::new(vec![0, 6, 12, 22]));
}
#[test]
fn ascii_byte_offset() {
let contents = "x = 1\ny = 2";
let index = index_ascii(contents);
// First row.
let loc = index.byte_offset(Location::new(1, 0), contents);
assert_eq!(loc, 0);
// Second row.
let loc = index.byte_offset(Location::new(2, 0), contents);
assert_eq!(loc, 6);
// One-past-the-end.
let loc = index.byte_offset(Location::new(3, 0), contents);
assert_eq!(loc, 11);
}
#[test]
fn ascii_carriage_return() {
let contents = "x = 4\ry = 3";
let index = index_ascii(contents);
assert_eq!(index, AsciiIndex::new(vec![0, 6]));
assert_eq!(index.byte_offset(Location::new(1, 4), contents), 4);
assert_eq!(index.byte_offset(Location::new(2, 0), contents), 6);
assert_eq!(index.byte_offset(Location::new(2, 1), contents), 7);
}
#[test]
fn ascii_carriage_return_newline() {
let contents = "x = 4\r\ny = 3";
let index = index_ascii(contents);
assert_eq!(index, AsciiIndex::new(vec![0, 7]));
assert_eq!(index.byte_offset(Location::new(1, 4), contents), 4);
assert_eq!(index.byte_offset(Location::new(2, 0), contents), 7);
assert_eq!(index.byte_offset(Location::new(2, 1), contents), 8);
}
impl Utf8Index {
fn line_count(&self) -> usize {
self.line_start_byte_offsets.len()
}
}
#[test]
fn utf8_index() {
let contents = "x = '🫣'";
let index = index_utf8(contents);
assert_eq!(index.line_count(), 1);
assert_eq!(index, Utf8Index::new(vec![0]));
let contents = "x = '🫣'\n";
let index = index_utf8(contents);
assert_eq!(index.line_count(), 2);
assert_eq!(index, Utf8Index::new(vec![0, 11]));
let contents = "x = '🫣'\ny = 2\nz = x + y\n";
let index = index_utf8(contents);
assert_eq!(index.line_count(), 4);
assert_eq!(index, Utf8Index::new(vec![0, 11, 17, 27]));
let contents = "# 🫣\nclass Foo:\n \"\"\".\"\"\"";
let index = index_utf8(contents);
assert_eq!(index.line_count(), 3);
assert_eq!(index, Utf8Index::new(vec![0, 7, 18]));
}
#[test]
fn utf8_carriage_return() {
let contents = "x = '🫣'\ry = 3";
let index = index_utf8(contents);
assert_eq!(index.line_count(), 2);
assert_eq!(index, Utf8Index::new(vec![0, 11]));
// Second '
assert_eq!(index.byte_offset(Location::new(1, 6), contents), 9);
assert_eq!(index.byte_offset(Location::new(2, 0), contents), 11);
assert_eq!(index.byte_offset(Location::new(2, 1), contents), 12);
}
#[test]
fn utf8_carriage_return_newline() {
let contents = "x = '🫣'\r\ny = 3";
let index = index_utf8(contents);
assert_eq!(index.line_count(), 2);
assert_eq!(index, Utf8Index::new(vec![0, 12]));
// Second '
assert_eq!(index.byte_offset(Location::new(1, 6), contents), 9);
assert_eq!(index.byte_offset(Location::new(2, 0), contents), 12);
assert_eq!(index.byte_offset(Location::new(2, 1), contents), 13);
}
#[test]
fn utf8_byte_offset() {
let contents = "x = '☃'\ny = 2";
let index = index_utf8(contents);
assert_eq!(index, Utf8Index::new(vec![0, 10]));
// First row.
let loc = index.byte_offset(Location::new(1, 0), contents);
assert_eq!(loc, 0);
let loc = index.byte_offset(Location::new(1, 5), contents);
assert_eq!(loc, 5);
assert_eq!(&contents[loc..], "☃'\ny = 2");
let loc = index.byte_offset(Location::new(1, 6), contents);
assert_eq!(loc, 8);
assert_eq!(&contents[loc..], "'\ny = 2");
// Second row.
let loc = index.byte_offset(Location::new(2, 0), contents);
assert_eq!(loc, 10);
// One-past-the-end.
let loc = index.byte_offset(Location::new(3, 0), contents);
assert_eq!(loc, 15);
}
}

View file

@ -1,13 +1,16 @@
mod generator; mod generator;
mod indexer; mod indexer;
mod line_index;
mod locator; mod locator;
mod stylist; mod stylist;
pub use crate::source_code::line_index::{LineIndex, OneIndexed};
pub use generator::Generator; pub use generator::Generator;
pub use indexer::Indexer; pub use indexer::Indexer;
pub use locator::Locator; pub use locator::Locator;
use rustpython_parser as parser; use rustpython_parser as parser;
use rustpython_parser::{lexer, Mode, ParseError}; use rustpython_parser::{lexer, Mode, ParseError};
pub use stylist::{LineEnding, Stylist}; pub use stylist::{LineEnding, Stylist};
/// Run round-trip source code generation on a given Python code. /// Run round-trip source code generation on a given Python code.

View file

@ -3,6 +3,7 @@ use rustpython_parser::ast::Location;
use ruff_python_ast::newlines::StrExt; use ruff_python_ast::newlines::StrExt;
use ruff_python_ast::source_code::Locator; use ruff_python_ast::source_code::Locator;
use ruff_python_ast::types::Range; use ruff_python_ast::types::Range;
use ruff_text_size::TextRange;
/// Return `true` if the given string is a radix literal (e.g., `0b101`). /// Return `true` if the given string is a radix literal (e.g., `0b101`).
pub fn is_radix_literal(content: &str) -> bool { pub fn is_radix_literal(content: &str) -> bool {
@ -55,7 +56,7 @@ pub fn expand_indented_block(
let mut nesting = 0; let mut nesting = 0;
let mut colon = None; let mut colon = None;
for (start, tok, _end) in rustpython_parser::lexer::lex_located( for (start, tok, _end) in rustpython_parser::lexer::lex_located(
&contents[start_index..end_index], &contents[TextRange::new(start_index, end_index)],
rustpython_parser::Mode::Module, rustpython_parser::Mode::Module,
location, location,
) )
@ -80,7 +81,7 @@ pub fn expand_indented_block(
// From here, we have two options: simple statement or compound statement. // From here, we have two options: simple statement or compound statement.
let indent = rustpython_parser::lexer::lex_located( let indent = rustpython_parser::lexer::lex_located(
&contents[colon_index..end_index], &contents[TextRange::new(colon_index, end_index)],
rustpython_parser::Mode::Module, rustpython_parser::Mode::Module,
colon_location, colon_location,
) )
@ -97,7 +98,7 @@ pub fn expand_indented_block(
// Compound statement: from the colon to the end of the block. // Compound statement: from the colon to the end of the block.
let mut offset = 0; let mut offset = 0;
for (index, line) in contents[end_index..] for (index, line) in contents[usize::from(end_index)..]
.universal_newlines() .universal_newlines()
.skip(1) .skip(1)
.enumerate() .enumerate()

View file

@ -80,10 +80,7 @@ impl Format<ASTFormatContext<'_>> for Literal {
f.write_element(FormatElement::StaticTextSlice { f.write_element(FormatElement::StaticTextSlice {
text, text,
range: TextRange::new( range: TextRange::new(start_index, end_index),
start_index.try_into().unwrap(),
end_index.try_into().unwrap(),
),
}) })
} }
} }

View file

@ -3,7 +3,7 @@ use rustpython_parser::ast::Location;
use ruff_formatter::prelude::*; use ruff_formatter::prelude::*;
use ruff_formatter::{write, Format}; use ruff_formatter::{write, Format};
use ruff_python_ast::types::Range; use ruff_python_ast::types::Range;
use ruff_text_size::TextSize; use ruff_text_size::{TextRange, TextSize};
use crate::context::ASTFormatContext; use crate::context::ASTFormatContext;
use crate::format::builders::literal; use crate::format::builders::literal;
@ -20,9 +20,10 @@ impl Format<ASTFormatContext<'_>> for FloatAtom {
let start_index = locator.offset(self.range.location); let start_index = locator.offset(self.range.location);
let end_index = locator.offset(self.range.end_location); let end_index = locator.offset(self.range.end_location);
if let Some(dot_index) = contents[start_index..end_index].find('.') { let content = &contents[TextRange::new(start_index, end_index)];
let integer = &contents[start_index..start_index + dot_index]; if let Some(dot_index) = content.find('.') {
let fractional = &contents[start_index + dot_index + 1..end_index]; let integer = &content[..dot_index];
let fractional = &content[dot_index + 1..];
if integer.is_empty() { if integer.is_empty() {
write!(f, [text("0")])?; write!(f, [text("0")])?;
@ -80,11 +81,10 @@ impl Format<ASTFormatContext<'_>> for FloatLiteral {
let start_index = locator.offset(self.range.location); let start_index = locator.offset(self.range.location);
let end_index = locator.offset(self.range.end_location); let end_index = locator.offset(self.range.end_location);
let content = &contents[TextRange::new(start_index, end_index)];
// Scientific notation // Scientific notation
if let Some(exponent_index) = contents[start_index..end_index] if let Some(exponent_index) = content.find('e').or_else(|| content.find('E')) {
.find('e')
.or_else(|| contents[start_index..end_index].find('E'))
{
// Write the base. // Write the base.
write!( write!(
f, f,
@ -100,7 +100,7 @@ impl Format<ASTFormatContext<'_>> for FloatLiteral {
write!(f, [text("e")])?; write!(f, [text("e")])?;
// Write the exponent, omitting the sign if it's positive. // Write the exponent, omitting the sign if it's positive.
let plus = contents[start_index + exponent_index + 1..end_index].starts_with('+'); let plus = content[exponent_index + 1..].starts_with('+');
write!( write!(
f, f,
[literal(Range::new( [literal(Range::new(
@ -137,10 +137,11 @@ impl Format<ASTFormatContext<'_>> for IntLiteral {
let end_index = locator.offset(self.range.end_location); let end_index = locator.offset(self.range.end_location);
for prefix in ["0b", "0B", "0o", "0O", "0x", "0X"] { for prefix in ["0b", "0B", "0o", "0O", "0x", "0X"] {
if contents[start_index..end_index].starts_with(prefix) { let content = &contents[TextRange::new(start_index, end_index)];
if content.starts_with(prefix) {
// In each case, the prefix must be lowercase, while the suffix must be uppercase. // In each case, the prefix must be lowercase, while the suffix must be uppercase.
let prefix = &contents[start_index..start_index + prefix.len()]; let prefix = &content[..prefix.len()];
let suffix = &contents[start_index + prefix.len()..end_index]; let suffix = &content[prefix.len()..];
if prefix.bytes().any(|b| b.is_ascii_uppercase()) if prefix.bytes().any(|b| b.is_ascii_uppercase())
|| suffix.bytes().any(|b| b.is_ascii_lowercase()) || suffix.bytes().any(|b| b.is_ascii_lowercase())
@ -185,9 +186,11 @@ impl Format<ASTFormatContext<'_>> for ComplexLiteral {
let start_index = locator.offset(self.range.location); let start_index = locator.offset(self.range.location);
let end_index = locator.offset(self.range.end_location); let end_index = locator.offset(self.range.end_location);
if contents[start_index..end_index].ends_with('j') { let content = &contents[TextRange::new(start_index, end_index)];
if content.ends_with('j') {
write!(f, [literal(self.range)])?; write!(f, [literal(self.range)])?;
} else if contents[start_index..end_index].ends_with('J') { } else if content.ends_with('J') {
write!( write!(
f, f,
[literal(Range::new( [literal(Range::new(

View file

@ -4,7 +4,7 @@ use ruff_formatter::prelude::*;
use ruff_formatter::{write, Format}; use ruff_formatter::{write, Format};
use ruff_python_ast::str::{leading_quote, trailing_quote}; use ruff_python_ast::str::{leading_quote, trailing_quote};
use ruff_python_ast::types::Range; use ruff_python_ast::types::Range;
use ruff_text_size::TextSize; use ruff_text_size::{TextRange, TextSize};
use crate::context::ASTFormatContext; use crate::context::ASTFormatContext;
use crate::cst::Expr; use crate::cst::Expr;
@ -22,7 +22,7 @@ impl Format<ASTFormatContext<'_>> for StringLiteralPart {
let end_index = locator.offset(self.range.end_location); let end_index = locator.offset(self.range.end_location);
// Extract leading and trailing quotes. // Extract leading and trailing quotes.
let contents = &contents[start_index..end_index]; let contents = &contents[TextRange::new(start_index, end_index)];
let leading_quote = leading_quote(contents).unwrap(); let leading_quote = leading_quote(contents).unwrap();
let trailing_quote = trailing_quote(contents).unwrap(); let trailing_quote = trailing_quote(contents).unwrap();
let body = &contents[leading_quote.len()..contents.len() - trailing_quote.len()]; let body = &contents[leading_quote.len()..contents.len() - trailing_quote.len()];