mirror of
https://github.com/RustPython/Parser.git
synced 2025-07-07 21:25:31 +00:00
vendor text_size and source_location (#102)
This commit is contained in:
parent
b07966695a
commit
d09bce80e6
29 changed files with 175 additions and 311 deletions
17
vendored/Cargo.toml
Normal file
17
vendored/Cargo.toml
Normal file
|
@ -0,0 +1,17 @@
|
|||
[package]
|
||||
name = "rustpython-parser-vendored"
|
||||
description = "RustPython parser vendored third-party crates."
|
||||
version = "0.3.0"
|
||||
authors = ["RustPython Team"]
|
||||
edition = "2021"
|
||||
repository = "https://github.com/RustPython/Parser/"
|
||||
license = "MIT"
|
||||
|
||||
[dependencies]
|
||||
serde = { version = "1.0.133", optional = true, default-features = false, features = ["derive"] }
|
||||
memchr.workspace = true
|
||||
once_cell.workspace = true
|
||||
|
||||
[features]
|
||||
default = []
|
||||
location = []
|
3
vendored/README
Normal file
3
vendored/README
Normal file
|
@ -0,0 +1,3 @@
|
|||
This crate vendors third-party source codes which we can't depend on them through crates.io.
|
||||
|
||||
See README and LICENSE of each modules.
|
2
vendored/src/lib.rs
Normal file
2
vendored/src/lib.rs
Normal file
|
@ -0,0 +1,2 @@
|
|||
pub mod source_location;
|
||||
pub mod text_size;
|
23
vendored/src/source_location/LICENSE
Normal file
23
vendored/src/source_location/LICENSE
Normal file
|
@ -0,0 +1,23 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2022 Charles Marsh
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
end of terms and conditions
|
2
vendored/src/source_location/README
Normal file
2
vendored/src/source_location/README
Normal file
|
@ -0,0 +1,2 @@
|
|||
This module is mainly imported from `ruff_python_ast::source_code`,
|
||||
including `ruff_python_ast::source_code::SourceLocation` related source code.
|
630
vendored/src/source_location/line_index.rs
Normal file
630
vendored/src/source_location/line_index.rs
Normal file
|
@ -0,0 +1,630 @@
|
|||
use super::SourceLocation;
|
||||
use crate::text_size::{TextLen, TextRange, TextSize};
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::num::NonZeroU32;
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions.
|
||||
///
|
||||
/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
|
||||
#[derive(Clone)]
|
||||
pub struct LineIndex {
|
||||
inner: Arc<LineIndexInner>,
|
||||
}
|
||||
|
||||
struct LineIndexInner {
|
||||
line_starts: Vec<TextSize>,
|
||||
kind: IndexKind,
|
||||
}
|
||||
|
||||
impl LineIndex {
|
||||
/// Builds the [`LineIndex`] from the source text of a file.
|
||||
pub fn from_source_text(text: &str) -> Self {
|
||||
let mut line_starts: Vec<TextSize> = Vec::with_capacity(text.len() / 88);
|
||||
line_starts.push(TextSize::default());
|
||||
|
||||
let bytes = text.as_bytes();
|
||||
let mut utf8 = false;
|
||||
|
||||
assert!(u32::try_from(bytes.len()).is_ok());
|
||||
|
||||
for (i, byte) in bytes.iter().enumerate() {
|
||||
utf8 |= !byte.is_ascii();
|
||||
|
||||
match byte {
|
||||
// Only track one line break for `\r\n`.
|
||||
b'\r' if bytes.get(i + 1) == Some(&b'\n') => continue,
|
||||
b'\n' | b'\r' => {
|
||||
// SAFETY: Assertion above guarantees `i <= u32::MAX`
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
line_starts.push(TextSize::from(i as u32) + TextSize::from(1));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
let kind = if utf8 {
|
||||
IndexKind::Utf8
|
||||
} else {
|
||||
IndexKind::Ascii
|
||||
};
|
||||
|
||||
Self {
|
||||
inner: Arc::new(LineIndexInner { line_starts, kind }),
|
||||
}
|
||||
}
|
||||
|
||||
fn kind(&self) -> IndexKind {
|
||||
self.inner.kind
|
||||
}
|
||||
|
||||
/// Returns the row and column index for an offset.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use rustpython_parser_vendored::text_size::TextSize;
|
||||
/// # use rustpython_parser_vendored::source_location::{LineIndex, OneIndexed, SourceLocation};
|
||||
/// let source = "def a():\n pass";
|
||||
/// let index = LineIndex::from_source_text(source);
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// index.source_location(TextSize::from(0), source),
|
||||
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
|
||||
/// );
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// index.source_location(TextSize::from(4), source),
|
||||
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) }
|
||||
/// );
|
||||
/// assert_eq!(
|
||||
/// index.source_location(TextSize::from(13), source),
|
||||
/// SourceLocation { row: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) }
|
||||
/// );
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
///
|
||||
/// If the offset is out of bounds.
|
||||
pub fn source_location(&self, offset: TextSize, content: &str) -> SourceLocation {
|
||||
match self.binary_search_line(&offset) {
|
||||
// Offset is at the start of a line
|
||||
Ok(row) => SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(row),
|
||||
column: OneIndexed::from_zero_indexed(0),
|
||||
},
|
||||
Err(next_row) => {
|
||||
// SAFETY: Safe because the index always contains an entry for the offset 0
|
||||
let row = next_row - 1;
|
||||
let mut line_start = self.line_starts()[row as usize];
|
||||
|
||||
let column = if self.kind().is_ascii() {
|
||||
u32::from(offset - line_start)
|
||||
} else {
|
||||
// Don't count the BOM character as a column.
|
||||
if line_start == TextSize::from(0) && content.starts_with('\u{feff}') {
|
||||
line_start = '\u{feff}'.text_len();
|
||||
}
|
||||
|
||||
let range = TextRange::new(line_start, offset);
|
||||
content[range].chars().count().try_into().unwrap()
|
||||
};
|
||||
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(row),
|
||||
column: OneIndexed::from_zero_indexed(column),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the number of lines in the source code.
|
||||
pub(crate) fn line_count(&self) -> usize {
|
||||
self.line_starts().len()
|
||||
}
|
||||
|
||||
/// Returns the row number for a given offset.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use rustpython_parser_vendored::text_size::TextSize;
|
||||
/// # use rustpython_parser_vendored::source_location::{LineIndex, OneIndexed, SourceLocation};
|
||||
/// let source = "def a():\n pass";
|
||||
/// let index = LineIndex::from_source_text(source);
|
||||
///
|
||||
/// assert_eq!(index.line_index(TextSize::from(0)), OneIndexed::from_zero_indexed(0));
|
||||
/// assert_eq!(index.line_index(TextSize::from(4)), OneIndexed::from_zero_indexed(0));
|
||||
/// assert_eq!(index.line_index(TextSize::from(13)), OneIndexed::from_zero_indexed(1));
|
||||
/// ```
|
||||
///
|
||||
/// ## Panics
|
||||
///
|
||||
/// If the offset is out of bounds.
|
||||
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
|
||||
match self.binary_search_line(&offset) {
|
||||
// Offset is at the start of a line
|
||||
Ok(row) => OneIndexed::from_zero_indexed(row),
|
||||
Err(row) => {
|
||||
// SAFETY: Safe because the index always contains an entry for the offset 0
|
||||
OneIndexed::from_zero_indexed(row - 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [byte offset](TextSize) for the `line` with the given index.
|
||||
pub(crate) fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
|
||||
let row_index = line.to_zero_indexed_usize();
|
||||
let starts = self.line_starts();
|
||||
|
||||
// If start-of-line position after last line
|
||||
if row_index == starts.len() {
|
||||
contents.text_len()
|
||||
} else {
|
||||
starts[row_index]
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [byte offset](TextSize) of the `line`'s end.
|
||||
/// The offset is the end of the line, up to and including the newline character ending the line (if any).
|
||||
pub(crate) fn line_end(&self, line: OneIndexed, contents: &str) -> TextSize {
|
||||
let row_index = line.to_zero_indexed_usize();
|
||||
let starts = self.line_starts();
|
||||
|
||||
// If start-of-line position after last line
|
||||
if row_index.saturating_add(1) >= starts.len() {
|
||||
contents.text_len()
|
||||
} else {
|
||||
starts[row_index + 1]
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [`TextRange`] of the `line` with the given index.
|
||||
/// The start points to the first character's [byte offset](TextSize), the end up to, and including
|
||||
/// the newline character ending the line (if any).
|
||||
pub(crate) fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
|
||||
let starts = self.line_starts();
|
||||
|
||||
if starts.len() == line.to_zero_indexed_usize() {
|
||||
TextRange::empty(contents.text_len())
|
||||
} else {
|
||||
TextRange::new(
|
||||
self.line_start(line, contents),
|
||||
self.line_start(line.saturating_add(1), contents),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [byte offsets](TextSize) for every line
|
||||
pub fn line_starts(&self) -> &[TextSize] {
|
||||
&self.inner.line_starts
|
||||
}
|
||||
|
||||
#[allow(clippy::trivially_copy_pass_by_ref)] // to keep same interface as `[T]::binary_search`
|
||||
fn binary_search_line(&self, offset: &TextSize) -> Result<u32, u32> {
|
||||
// `try_into()` always success as long as TextSize is u32
|
||||
match self.line_starts().binary_search(offset) {
|
||||
Ok(index) => Ok(index.try_into().unwrap()),
|
||||
Err(index) => Err(index.try_into().unwrap()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for LineIndex {
|
||||
type Target = [TextSize];
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.line_starts()
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for LineIndex {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
|
||||
f.debug_list().entries(self.line_starts()).finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
enum IndexKind {
|
||||
/// Optimized index for an ASCII only document
|
||||
Ascii,
|
||||
|
||||
/// Index for UTF8 documents
|
||||
Utf8,
|
||||
}
|
||||
|
||||
impl IndexKind {
|
||||
const fn is_ascii(self) -> bool {
|
||||
matches!(self, IndexKind::Ascii)
|
||||
}
|
||||
}
|
||||
|
||||
/// Type-safe wrapper for a value whose logical range starts at `1`, for
|
||||
/// instance the line or column numbers in a file
|
||||
///
|
||||
/// Internally this is represented as a [`NonZeroU32`], this enables some
|
||||
/// memory optimizations
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct OneIndexed(NonZeroU32);
|
||||
|
||||
#[allow(clippy::cast_possible_truncation)] // manually checked
|
||||
const fn try_to_u32(value: usize) -> Result<u32, usize> {
|
||||
if value <= u32::MAX as usize {
|
||||
Ok(value as u32)
|
||||
} else {
|
||||
Err(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl OneIndexed {
|
||||
// SAFETY: These constants are being initialized with non-zero values
|
||||
/// The smallest value that can be represented by this integer type.
|
||||
pub const MIN: Self = unwrap(Self::new(1));
|
||||
/// The largest value that can be represented by this integer type
|
||||
pub const MAX: Self = unwrap(Self::new(u32::MAX));
|
||||
|
||||
const ONE: NonZeroU32 = unwrap(NonZeroU32::new(1));
|
||||
|
||||
/// Creates a non-zero if the given value is not zero.
|
||||
pub const fn new(value: u32) -> Option<Self> {
|
||||
match NonZeroU32::new(value) {
|
||||
Some(value) => Some(Self(value)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct a new [`OneIndexed`] from a zero-indexed value
|
||||
pub const fn from_zero_indexed(value: u32) -> Self {
|
||||
Self(Self::ONE.saturating_add(value))
|
||||
}
|
||||
|
||||
/// Construct a new [`OneIndexed`] from a zero-indexed usize value
|
||||
pub const fn try_from_zero_indexed(value: usize) -> Result<Self, usize> {
|
||||
match try_to_u32(value) {
|
||||
Ok(value) => Ok(Self(Self::ONE.saturating_add(value))),
|
||||
Err(value) => Err(value),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the value as a primitive type.
|
||||
pub const fn get(self) -> u32 {
|
||||
self.0.get()
|
||||
}
|
||||
|
||||
/// Return the usize value for this [`OneIndexed`]
|
||||
pub const fn to_usize(self) -> usize {
|
||||
self.get() as _
|
||||
}
|
||||
|
||||
/// Return the zero-indexed primitive value for this [`OneIndexed`]
|
||||
pub const fn to_zero_indexed(self) -> u32 {
|
||||
self.0.get() - 1
|
||||
}
|
||||
|
||||
/// Return the zero-indexed usize value for this [`OneIndexed`]
|
||||
pub const fn to_zero_indexed_usize(self) -> usize {
|
||||
self.to_zero_indexed() as _
|
||||
}
|
||||
|
||||
/// Saturating integer addition. Computes `self + rhs`, saturating at
|
||||
/// the numeric bounds instead of overflowing.
|
||||
#[must_use]
|
||||
pub const fn saturating_add(self, rhs: u32) -> Self {
|
||||
match NonZeroU32::new(self.0.get().saturating_add(rhs)) {
|
||||
Some(value) => Self(value),
|
||||
None => Self::MAX,
|
||||
}
|
||||
}
|
||||
|
||||
/// Saturating integer subtraction. Computes `self - rhs`, saturating
|
||||
/// at the numeric bounds instead of overflowing.
|
||||
#[must_use]
|
||||
pub const fn saturating_sub(self, rhs: u32) -> Self {
|
||||
match NonZeroU32::new(self.0.get().saturating_sub(rhs)) {
|
||||
Some(value) => Self(value),
|
||||
None => Self::MIN,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for OneIndexed {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
std::fmt::Debug::fmt(&self.0.get(), f)
|
||||
}
|
||||
}
|
||||
|
||||
/// A const `Option::unwrap` without nightly features:
|
||||
/// [Tracking issue](https://github.com/rust-lang/rust/issues/67441)
|
||||
const fn unwrap<T: Copy>(option: Option<T>) -> T {
|
||||
match option {
|
||||
Some(value) => value,
|
||||
None => panic!("unwrapping None"),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::source_location::line_index::LineIndex;
|
||||
use crate::source_location::{OneIndexed, SourceLocation};
|
||||
use crate::text_size::TextSize;
|
||||
|
||||
#[test]
|
||||
fn ascii_index() {
|
||||
let index = LineIndex::from_source_text("");
|
||||
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
|
||||
|
||||
let index = LineIndex::from_source_text("x = 1");
|
||||
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
|
||||
|
||||
let index = LineIndex::from_source_text("x = 1\n");
|
||||
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
|
||||
|
||||
let index = LineIndex::from_source_text("x = 1\ny = 2\nz = x + y\n");
|
||||
assert_eq!(
|
||||
index.line_starts(),
|
||||
&[
|
||||
TextSize::from(0),
|
||||
TextSize::from(6),
|
||||
TextSize::from(12),
|
||||
TextSize::from(22)
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ascii_source_location() {
|
||||
let contents = "x = 1\ny = 2";
|
||||
let index = LineIndex::from_source_text(contents);
|
||||
|
||||
// First row.
|
||||
let loc = index.source_location(TextSize::from(2), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(2)
|
||||
}
|
||||
);
|
||||
|
||||
// Second row.
|
||||
let loc = index.source_location(TextSize::from(6), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
|
||||
let loc = index.source_location(TextSize::from(11), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(5)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ascii_carriage_return() {
|
||||
let contents = "x = 4\ry = 3";
|
||||
let index = LineIndex::from_source_text(contents);
|
||||
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
|
||||
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(4), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(4)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(6), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(7), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(1)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ascii_carriage_return_newline() {
|
||||
let contents = "x = 4\r\ny = 3";
|
||||
let index = LineIndex::from_source_text(contents);
|
||||
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]);
|
||||
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(4), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(4)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(7), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(8), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(1)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf8_index() {
|
||||
let index = LineIndex::from_source_text("x = '🫣'");
|
||||
assert_eq!(index.line_count(), 1);
|
||||
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
|
||||
|
||||
let index = LineIndex::from_source_text("x = '🫣'\n");
|
||||
assert_eq!(index.line_count(), 2);
|
||||
assert_eq!(
|
||||
index.line_starts(),
|
||||
&[TextSize::from(0), TextSize::from(11)]
|
||||
);
|
||||
|
||||
let index = LineIndex::from_source_text("x = '🫣'\ny = 2\nz = x + y\n");
|
||||
assert_eq!(index.line_count(), 4);
|
||||
assert_eq!(
|
||||
index.line_starts(),
|
||||
&[
|
||||
TextSize::from(0),
|
||||
TextSize::from(11),
|
||||
TextSize::from(17),
|
||||
TextSize::from(27)
|
||||
]
|
||||
);
|
||||
|
||||
let index = LineIndex::from_source_text("# 🫣\nclass Foo:\n \"\"\".\"\"\"");
|
||||
assert_eq!(index.line_count(), 3);
|
||||
assert_eq!(
|
||||
index.line_starts(),
|
||||
&[TextSize::from(0), TextSize::from(7), TextSize::from(18)]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf8_carriage_return() {
|
||||
let contents = "x = '🫣'\ry = 3";
|
||||
let index = LineIndex::from_source_text(contents);
|
||||
assert_eq!(index.line_count(), 2);
|
||||
assert_eq!(
|
||||
index.line_starts(),
|
||||
&[TextSize::from(0), TextSize::from(11)]
|
||||
);
|
||||
|
||||
// Second '
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(9), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(6)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(11), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(12), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(1)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf8_carriage_return_newline() {
|
||||
let contents = "x = '🫣'\r\ny = 3";
|
||||
let index = LineIndex::from_source_text(contents);
|
||||
assert_eq!(index.line_count(), 2);
|
||||
assert_eq!(
|
||||
index.line_starts(),
|
||||
&[TextSize::from(0), TextSize::from(12)]
|
||||
);
|
||||
|
||||
// Second '
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(9), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(6)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(12), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
index.source_location(TextSize::from(13), contents),
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(1)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utf8_byte_offset() {
|
||||
let contents = "x = '☃'\ny = 2";
|
||||
let index = LineIndex::from_source_text(contents);
|
||||
assert_eq!(
|
||||
index.line_starts(),
|
||||
&[TextSize::from(0), TextSize::from(10)]
|
||||
);
|
||||
|
||||
// First row.
|
||||
let loc = index.source_location(TextSize::from(0), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
|
||||
let loc = index.source_location(TextSize::from(5), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(5)
|
||||
}
|
||||
);
|
||||
|
||||
let loc = index.source_location(TextSize::from(8), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(0),
|
||||
column: OneIndexed::from_zero_indexed(6)
|
||||
}
|
||||
);
|
||||
|
||||
// Second row.
|
||||
let loc = index.source_location(TextSize::from(10), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(0)
|
||||
}
|
||||
);
|
||||
|
||||
// One-past-the-end.
|
||||
let loc = index.source_location(TextSize::from(15), contents);
|
||||
assert_eq!(
|
||||
loc,
|
||||
SourceLocation {
|
||||
row: OneIndexed::from_zero_indexed(1),
|
||||
column: OneIndexed::from_zero_indexed(5)
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
225
vendored/src/source_location/mod.rs
Normal file
225
vendored/src/source_location/mod.rs
Normal file
|
@ -0,0 +1,225 @@
|
|||
mod line_index;
|
||||
// mod locator;
|
||||
pub mod newlines;
|
||||
|
||||
pub use self::line_index::{LineIndex, OneIndexed};
|
||||
use crate::text_size::{TextRange, TextSize};
|
||||
#[cfg(feature = "serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::sync::Arc;
|
||||
|
||||
/// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`SourceLocation`].
|
||||
#[derive(Debug)]
|
||||
pub struct SourceCode<'src, 'index> {
|
||||
text: &'src str,
|
||||
index: &'index LineIndex,
|
||||
}
|
||||
|
||||
impl<'src, 'index> SourceCode<'src, 'index> {
|
||||
pub fn new(content: &'src str, index: &'index LineIndex) -> Self {
|
||||
Self {
|
||||
text: content,
|
||||
index,
|
||||
}
|
||||
}
|
||||
|
||||
/// Computes the one indexed row and column numbers for `offset`.
|
||||
#[inline]
|
||||
pub fn source_location(&self, offset: TextSize) -> SourceLocation {
|
||||
self.index.source_location(offset, self.text)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
|
||||
self.index.line_index(offset)
|
||||
}
|
||||
|
||||
/// Take the source code up to the given [`TextSize`].
|
||||
#[inline]
|
||||
pub fn up_to(&self, offset: TextSize) -> &'src str {
|
||||
&self.text[TextRange::up_to(offset)]
|
||||
}
|
||||
|
||||
/// Take the source code after the given [`TextSize`].
|
||||
#[inline]
|
||||
pub fn after(&self, offset: TextSize) -> &'src str {
|
||||
&self.text[usize::from(offset)..]
|
||||
}
|
||||
|
||||
/// Take the source code between the given [`TextRange`].
|
||||
pub fn slice(&self, range: TextRange) -> &'src str {
|
||||
&self.text[range]
|
||||
}
|
||||
|
||||
pub fn line_start(&self, line: OneIndexed) -> TextSize {
|
||||
self.index.line_start(line, self.text)
|
||||
}
|
||||
|
||||
pub fn line_end(&self, line: OneIndexed) -> TextSize {
|
||||
self.index.line_end(line, self.text)
|
||||
}
|
||||
|
||||
pub fn line_range(&self, line: OneIndexed) -> TextRange {
|
||||
self.index.line_range(line, self.text)
|
||||
}
|
||||
|
||||
/// Returns the source text of the line with the given index
|
||||
#[inline]
|
||||
pub fn line_text(&self, index: OneIndexed) -> &'src str {
|
||||
let range = self.index.line_range(index, self.text);
|
||||
&self.text[range]
|
||||
}
|
||||
|
||||
/// Returns the source text
|
||||
pub fn text(&self) -> &'src str {
|
||||
self.text
|
||||
}
|
||||
|
||||
/// Returns the number of lines
|
||||
#[inline]
|
||||
pub fn line_count(&self) -> usize {
|
||||
self.index.line_count()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<Self> for SourceCode<'_, '_> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.text == other.text
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for SourceCode<'_, '_> {}
|
||||
|
||||
/// A Builder for constructing a [`SourceFile`]
|
||||
pub struct SourceFileBuilder {
|
||||
name: Box<str>,
|
||||
code: Box<str>,
|
||||
index: Option<LineIndex>,
|
||||
}
|
||||
|
||||
impl SourceFileBuilder {
|
||||
/// Creates a new builder for a file named `name`.
|
||||
pub fn new<Name: Into<Box<str>>, Code: Into<Box<str>>>(name: Name, code: Code) -> Self {
|
||||
Self {
|
||||
name: name.into(),
|
||||
code: code.into(),
|
||||
index: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn line_index(mut self, index: LineIndex) -> Self {
|
||||
self.index = Some(index);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn set_line_index(&mut self, index: LineIndex) {
|
||||
self.index = Some(index);
|
||||
}
|
||||
|
||||
/// Consumes `self` and returns the [`SourceFile`].
|
||||
pub fn finish(self) -> SourceFile {
|
||||
let index = if let Some(index) = self.index {
|
||||
once_cell::sync::OnceCell::with_value(index)
|
||||
} else {
|
||||
once_cell::sync::OnceCell::new()
|
||||
};
|
||||
|
||||
SourceFile {
|
||||
inner: Arc::new(SourceFileInner {
|
||||
name: self.name,
|
||||
code: self.code,
|
||||
line_index: index,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A source file that is identified by its name. Optionally stores the source code and [`LineIndex`].
|
||||
///
|
||||
/// Cloning a [`SourceFile`] is cheap, because it only requires bumping a reference count.
|
||||
#[derive(Clone, Eq, PartialEq)]
|
||||
pub struct SourceFile {
|
||||
inner: Arc<SourceFileInner>,
|
||||
}
|
||||
|
||||
impl Debug for SourceFile {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("SourceFile")
|
||||
.field("name", &self.name())
|
||||
.field("code", &self.source_text())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl SourceFile {
|
||||
/// Returns the name of the source file (filename).
|
||||
#[inline]
|
||||
pub fn name(&self) -> &str {
|
||||
&self.inner.name
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn slice(&self, range: TextRange) -> &str {
|
||||
&self.source_text()[range]
|
||||
}
|
||||
|
||||
pub fn to_source_code(&self) -> SourceCode {
|
||||
SourceCode {
|
||||
text: self.source_text(),
|
||||
index: self.index(),
|
||||
}
|
||||
}
|
||||
|
||||
fn index(&self) -> &LineIndex {
|
||||
self.inner
|
||||
.line_index
|
||||
.get_or_init(|| LineIndex::from_source_text(self.source_text()))
|
||||
}
|
||||
|
||||
/// Returns `Some` with the source text if set, or `None`.
|
||||
#[inline]
|
||||
pub fn source_text(&self) -> &str {
|
||||
&self.inner.code
|
||||
}
|
||||
}
|
||||
|
||||
struct SourceFileInner {
|
||||
name: Box<str>,
|
||||
code: Box<str>,
|
||||
line_index: once_cell::sync::OnceCell<LineIndex>,
|
||||
}
|
||||
|
||||
impl PartialEq for SourceFileInner {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.name == other.name && self.code == other.code
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for SourceFileInner {}
|
||||
|
||||
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Copy)]
|
||||
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
|
||||
pub struct SourceLocation {
|
||||
pub row: OneIndexed,
|
||||
pub column: OneIndexed,
|
||||
}
|
||||
|
||||
impl Default for SourceLocation {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
row: OneIndexed::MIN,
|
||||
column: OneIndexed::MIN,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for SourceLocation {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("SourceLocation")
|
||||
.field("row", &self.row.get())
|
||||
.field("column", &self.column.get())
|
||||
.finish()
|
||||
}
|
||||
}
|
446
vendored/src/source_location/newlines.rs
Normal file
446
vendored/src/source_location/newlines.rs
Normal file
|
@ -0,0 +1,446 @@
|
|||
use crate::text_size::{TextLen, TextRange, TextSize};
|
||||
use memchr::{memchr2, memrchr2};
|
||||
use std::iter::FusedIterator;
|
||||
use std::ops::Deref;
|
||||
|
||||
/// Extension trait for [`str`] that provides a [`UniversalNewlineIterator`].
|
||||
pub trait StrExt {
|
||||
fn universal_newlines(&self) -> UniversalNewlineIterator<'_>;
|
||||
}
|
||||
|
||||
impl StrExt for str {
|
||||
fn universal_newlines(&self) -> UniversalNewlineIterator<'_> {
|
||||
UniversalNewlineIterator::from(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Like [`str#lines`], but accommodates LF, CRLF, and CR line endings,
|
||||
/// the latter of which are not supported by [`str#lines`].
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::TextSize;
|
||||
/// # use rustpython_parser_vendored::source_location::newlines::{Line, UniversalNewlineIterator};
|
||||
/// let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
|
||||
///
|
||||
/// assert_eq!(lines.next_back(), Some(Line::new("bop", TextSize::from(14))));
|
||||
/// assert_eq!(lines.next(), Some(Line::new("foo\n", TextSize::from(0))));
|
||||
/// assert_eq!(lines.next_back(), Some(Line::new("baz\r", TextSize::from(10))));
|
||||
/// assert_eq!(lines.next(), Some(Line::new("bar\n", TextSize::from(4))));
|
||||
/// assert_eq!(lines.next_back(), Some(Line::new("\r\n", TextSize::from(8))));
|
||||
/// assert_eq!(lines.next(), None);
|
||||
/// ```
|
||||
pub struct UniversalNewlineIterator<'a> {
|
||||
text: &'a str,
|
||||
offset: TextSize,
|
||||
offset_back: TextSize,
|
||||
}
|
||||
|
||||
impl<'a> UniversalNewlineIterator<'a> {
|
||||
pub fn with_offset(text: &'a str, offset: TextSize) -> UniversalNewlineIterator<'a> {
|
||||
UniversalNewlineIterator {
|
||||
text,
|
||||
offset,
|
||||
offset_back: offset + text.text_len(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from(text: &'a str) -> UniversalNewlineIterator<'a> {
|
||||
Self::with_offset(text, TextSize::default())
|
||||
}
|
||||
}
|
||||
|
||||
/// Finds the next newline character. Returns its position and the [`LineEnding`].
|
||||
#[inline]
|
||||
pub fn find_newline(text: &str) -> Option<(usize, LineEnding)> {
|
||||
let bytes = text.as_bytes();
|
||||
if let Some(position) = memchr2(b'\n', b'\r', bytes) {
|
||||
// SAFETY: memchr guarantees to return valid positions
|
||||
#[allow(unsafe_code)]
|
||||
let newline_character = unsafe { *bytes.get_unchecked(position) };
|
||||
|
||||
let line_ending = match newline_character {
|
||||
// Explicit branch for `\n` as this is the most likely path
|
||||
b'\n' => LineEnding::Lf,
|
||||
// '\r\n'
|
||||
b'\r' if bytes.get(position.saturating_add(1)) == Some(&b'\n') => LineEnding::CrLf,
|
||||
// '\r'
|
||||
_ => LineEnding::Cr,
|
||||
};
|
||||
|
||||
Some((position, line_ending))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for UniversalNewlineIterator<'a> {
|
||||
type Item = Line<'a>;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Line<'a>> {
|
||||
if self.text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let line = if let Some((newline_position, line_ending)) = find_newline(self.text) {
|
||||
let (text, remainder) = self.text.split_at(newline_position + line_ending.len());
|
||||
|
||||
let line = Line {
|
||||
offset: self.offset,
|
||||
text,
|
||||
};
|
||||
|
||||
self.text = remainder;
|
||||
self.offset += text.text_len();
|
||||
|
||||
line
|
||||
}
|
||||
// Last line
|
||||
else {
|
||||
Line {
|
||||
offset: self.offset,
|
||||
text: std::mem::take(&mut self.text),
|
||||
}
|
||||
};
|
||||
|
||||
Some(line)
|
||||
}
|
||||
|
||||
fn last(mut self) -> Option<Self::Item> {
|
||||
self.next_back()
|
||||
}
|
||||
}
|
||||
|
||||
impl DoubleEndedIterator for UniversalNewlineIterator<'_> {
|
||||
#[inline]
|
||||
fn next_back(&mut self) -> Option<Self::Item> {
|
||||
if self.text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let len = self.text.len();
|
||||
|
||||
// Trim any trailing newlines.
|
||||
let haystack = match self.text.as_bytes()[len - 1] {
|
||||
b'\n' if len > 1 && self.text.as_bytes()[len - 2] == b'\r' => &self.text[..len - 2],
|
||||
b'\n' | b'\r' => &self.text[..len - 1],
|
||||
_ => self.text,
|
||||
};
|
||||
|
||||
// Find the end of the previous line. The previous line is the text up to, but not including
|
||||
// the newline character.
|
||||
let line = if let Some(line_end) = memrchr2(b'\n', b'\r', haystack.as_bytes()) {
|
||||
// '\n' or '\r' or '\r\n'
|
||||
let (remainder, line) = self.text.split_at(line_end + 1);
|
||||
self.text = remainder;
|
||||
self.offset_back -= line.text_len();
|
||||
|
||||
Line {
|
||||
text: line,
|
||||
offset: self.offset_back,
|
||||
}
|
||||
} else {
|
||||
// Last line
|
||||
let offset = self.offset_back - self.text.text_len();
|
||||
Line {
|
||||
text: std::mem::take(&mut self.text),
|
||||
offset,
|
||||
}
|
||||
};
|
||||
|
||||
Some(line)
|
||||
}
|
||||
}
|
||||
|
||||
impl FusedIterator for UniversalNewlineIterator<'_> {}
|
||||
|
||||
/// Like [`UniversalNewlineIterator`], but includes a trailing newline as an empty line.
|
||||
pub struct NewlineWithTrailingNewline<'a> {
|
||||
trailing: Option<Line<'a>>,
|
||||
underlying: UniversalNewlineIterator<'a>,
|
||||
}
|
||||
|
||||
impl<'a> NewlineWithTrailingNewline<'a> {
|
||||
pub fn from(input: &'a str) -> NewlineWithTrailingNewline<'a> {
|
||||
Self::with_offset(input, TextSize::default())
|
||||
}
|
||||
|
||||
pub fn with_offset(input: &'a str, offset: TextSize) -> Self {
|
||||
NewlineWithTrailingNewline {
|
||||
underlying: UniversalNewlineIterator::with_offset(input, offset),
|
||||
trailing: if input.ends_with(['\r', '\n']) {
|
||||
Some(Line {
|
||||
text: "",
|
||||
offset: offset + input.text_len(),
|
||||
})
|
||||
} else {
|
||||
None
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for NewlineWithTrailingNewline<'a> {
|
||||
type Item = Line<'a>;
|
||||
|
||||
#[inline]
|
||||
fn next(&mut self) -> Option<Line<'a>> {
|
||||
self.underlying.next().or_else(|| self.trailing.take())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct Line<'a> {
|
||||
text: &'a str,
|
||||
offset: TextSize,
|
||||
}
|
||||
|
||||
impl<'a> Line<'a> {
|
||||
pub fn new(text: &'a str, offset: TextSize) -> Self {
|
||||
Self { text, offset }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub const fn start(&self) -> TextSize {
|
||||
self.offset
|
||||
}
|
||||
|
||||
/// Returns the byte offset where the line ends, including its terminating new line character.
|
||||
#[inline]
|
||||
pub fn full_end(&self) -> TextSize {
|
||||
self.offset + self.full_text_len()
|
||||
}
|
||||
|
||||
/// Returns the byte offset where the line ends, excluding its new line character
|
||||
#[inline]
|
||||
pub fn end(&self) -> TextSize {
|
||||
self.offset + self.as_str().text_len()
|
||||
}
|
||||
|
||||
/// Returns the range of the line, including its terminating new line character.
|
||||
#[inline]
|
||||
pub fn full_range(&self) -> TextRange {
|
||||
TextRange::at(self.offset, self.text.text_len())
|
||||
}
|
||||
|
||||
/// Returns the range of the line, excluding its terminating new line character
|
||||
#[inline]
|
||||
pub fn range(&self) -> TextRange {
|
||||
TextRange::new(self.start(), self.end())
|
||||
}
|
||||
|
||||
/// Returns the text of the line, excluding the terminating new line character.
|
||||
#[inline]
|
||||
pub fn as_str(&self) -> &'a str {
|
||||
let mut bytes = self.text.bytes().rev();
|
||||
|
||||
let newline_len = match bytes.next() {
|
||||
Some(b'\n') => {
|
||||
if bytes.next() == Some(b'\r') {
|
||||
2
|
||||
} else {
|
||||
1
|
||||
}
|
||||
}
|
||||
Some(b'\r') => 1,
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
&self.text[..self.text.len() - newline_len]
|
||||
}
|
||||
|
||||
/// Returns the line's text, including the terminating new line character.
|
||||
#[inline]
|
||||
pub fn as_full_str(&self) -> &'a str {
|
||||
self.text
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn full_text_len(&self) -> TextSize {
|
||||
self.text.text_len()
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Line<'_> {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<&str> for Line<'_> {
|
||||
fn eq(&self, other: &&str) -> bool {
|
||||
self.as_str() == *other
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq<Line<'_>> for &str {
|
||||
fn eq(&self, other: &Line<'_>) -> bool {
|
||||
*self == other.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
/// The line ending style used in Python source code.
|
||||
/// See <https://docs.python.org/3/reference/lexical_analysis.html#physical-lines>
|
||||
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
|
||||
pub enum LineEnding {
|
||||
Lf,
|
||||
Cr,
|
||||
CrLf,
|
||||
}
|
||||
|
||||
impl Default for LineEnding {
|
||||
fn default() -> Self {
|
||||
if cfg!(windows) {
|
||||
LineEnding::CrLf
|
||||
} else {
|
||||
LineEnding::Lf
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LineEnding {
|
||||
pub const fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
LineEnding::Lf => "\n",
|
||||
LineEnding::CrLf => "\r\n",
|
||||
LineEnding::Cr => "\r",
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::len_without_is_empty)]
|
||||
pub const fn len(&self) -> usize {
|
||||
match self {
|
||||
LineEnding::Lf | LineEnding::Cr => 1,
|
||||
LineEnding::CrLf => 2,
|
||||
}
|
||||
}
|
||||
|
||||
pub const fn text_len(&self) -> TextSize {
|
||||
match self {
|
||||
LineEnding::Lf | LineEnding::Cr => TextSize::new(1),
|
||||
LineEnding::CrLf => TextSize::new(2),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for LineEnding {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Line;
|
||||
use super::UniversalNewlineIterator;
|
||||
use crate::text_size::TextSize;
|
||||
|
||||
#[test]
|
||||
fn universal_newlines_empty_str() {
|
||||
let lines: Vec<_> = UniversalNewlineIterator::from("").collect();
|
||||
assert_eq!(lines, Vec::<Line>::new());
|
||||
|
||||
let lines: Vec<_> = UniversalNewlineIterator::from("").rev().collect();
|
||||
assert_eq!(lines, Vec::<Line>::new());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn universal_newlines_forward() {
|
||||
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop").collect();
|
||||
assert_eq!(
|
||||
lines,
|
||||
vec![
|
||||
Line::new("foo\n", TextSize::from(0)),
|
||||
Line::new("bar\n", TextSize::from(4)),
|
||||
Line::new("\r\n", TextSize::from(8)),
|
||||
Line::new("baz\r", TextSize::from(10)),
|
||||
Line::new("bop", TextSize::from(14)),
|
||||
]
|
||||
);
|
||||
|
||||
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n").collect();
|
||||
assert_eq!(
|
||||
lines,
|
||||
vec![
|
||||
Line::new("foo\n", TextSize::from(0)),
|
||||
Line::new("bar\n", TextSize::from(4)),
|
||||
Line::new("\r\n", TextSize::from(8)),
|
||||
Line::new("baz\r", TextSize::from(10)),
|
||||
Line::new("bop\n", TextSize::from(14)),
|
||||
]
|
||||
);
|
||||
|
||||
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n\n").collect();
|
||||
assert_eq!(
|
||||
lines,
|
||||
vec![
|
||||
Line::new("foo\n", TextSize::from(0)),
|
||||
Line::new("bar\n", TextSize::from(4)),
|
||||
Line::new("\r\n", TextSize::from(8)),
|
||||
Line::new("baz\r", TextSize::from(10)),
|
||||
Line::new("bop\n", TextSize::from(14)),
|
||||
Line::new("\n", TextSize::from(18)),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn universal_newlines_backwards() {
|
||||
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop")
|
||||
.rev()
|
||||
.collect();
|
||||
assert_eq!(
|
||||
lines,
|
||||
vec![
|
||||
Line::new("bop", TextSize::from(14)),
|
||||
Line::new("baz\r", TextSize::from(10)),
|
||||
Line::new("\r\n", TextSize::from(8)),
|
||||
Line::new("bar\n", TextSize::from(4)),
|
||||
Line::new("foo\n", TextSize::from(0)),
|
||||
]
|
||||
);
|
||||
|
||||
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\nbaz\rbop\n")
|
||||
.rev()
|
||||
.map(|line| line.as_str())
|
||||
.collect();
|
||||
|
||||
assert_eq!(
|
||||
lines,
|
||||
vec![
|
||||
Line::new("bop\n", TextSize::from(13)),
|
||||
Line::new("baz\r", TextSize::from(9)),
|
||||
Line::new("\n", TextSize::from(8)),
|
||||
Line::new("bar\n", TextSize::from(4)),
|
||||
Line::new("foo\n", TextSize::from(0)),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn universal_newlines_mixed() {
|
||||
let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
|
||||
|
||||
assert_eq!(
|
||||
lines.next_back(),
|
||||
Some(Line::new("bop", TextSize::from(14)))
|
||||
);
|
||||
assert_eq!(lines.next(), Some(Line::new("foo\n", TextSize::from(0))));
|
||||
assert_eq!(
|
||||
lines.next_back(),
|
||||
Some(Line::new("baz\r", TextSize::from(10)))
|
||||
);
|
||||
assert_eq!(lines.next(), Some(Line::new("bar\n", TextSize::from(4))));
|
||||
assert_eq!(
|
||||
lines.next_back(),
|
||||
Some(Line::new("\r\n", TextSize::from(8)))
|
||||
);
|
||||
assert_eq!(lines.next(), None);
|
||||
}
|
||||
}
|
53
vendored/src/text_size/LICENSE
Normal file
53
vendored/src/text_size/LICENSE
Normal file
|
@ -0,0 +1,53 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2022 Charles Marsh
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
end of terms and conditions
|
||||
|
||||
The externally maintained libraries from which parts of the Software is derived
|
||||
are:
|
||||
|
||||
- rust-analyzer/text-size, licensed under the MIT license:
|
||||
"""
|
||||
Permission is hereby granted, free of charge, to any
|
||||
person obtaining a copy of this software and associated
|
||||
documentation files (the "Software"), to deal in the
|
||||
Software without restriction, including without
|
||||
limitation the rights to use, copy, modify, merge,
|
||||
publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software
|
||||
is furnished to do so, subject to the following
|
||||
conditions:
|
||||
|
||||
The above copyright notice and this permission notice
|
||||
shall be included in all copies or substantial portions
|
||||
of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
||||
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
|
||||
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
|
||||
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
"""
|
2
vendored/src/text_size/README
Normal file
2
vendored/src/text_size/README
Normal file
|
@ -0,0 +1,2 @@
|
|||
This module is imported from `ruff_text_size`,
|
||||
which is a fork of `text-size`.
|
34
vendored/src/text_size/mod.rs
Normal file
34
vendored/src/text_size/mod.rs
Normal file
|
@ -0,0 +1,34 @@
|
|||
//! Newtypes for working with text sizes/ranges in a more type-safe manner.
|
||||
//!
|
||||
//! This library can help with two things:
|
||||
//! * Reducing storage requirements for offsets and ranges, under the
|
||||
//! assumption that 32 bits is enough.
|
||||
//! * Providing standard vocabulary types for applications where text ranges
|
||||
//! are pervasive.
|
||||
//!
|
||||
//! However, you should not use this library simply because you work with
|
||||
//! strings. In the overwhelming majority of cases, using `usize` and
|
||||
//! `std::ops::Range<usize>` is better. In particular, if you are publishing a
|
||||
//! library, using only std types in the interface would make it more
|
||||
//! interoperable. Similarly, if you are writing something like a lexer, which
|
||||
//! produces, but does not *store* text ranges, then sticking to `usize` would
|
||||
//! be better.
|
||||
//!
|
||||
//! Minimal Supported Rust Version: latest stable.
|
||||
|
||||
#![forbid(unsafe_code)]
|
||||
#![warn(missing_debug_implementations, missing_docs)]
|
||||
|
||||
mod range;
|
||||
mod size;
|
||||
mod traits;
|
||||
|
||||
#[cfg(feature = "schemars")]
|
||||
mod schemars_impls;
|
||||
#[cfg(feature = "serde")]
|
||||
mod serde_impls;
|
||||
|
||||
pub use self::{range::TextRange, size::TextSize, traits::TextLen};
|
||||
|
||||
#[cfg(target_pointer_width = "16")]
|
||||
compile_error!("text-size assumes usize >= u32 and does not work on 16-bit targets");
|
544
vendored/src/text_size/range.rs
Normal file
544
vendored/src/text_size/range.rs
Normal file
|
@ -0,0 +1,544 @@
|
|||
use cmp::Ordering;
|
||||
|
||||
use {
|
||||
super::TextSize,
|
||||
std::{
|
||||
cmp, fmt,
|
||||
ops::{Add, AddAssign, Bound, Index, IndexMut, Range, RangeBounds, Sub, SubAssign},
|
||||
},
|
||||
};
|
||||
|
||||
/// A range in text, represented as a pair of [`TextSize`][struct@TextSize].
|
||||
///
|
||||
/// It is a logic error for `start` to be greater than `end`.
|
||||
#[derive(Default, Copy, Clone, Eq, PartialEq, Hash)]
|
||||
pub struct TextRange {
|
||||
// Invariant: start <= end
|
||||
start: TextSize,
|
||||
end: TextSize,
|
||||
}
|
||||
|
||||
impl fmt::Debug for TextRange {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}..{}", self.start().raw, self.end().raw)
|
||||
}
|
||||
}
|
||||
|
||||
impl TextRange {
|
||||
/// Creates a new `TextRange` with the given `start` and `end` (`start..end`).
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if `end < start`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// let start = TextSize::from(5);
|
||||
/// let end = TextSize::from(10);
|
||||
/// let range = TextRange::new(start, end);
|
||||
///
|
||||
/// assert_eq!(range.start(), start);
|
||||
/// assert_eq!(range.end(), end);
|
||||
/// assert_eq!(range.len(), end - start);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub const fn new(start: TextSize, end: TextSize) -> TextRange {
|
||||
assert!(start.raw <= end.raw);
|
||||
TextRange { start, end }
|
||||
}
|
||||
|
||||
/// Create a new `TextRange` with the given `offset` and `len` (`offset..offset + len`).
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// let text = "0123456789";
|
||||
///
|
||||
/// let offset = TextSize::from(2);
|
||||
/// let length = TextSize::from(5);
|
||||
/// let range = TextRange::at(offset, length);
|
||||
///
|
||||
/// assert_eq!(range, TextRange::new(offset, offset + length));
|
||||
/// assert_eq!(&text[range], "23456")
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn at(offset: TextSize, len: TextSize) -> TextRange {
|
||||
TextRange::new(offset, offset + len)
|
||||
}
|
||||
|
||||
/// Create a zero-length range at the specified offset (`offset..offset`).
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// let point: TextSize;
|
||||
/// # point = TextSize::from(3);
|
||||
/// let range = TextRange::empty(point);
|
||||
/// assert!(range.is_empty());
|
||||
/// assert_eq!(range, TextRange::new(point, point));
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn empty(offset: TextSize) -> TextRange {
|
||||
TextRange {
|
||||
start: offset,
|
||||
end: offset,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a range up to the given end (`..end`).
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// let point: TextSize;
|
||||
/// # point = TextSize::from(12);
|
||||
/// let range = TextRange::up_to(point);
|
||||
///
|
||||
/// assert_eq!(range.len(), point);
|
||||
/// assert_eq!(range, TextRange::new(0.into(), point));
|
||||
/// assert_eq!(range, TextRange::at(0.into(), point));
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn up_to(end: TextSize) -> TextRange {
|
||||
TextRange {
|
||||
start: 0.into(),
|
||||
end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Identity methods.
|
||||
impl TextRange {
|
||||
/// The start point of this range.
|
||||
#[inline]
|
||||
pub const fn start(self) -> TextSize {
|
||||
self.start
|
||||
}
|
||||
|
||||
/// The end point of this range.
|
||||
#[inline]
|
||||
pub const fn end(self) -> TextSize {
|
||||
self.end
|
||||
}
|
||||
|
||||
/// The size of this range.
|
||||
#[inline]
|
||||
pub const fn len(self) -> TextSize {
|
||||
// HACK for const fn: math on primitives only
|
||||
TextSize {
|
||||
raw: self.end().raw - self.start().raw,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if this range is empty.
|
||||
#[inline]
|
||||
pub const fn is_empty(self) -> bool {
|
||||
// HACK for const fn: math on primitives only
|
||||
self.start().raw == self.end().raw
|
||||
}
|
||||
}
|
||||
|
||||
/// Manipulation methods.
|
||||
impl TextRange {
|
||||
/// Check if this range contains an offset.
|
||||
///
|
||||
/// The end index is considered excluded.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// let (start, end): (TextSize, TextSize);
|
||||
/// # start = 10.into(); end = 20.into();
|
||||
/// let range = TextRange::new(start, end);
|
||||
/// assert!(range.contains(start));
|
||||
/// assert!(!range.contains(end));
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn contains(self, offset: TextSize) -> bool {
|
||||
self.start() <= offset && offset < self.end()
|
||||
}
|
||||
|
||||
/// Check if this range contains an offset.
|
||||
///
|
||||
/// The end index is considered included.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// let (start, end): (TextSize, TextSize);
|
||||
/// # start = 10.into(); end = 20.into();
|
||||
/// let range = TextRange::new(start, end);
|
||||
/// assert!(range.contains_inclusive(start));
|
||||
/// assert!(range.contains_inclusive(end));
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn contains_inclusive(self, offset: TextSize) -> bool {
|
||||
self.start() <= offset && offset <= self.end()
|
||||
}
|
||||
|
||||
/// Check if this range completely contains another range.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// let larger = TextRange::new(0.into(), 20.into());
|
||||
/// let smaller = TextRange::new(5.into(), 15.into());
|
||||
/// assert!(larger.contains_range(smaller));
|
||||
/// assert!(!smaller.contains_range(larger));
|
||||
///
|
||||
/// // a range always contains itself
|
||||
/// assert!(larger.contains_range(larger));
|
||||
/// assert!(smaller.contains_range(smaller));
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn contains_range(self, other: TextRange) -> bool {
|
||||
self.start() <= other.start() && other.end() <= self.end()
|
||||
}
|
||||
|
||||
/// The range covered by both ranges, if it exists.
|
||||
/// If the ranges touch but do not overlap, the output range is empty.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// assert_eq!(
|
||||
/// TextRange::intersect(
|
||||
/// TextRange::new(0.into(), 10.into()),
|
||||
/// TextRange::new(5.into(), 15.into()),
|
||||
/// ),
|
||||
/// Some(TextRange::new(5.into(), 10.into())),
|
||||
/// );
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn intersect(self, other: TextRange) -> Option<TextRange> {
|
||||
let start = cmp::max(self.start(), other.start());
|
||||
let end = cmp::min(self.end(), other.end());
|
||||
if end < start {
|
||||
return None;
|
||||
}
|
||||
Some(TextRange::new(start, end))
|
||||
}
|
||||
|
||||
/// Extends the range to cover `other` as well.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// assert_eq!(
|
||||
/// TextRange::cover(
|
||||
/// TextRange::new(0.into(), 5.into()),
|
||||
/// TextRange::new(15.into(), 20.into()),
|
||||
/// ),
|
||||
/// TextRange::new(0.into(), 20.into()),
|
||||
/// );
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn cover(self, other: TextRange) -> TextRange {
|
||||
let start = cmp::min(self.start(), other.start());
|
||||
let end = cmp::max(self.end(), other.end());
|
||||
TextRange::new(start, end)
|
||||
}
|
||||
|
||||
/// Extends the range to cover `other` offsets as well.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// assert_eq!(
|
||||
/// TextRange::empty(0.into()).cover_offset(20.into()),
|
||||
/// TextRange::new(0.into(), 20.into()),
|
||||
/// )
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn cover_offset(self, offset: TextSize) -> TextRange {
|
||||
self.cover(TextRange::empty(offset))
|
||||
}
|
||||
|
||||
/// Add an offset to this range.
|
||||
///
|
||||
/// Note that this is not appropriate for changing where a `TextRange` is
|
||||
/// within some string; rather, it is for changing the reference anchor
|
||||
/// that the `TextRange` is measured against.
|
||||
///
|
||||
/// The unchecked version (`Add::add`) will _always_ panic on overflow,
|
||||
/// in contrast to primitive integers, which check in debug mode only.
|
||||
#[inline]
|
||||
pub fn checked_add(self, offset: TextSize) -> Option<TextRange> {
|
||||
Some(TextRange {
|
||||
start: self.start.checked_add(offset)?,
|
||||
end: self.end.checked_add(offset)?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Subtract an offset from this range.
|
||||
///
|
||||
/// Note that this is not appropriate for changing where a `TextRange` is
|
||||
/// within some string; rather, it is for changing the reference anchor
|
||||
/// that the `TextRange` is measured against.
|
||||
///
|
||||
/// The unchecked version (`Sub::sub`) will _always_ panic on overflow,
|
||||
/// in contrast to primitive integers, which check in debug mode only.
|
||||
#[inline]
|
||||
pub fn checked_sub(self, offset: TextSize) -> Option<TextRange> {
|
||||
Some(TextRange {
|
||||
start: self.start.checked_sub(offset)?,
|
||||
end: self.end.checked_sub(offset)?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Relative order of the two ranges (overlapping ranges are considered
|
||||
/// equal).
|
||||
///
|
||||
///
|
||||
/// This is useful when, for example, binary searching an array of disjoint
|
||||
/// ranges.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// # use std::cmp::Ordering;
|
||||
///
|
||||
/// let a = TextRange::new(0.into(), 3.into());
|
||||
/// let b = TextRange::new(4.into(), 5.into());
|
||||
/// assert_eq!(a.ordering(b), Ordering::Less);
|
||||
///
|
||||
/// let a = TextRange::new(0.into(), 3.into());
|
||||
/// let b = TextRange::new(3.into(), 5.into());
|
||||
/// assert_eq!(a.ordering(b), Ordering::Less);
|
||||
///
|
||||
/// let a = TextRange::new(0.into(), 3.into());
|
||||
/// let b = TextRange::new(2.into(), 5.into());
|
||||
/// assert_eq!(a.ordering(b), Ordering::Equal);
|
||||
///
|
||||
/// let a = TextRange::new(0.into(), 3.into());
|
||||
/// let b = TextRange::new(2.into(), 2.into());
|
||||
/// assert_eq!(a.ordering(b), Ordering::Equal);
|
||||
///
|
||||
/// let a = TextRange::new(2.into(), 3.into());
|
||||
/// let b = TextRange::new(2.into(), 2.into());
|
||||
/// assert_eq!(a.ordering(b), Ordering::Greater);
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn ordering(self, other: TextRange) -> Ordering {
|
||||
if self.end() <= other.start() {
|
||||
Ordering::Less
|
||||
} else if other.end() <= self.start() {
|
||||
Ordering::Greater
|
||||
} else {
|
||||
Ordering::Equal
|
||||
}
|
||||
}
|
||||
|
||||
/// Subtracts an offset from the start position.
|
||||
///
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `start - amount` is less than zero.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// use rustpython_parser_vendored::text_size::{TextRange, TextSize};
|
||||
///
|
||||
/// let range = TextRange::new(TextSize::from(5), TextSize::from(10));
|
||||
/// assert_eq!(range.sub_start(TextSize::from(2)), TextRange::new(TextSize::from(3), TextSize::from(10)));
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn sub_start(&self, amount: TextSize) -> TextRange {
|
||||
TextRange::new(self.start() - amount, self.end())
|
||||
}
|
||||
|
||||
/// Adds an offset to the start position.
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `start + amount > end`
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// use rustpython_parser_vendored::text_size::{TextRange, TextSize};
|
||||
///
|
||||
/// let range = TextRange::new(TextSize::from(5), TextSize::from(10));
|
||||
/// assert_eq!(range.add_start(TextSize::from(3)), TextRange::new(TextSize::from(8), TextSize::from(10)));
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn add_start(&self, amount: TextSize) -> TextRange {
|
||||
TextRange::new(self.start() + amount, self.end())
|
||||
}
|
||||
|
||||
/// Subtracts an offset from the end position.
|
||||
///
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `end - amount < 0` or `end - amount < start`
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// use rustpython_parser_vendored::text_size::{TextRange, TextSize};
|
||||
///
|
||||
/// let range = TextRange::new(TextSize::from(5), TextSize::from(10));
|
||||
/// assert_eq!(range.sub_end(TextSize::from(2)), TextRange::new(TextSize::from(5), TextSize::from(8)));
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn sub_end(&self, amount: TextSize) -> TextRange {
|
||||
TextRange::new(self.start(), self.end() - amount)
|
||||
}
|
||||
|
||||
/// Adds an offset to the end position.
|
||||
///
|
||||
///
|
||||
/// ## Panics
|
||||
/// If `end + amount > u32::MAX`
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// use rustpython_parser_vendored::text_size::{TextRange, TextSize};
|
||||
///
|
||||
/// let range = TextRange::new(TextSize::from(5), TextSize::from(10));
|
||||
/// assert_eq!(range.add_end(TextSize::from(2)), TextRange::new(TextSize::from(5), TextSize::from(12)));
|
||||
/// ```
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn add_end(&self, amount: TextSize) -> TextRange {
|
||||
TextRange::new(self.start(), self.end() + amount)
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<TextRange> for str {
|
||||
type Output = str;
|
||||
#[inline]
|
||||
fn index(&self, index: TextRange) -> &str {
|
||||
&self[Range::<usize>::from(index)]
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<TextRange> for String {
|
||||
type Output = str;
|
||||
#[inline]
|
||||
fn index(&self, index: TextRange) -> &str {
|
||||
&self[Range::<usize>::from(index)]
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexMut<TextRange> for str {
|
||||
#[inline]
|
||||
fn index_mut(&mut self, index: TextRange) -> &mut str {
|
||||
&mut self[Range::<usize>::from(index)]
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexMut<TextRange> for String {
|
||||
#[inline]
|
||||
fn index_mut(&mut self, index: TextRange) -> &mut str {
|
||||
&mut self[Range::<usize>::from(index)]
|
||||
}
|
||||
}
|
||||
|
||||
impl RangeBounds<TextSize> for TextRange {
|
||||
fn start_bound(&self) -> Bound<&TextSize> {
|
||||
Bound::Included(&self.start)
|
||||
}
|
||||
|
||||
fn end_bound(&self) -> Bound<&TextSize> {
|
||||
Bound::Excluded(&self.end)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Range<TextSize>> for TextRange {
|
||||
#[inline]
|
||||
fn from(r: Range<TextSize>) -> Self {
|
||||
TextRange::new(r.start, r.end)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> From<TextRange> for Range<T>
|
||||
where
|
||||
T: From<TextSize>,
|
||||
{
|
||||
#[inline]
|
||||
fn from(r: TextRange) -> Self {
|
||||
r.start().into()..r.end().into()
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! ops {
|
||||
(impl $Op:ident for TextRange by fn $f:ident = $op:tt) => {
|
||||
impl $Op<&TextSize> for TextRange {
|
||||
type Output = TextRange;
|
||||
#[inline]
|
||||
fn $f(self, other: &TextSize) -> TextRange {
|
||||
self $op *other
|
||||
}
|
||||
}
|
||||
impl<T> $Op<T> for &TextRange
|
||||
where
|
||||
TextRange: $Op<T, Output=TextRange>,
|
||||
{
|
||||
type Output = TextRange;
|
||||
#[inline]
|
||||
fn $f(self, other: T) -> TextRange {
|
||||
*self $op other
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl Add<TextSize> for TextRange {
|
||||
type Output = TextRange;
|
||||
#[inline]
|
||||
fn add(self, offset: TextSize) -> TextRange {
|
||||
self.checked_add(offset)
|
||||
.expect("TextRange +offset overflowed")
|
||||
}
|
||||
}
|
||||
|
||||
impl Sub<TextSize> for TextRange {
|
||||
type Output = TextRange;
|
||||
#[inline]
|
||||
fn sub(self, offset: TextSize) -> TextRange {
|
||||
self.checked_sub(offset)
|
||||
.expect("TextRange -offset overflowed")
|
||||
}
|
||||
}
|
||||
|
||||
ops!(impl Add for TextRange by fn add = +);
|
||||
ops!(impl Sub for TextRange by fn sub = -);
|
||||
|
||||
impl<A> AddAssign<A> for TextRange
|
||||
where
|
||||
TextRange: Add<A, Output = TextRange>,
|
||||
{
|
||||
#[inline]
|
||||
fn add_assign(&mut self, rhs: A) {
|
||||
*self = *self + rhs;
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> SubAssign<S> for TextRange
|
||||
where
|
||||
TextRange: Sub<S, Output = TextRange>,
|
||||
{
|
||||
#[inline]
|
||||
fn sub_assign(&mut self, rhs: S) {
|
||||
*self = *self - rhs;
|
||||
}
|
||||
}
|
33
vendored/src/text_size/schemars_impls.rs
Normal file
33
vendored/src/text_size/schemars_impls.rs
Normal file
|
@ -0,0 +1,33 @@
|
|||
//! This module implements the [`JsonSchema`] trait from the `schemars` crate for
|
||||
//! [`TextSize`] and [`TextRange`] if the `schemars` feature is enabled. This trait
|
||||
//! exposes meta-information on how a given type is serialized and deserialized
|
||||
//! using `serde`, and is currently used to generate autocomplete information
|
||||
//! for the `rome.json` configuration file and TypeScript types for the node.js
|
||||
//! bindings to the Workspace API
|
||||
|
||||
use crate::{TextRange, TextSize};
|
||||
use schemars::{gen::SchemaGenerator, schema::Schema, JsonSchema};
|
||||
|
||||
impl JsonSchema for TextSize {
|
||||
fn schema_name() -> String {
|
||||
String::from("TextSize")
|
||||
}
|
||||
|
||||
fn json_schema(gen: &mut SchemaGenerator) -> Schema {
|
||||
// TextSize is represented as a raw u32, see serde_impls.rs for the
|
||||
// actual implementation
|
||||
<u32>::json_schema(gen)
|
||||
}
|
||||
}
|
||||
|
||||
impl JsonSchema for TextRange {
|
||||
fn schema_name() -> String {
|
||||
String::from("TextRange")
|
||||
}
|
||||
|
||||
fn json_schema(gen: &mut SchemaGenerator) -> Schema {
|
||||
// TextSize is represented as (TextSize, TextSize), see serde_impls.rs
|
||||
// for the actual implementation
|
||||
<(TextSize, TextSize)>::json_schema(gen)
|
||||
}
|
||||
}
|
47
vendored/src/text_size/serde_impls.rs
Normal file
47
vendored/src/text_size/serde_impls.rs
Normal file
|
@ -0,0 +1,47 @@
|
|||
use {
|
||||
super::{TextRange, TextSize},
|
||||
serde::{de, Deserialize, Deserializer, Serialize, Serializer},
|
||||
};
|
||||
|
||||
impl Serialize for TextSize {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
self.raw.serialize(serializer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for TextSize {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
u32::deserialize(deserializer).map(TextSize::from)
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for TextRange {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
(self.start(), self.end()).serialize(serializer)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for TextRange {
|
||||
#[allow(clippy::nonminimal_bool)]
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let (start, end) = Deserialize::deserialize(deserializer)?;
|
||||
if !(start <= end) {
|
||||
return Err(de::Error::custom(format!(
|
||||
"invalid range: {start:?}..{end:?}"
|
||||
)));
|
||||
}
|
||||
Ok(TextRange::new(start, end))
|
||||
}
|
||||
}
|
197
vendored/src/text_size/size.rs
Normal file
197
vendored/src/text_size/size.rs
Normal file
|
@ -0,0 +1,197 @@
|
|||
use {
|
||||
super::TextLen,
|
||||
std::{
|
||||
convert::TryFrom,
|
||||
fmt, iter,
|
||||
num::TryFromIntError,
|
||||
ops::{Add, AddAssign, Sub, SubAssign},
|
||||
u32,
|
||||
},
|
||||
};
|
||||
|
||||
/// A measure of text length. Also, equivalently, an index into text.
|
||||
///
|
||||
/// This is a UTF-8 bytes offset stored as `u32`, but
|
||||
/// most clients should treat it as an opaque measure.
|
||||
///
|
||||
/// For cases that need to escape `TextSize` and return to working directly
|
||||
/// with primitive integers, `TextSize` can be converted losslessly to/from
|
||||
/// `u32` via [`From`] conversions as well as losslessly be converted [`Into`]
|
||||
/// `usize`. The `usize -> TextSize` direction can be done via [`TryFrom`].
|
||||
///
|
||||
/// These escape hatches are primarily required for unit testing and when
|
||||
/// converting from UTF-8 size to another coordinate space, such as UTF-16.
|
||||
#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct TextSize {
|
||||
pub(crate) raw: u32,
|
||||
}
|
||||
|
||||
impl fmt::Debug for TextSize {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", self.raw)
|
||||
}
|
||||
}
|
||||
|
||||
impl TextSize {
|
||||
/// Creates a new `TextSize` at the given `offset`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// assert_eq!(TextSize::from(4), TextSize::new(4));
|
||||
/// ```
|
||||
pub const fn new(offset: u32) -> Self {
|
||||
Self { raw: offset }
|
||||
}
|
||||
|
||||
/// The text size of some primitive text-like object.
|
||||
///
|
||||
/// Accepts `char`, `&str`, and `&String`.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// let char_size = TextSize::of('🦀');
|
||||
/// assert_eq!(char_size, TextSize::from(4));
|
||||
///
|
||||
/// let str_size = TextSize::of("rust-analyzer");
|
||||
/// assert_eq!(str_size, TextSize::from(13));
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn of<T: TextLen>(text: T) -> TextSize {
|
||||
text.text_len()
|
||||
}
|
||||
|
||||
/// Returns current raw `offset` as u32.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// assert_eq!(TextSize::from(4).to_u32(), 4);
|
||||
/// ```
|
||||
pub fn to_u32(&self) -> u32 {
|
||||
self.raw
|
||||
}
|
||||
|
||||
/// Returns current raw `offset` as usize.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```rust
|
||||
/// # use rustpython_parser_vendored::text_size::*;
|
||||
/// assert_eq!(TextSize::from(4).to_usize(), 4);
|
||||
/// ```
|
||||
pub fn to_usize(&self) -> usize {
|
||||
self.raw as usize
|
||||
}
|
||||
}
|
||||
|
||||
/// Methods to act like a primitive integer type, where reasonably applicable.
|
||||
// Last updated for parity with Rust 1.42.0.
|
||||
impl TextSize {
|
||||
/// Checked addition. Returns `None` if overflow occurred.
|
||||
#[inline]
|
||||
pub fn checked_add(self, rhs: TextSize) -> Option<TextSize> {
|
||||
self.raw.checked_add(rhs.raw).map(|raw| TextSize { raw })
|
||||
}
|
||||
|
||||
/// Checked subtraction. Returns `None` if overflow occurred.
|
||||
#[inline]
|
||||
pub fn checked_sub(self, rhs: TextSize) -> Option<TextSize> {
|
||||
self.raw.checked_sub(rhs.raw).map(|raw| TextSize { raw })
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u32> for TextSize {
|
||||
#[inline]
|
||||
fn from(raw: u32) -> Self {
|
||||
TextSize::new(raw)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TextSize> for u32 {
|
||||
#[inline]
|
||||
fn from(value: TextSize) -> Self {
|
||||
value.to_u32()
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<usize> for TextSize {
|
||||
type Error = TryFromIntError;
|
||||
#[inline]
|
||||
fn try_from(value: usize) -> Result<Self, TryFromIntError> {
|
||||
Ok(u32::try_from(value)?.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TextSize> for usize {
|
||||
#[inline]
|
||||
fn from(value: TextSize) -> Self {
|
||||
value.to_usize()
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! ops {
|
||||
(impl $Op:ident for TextSize by fn $f:ident = $op:tt) => {
|
||||
impl $Op<TextSize> for TextSize {
|
||||
type Output = TextSize;
|
||||
#[inline]
|
||||
fn $f(self, other: TextSize) -> TextSize {
|
||||
TextSize { raw: self.raw $op other.raw }
|
||||
}
|
||||
}
|
||||
impl $Op<&TextSize> for TextSize {
|
||||
type Output = TextSize;
|
||||
#[inline]
|
||||
fn $f(self, other: &TextSize) -> TextSize {
|
||||
self $op *other
|
||||
}
|
||||
}
|
||||
impl<T> $Op<T> for &TextSize
|
||||
where
|
||||
TextSize: $Op<T, Output=TextSize>,
|
||||
{
|
||||
type Output = TextSize;
|
||||
#[inline]
|
||||
fn $f(self, other: T) -> TextSize {
|
||||
*self $op other
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
ops!(impl Add for TextSize by fn add = +);
|
||||
ops!(impl Sub for TextSize by fn sub = -);
|
||||
|
||||
impl<A> AddAssign<A> for TextSize
|
||||
where
|
||||
TextSize: Add<A, Output = TextSize>,
|
||||
{
|
||||
#[inline]
|
||||
fn add_assign(&mut self, rhs: A) {
|
||||
*self = *self + rhs;
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> SubAssign<S> for TextSize
|
||||
where
|
||||
TextSize: Sub<S, Output = TextSize>,
|
||||
{
|
||||
#[inline]
|
||||
fn sub_assign(&mut self, rhs: S) {
|
||||
*self = *self - rhs;
|
||||
}
|
||||
}
|
||||
|
||||
impl<A> iter::Sum<A> for TextSize
|
||||
where
|
||||
TextSize: Add<A, Output = TextSize>,
|
||||
{
|
||||
#[inline]
|
||||
fn sum<I: Iterator<Item = A>>(iter: I) -> TextSize {
|
||||
iter.fold(0.into(), Add::add)
|
||||
}
|
||||
}
|
37
vendored/src/text_size/traits.rs
Normal file
37
vendored/src/text_size/traits.rs
Normal file
|
@ -0,0 +1,37 @@
|
|||
use {super::TextSize, std::convert::TryInto};
|
||||
|
||||
use priv_in_pub::Sealed;
|
||||
mod priv_in_pub {
|
||||
pub trait Sealed {}
|
||||
}
|
||||
|
||||
/// Primitives with a textual length that can be passed to [`TextSize::of`].
|
||||
pub trait TextLen: Copy + Sealed {
|
||||
/// The textual length of this primitive.
|
||||
fn text_len(self) -> TextSize;
|
||||
}
|
||||
|
||||
impl Sealed for &'_ str {}
|
||||
impl TextLen for &'_ str {
|
||||
#[inline]
|
||||
fn text_len(self) -> TextSize {
|
||||
self.len().try_into().unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl Sealed for &'_ String {}
|
||||
impl TextLen for &'_ String {
|
||||
#[inline]
|
||||
fn text_len(self) -> TextSize {
|
||||
self.as_str().text_len()
|
||||
}
|
||||
}
|
||||
|
||||
impl Sealed for char {}
|
||||
impl TextLen for char {
|
||||
#[inline]
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
fn text_len(self) -> TextSize {
|
||||
(self.len_utf8() as u32).into()
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue