vendor text_size and source_location (#102)

This commit is contained in:
Jeong, YunWon 2023-08-29 19:52:25 +09:00 committed by GitHub
parent b07966695a
commit d09bce80e6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
29 changed files with 175 additions and 311 deletions

17
vendored/Cargo.toml Normal file
View file

@ -0,0 +1,17 @@
[package]
name = "rustpython-parser-vendored"
description = "RustPython parser vendored third-party crates."
version = "0.3.0"
authors = ["RustPython Team"]
edition = "2021"
repository = "https://github.com/RustPython/Parser/"
license = "MIT"
[dependencies]
serde = { version = "1.0.133", optional = true, default-features = false, features = ["derive"] }
memchr.workspace = true
once_cell.workspace = true
[features]
default = []
location = []

3
vendored/README Normal file
View file

@ -0,0 +1,3 @@
This crate vendors third-party source codes which we can't depend on them through crates.io.
See README and LICENSE of each modules.

2
vendored/src/lib.rs Normal file
View file

@ -0,0 +1,2 @@
pub mod source_location;
pub mod text_size;

View file

@ -0,0 +1,23 @@
MIT License
Copyright (c) 2022 Charles Marsh
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
end of terms and conditions

View file

@ -0,0 +1,2 @@
This module is mainly imported from `ruff_python_ast::source_code`,
including `ruff_python_ast::source_code::SourceLocation` related source code.

View file

@ -0,0 +1,630 @@
use super::SourceLocation;
use crate::text_size::{TextLen, TextRange, TextSize};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::fmt;
use std::fmt::{Debug, Formatter};
use std::num::NonZeroU32;
use std::ops::Deref;
use std::sync::Arc;
/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions.
///
/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
#[derive(Clone)]
pub struct LineIndex {
inner: Arc<LineIndexInner>,
}
struct LineIndexInner {
line_starts: Vec<TextSize>,
kind: IndexKind,
}
impl LineIndex {
/// Builds the [`LineIndex`] from the source text of a file.
pub fn from_source_text(text: &str) -> Self {
let mut line_starts: Vec<TextSize> = Vec::with_capacity(text.len() / 88);
line_starts.push(TextSize::default());
let bytes = text.as_bytes();
let mut utf8 = false;
assert!(u32::try_from(bytes.len()).is_ok());
for (i, byte) in bytes.iter().enumerate() {
utf8 |= !byte.is_ascii();
match byte {
// Only track one line break for `\r\n`.
b'\r' if bytes.get(i + 1) == Some(&b'\n') => continue,
b'\n' | b'\r' => {
// SAFETY: Assertion above guarantees `i <= u32::MAX`
#[allow(clippy::cast_possible_truncation)]
line_starts.push(TextSize::from(i as u32) + TextSize::from(1));
}
_ => {}
}
}
let kind = if utf8 {
IndexKind::Utf8
} else {
IndexKind::Ascii
};
Self {
inner: Arc::new(LineIndexInner { line_starts, kind }),
}
}
fn kind(&self) -> IndexKind {
self.inner.kind
}
/// Returns the row and column index for an offset.
///
/// ## Examples
///
/// ```
/// # use rustpython_parser_vendored::text_size::TextSize;
/// # use rustpython_parser_vendored::source_location::{LineIndex, OneIndexed, SourceLocation};
/// let source = "def a():\n pass";
/// let index = LineIndex::from_source_text(source);
///
/// assert_eq!(
/// index.source_location(TextSize::from(0), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
/// );
///
/// assert_eq!(
/// index.source_location(TextSize::from(4), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) }
/// );
/// assert_eq!(
/// index.source_location(TextSize::from(13), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) }
/// );
/// ```
///
/// ## Panics
///
/// If the offset is out of bounds.
pub fn source_location(&self, offset: TextSize, content: &str) -> SourceLocation {
match self.binary_search_line(&offset) {
// Offset is at the start of a line
Ok(row) => SourceLocation {
row: OneIndexed::from_zero_indexed(row),
column: OneIndexed::from_zero_indexed(0),
},
Err(next_row) => {
// SAFETY: Safe because the index always contains an entry for the offset 0
let row = next_row - 1;
let mut line_start = self.line_starts()[row as usize];
let column = if self.kind().is_ascii() {
u32::from(offset - line_start)
} else {
// Don't count the BOM character as a column.
if line_start == TextSize::from(0) && content.starts_with('\u{feff}') {
line_start = '\u{feff}'.text_len();
}
let range = TextRange::new(line_start, offset);
content[range].chars().count().try_into().unwrap()
};
SourceLocation {
row: OneIndexed::from_zero_indexed(row),
column: OneIndexed::from_zero_indexed(column),
}
}
}
}
/// Return the number of lines in the source code.
pub(crate) fn line_count(&self) -> usize {
self.line_starts().len()
}
/// Returns the row number for a given offset.
///
/// ## Examples
///
/// ```
/// # use rustpython_parser_vendored::text_size::TextSize;
/// # use rustpython_parser_vendored::source_location::{LineIndex, OneIndexed, SourceLocation};
/// let source = "def a():\n pass";
/// let index = LineIndex::from_source_text(source);
///
/// assert_eq!(index.line_index(TextSize::from(0)), OneIndexed::from_zero_indexed(0));
/// assert_eq!(index.line_index(TextSize::from(4)), OneIndexed::from_zero_indexed(0));
/// assert_eq!(index.line_index(TextSize::from(13)), OneIndexed::from_zero_indexed(1));
/// ```
///
/// ## Panics
///
/// If the offset is out of bounds.
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
match self.binary_search_line(&offset) {
// Offset is at the start of a line
Ok(row) => OneIndexed::from_zero_indexed(row),
Err(row) => {
// SAFETY: Safe because the index always contains an entry for the offset 0
OneIndexed::from_zero_indexed(row - 1)
}
}
}
/// Returns the [byte offset](TextSize) for the `line` with the given index.
pub(crate) fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
let row_index = line.to_zero_indexed_usize();
let starts = self.line_starts();
// If start-of-line position after last line
if row_index == starts.len() {
contents.text_len()
} else {
starts[row_index]
}
}
/// Returns the [byte offset](TextSize) of the `line`'s end.
/// The offset is the end of the line, up to and including the newline character ending the line (if any).
pub(crate) fn line_end(&self, line: OneIndexed, contents: &str) -> TextSize {
let row_index = line.to_zero_indexed_usize();
let starts = self.line_starts();
// If start-of-line position after last line
if row_index.saturating_add(1) >= starts.len() {
contents.text_len()
} else {
starts[row_index + 1]
}
}
/// Returns the [`TextRange`] of the `line` with the given index.
/// The start points to the first character's [byte offset](TextSize), the end up to, and including
/// the newline character ending the line (if any).
pub(crate) fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
let starts = self.line_starts();
if starts.len() == line.to_zero_indexed_usize() {
TextRange::empty(contents.text_len())
} else {
TextRange::new(
self.line_start(line, contents),
self.line_start(line.saturating_add(1), contents),
)
}
}
/// Returns the [byte offsets](TextSize) for every line
pub fn line_starts(&self) -> &[TextSize] {
&self.inner.line_starts
}
#[allow(clippy::trivially_copy_pass_by_ref)] // to keep same interface as `[T]::binary_search`
fn binary_search_line(&self, offset: &TextSize) -> Result<u32, u32> {
// `try_into()` always success as long as TextSize is u32
match self.line_starts().binary_search(offset) {
Ok(index) => Ok(index.try_into().unwrap()),
Err(index) => Err(index.try_into().unwrap()),
}
}
}
impl Deref for LineIndex {
type Target = [TextSize];
fn deref(&self) -> &Self::Target {
self.line_starts()
}
}
impl Debug for LineIndex {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.debug_list().entries(self.line_starts()).finish()
}
}
#[derive(Debug, Clone, Copy)]
enum IndexKind {
/// Optimized index for an ASCII only document
Ascii,
/// Index for UTF8 documents
Utf8,
}
impl IndexKind {
const fn is_ascii(self) -> bool {
matches!(self, IndexKind::Ascii)
}
}
/// Type-safe wrapper for a value whose logical range starts at `1`, for
/// instance the line or column numbers in a file
///
/// Internally this is represented as a [`NonZeroU32`], this enables some
/// memory optimizations
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct OneIndexed(NonZeroU32);
#[allow(clippy::cast_possible_truncation)] // manually checked
const fn try_to_u32(value: usize) -> Result<u32, usize> {
if value <= u32::MAX as usize {
Ok(value as u32)
} else {
Err(value)
}
}
impl OneIndexed {
// SAFETY: These constants are being initialized with non-zero values
/// The smallest value that can be represented by this integer type.
pub const MIN: Self = unwrap(Self::new(1));
/// The largest value that can be represented by this integer type
pub const MAX: Self = unwrap(Self::new(u32::MAX));
const ONE: NonZeroU32 = unwrap(NonZeroU32::new(1));
/// Creates a non-zero if the given value is not zero.
pub const fn new(value: u32) -> Option<Self> {
match NonZeroU32::new(value) {
Some(value) => Some(Self(value)),
None => None,
}
}
/// Construct a new [`OneIndexed`] from a zero-indexed value
pub const fn from_zero_indexed(value: u32) -> Self {
Self(Self::ONE.saturating_add(value))
}
/// Construct a new [`OneIndexed`] from a zero-indexed usize value
pub const fn try_from_zero_indexed(value: usize) -> Result<Self, usize> {
match try_to_u32(value) {
Ok(value) => Ok(Self(Self::ONE.saturating_add(value))),
Err(value) => Err(value),
}
}
/// Returns the value as a primitive type.
pub const fn get(self) -> u32 {
self.0.get()
}
/// Return the usize value for this [`OneIndexed`]
pub const fn to_usize(self) -> usize {
self.get() as _
}
/// Return the zero-indexed primitive value for this [`OneIndexed`]
pub const fn to_zero_indexed(self) -> u32 {
self.0.get() - 1
}
/// Return the zero-indexed usize value for this [`OneIndexed`]
pub const fn to_zero_indexed_usize(self) -> usize {
self.to_zero_indexed() as _
}
/// Saturating integer addition. Computes `self + rhs`, saturating at
/// the numeric bounds instead of overflowing.
#[must_use]
pub const fn saturating_add(self, rhs: u32) -> Self {
match NonZeroU32::new(self.0.get().saturating_add(rhs)) {
Some(value) => Self(value),
None => Self::MAX,
}
}
/// Saturating integer subtraction. Computes `self - rhs`, saturating
/// at the numeric bounds instead of overflowing.
#[must_use]
pub const fn saturating_sub(self, rhs: u32) -> Self {
match NonZeroU32::new(self.0.get().saturating_sub(rhs)) {
Some(value) => Self(value),
None => Self::MIN,
}
}
}
impl std::fmt::Display for OneIndexed {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(&self.0.get(), f)
}
}
/// A const `Option::unwrap` without nightly features:
/// [Tracking issue](https://github.com/rust-lang/rust/issues/67441)
const fn unwrap<T: Copy>(option: Option<T>) -> T {
match option {
Some(value) => value,
None => panic!("unwrapping None"),
}
}
#[cfg(test)]
mod tests {
use crate::source_location::line_index::LineIndex;
use crate::source_location::{OneIndexed, SourceLocation};
use crate::text_size::TextSize;
#[test]
fn ascii_index() {
let index = LineIndex::from_source_text("");
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
let index = LineIndex::from_source_text("x = 1");
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
let index = LineIndex::from_source_text("x = 1\n");
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
let index = LineIndex::from_source_text("x = 1\ny = 2\nz = x + y\n");
assert_eq!(
index.line_starts(),
&[
TextSize::from(0),
TextSize::from(6),
TextSize::from(12),
TextSize::from(22)
]
);
}
#[test]
fn ascii_source_location() {
let contents = "x = 1\ny = 2";
let index = LineIndex::from_source_text(contents);
// First row.
let loc = index.source_location(TextSize::from(2), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(2)
}
);
// Second row.
let loc = index.source_location(TextSize::from(6), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
let loc = index.source_location(TextSize::from(11), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(5)
}
);
}
#[test]
fn ascii_carriage_return() {
let contents = "x = 4\ry = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
assert_eq!(
index.source_location(TextSize::from(4), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(4)
}
);
assert_eq!(
index.source_location(TextSize::from(6), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(7), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
#[test]
fn ascii_carriage_return_newline() {
let contents = "x = 4\r\ny = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]);
assert_eq!(
index.source_location(TextSize::from(4), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(4)
}
);
assert_eq!(
index.source_location(TextSize::from(7), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(8), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
#[test]
fn utf8_index() {
let index = LineIndex::from_source_text("x = '🫣'");
assert_eq!(index.line_count(), 1);
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
let index = LineIndex::from_source_text("x = '🫣'\n");
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(11)]
);
let index = LineIndex::from_source_text("x = '🫣'\ny = 2\nz = x + y\n");
assert_eq!(index.line_count(), 4);
assert_eq!(
index.line_starts(),
&[
TextSize::from(0),
TextSize::from(11),
TextSize::from(17),
TextSize::from(27)
]
);
let index = LineIndex::from_source_text("# 🫣\nclass Foo:\n \"\"\".\"\"\"");
assert_eq!(index.line_count(), 3);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(7), TextSize::from(18)]
);
}
#[test]
fn utf8_carriage_return() {
let contents = "x = '🫣'\ry = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(11)]
);
// Second '
assert_eq!(
index.source_location(TextSize::from(9), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
assert_eq!(
index.source_location(TextSize::from(11), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(12), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
#[test]
fn utf8_carriage_return_newline() {
let contents = "x = '🫣'\r\ny = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(12)]
);
// Second '
assert_eq!(
index.source_location(TextSize::from(9), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
assert_eq!(
index.source_location(TextSize::from(12), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.source_location(TextSize::from(13), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
#[test]
fn utf8_byte_offset() {
let contents = "x = '☃'\ny = 2";
let index = LineIndex::from_source_text(contents);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(10)]
);
// First row.
let loc = index.source_location(TextSize::from(0), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(0)
}
);
let loc = index.source_location(TextSize::from(5), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(5)
}
);
let loc = index.source_location(TextSize::from(8), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
// Second row.
let loc = index.source_location(TextSize::from(10), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
// One-past-the-end.
let loc = index.source_location(TextSize::from(15), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(5)
}
);
}
}

View file

@ -0,0 +1,225 @@
mod line_index;
// mod locator;
pub mod newlines;
pub use self::line_index::{LineIndex, OneIndexed};
use crate::text_size::{TextRange, TextSize};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::fmt::{Debug, Formatter};
use std::sync::Arc;
/// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`SourceLocation`].
#[derive(Debug)]
pub struct SourceCode<'src, 'index> {
text: &'src str,
index: &'index LineIndex,
}
impl<'src, 'index> SourceCode<'src, 'index> {
pub fn new(content: &'src str, index: &'index LineIndex) -> Self {
Self {
text: content,
index,
}
}
/// Computes the one indexed row and column numbers for `offset`.
#[inline]
pub fn source_location(&self, offset: TextSize) -> SourceLocation {
self.index.source_location(offset, self.text)
}
#[inline]
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
self.index.line_index(offset)
}
/// Take the source code up to the given [`TextSize`].
#[inline]
pub fn up_to(&self, offset: TextSize) -> &'src str {
&self.text[TextRange::up_to(offset)]
}
/// Take the source code after the given [`TextSize`].
#[inline]
pub fn after(&self, offset: TextSize) -> &'src str {
&self.text[usize::from(offset)..]
}
/// Take the source code between the given [`TextRange`].
pub fn slice(&self, range: TextRange) -> &'src str {
&self.text[range]
}
pub fn line_start(&self, line: OneIndexed) -> TextSize {
self.index.line_start(line, self.text)
}
pub fn line_end(&self, line: OneIndexed) -> TextSize {
self.index.line_end(line, self.text)
}
pub fn line_range(&self, line: OneIndexed) -> TextRange {
self.index.line_range(line, self.text)
}
/// Returns the source text of the line with the given index
#[inline]
pub fn line_text(&self, index: OneIndexed) -> &'src str {
let range = self.index.line_range(index, self.text);
&self.text[range]
}
/// Returns the source text
pub fn text(&self) -> &'src str {
self.text
}
/// Returns the number of lines
#[inline]
pub fn line_count(&self) -> usize {
self.index.line_count()
}
}
impl PartialEq<Self> for SourceCode<'_, '_> {
fn eq(&self, other: &Self) -> bool {
self.text == other.text
}
}
impl Eq for SourceCode<'_, '_> {}
/// A Builder for constructing a [`SourceFile`]
pub struct SourceFileBuilder {
name: Box<str>,
code: Box<str>,
index: Option<LineIndex>,
}
impl SourceFileBuilder {
/// Creates a new builder for a file named `name`.
pub fn new<Name: Into<Box<str>>, Code: Into<Box<str>>>(name: Name, code: Code) -> Self {
Self {
name: name.into(),
code: code.into(),
index: None,
}
}
#[must_use]
pub fn line_index(mut self, index: LineIndex) -> Self {
self.index = Some(index);
self
}
pub fn set_line_index(&mut self, index: LineIndex) {
self.index = Some(index);
}
/// Consumes `self` and returns the [`SourceFile`].
pub fn finish(self) -> SourceFile {
let index = if let Some(index) = self.index {
once_cell::sync::OnceCell::with_value(index)
} else {
once_cell::sync::OnceCell::new()
};
SourceFile {
inner: Arc::new(SourceFileInner {
name: self.name,
code: self.code,
line_index: index,
}),
}
}
}
/// A source file that is identified by its name. Optionally stores the source code and [`LineIndex`].
///
/// Cloning a [`SourceFile`] is cheap, because it only requires bumping a reference count.
#[derive(Clone, Eq, PartialEq)]
pub struct SourceFile {
inner: Arc<SourceFileInner>,
}
impl Debug for SourceFile {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SourceFile")
.field("name", &self.name())
.field("code", &self.source_text())
.finish()
}
}
impl SourceFile {
/// Returns the name of the source file (filename).
#[inline]
pub fn name(&self) -> &str {
&self.inner.name
}
#[inline]
pub fn slice(&self, range: TextRange) -> &str {
&self.source_text()[range]
}
pub fn to_source_code(&self) -> SourceCode {
SourceCode {
text: self.source_text(),
index: self.index(),
}
}
fn index(&self) -> &LineIndex {
self.inner
.line_index
.get_or_init(|| LineIndex::from_source_text(self.source_text()))
}
/// Returns `Some` with the source text if set, or `None`.
#[inline]
pub fn source_text(&self) -> &str {
&self.inner.code
}
}
struct SourceFileInner {
name: Box<str>,
code: Box<str>,
line_index: once_cell::sync::OnceCell<LineIndex>,
}
impl PartialEq for SourceFileInner {
fn eq(&self, other: &Self) -> bool {
self.name == other.name && self.code == other.code
}
}
impl Eq for SourceFileInner {}
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Copy)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SourceLocation {
pub row: OneIndexed,
pub column: OneIndexed,
}
impl Default for SourceLocation {
fn default() -> Self {
Self {
row: OneIndexed::MIN,
column: OneIndexed::MIN,
}
}
}
impl Debug for SourceLocation {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SourceLocation")
.field("row", &self.row.get())
.field("column", &self.column.get())
.finish()
}
}

View file

@ -0,0 +1,446 @@
use crate::text_size::{TextLen, TextRange, TextSize};
use memchr::{memchr2, memrchr2};
use std::iter::FusedIterator;
use std::ops::Deref;
/// Extension trait for [`str`] that provides a [`UniversalNewlineIterator`].
pub trait StrExt {
fn universal_newlines(&self) -> UniversalNewlineIterator<'_>;
}
impl StrExt for str {
fn universal_newlines(&self) -> UniversalNewlineIterator<'_> {
UniversalNewlineIterator::from(self)
}
}
/// Like [`str#lines`], but accommodates LF, CRLF, and CR line endings,
/// the latter of which are not supported by [`str#lines`].
///
/// ## Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::TextSize;
/// # use rustpython_parser_vendored::source_location::newlines::{Line, UniversalNewlineIterator};
/// let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
///
/// assert_eq!(lines.next_back(), Some(Line::new("bop", TextSize::from(14))));
/// assert_eq!(lines.next(), Some(Line::new("foo\n", TextSize::from(0))));
/// assert_eq!(lines.next_back(), Some(Line::new("baz\r", TextSize::from(10))));
/// assert_eq!(lines.next(), Some(Line::new("bar\n", TextSize::from(4))));
/// assert_eq!(lines.next_back(), Some(Line::new("\r\n", TextSize::from(8))));
/// assert_eq!(lines.next(), None);
/// ```
pub struct UniversalNewlineIterator<'a> {
text: &'a str,
offset: TextSize,
offset_back: TextSize,
}
impl<'a> UniversalNewlineIterator<'a> {
pub fn with_offset(text: &'a str, offset: TextSize) -> UniversalNewlineIterator<'a> {
UniversalNewlineIterator {
text,
offset,
offset_back: offset + text.text_len(),
}
}
pub fn from(text: &'a str) -> UniversalNewlineIterator<'a> {
Self::with_offset(text, TextSize::default())
}
}
/// Finds the next newline character. Returns its position and the [`LineEnding`].
#[inline]
pub fn find_newline(text: &str) -> Option<(usize, LineEnding)> {
let bytes = text.as_bytes();
if let Some(position) = memchr2(b'\n', b'\r', bytes) {
// SAFETY: memchr guarantees to return valid positions
#[allow(unsafe_code)]
let newline_character = unsafe { *bytes.get_unchecked(position) };
let line_ending = match newline_character {
// Explicit branch for `\n` as this is the most likely path
b'\n' => LineEnding::Lf,
// '\r\n'
b'\r' if bytes.get(position.saturating_add(1)) == Some(&b'\n') => LineEnding::CrLf,
// '\r'
_ => LineEnding::Cr,
};
Some((position, line_ending))
} else {
None
}
}
impl<'a> Iterator for UniversalNewlineIterator<'a> {
type Item = Line<'a>;
#[inline]
fn next(&mut self) -> Option<Line<'a>> {
if self.text.is_empty() {
return None;
}
let line = if let Some((newline_position, line_ending)) = find_newline(self.text) {
let (text, remainder) = self.text.split_at(newline_position + line_ending.len());
let line = Line {
offset: self.offset,
text,
};
self.text = remainder;
self.offset += text.text_len();
line
}
// Last line
else {
Line {
offset: self.offset,
text: std::mem::take(&mut self.text),
}
};
Some(line)
}
fn last(mut self) -> Option<Self::Item> {
self.next_back()
}
}
impl DoubleEndedIterator for UniversalNewlineIterator<'_> {
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
if self.text.is_empty() {
return None;
}
let len = self.text.len();
// Trim any trailing newlines.
let haystack = match self.text.as_bytes()[len - 1] {
b'\n' if len > 1 && self.text.as_bytes()[len - 2] == b'\r' => &self.text[..len - 2],
b'\n' | b'\r' => &self.text[..len - 1],
_ => self.text,
};
// Find the end of the previous line. The previous line is the text up to, but not including
// the newline character.
let line = if let Some(line_end) = memrchr2(b'\n', b'\r', haystack.as_bytes()) {
// '\n' or '\r' or '\r\n'
let (remainder, line) = self.text.split_at(line_end + 1);
self.text = remainder;
self.offset_back -= line.text_len();
Line {
text: line,
offset: self.offset_back,
}
} else {
// Last line
let offset = self.offset_back - self.text.text_len();
Line {
text: std::mem::take(&mut self.text),
offset,
}
};
Some(line)
}
}
impl FusedIterator for UniversalNewlineIterator<'_> {}
/// Like [`UniversalNewlineIterator`], but includes a trailing newline as an empty line.
pub struct NewlineWithTrailingNewline<'a> {
trailing: Option<Line<'a>>,
underlying: UniversalNewlineIterator<'a>,
}
impl<'a> NewlineWithTrailingNewline<'a> {
pub fn from(input: &'a str) -> NewlineWithTrailingNewline<'a> {
Self::with_offset(input, TextSize::default())
}
pub fn with_offset(input: &'a str, offset: TextSize) -> Self {
NewlineWithTrailingNewline {
underlying: UniversalNewlineIterator::with_offset(input, offset),
trailing: if input.ends_with(['\r', '\n']) {
Some(Line {
text: "",
offset: offset + input.text_len(),
})
} else {
None
},
}
}
}
impl<'a> Iterator for NewlineWithTrailingNewline<'a> {
type Item = Line<'a>;
#[inline]
fn next(&mut self) -> Option<Line<'a>> {
self.underlying.next().or_else(|| self.trailing.take())
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Line<'a> {
text: &'a str,
offset: TextSize,
}
impl<'a> Line<'a> {
pub fn new(text: &'a str, offset: TextSize) -> Self {
Self { text, offset }
}
#[inline]
pub const fn start(&self) -> TextSize {
self.offset
}
/// Returns the byte offset where the line ends, including its terminating new line character.
#[inline]
pub fn full_end(&self) -> TextSize {
self.offset + self.full_text_len()
}
/// Returns the byte offset where the line ends, excluding its new line character
#[inline]
pub fn end(&self) -> TextSize {
self.offset + self.as_str().text_len()
}
/// Returns the range of the line, including its terminating new line character.
#[inline]
pub fn full_range(&self) -> TextRange {
TextRange::at(self.offset, self.text.text_len())
}
/// Returns the range of the line, excluding its terminating new line character
#[inline]
pub fn range(&self) -> TextRange {
TextRange::new(self.start(), self.end())
}
/// Returns the text of the line, excluding the terminating new line character.
#[inline]
pub fn as_str(&self) -> &'a str {
let mut bytes = self.text.bytes().rev();
let newline_len = match bytes.next() {
Some(b'\n') => {
if bytes.next() == Some(b'\r') {
2
} else {
1
}
}
Some(b'\r') => 1,
_ => 0,
};
&self.text[..self.text.len() - newline_len]
}
/// Returns the line's text, including the terminating new line character.
#[inline]
pub fn as_full_str(&self) -> &'a str {
self.text
}
#[inline]
pub fn full_text_len(&self) -> TextSize {
self.text.text_len()
}
}
impl Deref for Line<'_> {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
impl PartialEq<&str> for Line<'_> {
fn eq(&self, other: &&str) -> bool {
self.as_str() == *other
}
}
impl PartialEq<Line<'_>> for &str {
fn eq(&self, other: &Line<'_>) -> bool {
*self == other.as_str()
}
}
/// The line ending style used in Python source code.
/// See <https://docs.python.org/3/reference/lexical_analysis.html#physical-lines>
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
pub enum LineEnding {
Lf,
Cr,
CrLf,
}
impl Default for LineEnding {
fn default() -> Self {
if cfg!(windows) {
LineEnding::CrLf
} else {
LineEnding::Lf
}
}
}
impl LineEnding {
pub const fn as_str(&self) -> &'static str {
match self {
LineEnding::Lf => "\n",
LineEnding::CrLf => "\r\n",
LineEnding::Cr => "\r",
}
}
#[allow(clippy::len_without_is_empty)]
pub const fn len(&self) -> usize {
match self {
LineEnding::Lf | LineEnding::Cr => 1,
LineEnding::CrLf => 2,
}
}
pub const fn text_len(&self) -> TextSize {
match self {
LineEnding::Lf | LineEnding::Cr => TextSize::new(1),
LineEnding::CrLf => TextSize::new(2),
}
}
}
impl Deref for LineEnding {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
#[cfg(test)]
mod tests {
use super::Line;
use super::UniversalNewlineIterator;
use crate::text_size::TextSize;
#[test]
fn universal_newlines_empty_str() {
let lines: Vec<_> = UniversalNewlineIterator::from("").collect();
assert_eq!(lines, Vec::<Line>::new());
let lines: Vec<_> = UniversalNewlineIterator::from("").rev().collect();
assert_eq!(lines, Vec::<Line>::new());
}
#[test]
fn universal_newlines_forward() {
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop").collect();
assert_eq!(
lines,
vec![
Line::new("foo\n", TextSize::from(0)),
Line::new("bar\n", TextSize::from(4)),
Line::new("\r\n", TextSize::from(8)),
Line::new("baz\r", TextSize::from(10)),
Line::new("bop", TextSize::from(14)),
]
);
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n").collect();
assert_eq!(
lines,
vec![
Line::new("foo\n", TextSize::from(0)),
Line::new("bar\n", TextSize::from(4)),
Line::new("\r\n", TextSize::from(8)),
Line::new("baz\r", TextSize::from(10)),
Line::new("bop\n", TextSize::from(14)),
]
);
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop\n\n").collect();
assert_eq!(
lines,
vec![
Line::new("foo\n", TextSize::from(0)),
Line::new("bar\n", TextSize::from(4)),
Line::new("\r\n", TextSize::from(8)),
Line::new("baz\r", TextSize::from(10)),
Line::new("bop\n", TextSize::from(14)),
Line::new("\n", TextSize::from(18)),
]
);
}
#[test]
fn universal_newlines_backwards() {
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop")
.rev()
.collect();
assert_eq!(
lines,
vec![
Line::new("bop", TextSize::from(14)),
Line::new("baz\r", TextSize::from(10)),
Line::new("\r\n", TextSize::from(8)),
Line::new("bar\n", TextSize::from(4)),
Line::new("foo\n", TextSize::from(0)),
]
);
let lines: Vec<_> = UniversalNewlineIterator::from("foo\nbar\n\nbaz\rbop\n")
.rev()
.map(|line| line.as_str())
.collect();
assert_eq!(
lines,
vec![
Line::new("bop\n", TextSize::from(13)),
Line::new("baz\r", TextSize::from(9)),
Line::new("\n", TextSize::from(8)),
Line::new("bar\n", TextSize::from(4)),
Line::new("foo\n", TextSize::from(0)),
]
);
}
#[test]
fn universal_newlines_mixed() {
let mut lines = UniversalNewlineIterator::from("foo\nbar\n\r\nbaz\rbop");
assert_eq!(
lines.next_back(),
Some(Line::new("bop", TextSize::from(14)))
);
assert_eq!(lines.next(), Some(Line::new("foo\n", TextSize::from(0))));
assert_eq!(
lines.next_back(),
Some(Line::new("baz\r", TextSize::from(10)))
);
assert_eq!(lines.next(), Some(Line::new("bar\n", TextSize::from(4))));
assert_eq!(
lines.next_back(),
Some(Line::new("\r\n", TextSize::from(8)))
);
assert_eq!(lines.next(), None);
}
}

View file

@ -0,0 +1,53 @@
MIT License
Copyright (c) 2022 Charles Marsh
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
end of terms and conditions
The externally maintained libraries from which parts of the Software is derived
are:
- rust-analyzer/text-size, licensed under the MIT license:
"""
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
"""

View file

@ -0,0 +1,2 @@
This module is imported from `ruff_text_size`,
which is a fork of `text-size`.

View file

@ -0,0 +1,34 @@
//! Newtypes for working with text sizes/ranges in a more type-safe manner.
//!
//! This library can help with two things:
//! * Reducing storage requirements for offsets and ranges, under the
//! assumption that 32 bits is enough.
//! * Providing standard vocabulary types for applications where text ranges
//! are pervasive.
//!
//! However, you should not use this library simply because you work with
//! strings. In the overwhelming majority of cases, using `usize` and
//! `std::ops::Range<usize>` is better. In particular, if you are publishing a
//! library, using only std types in the interface would make it more
//! interoperable. Similarly, if you are writing something like a lexer, which
//! produces, but does not *store* text ranges, then sticking to `usize` would
//! be better.
//!
//! Minimal Supported Rust Version: latest stable.
#![forbid(unsafe_code)]
#![warn(missing_debug_implementations, missing_docs)]
mod range;
mod size;
mod traits;
#[cfg(feature = "schemars")]
mod schemars_impls;
#[cfg(feature = "serde")]
mod serde_impls;
pub use self::{range::TextRange, size::TextSize, traits::TextLen};
#[cfg(target_pointer_width = "16")]
compile_error!("text-size assumes usize >= u32 and does not work on 16-bit targets");

View file

@ -0,0 +1,544 @@
use cmp::Ordering;
use {
super::TextSize,
std::{
cmp, fmt,
ops::{Add, AddAssign, Bound, Index, IndexMut, Range, RangeBounds, Sub, SubAssign},
},
};
/// A range in text, represented as a pair of [`TextSize`][struct@TextSize].
///
/// It is a logic error for `start` to be greater than `end`.
#[derive(Default, Copy, Clone, Eq, PartialEq, Hash)]
pub struct TextRange {
// Invariant: start <= end
start: TextSize,
end: TextSize,
}
impl fmt::Debug for TextRange {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}..{}", self.start().raw, self.end().raw)
}
}
impl TextRange {
/// Creates a new `TextRange` with the given `start` and `end` (`start..end`).
///
/// # Panics
///
/// Panics if `end < start`.
///
/// # Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::*;
/// let start = TextSize::from(5);
/// let end = TextSize::from(10);
/// let range = TextRange::new(start, end);
///
/// assert_eq!(range.start(), start);
/// assert_eq!(range.end(), end);
/// assert_eq!(range.len(), end - start);
/// ```
#[inline]
pub const fn new(start: TextSize, end: TextSize) -> TextRange {
assert!(start.raw <= end.raw);
TextRange { start, end }
}
/// Create a new `TextRange` with the given `offset` and `len` (`offset..offset + len`).
///
/// # Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::*;
/// let text = "0123456789";
///
/// let offset = TextSize::from(2);
/// let length = TextSize::from(5);
/// let range = TextRange::at(offset, length);
///
/// assert_eq!(range, TextRange::new(offset, offset + length));
/// assert_eq!(&text[range], "23456")
/// ```
#[inline]
pub fn at(offset: TextSize, len: TextSize) -> TextRange {
TextRange::new(offset, offset + len)
}
/// Create a zero-length range at the specified offset (`offset..offset`).
///
/// # Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::*;
/// let point: TextSize;
/// # point = TextSize::from(3);
/// let range = TextRange::empty(point);
/// assert!(range.is_empty());
/// assert_eq!(range, TextRange::new(point, point));
/// ```
#[inline]
pub fn empty(offset: TextSize) -> TextRange {
TextRange {
start: offset,
end: offset,
}
}
/// Create a range up to the given end (`..end`).
///
/// # Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::*;
/// let point: TextSize;
/// # point = TextSize::from(12);
/// let range = TextRange::up_to(point);
///
/// assert_eq!(range.len(), point);
/// assert_eq!(range, TextRange::new(0.into(), point));
/// assert_eq!(range, TextRange::at(0.into(), point));
/// ```
#[inline]
pub fn up_to(end: TextSize) -> TextRange {
TextRange {
start: 0.into(),
end,
}
}
}
/// Identity methods.
impl TextRange {
/// The start point of this range.
#[inline]
pub const fn start(self) -> TextSize {
self.start
}
/// The end point of this range.
#[inline]
pub const fn end(self) -> TextSize {
self.end
}
/// The size of this range.
#[inline]
pub const fn len(self) -> TextSize {
// HACK for const fn: math on primitives only
TextSize {
raw: self.end().raw - self.start().raw,
}
}
/// Check if this range is empty.
#[inline]
pub const fn is_empty(self) -> bool {
// HACK for const fn: math on primitives only
self.start().raw == self.end().raw
}
}
/// Manipulation methods.
impl TextRange {
/// Check if this range contains an offset.
///
/// The end index is considered excluded.
///
/// # Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::*;
/// let (start, end): (TextSize, TextSize);
/// # start = 10.into(); end = 20.into();
/// let range = TextRange::new(start, end);
/// assert!(range.contains(start));
/// assert!(!range.contains(end));
/// ```
#[inline]
pub fn contains(self, offset: TextSize) -> bool {
self.start() <= offset && offset < self.end()
}
/// Check if this range contains an offset.
///
/// The end index is considered included.
///
/// # Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::*;
/// let (start, end): (TextSize, TextSize);
/// # start = 10.into(); end = 20.into();
/// let range = TextRange::new(start, end);
/// assert!(range.contains_inclusive(start));
/// assert!(range.contains_inclusive(end));
/// ```
#[inline]
pub fn contains_inclusive(self, offset: TextSize) -> bool {
self.start() <= offset && offset <= self.end()
}
/// Check if this range completely contains another range.
///
/// # Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::*;
/// let larger = TextRange::new(0.into(), 20.into());
/// let smaller = TextRange::new(5.into(), 15.into());
/// assert!(larger.contains_range(smaller));
/// assert!(!smaller.contains_range(larger));
///
/// // a range always contains itself
/// assert!(larger.contains_range(larger));
/// assert!(smaller.contains_range(smaller));
/// ```
#[inline]
pub fn contains_range(self, other: TextRange) -> bool {
self.start() <= other.start() && other.end() <= self.end()
}
/// The range covered by both ranges, if it exists.
/// If the ranges touch but do not overlap, the output range is empty.
///
/// # Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::*;
/// assert_eq!(
/// TextRange::intersect(
/// TextRange::new(0.into(), 10.into()),
/// TextRange::new(5.into(), 15.into()),
/// ),
/// Some(TextRange::new(5.into(), 10.into())),
/// );
/// ```
#[inline]
pub fn intersect(self, other: TextRange) -> Option<TextRange> {
let start = cmp::max(self.start(), other.start());
let end = cmp::min(self.end(), other.end());
if end < start {
return None;
}
Some(TextRange::new(start, end))
}
/// Extends the range to cover `other` as well.
///
/// # Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::*;
/// assert_eq!(
/// TextRange::cover(
/// TextRange::new(0.into(), 5.into()),
/// TextRange::new(15.into(), 20.into()),
/// ),
/// TextRange::new(0.into(), 20.into()),
/// );
/// ```
#[inline]
#[must_use]
pub fn cover(self, other: TextRange) -> TextRange {
let start = cmp::min(self.start(), other.start());
let end = cmp::max(self.end(), other.end());
TextRange::new(start, end)
}
/// Extends the range to cover `other` offsets as well.
///
/// # Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::*;
/// assert_eq!(
/// TextRange::empty(0.into()).cover_offset(20.into()),
/// TextRange::new(0.into(), 20.into()),
/// )
/// ```
#[inline]
#[must_use]
pub fn cover_offset(self, offset: TextSize) -> TextRange {
self.cover(TextRange::empty(offset))
}
/// Add an offset to this range.
///
/// Note that this is not appropriate for changing where a `TextRange` is
/// within some string; rather, it is for changing the reference anchor
/// that the `TextRange` is measured against.
///
/// The unchecked version (`Add::add`) will _always_ panic on overflow,
/// in contrast to primitive integers, which check in debug mode only.
#[inline]
pub fn checked_add(self, offset: TextSize) -> Option<TextRange> {
Some(TextRange {
start: self.start.checked_add(offset)?,
end: self.end.checked_add(offset)?,
})
}
/// Subtract an offset from this range.
///
/// Note that this is not appropriate for changing where a `TextRange` is
/// within some string; rather, it is for changing the reference anchor
/// that the `TextRange` is measured against.
///
/// The unchecked version (`Sub::sub`) will _always_ panic on overflow,
/// in contrast to primitive integers, which check in debug mode only.
#[inline]
pub fn checked_sub(self, offset: TextSize) -> Option<TextRange> {
Some(TextRange {
start: self.start.checked_sub(offset)?,
end: self.end.checked_sub(offset)?,
})
}
/// Relative order of the two ranges (overlapping ranges are considered
/// equal).
///
///
/// This is useful when, for example, binary searching an array of disjoint
/// ranges.
///
/// # Examples
///
/// ```
/// # use rustpython_parser_vendored::text_size::*;
/// # use std::cmp::Ordering;
///
/// let a = TextRange::new(0.into(), 3.into());
/// let b = TextRange::new(4.into(), 5.into());
/// assert_eq!(a.ordering(b), Ordering::Less);
///
/// let a = TextRange::new(0.into(), 3.into());
/// let b = TextRange::new(3.into(), 5.into());
/// assert_eq!(a.ordering(b), Ordering::Less);
///
/// let a = TextRange::new(0.into(), 3.into());
/// let b = TextRange::new(2.into(), 5.into());
/// assert_eq!(a.ordering(b), Ordering::Equal);
///
/// let a = TextRange::new(0.into(), 3.into());
/// let b = TextRange::new(2.into(), 2.into());
/// assert_eq!(a.ordering(b), Ordering::Equal);
///
/// let a = TextRange::new(2.into(), 3.into());
/// let b = TextRange::new(2.into(), 2.into());
/// assert_eq!(a.ordering(b), Ordering::Greater);
/// ```
#[inline]
pub fn ordering(self, other: TextRange) -> Ordering {
if self.end() <= other.start() {
Ordering::Less
} else if other.end() <= self.start() {
Ordering::Greater
} else {
Ordering::Equal
}
}
/// Subtracts an offset from the start position.
///
///
/// ## Panics
/// If `start - amount` is less than zero.
///
/// ## Examples
///
/// ```
/// use rustpython_parser_vendored::text_size::{TextRange, TextSize};
///
/// let range = TextRange::new(TextSize::from(5), TextSize::from(10));
/// assert_eq!(range.sub_start(TextSize::from(2)), TextRange::new(TextSize::from(3), TextSize::from(10)));
/// ```
#[inline]
#[must_use]
pub fn sub_start(&self, amount: TextSize) -> TextRange {
TextRange::new(self.start() - amount, self.end())
}
/// Adds an offset to the start position.
///
/// ## Panics
/// If `start + amount > end`
///
/// ## Examples
///
/// ```
/// use rustpython_parser_vendored::text_size::{TextRange, TextSize};
///
/// let range = TextRange::new(TextSize::from(5), TextSize::from(10));
/// assert_eq!(range.add_start(TextSize::from(3)), TextRange::new(TextSize::from(8), TextSize::from(10)));
/// ```
#[inline]
#[must_use]
pub fn add_start(&self, amount: TextSize) -> TextRange {
TextRange::new(self.start() + amount, self.end())
}
/// Subtracts an offset from the end position.
///
///
/// ## Panics
/// If `end - amount < 0` or `end - amount < start`
///
/// ## Examples
///
/// ```
/// use rustpython_parser_vendored::text_size::{TextRange, TextSize};
///
/// let range = TextRange::new(TextSize::from(5), TextSize::from(10));
/// assert_eq!(range.sub_end(TextSize::from(2)), TextRange::new(TextSize::from(5), TextSize::from(8)));
/// ```
#[inline]
#[must_use]
pub fn sub_end(&self, amount: TextSize) -> TextRange {
TextRange::new(self.start(), self.end() - amount)
}
/// Adds an offset to the end position.
///
///
/// ## Panics
/// If `end + amount > u32::MAX`
///
/// ## Examples
///
/// ```
/// use rustpython_parser_vendored::text_size::{TextRange, TextSize};
///
/// let range = TextRange::new(TextSize::from(5), TextSize::from(10));
/// assert_eq!(range.add_end(TextSize::from(2)), TextRange::new(TextSize::from(5), TextSize::from(12)));
/// ```
#[inline]
#[must_use]
pub fn add_end(&self, amount: TextSize) -> TextRange {
TextRange::new(self.start(), self.end() + amount)
}
}
impl Index<TextRange> for str {
type Output = str;
#[inline]
fn index(&self, index: TextRange) -> &str {
&self[Range::<usize>::from(index)]
}
}
impl Index<TextRange> for String {
type Output = str;
#[inline]
fn index(&self, index: TextRange) -> &str {
&self[Range::<usize>::from(index)]
}
}
impl IndexMut<TextRange> for str {
#[inline]
fn index_mut(&mut self, index: TextRange) -> &mut str {
&mut self[Range::<usize>::from(index)]
}
}
impl IndexMut<TextRange> for String {
#[inline]
fn index_mut(&mut self, index: TextRange) -> &mut str {
&mut self[Range::<usize>::from(index)]
}
}
impl RangeBounds<TextSize> for TextRange {
fn start_bound(&self) -> Bound<&TextSize> {
Bound::Included(&self.start)
}
fn end_bound(&self) -> Bound<&TextSize> {
Bound::Excluded(&self.end)
}
}
impl From<Range<TextSize>> for TextRange {
#[inline]
fn from(r: Range<TextSize>) -> Self {
TextRange::new(r.start, r.end)
}
}
impl<T> From<TextRange> for Range<T>
where
T: From<TextSize>,
{
#[inline]
fn from(r: TextRange) -> Self {
r.start().into()..r.end().into()
}
}
macro_rules! ops {
(impl $Op:ident for TextRange by fn $f:ident = $op:tt) => {
impl $Op<&TextSize> for TextRange {
type Output = TextRange;
#[inline]
fn $f(self, other: &TextSize) -> TextRange {
self $op *other
}
}
impl<T> $Op<T> for &TextRange
where
TextRange: $Op<T, Output=TextRange>,
{
type Output = TextRange;
#[inline]
fn $f(self, other: T) -> TextRange {
*self $op other
}
}
};
}
impl Add<TextSize> for TextRange {
type Output = TextRange;
#[inline]
fn add(self, offset: TextSize) -> TextRange {
self.checked_add(offset)
.expect("TextRange +offset overflowed")
}
}
impl Sub<TextSize> for TextRange {
type Output = TextRange;
#[inline]
fn sub(self, offset: TextSize) -> TextRange {
self.checked_sub(offset)
.expect("TextRange -offset overflowed")
}
}
ops!(impl Add for TextRange by fn add = +);
ops!(impl Sub for TextRange by fn sub = -);
impl<A> AddAssign<A> for TextRange
where
TextRange: Add<A, Output = TextRange>,
{
#[inline]
fn add_assign(&mut self, rhs: A) {
*self = *self + rhs;
}
}
impl<S> SubAssign<S> for TextRange
where
TextRange: Sub<S, Output = TextRange>,
{
#[inline]
fn sub_assign(&mut self, rhs: S) {
*self = *self - rhs;
}
}

View file

@ -0,0 +1,33 @@
//! This module implements the [`JsonSchema`] trait from the `schemars` crate for
//! [`TextSize`] and [`TextRange`] if the `schemars` feature is enabled. This trait
//! exposes meta-information on how a given type is serialized and deserialized
//! using `serde`, and is currently used to generate autocomplete information
//! for the `rome.json` configuration file and TypeScript types for the node.js
//! bindings to the Workspace API
use crate::{TextRange, TextSize};
use schemars::{gen::SchemaGenerator, schema::Schema, JsonSchema};
impl JsonSchema for TextSize {
fn schema_name() -> String {
String::from("TextSize")
}
fn json_schema(gen: &mut SchemaGenerator) -> Schema {
// TextSize is represented as a raw u32, see serde_impls.rs for the
// actual implementation
<u32>::json_schema(gen)
}
}
impl JsonSchema for TextRange {
fn schema_name() -> String {
String::from("TextRange")
}
fn json_schema(gen: &mut SchemaGenerator) -> Schema {
// TextSize is represented as (TextSize, TextSize), see serde_impls.rs
// for the actual implementation
<(TextSize, TextSize)>::json_schema(gen)
}
}

View file

@ -0,0 +1,47 @@
use {
super::{TextRange, TextSize},
serde::{de, Deserialize, Deserializer, Serialize, Serializer},
};
impl Serialize for TextSize {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
self.raw.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for TextSize {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
u32::deserialize(deserializer).map(TextSize::from)
}
}
impl Serialize for TextRange {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
(self.start(), self.end()).serialize(serializer)
}
}
impl<'de> Deserialize<'de> for TextRange {
#[allow(clippy::nonminimal_bool)]
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let (start, end) = Deserialize::deserialize(deserializer)?;
if !(start <= end) {
return Err(de::Error::custom(format!(
"invalid range: {start:?}..{end:?}"
)));
}
Ok(TextRange::new(start, end))
}
}

View file

@ -0,0 +1,197 @@
use {
super::TextLen,
std::{
convert::TryFrom,
fmt, iter,
num::TryFromIntError,
ops::{Add, AddAssign, Sub, SubAssign},
u32,
},
};
/// A measure of text length. Also, equivalently, an index into text.
///
/// This is a UTF-8 bytes offset stored as `u32`, but
/// most clients should treat it as an opaque measure.
///
/// For cases that need to escape `TextSize` and return to working directly
/// with primitive integers, `TextSize` can be converted losslessly to/from
/// `u32` via [`From`] conversions as well as losslessly be converted [`Into`]
/// `usize`. The `usize -> TextSize` direction can be done via [`TryFrom`].
///
/// These escape hatches are primarily required for unit testing and when
/// converting from UTF-8 size to another coordinate space, such as UTF-16.
#[derive(Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct TextSize {
pub(crate) raw: u32,
}
impl fmt::Debug for TextSize {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.raw)
}
}
impl TextSize {
/// Creates a new `TextSize` at the given `offset`.
///
/// # Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::*;
/// assert_eq!(TextSize::from(4), TextSize::new(4));
/// ```
pub const fn new(offset: u32) -> Self {
Self { raw: offset }
}
/// The text size of some primitive text-like object.
///
/// Accepts `char`, `&str`, and `&String`.
///
/// # Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::*;
/// let char_size = TextSize::of('🦀');
/// assert_eq!(char_size, TextSize::from(4));
///
/// let str_size = TextSize::of("rust-analyzer");
/// assert_eq!(str_size, TextSize::from(13));
/// ```
#[inline]
pub fn of<T: TextLen>(text: T) -> TextSize {
text.text_len()
}
/// Returns current raw `offset` as u32.
///
/// # Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::*;
/// assert_eq!(TextSize::from(4).to_u32(), 4);
/// ```
pub fn to_u32(&self) -> u32 {
self.raw
}
/// Returns current raw `offset` as usize.
///
/// # Examples
///
/// ```rust
/// # use rustpython_parser_vendored::text_size::*;
/// assert_eq!(TextSize::from(4).to_usize(), 4);
/// ```
pub fn to_usize(&self) -> usize {
self.raw as usize
}
}
/// Methods to act like a primitive integer type, where reasonably applicable.
// Last updated for parity with Rust 1.42.0.
impl TextSize {
/// Checked addition. Returns `None` if overflow occurred.
#[inline]
pub fn checked_add(self, rhs: TextSize) -> Option<TextSize> {
self.raw.checked_add(rhs.raw).map(|raw| TextSize { raw })
}
/// Checked subtraction. Returns `None` if overflow occurred.
#[inline]
pub fn checked_sub(self, rhs: TextSize) -> Option<TextSize> {
self.raw.checked_sub(rhs.raw).map(|raw| TextSize { raw })
}
}
impl From<u32> for TextSize {
#[inline]
fn from(raw: u32) -> Self {
TextSize::new(raw)
}
}
impl From<TextSize> for u32 {
#[inline]
fn from(value: TextSize) -> Self {
value.to_u32()
}
}
impl TryFrom<usize> for TextSize {
type Error = TryFromIntError;
#[inline]
fn try_from(value: usize) -> Result<Self, TryFromIntError> {
Ok(u32::try_from(value)?.into())
}
}
impl From<TextSize> for usize {
#[inline]
fn from(value: TextSize) -> Self {
value.to_usize()
}
}
macro_rules! ops {
(impl $Op:ident for TextSize by fn $f:ident = $op:tt) => {
impl $Op<TextSize> for TextSize {
type Output = TextSize;
#[inline]
fn $f(self, other: TextSize) -> TextSize {
TextSize { raw: self.raw $op other.raw }
}
}
impl $Op<&TextSize> for TextSize {
type Output = TextSize;
#[inline]
fn $f(self, other: &TextSize) -> TextSize {
self $op *other
}
}
impl<T> $Op<T> for &TextSize
where
TextSize: $Op<T, Output=TextSize>,
{
type Output = TextSize;
#[inline]
fn $f(self, other: T) -> TextSize {
*self $op other
}
}
};
}
ops!(impl Add for TextSize by fn add = +);
ops!(impl Sub for TextSize by fn sub = -);
impl<A> AddAssign<A> for TextSize
where
TextSize: Add<A, Output = TextSize>,
{
#[inline]
fn add_assign(&mut self, rhs: A) {
*self = *self + rhs;
}
}
impl<S> SubAssign<S> for TextSize
where
TextSize: Sub<S, Output = TextSize>,
{
#[inline]
fn sub_assign(&mut self, rhs: S) {
*self = *self - rhs;
}
}
impl<A> iter::Sum<A> for TextSize
where
TextSize: Add<A, Output = TextSize>,
{
#[inline]
fn sum<I: Iterator<Item = A>>(iter: I) -> TextSize {
iter.fold(0.into(), Add::add)
}
}

View file

@ -0,0 +1,37 @@
use {super::TextSize, std::convert::TryInto};
use priv_in_pub::Sealed;
mod priv_in_pub {
pub trait Sealed {}
}
/// Primitives with a textual length that can be passed to [`TextSize::of`].
pub trait TextLen: Copy + Sealed {
/// The textual length of this primitive.
fn text_len(self) -> TextSize;
}
impl Sealed for &'_ str {}
impl TextLen for &'_ str {
#[inline]
fn text_len(self) -> TextSize {
self.len().try_into().unwrap()
}
}
impl Sealed for &'_ String {}
impl TextLen for &'_ String {
#[inline]
fn text_len(self) -> TextSize {
self.as_str().text_len()
}
}
impl Sealed for char {}
impl TextLen for char {
#[inline]
#[allow(clippy::cast_possible_truncation)]
fn text_len(self) -> TextSize {
(self.len_utf8() as u32).into()
}
}