Replace row/column based Location with byte-offsets. (#3931)

This commit is contained in:
Micha Reiser 2023-04-26 20:11:02 +02:00 committed by GitHub
parent ee91598835
commit cab65b25da
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
418 changed files with 6203 additions and 7040 deletions

View file

@ -3,10 +3,11 @@
use std::ops::Deref;
use rustpython_parser::ast::{
Alias, Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, ConversionFlag, Excepthandler,
Alias, Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, Excepthandler,
ExcepthandlerKind, Expr, ExprKind, MatchCase, Operator, Pattern, PatternKind, Stmt, StmtKind,
Suite, Withitem,
};
use rustpython_parser::ConversionFlag;
use ruff_rustpython::vendor::{bytes, str};

View file

@ -1,98 +1,135 @@
//! Struct used to index source code, to enable efficient lookup of tokens that
//! are omitted from the AST (e.g., commented lines).
use rustpython_parser::ast::Location;
use crate::source_code::Locator;
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
use crate::types::Range;
pub struct Indexer {
commented_lines: Vec<usize>,
continuation_lines: Vec<usize>,
string_ranges: Vec<Range>,
/// Stores the ranges of comments sorted by [`TextRange::start`] in increasing order. No two ranges are overlapping.
comment_ranges: Vec<TextRange>,
/// Stores the start offset of continuation lines.
continuation_lines: Vec<TextSize>,
/// The range of all triple quoted strings in the source document. The ranges are sorted by their
/// [`TextRange::start`] position in increasing order. No two ranges are overlapping.
triple_quoted_string_ranges: Vec<TextRange>,
}
impl Indexer {
/// Return a slice of all lines that include a comment.
pub fn commented_lines(&self) -> &[usize] {
&self.commented_lines
}
pub fn from_tokens(tokens: &[LexResult], locator: &Locator) -> Self {
assert!(TextSize::try_from(locator.contents().len()).is_ok());
/// Return a slice of all lines that end with a continuation (backslash).
pub fn continuation_lines(&self) -> &[usize] {
&self.continuation_lines
}
/// Return a slice of all ranges that include a triple-quoted string.
pub fn string_ranges(&self) -> &[Range] {
&self.string_ranges
}
}
impl From<&[LexResult]> for Indexer {
fn from(lxr: &[LexResult]) -> Self {
let mut commented_lines = Vec::new();
let mut continuation_lines = Vec::new();
let mut string_ranges = Vec::new();
let mut prev: Option<(&Location, &Tok, &Location)> = None;
for (start, tok, end) in lxr.iter().flatten() {
// Token, end
let mut prev_end = TextSize::default();
let mut prev_token: Option<&Tok> = None;
let mut line_start = TextSize::default();
for (tok, range) in tokens.iter().flatten() {
let trivia = &locator.contents()[TextRange::new(prev_end, range.start())];
// Get the trivia between the previous and the current token and detect any newlines.
// This is necessary because `RustPython` doesn't emit `[Tok::Newline]` tokens
// between any two tokens that form a continuation nor multiple newlines in a row.
// That's why we have to extract the newlines "manually".
for (index, text) in trivia.match_indices(['\n', '\r']) {
if text == "\r" && trivia.as_bytes().get(index + 1) == Some(&b'\n') {
continue;
}
// Newlines after a comment or new-line never form a continuation.
if !matches!(
prev_token,
Some(Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(..)) | None
) {
continuation_lines.push(line_start);
}
// SAFETY: Safe because of the len assertion at the top of the function.
#[allow(clippy::cast_possible_truncation)]
{
line_start = prev_end + TextSize::new((index + 1) as u32);
}
}
match tok {
Tok::Comment(..) => commented_lines.push(start.row()),
Tok::Comment(..) => {
commented_lines.push(*range);
}
Tok::Newline | Tok::NonLogicalNewline => {
line_start = range.end();
}
Tok::String {
triple_quoted: true,
..
} => string_ranges.push(Range::new(*start, *end)),
_ => (),
} => string_ranges.push(*range),
_ => {}
}
if let Some((.., prev_tok, prev_end)) = prev {
if !matches!(
prev_tok,
Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(..)
) {
for line in prev_end.row()..start.row() {
continuation_lines.push(line);
}
}
}
prev = Some((start, tok, end));
prev_token = Some(tok);
prev_end = range.end();
}
Self {
commented_lines,
comment_ranges: commented_lines,
continuation_lines,
string_ranges,
triple_quoted_string_ranges: string_ranges,
}
}
/// Returns the byte offset ranges of comments
pub fn comment_ranges(&self) -> &[TextRange] {
&self.comment_ranges
}
/// Returns the line start positions of continuations (backslash).
pub fn continuation_line_starts(&self) -> &[TextSize] {
&self.continuation_lines
}
/// Return a slice of all ranges that include a triple-quoted string. The ranges are sorted by
/// [`TextRange::start`] in increasing order. No two ranges are overlapping.
pub fn triple_quoted_string_ranges(&self) -> &[TextRange] {
&self.triple_quoted_string_ranges
}
pub fn is_continuation(&self, offset: TextSize, locator: &Locator) -> bool {
let line_start = locator.line_start(offset);
self.continuation_lines.binary_search(&line_start).is_ok()
}
}
#[cfg(test)]
mod tests {
use rustpython_parser::ast::Location;
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser::lexer::LexResult;
use rustpython_parser::{lexer, Mode};
use crate::source_code::Indexer;
use crate::types::Range;
use crate::source_code::{Indexer, Locator};
#[test]
fn continuation() {
let contents = r#"x = 1"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.continuation_lines(), Vec::<usize>::new().as_slice());
let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
assert_eq!(indexer.continuation_line_starts(), &[]);
let contents = r#"
# Hello, world!
# Hello, world!
x = 1
y = 2
"#
"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.continuation_lines(), Vec::<usize>::new().as_slice());
let indexer = Indexer::from_tokens(&lxr, &Locator::new(contents));
assert_eq!(indexer.continuation_line_starts(), &[]);
let contents = r#"
x = \
@ -111,8 +148,20 @@ if True:
"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.continuation_lines(), [1, 5, 6, 11]);
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.continuation_line_starts(),
[
// row 1
TextSize::from(0),
// row 5
TextSize::from(22),
// row 6
TextSize::from(32),
// row 11
TextSize::from(71),
]
);
let contents = r#"
x = 1; import sys
@ -131,16 +180,24 @@ import os
"#
.trim();
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.continuation_lines(), [9, 12]);
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.continuation_line_starts(),
[
// row 9
TextSize::from(84),
// row 12
TextSize::from(116)
]
);
}
#[test]
fn string_ranges() {
let contents = r#""this is a single-quoted string""#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.string_ranges(), &vec![]);
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(indexer.triple_quoted_string_ranges(), []);
let contents = r#"
"""
@ -148,10 +205,10 @@ import os
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.string_ranges(),
&vec![Range::new(Location::new(2, 12), Location::new(4, 15))]
indexer.triple_quoted_string_ranges(),
[TextRange::new(TextSize::from(13), TextSize::from(71))]
);
let contents = r#"
@ -160,10 +217,10 @@ import os
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.string_ranges(),
&vec![Range::new(Location::new(2, 12), Location::new(4, 15))]
indexer.triple_quoted_string_ranges(),
[TextRange::new(TextSize::from(13), TextSize::from(107))]
);
let contents = r#"
@ -177,12 +234,12 @@ import os
"""
"#;
let lxr: Vec<LexResult> = lexer::lex(contents, Mode::Module).collect();
let indexer: Indexer = lxr.as_slice().into();
let indexer = Indexer::from_tokens(lxr.as_slice(), &Locator::new(contents));
assert_eq!(
indexer.string_ranges(),
&vec![
Range::new(Location::new(2, 12), Location::new(5, 15)),
Range::new(Location::new(6, 12), Location::new(9, 15))
indexer.triple_quoted_string_ranges(),
&[
TextRange::new(TextSize::from(13), TextSize::from(85)),
TextRange::new(TextSize::from(98), TextSize::from(161))
]
);
}

View file

@ -1,12 +1,14 @@
use crate::source_code::SourceLocation;
use ruff_text_size::{TextLen, TextRange, TextSize};
use rustpython_parser::ast::Location;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::fmt;
use std::fmt::{Debug, Formatter};
use std::num::NonZeroUsize;
use std::ops::Deref;
use std::sync::Arc;
/// Index for fast [`Location`] to [byte offset](TextSize) conversions.
/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions.
///
/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
#[derive(Clone)]
@ -58,28 +60,63 @@ impl LineIndex {
self.inner.kind
}
/// Converts a [`Location`] to it's [byte offset](TextSize) in the source code.
pub fn location_offset(&self, location: Location, contents: &str) -> TextSize {
let line_index = OneIndexed::new(location.row()).unwrap();
let line_range = self.line_range(line_index, contents);
/// Returns the row and column index for an offset.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::TextSize;
/// # use ruff_python_ast::source_code::{LineIndex, OneIndexed, SourceLocation};
/// let source = "def a():\n pass";
/// let index = LineIndex::from_source_text(source);
///
/// assert_eq!(
/// index.source_location(TextSize::from(0), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(0) }
/// );
///
/// assert_eq!(
/// index.source_location(TextSize::from(4), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(0), column: OneIndexed::from_zero_indexed(4) }
/// );
/// assert_eq!(
/// index.source_location(TextSize::from(13), source),
/// SourceLocation { row: OneIndexed::from_zero_indexed(1), column: OneIndexed::from_zero_indexed(4) }
/// );
/// ```
///
/// ## Panics
///
/// If the offset is out of bounds.
pub fn source_location(&self, offset: TextSize, content: &str) -> SourceLocation {
match self.line_starts().binary_search(&offset) {
// Offset is at the start of a line
Ok(row) => SourceLocation {
row: OneIndexed::from_zero_indexed(row),
column: OneIndexed::from_zero_indexed(0),
},
Err(next_row) => {
// SAFETY: Safe because the index always contains an entry for the offset 0
let row = next_row - 1;
let mut line_start = self.line_starts()[row];
let column_offset = match self.kind() {
IndexKind::Ascii => TextSize::try_from(location.column()).unwrap(),
IndexKind::Utf8 => {
let line = &contents[line_range];
let column = if self.kind().is_ascii() {
usize::from(offset) - usize::from(line_start)
} else {
// Don't count the BOM character as a column.
if line_start == TextSize::from(0) && content.starts_with('\u{feff}') {
line_start = '\u{feff}'.text_len();
}
// Skip the bom character
let bom_len =
usize::from(line_index.to_zero_indexed() == 0 && line.starts_with('\u{feff}'));
content[TextRange::new(line_start, offset)].chars().count()
};
match line.char_indices().nth(location.column() + bom_len) {
Some((offset, _)) => TextSize::try_from(offset).unwrap(),
None => line_range.len(),
SourceLocation {
row: OneIndexed::from_zero_indexed(row),
column: OneIndexed::from_zero_indexed(column),
}
}
};
line_range.start() + column_offset
}
}
/// Return the number of lines in the source code.
@ -87,6 +124,35 @@ impl LineIndex {
self.line_starts().len()
}
/// Returns the row number for a given offset.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::TextSize;
/// # use ruff_python_ast::source_code::{LineIndex, OneIndexed, SourceLocation};
/// let source = "def a():\n pass";
/// let index = LineIndex::from_source_text(source);
///
/// assert_eq!(index.line_index(TextSize::from(0)), OneIndexed::from_zero_indexed(0));
/// assert_eq!(index.line_index(TextSize::from(4)), OneIndexed::from_zero_indexed(0));
/// assert_eq!(index.line_index(TextSize::from(13)), OneIndexed::from_zero_indexed(1));
/// ```
///
/// ## Panics
///
/// If the offset is out of bounds.
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
match self.line_starts().binary_search(&offset) {
// Offset is at the start of a line
Ok(row) => OneIndexed::from_zero_indexed(row),
Err(row) => {
// SAFETY: Safe because the index always contains an entry for the offset 0
OneIndexed::from_zero_indexed(row - 1)
}
}
}
/// Returns the [byte offset](TextSize) for the `line` with the given index.
pub(crate) fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
let row_index = line.to_zero_indexed();
@ -159,12 +225,19 @@ enum IndexKind {
Utf8,
}
impl IndexKind {
const fn is_ascii(self) -> bool {
matches!(self, IndexKind::Ascii)
}
}
/// Type-safe wrapper for a value whose logical range starts at `1`, for
/// instance the line or column numbers in a file
///
/// Internally this is represented as a [`NonZeroUsize`], this enables some
/// memory optimizations
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct OneIndexed(NonZeroUsize);
impl OneIndexed {
@ -238,8 +311,8 @@ const fn unwrap<T: Copy>(option: Option<T>) -> T {
#[cfg(test)]
mod tests {
use crate::source_code::line_index::LineIndex;
use crate::source_code::{OneIndexed, SourceLocation};
use ruff_text_size::TextSize;
use rustpython_parser::ast::Location;
#[test]
fn ascii_index() {
@ -265,21 +338,38 @@ mod tests {
}
#[test]
fn ascii_byte_offset() {
fn ascii_source_location() {
let contents = "x = 1\ny = 2";
let index = LineIndex::from_source_text(contents);
// First row.
let loc = index.location_offset(Location::new(1, 0), contents);
assert_eq!(loc, TextSize::from(0));
let loc = index.source_location(TextSize::from(2), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(2)
}
);
// Second row.
let loc = index.location_offset(Location::new(2, 0), contents);
assert_eq!(loc, TextSize::from(6));
let loc = index.source_location(TextSize::from(6), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
// One-past-the-end.
let loc = index.location_offset(Location::new(3, 0), contents);
assert_eq!(loc, TextSize::from(11));
let loc = index.source_location(TextSize::from(11), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(5)
}
);
}
#[test]
@ -289,16 +379,25 @@ mod tests {
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(6)]);
assert_eq!(
index.location_offset(Location::new(1, 4), contents),
TextSize::from(4)
index.source_location(TextSize::from(4), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(4)
}
);
assert_eq!(
index.location_offset(Location::new(2, 0), contents),
TextSize::from(6)
index.source_location(TextSize::from(6), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.location_offset(Location::new(2, 1), contents),
TextSize::from(7)
index.source_location(TextSize::from(7), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
@ -309,16 +408,25 @@ mod tests {
assert_eq!(index.line_starts(), &[TextSize::from(0), TextSize::from(7)]);
assert_eq!(
index.location_offset(Location::new(1, 4), contents),
TextSize::from(4)
index.source_location(TextSize::from(4), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(4)
}
);
assert_eq!(
index.location_offset(Location::new(2, 0), contents),
TextSize::from(7)
index.source_location(TextSize::from(7), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.location_offset(Location::new(2, 1), contents),
TextSize::from(8)
index.source_location(TextSize::from(8), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
@ -367,16 +475,25 @@ mod tests {
// Second '
assert_eq!(
index.location_offset(Location::new(1, 6), contents),
TextSize::from(9)
index.source_location(TextSize::from(9), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
assert_eq!(
index.location_offset(Location::new(2, 0), contents),
TextSize::from(11)
index.source_location(TextSize::from(11), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.location_offset(Location::new(2, 1), contents),
TextSize::from(12)
index.source_location(TextSize::from(12), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
@ -392,16 +509,25 @@ mod tests {
// Second '
assert_eq!(
index.location_offset(Location::new(1, 6), contents),
TextSize::from(9)
index.source_location(TextSize::from(9), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
assert_eq!(
index.location_offset(Location::new(2, 0), contents),
TextSize::from(12)
index.source_location(TextSize::from(12), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
assert_eq!(
index.location_offset(Location::new(2, 1), contents),
TextSize::from(13)
index.source_location(TextSize::from(13), contents),
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(1)
}
);
}
@ -415,23 +541,51 @@ mod tests {
);
// First row.
let loc = index.location_offset(Location::new(1, 0), contents);
assert_eq!(loc, TextSize::from(0));
let loc = index.source_location(TextSize::from(0), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(0)
}
);
let loc = index.location_offset(Location::new(1, 5), contents);
assert_eq!(loc, TextSize::from(5));
assert_eq!(&"x = '☃'\ny = 2"[usize::from(loc)..], "☃'\ny = 2");
let loc = index.source_location(TextSize::from(5), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(5)
}
);
let loc = index.location_offset(Location::new(1, 6), contents);
assert_eq!(loc, TextSize::from(8));
assert_eq!(&"x = '☃'\ny = 2"[usize::from(loc)..], "'\ny = 2");
let loc = index.source_location(TextSize::from(8), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(0),
column: OneIndexed::from_zero_indexed(6)
}
);
// Second row.
let loc = index.location_offset(Location::new(2, 0), contents);
assert_eq!(loc, TextSize::from(10));
let loc = index.source_location(TextSize::from(10), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(0)
}
);
// One-past-the-end.
let loc = index.location_offset(Location::new(3, 0), contents);
assert_eq!(loc, TextSize::from(15));
let loc = index.source_location(TextSize::from(15), contents);
assert_eq!(
loc,
SourceLocation {
row: OneIndexed::from_zero_indexed(1),
column: OneIndexed::from_zero_indexed(5)
}
);
}
}

View file

@ -1,61 +1,399 @@
//! Struct used to efficiently slice source code at (row, column) Locations.
use crate::source_code::line_index::LineIndex;
use crate::source_code::SourceCode;
use crate::source_code::{LineIndex, OneIndexed, SourceCode, SourceLocation};
use once_cell::unsync::OnceCell;
use ruff_text_size::TextSize;
use rustpython_parser::ast::Location;
use crate::types::Range;
use ruff_text_size::{TextLen, TextRange, TextSize};
use std::ops::Add;
pub struct Locator<'a> {
contents: &'a str,
line_index: OnceCell<LineIndex>,
index: OnceCell<LineIndex>,
}
impl<'a> Locator<'a> {
pub const fn new(contents: &'a str) -> Self {
Self {
contents,
line_index: OnceCell::new(),
index: OnceCell::new(),
}
}
fn get_or_init_index(&self) -> &LineIndex {
self.line_index
#[deprecated(
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
)]
pub fn compute_line_index(&self, offset: TextSize) -> OneIndexed {
self.to_index().line_index(offset)
}
#[deprecated(
note = "This is expensive, avoid using outside of the diagnostic phase. Prefer the other `Locator` methods instead."
)]
pub fn compute_source_location(&self, offset: TextSize) -> SourceLocation {
self.to_source_code().source_location(offset)
}
fn to_index(&self) -> &LineIndex {
self.index
.get_or_init(|| LineIndex::from_source_text(self.contents))
}
#[inline]
pub fn to_source_code(&self) -> SourceCode<'a, '_> {
pub fn line_index(&self) -> Option<&LineIndex> {
self.index.get()
}
pub fn to_source_code(&self) -> SourceCode {
SourceCode {
index: self.get_or_init_index(),
index: self.to_index(),
text: self.contents,
}
}
/// Take the source code up to the given [`Location`].
#[inline]
pub fn up_to(&self, location: Location) -> &'a str {
self.to_source_code().up_to(location)
/// Computes the start position of the line of `offset`.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::TextSize;
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\rthird line");
///
/// assert_eq!(locator.line_start(TextSize::from(0)), TextSize::from(0));
/// assert_eq!(locator.line_start(TextSize::from(4)), TextSize::from(0));
///
/// assert_eq!(locator.line_start(TextSize::from(14)), TextSize::from(11));
/// assert_eq!(locator.line_start(TextSize::from(28)), TextSize::from(23));
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn line_start(&self, offset: TextSize) -> TextSize {
if let Some(index) = self.contents[TextRange::up_to(offset)].rfind(['\n', '\r']) {
// SAFETY: Safe because `index < offset`
TextSize::try_from(index).unwrap().add(TextSize::from(1))
} else {
TextSize::default()
}
}
/// Take the source code after the given [`Location`].
#[inline]
pub fn after(&self, location: Location) -> &'a str {
self.to_source_code().after(location)
pub fn is_at_start_of_line(&self, offset: TextSize) -> bool {
offset == TextSize::from(0)
|| self.contents[TextRange::up_to(offset)].ends_with(['\n', '\r'])
}
/// Take the source code between the given [`Range`].
#[inline]
pub fn slice<R: Into<Range>>(&self, range: R) -> &'a str {
self.to_source_code().slice(range)
/// Computes the offset that is right after the newline character that ends `offset`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.full_line_end(TextSize::from(3)), TextSize::from(11));
/// assert_eq!(locator.full_line_end(TextSize::from(14)), TextSize::from(24));
/// assert_eq!(locator.full_line_end(TextSize::from(28)), TextSize::from(34));
/// ```
///
/// ## Panics
///
/// If `offset` is passed the end of the content.
pub fn full_line_end(&self, offset: TextSize) -> TextSize {
let slice = &self.contents[usize::from(offset)..];
if let Some(index) = slice.find(['\n', '\r']) {
let bytes = slice.as_bytes();
// `\r\n`
let relative_offset = if bytes[index] == b'\r' && bytes.get(index + 1) == Some(&b'\n') {
TextSize::try_from(index + 2).unwrap()
}
// `\r` or `\n`
else {
TextSize::try_from(index + 1).unwrap()
};
offset.add(relative_offset)
} else {
self.contents.text_len()
}
}
/// Return the byte offset of the given [`Location`].
/// Computes the offset that is right before the newline character that ends `offset`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.line_end(TextSize::from(3)), TextSize::from(10));
/// assert_eq!(locator.line_end(TextSize::from(14)), TextSize::from(22));
/// assert_eq!(locator.line_end(TextSize::from(28)), TextSize::from(34));
/// ```
///
/// ## Panics
///
/// If `offset` is passed the end of the content.
pub fn line_end(&self, offset: TextSize) -> TextSize {
let slice = &self.contents[usize::from(offset)..];
if let Some(index) = slice.find(['\n', '\r']) {
offset + TextSize::try_from(index).unwrap()
} else {
self.contents.text_len()
}
}
/// Computes the range of this `offset`s line.
///
/// The range starts at the beginning of the line and goes up to, and including, the new line character
/// at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.full_line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(11)));
/// assert_eq!(locator.full_line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(24)));
/// assert_eq!(locator.full_line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn full_line_range(&self, offset: TextSize) -> TextRange {
TextRange::new(self.line_start(offset), self.full_line_end(offset))
}
/// Computes the range of this `offset`s line ending before the newline character.
///
/// The range starts at the beginning of the line and goes up to, but excluding, the new line character
/// at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.line_range(TextSize::from(3)), TextRange::new(TextSize::from(0), TextSize::from(10)));
/// assert_eq!(locator.line_range(TextSize::from(14)), TextRange::new(TextSize::from(11), TextSize::from(22)));
/// assert_eq!(locator.line_range(TextSize::from(28)), TextRange::new(TextSize::from(24), TextSize::from(34)));
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn line_range(&self, offset: TextSize) -> TextRange {
TextRange::new(self.line_start(offset), self.line_end(offset))
}
/// Returns the text of the `offset`'s line.
///
/// The line includes the newline characters at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.full_line(TextSize::from(3)), "First line\n");
/// assert_eq!(locator.full_line(TextSize::from(14)), "second line\r\n");
/// assert_eq!(locator.full_line(TextSize::from(28)), "third line");
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn full_line(&self, offset: TextSize) -> &'a str {
&self.contents[self.full_line_range(offset)]
}
/// Returns the text of the `offset`'s line.
///
/// Excludes the newline characters at the end of the line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(locator.line(TextSize::from(3)), "First line");
/// assert_eq!(locator.line(TextSize::from(14)), "second line");
/// assert_eq!(locator.line(TextSize::from(28)), "third line");
/// ```
///
/// ## Panics
/// If `offset` is out of bounds.
pub fn line(&self, offset: TextSize) -> &'a str {
&self.contents[self.line_range(offset)]
}
/// Computes the range of all lines that this `range` covers.
///
/// The range starts at the beginning of the line at `range.start()` and goes up to, and including, the new line character
/// at the end of `range.ends()`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// TextRange::new(TextSize::from(0), TextSize::from(11))
/// );
/// assert_eq!(
/// locator.full_lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// TextRange::new(TextSize::from(0), TextSize::from(24))
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn full_lines_range(&self, range: TextRange) -> TextRange {
TextRange::new(
self.line_start(range.start()),
self.full_line_end(range.end()),
)
}
/// Computes the range of all lines that this `range` covers.
///
/// The range starts at the beginning of the line at `range.start()` and goes up to, but excluding, the new line character
/// at the end of `range.end()`'s line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// TextRange::new(TextSize::from(0), TextSize::from(10))
/// );
/// assert_eq!(
/// locator.lines_range(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// TextRange::new(TextSize::from(0), TextSize::from(22))
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn lines_range(&self, range: TextRange) -> TextRange {
TextRange::new(self.line_start(range.start()), self.line_end(range.end()))
}
/// Returns true if the text of `range` contains any line break.
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert!(
/// !locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// );
/// assert!(
/// locator.contains_line_break(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// );
/// ```
///
/// ## Panics
/// If the `range` is out of bounds.
pub fn contains_line_break(&self, range: TextRange) -> bool {
let text = &self.contents[range];
text.contains(['\n', '\r'])
}
/// Returns the text of all lines that include `range`.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// "First line"
/// );
/// assert_eq!(
/// locator.lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// "First line\nsecond line"
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn lines(&self, range: TextRange) -> &'a str {
&self.contents[self.lines_range(range)]
}
/// Returns the text of all lines that include `range`.
///
/// Includes the newline characters of the last line.
///
/// ## Examples
///
/// ```
/// # use ruff_text_size::{TextRange, TextSize};
/// # use ruff_python_ast::source_code::Locator;
///
/// let locator = Locator::new("First line\nsecond line\r\nthird line");
///
/// assert_eq!(
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(5))),
/// "First line\n"
/// );
/// assert_eq!(
/// locator.full_lines(TextRange::new(TextSize::from(3), TextSize::from(14))),
/// "First line\nsecond line\r\n"
/// );
/// ```
///
/// ## Panics
/// If the start or end of `range` is out of bounds.
pub fn full_lines(&self, range: TextRange) -> &'a str {
&self.contents[self.full_lines_range(range)]
}
/// Take the source code up to the given [`TextSize`].
#[inline]
pub fn offset(&self, location: Location) -> TextSize {
self.to_source_code().offset(location)
pub fn up_to(&self, offset: TextSize) -> &'a str {
&self.contents[TextRange::up_to(offset)]
}
/// Take the source code after the given [`TextSize`].
#[inline]
pub fn after(&self, offset: TextSize) -> &'a str {
&self.contents[usize::from(offset)..]
}
/// Take the source code between the given [`TextRange`].
#[inline]
pub fn slice(&self, range: TextRange) -> &'a str {
&self.contents[range]
}
/// Return the underlying source code.
@ -63,17 +401,15 @@ impl<'a> Locator<'a> {
self.contents
}
/// Return the number of lines in the source code.
pub fn count_lines(&self) -> usize {
let index = self.get_or_init_index();
index.line_count()
}
/// Return the number of bytes in the source code.
pub const fn len(&self) -> usize {
self.contents.len()
}
pub fn text_len(&self) -> TextSize {
self.contents.text_len()
}
/// Return `true` if the source code is empty.
pub const fn is_empty(&self) -> bool {
self.contents.is_empty()

View file

@ -5,17 +5,17 @@ mod locator;
mod stylist;
pub use crate::source_code::line_index::{LineIndex, OneIndexed};
use crate::types::Range;
pub use generator::Generator;
pub use indexer::Indexer;
pub use locator::Locator;
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser as parser;
use rustpython_parser::ast::Location;
use rustpython_parser::{lexer, Mode, ParseError};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
use std::fmt::{Debug, Formatter};
use std::sync::Arc;
pub use stylist::{LineEnding, Stylist};
/// Run round-trip source code generation on a given Python code.
@ -29,7 +29,7 @@ pub fn round_trip(code: &str, source_path: &str) -> Result<String, ParseError> {
Ok(generator.generate())
}
/// Gives access to the source code of a file and allows mapping between [`Location`] and byte offsets.
/// Gives access to the source code of a file and allows mapping between [`TextSize`] and [`SourceLocation`].
#[derive(Debug)]
pub struct SourceCode<'src, 'index> {
text: &'src str,
@ -44,37 +44,34 @@ impl<'src, 'index> SourceCode<'src, 'index> {
}
}
/// Take the source code up to the given [`Location`].
pub fn up_to(&self, location: Location) -> &'src str {
let offset = self.index.location_offset(location, self.text);
/// Computes the one indexed row and column numbers for `offset`.
#[inline]
pub fn source_location(&self, offset: TextSize) -> SourceLocation {
self.index.source_location(offset, self.text)
}
#[inline]
pub fn line_index(&self, offset: TextSize) -> OneIndexed {
self.index.line_index(offset)
}
/// Take the source code up to the given [`TextSize`].
#[inline]
pub fn up_to(&self, offset: TextSize) -> &'src str {
&self.text[TextRange::up_to(offset)]
}
/// Take the source code after the given [`Location`].
pub fn after(&self, location: Location) -> &'src str {
let offset = self.index.location_offset(location, self.text);
/// Take the source code after the given [`TextSize`].
#[inline]
pub fn after(&self, offset: TextSize) -> &'src str {
&self.text[usize::from(offset)..]
}
/// Take the source code between the given [`Range`].
pub fn slice<R: Into<Range>>(&self, range: R) -> &'src str {
let range = self.text_range(range);
/// Take the source code between the given [`TextRange`].
pub fn slice(&self, range: TextRange) -> &'src str {
&self.text[range]
}
/// Converts a [`Location`] range to a byte offset range
pub fn text_range<R: Into<Range>>(&self, range: R) -> TextRange {
let range = range.into();
let start = self.index.location_offset(range.location, self.text);
let end = self.index.location_offset(range.end_location, self.text);
TextRange::new(start, end)
}
/// Return the byte offset of the given [`Location`].
pub fn offset(&self, location: Location) -> TextSize {
self.index.location_offset(location, self.text)
}
pub fn line_start(&self, line: OneIndexed) -> TextSize {
self.index.line_start(line, self.text)
}
@ -87,20 +84,6 @@ impl<'src, 'index> SourceCode<'src, 'index> {
self.index.line_range(line, self.text)
}
/// Returns a string with the lines spawning between location and end location.
pub fn lines(&self, range: Range) -> &'src str {
let start_line = self
.index
.line_range(OneIndexed::new(range.location.row()).unwrap(), self.text);
let end_line = self.index.line_range(
OneIndexed::new(range.end_location.row()).unwrap(),
self.text,
);
&self.text[TextRange::new(start_line.start(), end_line.end())]
}
/// Returns the source text of the line with the given index
#[inline]
pub fn line_text(&self, index: OneIndexed) -> &'src str {
@ -131,69 +114,43 @@ impl Eq for SourceCode<'_, '_> {}
/// A Builder for constructing a [`SourceFile`]
pub struct SourceFileBuilder {
name: Box<str>,
code: Option<FileSourceCode>,
code: Box<str>,
index: Option<LineIndex>,
}
impl SourceFileBuilder {
/// Creates a new builder for a file named `name`.
pub fn new(name: &str) -> Self {
pub fn new<Name: Into<Box<str>>, Code: Into<Box<str>>>(name: Name, code: Code) -> Self {
Self {
name: Box::from(name),
code: None,
name: name.into(),
code: code.into(),
index: None,
}
}
/// Creates a enw builder for a file named `name`
pub fn from_string(name: String) -> Self {
Self {
name: Box::from(name),
code: None,
}
}
/// Consumes `self` and returns a builder for a file with the source text and the [`LineIndex`] copied
/// from `source`.
#[must_use]
pub fn source_code(mut self, source: &SourceCode) -> Self {
self.set_source_code(source);
pub fn line_index(mut self, index: LineIndex) -> Self {
self.index = Some(index);
self
}
/// Copies the source text and [`LineIndex`] from `source`.
pub fn set_source_code(&mut self, source: &SourceCode) {
self.code = Some(FileSourceCode {
text: Box::from(source.text()),
index: source.index.clone(),
});
}
/// Consumes `self` and returns a builder for a file with the source text `text`. Builds the [`LineIndex`] from `text`.
#[must_use]
pub fn source_text(self, text: &str) -> Self {
self.source_code(&SourceCode::new(text, &LineIndex::from_source_text(text)))
}
/// Consumes `self` and returns a builder for a file with the source text `text`. Builds the [`LineIndex`] from `text`.
#[must_use]
pub fn source_text_string(mut self, text: String) -> Self {
self.set_source_text_string(text);
self
}
/// Copies the source text `text` and builds the [`LineIndex`] from `text`.
pub fn set_source_text_string(&mut self, text: String) {
self.code = Some(FileSourceCode {
index: LineIndex::from_source_text(&text),
text: Box::from(text),
});
pub fn set_line_index(&mut self, index: LineIndex) {
self.index = Some(index);
}
/// Consumes `self` and returns the [`SourceFile`].
pub fn finish(self) -> SourceFile {
let index = if let Some(index) = self.index {
once_cell::sync::OnceCell::with_value(index)
} else {
once_cell::sync::OnceCell::new()
};
SourceFile {
inner: Arc::new(SourceFileInner {
name: self.name,
code: self.code,
line_index: index,
}),
}
}
@ -211,7 +168,7 @@ impl Debug for SourceFile {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SourceFile")
.field("name", &self.name())
.field("code", &self.source_code())
.field("code", &self.source_text())
.finish()
}
}
@ -223,38 +180,57 @@ impl SourceFile {
&self.inner.name
}
/// Returns `Some` with the source code if set, or `None`.
#[inline]
pub fn source_code(&self) -> Option<SourceCode> {
self.inner.code.as_ref().map(|code| SourceCode {
text: &code.text,
index: &code.index,
})
pub fn slice(&self, range: TextRange) -> &str {
&self.source_text()[range]
}
pub fn to_source_code(&self) -> SourceCode {
SourceCode {
text: self.source_text(),
index: self.index(),
}
}
fn index(&self) -> &LineIndex {
self.inner
.line_index
.get_or_init(|| LineIndex::from_source_text(self.source_text()))
}
/// Returns `Some` with the source text if set, or `None`.
#[inline]
pub fn source_text(&self) -> Option<&str> {
self.inner.code.as_ref().map(|code| &*code.text)
pub fn source_text(&self) -> &str {
&self.inner.code
}
}
#[derive(Eq, PartialEq)]
struct SourceFileInner {
name: Box<str>,
code: Option<FileSourceCode>,
code: Box<str>,
line_index: once_cell::sync::OnceCell<LineIndex>,
}
struct FileSourceCode {
text: Box<str>,
index: LineIndex,
}
impl PartialEq for FileSourceCode {
impl PartialEq for SourceFileInner {
fn eq(&self, other: &Self) -> bool {
// It should be safe to assume that the index for two source files are identical
self.text == other.text
self.name == other.name && self.code == other.code
}
}
impl Eq for FileSourceCode {}
impl Eq for SourceFileInner {}
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct SourceLocation {
pub row: OneIndexed,
pub column: OneIndexed,
}
impl Debug for SourceLocation {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SourceLocation")
.field("row", &self.row.get())
.field("column", &self.column.get())
.finish()
}
}

View file

@ -4,7 +4,6 @@ use std::fmt;
use std::ops::Deref;
use once_cell::unsync::OnceCell;
use rustpython_parser::ast::Location;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::Tok;
@ -12,48 +11,21 @@ use ruff_rustpython::vendor;
use crate::source_code::Locator;
use crate::str::leading_quote;
use crate::types::Range;
pub struct Stylist<'a> {
locator: &'a Locator<'a>,
indentation: OnceCell<Indentation>,
indent_end: Option<Location>,
quote: OnceCell<Quote>,
quote_range: Option<Range>,
indentation: Indentation,
quote: Quote,
line_ending: OnceCell<LineEnding>,
}
impl<'a> Stylist<'a> {
pub fn indentation(&'a self) -> &'a Indentation {
self.indentation.get_or_init(|| {
if let Some(indent_end) = self.indent_end {
let start = Location::new(indent_end.row(), 0);
let whitespace = self.locator.slice(Range::new(start, indent_end));
Indentation(whitespace.to_string())
} else {
Indentation::default()
}
})
&self.indentation
}
pub fn quote(&'a self) -> Quote {
*self.quote.get_or_init(|| {
self.quote_range
.and_then(|quote_range| {
let content = self.locator.slice(quote_range);
leading_quote(content)
})
.map(|pattern| {
if pattern.contains('\'') {
Quote::Single
} else if pattern.contains('"') {
Quote::Double
} else {
unreachable!("Expected string to start with a valid quote prefix")
}
})
.unwrap_or_default()
})
self.quote
}
pub fn line_ending(&'a self) -> LineEnding {
@ -63,33 +35,60 @@ impl<'a> Stylist<'a> {
}
pub fn from_tokens(tokens: &[LexResult], locator: &'a Locator<'a>) -> Self {
let indent_end = tokens.iter().flatten().find_map(|(_, t, end)| {
if matches!(t, Tok::Indent) {
Some(*end)
} else {
None
}
});
let quote_range = tokens.iter().flatten().find_map(|(start, t, end)| match t {
Tok::String {
triple_quoted: false,
..
} => Some(Range::new(*start, *end)),
_ => None,
});
let indentation = detect_indention(tokens, locator);
Self {
locator,
indentation: OnceCell::default(),
indent_end,
quote_range,
quote: OnceCell::default(),
indentation,
quote: detect_quote(tokens, locator),
line_ending: OnceCell::default(),
}
}
}
fn detect_quote(tokens: &[LexResult], locator: &Locator) -> Quote {
let quote_range = tokens.iter().flatten().find_map(|(t, range)| match t {
Tok::String {
triple_quoted: false,
..
} => Some(*range),
_ => None,
});
if let Some(quote_range) = quote_range {
let content = &locator.slice(quote_range);
if let Some(quotes) = leading_quote(content) {
return if quotes.contains('\'') {
Quote::Single
} else if quotes.contains('"') {
Quote::Double
} else {
unreachable!("Expected string to start with a valid quote prefix")
};
}
}
Quote::default()
}
fn detect_indention(tokens: &[LexResult], locator: &Locator) -> Indentation {
let indent_range = tokens.iter().flatten().find_map(|(t, range)| {
if matches!(t, Tok::Indent) {
Some(range)
} else {
None
}
});
if let Some(indent_range) = indent_range {
let whitespace = locator.slice(*indent_range);
Indentation(whitespace.to_string())
} else {
Indentation::default()
}
}
/// The quotation style used in Python source code.
#[derive(Debug, Default, PartialEq, Eq, Copy, Clone)]
pub enum Quote {
@ -198,17 +197,18 @@ impl Deref for LineEnding {
/// Detect the line ending style of the given contents.
fn detect_line_ending(contents: &str) -> Option<LineEnding> {
if let Some(position) = contents.find('\n') {
let position = position.saturating_sub(1);
return if let Some('\r') = contents.chars().nth(position) {
if let Some(position) = contents.find(['\n', '\r']) {
let bytes = contents.as_bytes();
if bytes[position] == b'\n' {
Some(LineEnding::Lf)
} else if bytes.get(position.saturating_add(1)) == Some(&b'\n') {
Some(LineEnding::CrLf)
} else {
Some(LineEnding::Lf)
};
} else if contents.find('\r').is_some() {
return Some(LineEnding::Cr);
Some(LineEnding::Cr)
}
} else {
None
}
None
}
#[cfg(test)]