Store source code on message (#3897)

This commit is contained in:
Micha Reiser 2023-04-11 09:57:36 +02:00 committed by GitHub
parent 76c47a9a43
commit 381203c084
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 537 additions and 217 deletions

View file

@ -897,7 +897,7 @@ pub fn match_trailing_comment<T>(located: &Located<T>, locator: &Locator) -> Opt
/// Return the number of trailing empty lines following a statement.
pub fn count_trailing_lines(stmt: &Stmt, locator: &Locator) -> usize {
let suffix = locator.skip(Location::new(stmt.end_location.unwrap().row() + 1, 0));
let suffix = locator.after(Location::new(stmt.end_location.unwrap().row() + 1, 0));
suffix
.lines()
.take_while(|line| line.trim().is_empty())
@ -906,7 +906,7 @@ pub fn count_trailing_lines(stmt: &Stmt, locator: &Locator) -> usize {
/// Return the range of the first parenthesis pair after a given [`Location`].
pub fn match_parens(start: Location, locator: &Locator) -> Option<Range> {
let contents = locator.skip(start);
let contents = locator.after(start);
let mut fix_start = None;
let mut fix_end = None;
let mut count: usize = 0;

View file

@ -83,12 +83,12 @@ impl LineIndex {
}
/// Return the number of lines in the source code.
pub(crate) fn lines_count(&self) -> usize {
pub(crate) fn line_count(&self) -> usize {
self.line_starts().len()
}
/// Returns the [byte offset](TextSize) for the `line` with the given index.
fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
pub(crate) fn line_start(&self, line: OneIndexed, contents: &str) -> TextSize {
let row_index = line.to_zero_indexed();
let starts = self.line_starts();
@ -103,7 +103,7 @@ impl LineIndex {
/// Returns the [`TextRange`] of the `line` with the given index.
/// The start points to the first character's [byte offset](TextSize), the end up to, and including
/// the newline character ending the line (if any).
fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
pub(crate) fn line_range(&self, line: OneIndexed, contents: &str) -> TextRange {
let starts = self.line_starts();
if starts.len() == line.to_zero_indexed() {
@ -175,6 +175,11 @@ impl OneIndexed {
Self(ONE.saturating_add(value))
}
/// Returns the value as a primitive type.
pub const fn get(self) -> usize {
self.0.get()
}
/// Return the zero-indexed primitive value for this [`OneIndexed`]
pub const fn to_zero_indexed(self) -> usize {
self.0.get() - 1
@ -306,18 +311,18 @@ mod tests {
#[test]
fn utf8_index() {
let index = LineIndex::from_source_text("x = '🫣'");
assert_eq!(index.lines_count(), 1);
assert_eq!(index.line_count(), 1);
assert_eq!(index.line_starts(), &[TextSize::from(0)]);
let index = LineIndex::from_source_text("x = '🫣'\n");
assert_eq!(index.lines_count(), 2);
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(11)]
);
let index = LineIndex::from_source_text("x = '🫣'\ny = 2\nz = x + y\n");
assert_eq!(index.lines_count(), 4);
assert_eq!(index.line_count(), 4);
assert_eq!(
index.line_starts(),
&[
@ -329,7 +334,7 @@ mod tests {
);
let index = LineIndex::from_source_text("# 🫣\nclass Foo:\n \"\"\".\"\"\"");
assert_eq!(index.lines_count(), 3);
assert_eq!(index.line_count(), 3);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(7), TextSize::from(18)]
@ -340,7 +345,7 @@ mod tests {
fn utf8_carriage_return() {
let contents = "x = '🫣'\ry = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.lines_count(), 2);
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(11)]
@ -365,7 +370,7 @@ mod tests {
fn utf8_carriage_return_newline() {
let contents = "x = '🫣'\r\ny = 3";
let index = LineIndex::from_source_text(contents);
assert_eq!(index.lines_count(), 2);
assert_eq!(index.line_count(), 2);
assert_eq!(
index.line_starts(),
&[TextSize::from(0), TextSize::from(12)]

View file

@ -1,57 +1,65 @@
//! Struct used to efficiently slice source code at (row, column) Locations.
use crate::source_code::line_index::LineIndex;
use crate::source_code::{SourceCode, SourceCodeBuf};
use once_cell::unsync::OnceCell;
use ruff_text_size::{TextRange, TextSize};
use ruff_text_size::TextSize;
use rustpython_parser::ast::Location;
use crate::types::Range;
pub struct Locator<'a> {
contents: &'a str,
index: OnceCell<LineIndex>,
line_index: OnceCell<LineIndex>,
}
impl<'a> Locator<'a> {
pub const fn new(contents: &'a str) -> Self {
Self {
contents,
index: OnceCell::new(),
line_index: OnceCell::new(),
}
}
fn get_or_init_index(&self) -> &LineIndex {
self.index
self.line_index
.get_or_init(|| LineIndex::from_source_text(self.contents))
}
fn source_code(&self) -> SourceCode<'a, '_> {
SourceCode {
index: self.get_or_init_index(),
text: self.contents,
}
}
#[inline]
pub fn to_source_code_buf(&self) -> SourceCodeBuf {
self.source_code().to_owned()
}
/// Take the source code up to the given [`Location`].
pub fn take(&self, location: Location) -> &'a str {
let index = self.get_or_init_index();
let offset = index.location_offset(location, self.contents);
&self.contents[TextRange::up_to(offset)]
#[inline]
pub fn up_to(&self, location: Location) -> &'a str {
self.source_code().up_to(location)
}
/// Take the source code after the given [`Location`].
pub fn skip(&self, location: Location) -> &'a str {
let index = self.get_or_init_index();
let offset = index.location_offset(location, self.contents);
&self.contents[usize::from(offset)..]
#[inline]
pub fn after(&self, location: Location) -> &'a str {
self.source_code().after(location)
}
/// Take the source code between the given [`Range`].
#[inline]
pub fn slice<R: Into<Range>>(&self, range: R) -> &'a str {
let index = self.get_or_init_index();
let range = range.into();
let start = index.location_offset(range.location, self.contents);
let end = index.location_offset(range.end_location, self.contents);
&self.contents[TextRange::new(start, end)]
self.source_code().slice(range)
}
/// Return the byte offset of the given [`Location`].
#[inline]
pub fn offset(&self, location: Location) -> TextSize {
let index = self.get_or_init_index();
index.location_offset(location, self.contents)
self.source_code().offset(location)
}
/// Return the underlying source code.
@ -62,7 +70,7 @@ impl<'a> Locator<'a> {
/// Return the number of lines in the source code.
pub fn count_lines(&self) -> usize {
let index = self.get_or_init_index();
index.lines_count()
index.line_count()
}
/// Return the number of bytes in the source code.

View file

@ -5,12 +5,16 @@ mod locator;
mod stylist;
pub use crate::source_code::line_index::{LineIndex, OneIndexed};
use crate::types::Range;
pub use generator::Generator;
pub use indexer::Indexer;
pub use locator::Locator;
use ruff_text_size::{TextRange, TextSize};
use rustpython_parser as parser;
use rustpython_parser::ast::Location;
use rustpython_parser::{lexer, Mode, ParseError};
use std::sync::Arc;
pub use stylist::{LineEnding, Stylist};
/// Run round-trip source code generation on a given Python code.
@ -23,3 +27,214 @@ pub fn round_trip(code: &str, source_path: &str) -> Result<String, ParseError> {
generator.unparse_suite(&python_ast);
Ok(generator.generate())
}
/// Gives access to the source code of a file and allows mapping between [`Location`] and byte offsets.
#[derive(Debug)]
pub struct SourceCode<'src, 'index> {
text: &'src str,
index: &'index LineIndex,
}
impl<'src, 'index> SourceCode<'src, 'index> {
pub fn new(content: &'src str, index: &'index LineIndex) -> Self {
Self {
text: content,
index,
}
}
/// Take the source code up to the given [`Location`].
pub fn up_to(&self, location: Location) -> &'src str {
let offset = self.index.location_offset(location, self.text);
&self.text[TextRange::up_to(offset)]
}
/// Take the source code after the given [`Location`].
pub fn after(&self, location: Location) -> &'src str {
let offset = self.index.location_offset(location, self.text);
&self.text[usize::from(offset)..]
}
/// Take the source code between the given [`Range`].
pub fn slice<R: Into<Range>>(&self, range: R) -> &'src str {
let range = self.text_range(range);
&self.text[range]
}
/// Converts a [`Location`] range to a byte offset range
pub fn text_range<R: Into<Range>>(&self, range: R) -> TextRange {
let range = range.into();
let start = self.index.location_offset(range.location, self.text);
let end = self.index.location_offset(range.end_location, self.text);
TextRange::new(start, end)
}
/// Return the byte offset of the given [`Location`].
pub fn offset(&self, location: Location) -> TextSize {
self.index.location_offset(location, self.text)
}
pub fn line_start(&self, line: OneIndexed) -> TextSize {
self.index.line_start(line, self.text)
}
pub fn line_range(&self, line: OneIndexed) -> TextRange {
self.index.line_range(line, self.text)
}
/// Returns a string with the lines spawning between location and end location.
pub fn lines(&self, range: Range) -> &'src str {
let start_line = self
.index
.line_range(OneIndexed::new(range.location.row()).unwrap(), self.text);
let end_line = self.index.line_range(
OneIndexed::new(range.end_location.row()).unwrap(),
self.text,
);
&self.text[TextRange::new(start_line.start(), end_line.end())]
}
/// Returns the source text of the line with the given index
#[inline]
pub fn line_text(&self, index: OneIndexed) -> &'src str {
let range = self.index.line_range(index, self.text);
&self.text[range]
}
pub fn text(&self) -> &'src str {
self.text
}
#[inline]
pub fn line_count(&self) -> usize {
self.index.line_count()
}
pub fn to_source_code_buf(&self) -> SourceCodeBuf {
self.to_owned()
}
pub fn to_owned(&self) -> SourceCodeBuf {
SourceCodeBuf::new(self.text, self.index.clone())
}
}
impl PartialEq<Self> for SourceCode<'_, '_> {
fn eq(&self, other: &Self) -> bool {
self.text == other.text
}
}
impl Eq for SourceCode<'_, '_> {}
impl PartialEq<SourceCodeBuf> for SourceCode<'_, '_> {
fn eq(&self, other: &SourceCodeBuf) -> bool {
self.text == &*other.text
}
}
/// Gives access to the source code of a file and allows mapping between [`Location`] and byte offsets.
///
/// This is the owned pendant to [`SourceCode`]. Cloning only requires bumping reference counters.
#[derive(Clone, Debug)]
pub struct SourceCodeBuf {
text: Arc<str>,
index: LineIndex,
}
impl SourceCodeBuf {
pub fn new(content: &str, index: LineIndex) -> Self {
Self {
text: Arc::from(content),
index,
}
}
/// Creates the [`LineIndex`] for `text` and returns the [`SourceCodeBuf`].
pub fn from_content(text: &str) -> Self {
Self::new(text, LineIndex::from_source_text(text))
}
#[inline]
fn as_source_code(&self) -> SourceCode {
SourceCode {
text: &self.text,
index: &self.index,
}
}
/// Take the source code up to the given [`Location`].
pub fn up_to(&self, location: Location) -> &str {
self.as_source_code().up_to(location)
}
/// Take the source code after the given [`Location`].
pub fn after(&self, location: Location) -> &str {
self.as_source_code().after(location)
}
/// Take the source code between the given [`Range`].
#[inline]
pub fn slice<R: Into<Range>>(&self, range: R) -> &str {
self.as_source_code().slice(range)
}
/// Converts a [`Location`] range to a byte offset range
#[inline]
pub fn text_range<R: Into<Range>>(&self, range: R) -> TextRange {
self.as_source_code().text_range(range)
}
#[inline]
pub fn line_range(&self, line: OneIndexed) -> TextRange {
self.as_source_code().line_range(line)
}
/// Return the byte offset of the given [`Location`].
#[inline]
pub fn offset(&self, location: Location) -> TextSize {
self.as_source_code().offset(location)
}
#[inline]
pub fn line_start(&self, line: OneIndexed) -> TextSize {
self.as_source_code().line_start(line)
}
#[inline]
pub fn lines(&self, range: Range) -> &str {
self.as_source_code().lines(range)
}
/// Returns the source text of the line with the given index
#[inline]
pub fn line_text(&self, index: OneIndexed) -> &str {
self.as_source_code().line_text(index)
}
#[inline]
pub fn line_count(&self) -> usize {
self.index.line_count()
}
pub fn text(&self) -> &str {
&self.text
}
}
impl PartialEq<Self> for SourceCodeBuf {
// The same source text should have the same index
fn eq(&self, other: &Self) -> bool {
self.text == other.text
}
}
impl PartialEq<SourceCode<'_, '_>> for SourceCodeBuf {
fn eq(&self, other: &SourceCode<'_, '_>) -> bool {
&*self.text == other.text
}
}
impl Eq for SourceCodeBuf {}