mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 10:48:32 +00:00
red-knot: source_text
, line_index
, and parsed_module
queries (#11822)
This commit is contained in:
parent
efbf7b14b5
commit
d4dd96d1f4
9 changed files with 194 additions and 16 deletions
4
Cargo.lock
generated
4
Cargo.lock
generated
|
@ -2029,6 +2029,10 @@ dependencies = [
|
|||
"countme",
|
||||
"dashmap",
|
||||
"filetime",
|
||||
"ruff_python_ast",
|
||||
"ruff_python_parser",
|
||||
"ruff_source_file",
|
||||
"ruff_text_size",
|
||||
"rustc-hash",
|
||||
"salsa-2022",
|
||||
"tracing",
|
||||
|
|
|
@ -11,6 +11,11 @@ repository = { workspace = true }
|
|||
license = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
ruff_python_ast = { workspace = true }
|
||||
ruff_python_parser = { workspace = true }
|
||||
ruff_source_file = { workspace = true }
|
||||
ruff_text_size = { workspace = true }
|
||||
|
||||
camino = { workspace = true }
|
||||
countme = { workspace = true }
|
||||
dashmap = { workspace = true }
|
||||
|
|
|
@ -48,6 +48,31 @@ impl FileSystemPath {
|
|||
unsafe { &*(path as *const Utf8Path as *const FileSystemPath) }
|
||||
}
|
||||
|
||||
/// Extracts the file extension, if possible.
|
||||
///
|
||||
/// The extension is:
|
||||
///
|
||||
/// * [`None`], if there is no file name;
|
||||
/// * [`None`], if there is no embedded `.`;
|
||||
/// * [`None`], if the file name begins with `.` and has no other `.`s within;
|
||||
/// * Otherwise, the portion of the file name after the final `.`
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use ruff_db::file_system::FileSystemPath;
|
||||
///
|
||||
/// assert_eq!("rs", FileSystemPath::new("foo.rs").extension().unwrap());
|
||||
/// assert_eq!("gz", FileSystemPath::new("foo.tar.gz").extension().unwrap());
|
||||
/// ```
|
||||
///
|
||||
/// See [`Path::extension`] for more details.
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn extension(&self) -> Option<&str> {
|
||||
self.0.extension()
|
||||
}
|
||||
|
||||
/// Converts the path to an owned [`FileSystemPathBuf`].
|
||||
pub fn to_path_buf(&self) -> FileSystemPathBuf {
|
||||
FileSystemPathBuf(self.0.to_path_buf())
|
||||
|
@ -251,9 +276,10 @@ impl FileType {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::file_system::FileRevision;
|
||||
use filetime::FileTime;
|
||||
|
||||
use crate::file_system::FileRevision;
|
||||
|
||||
#[test]
|
||||
fn revision_from_file_time() {
|
||||
let file_time = FileTime::now();
|
||||
|
|
|
@ -4,15 +4,19 @@ use rustc_hash::FxHasher;
|
|||
use salsa::DbWithJar;
|
||||
|
||||
use crate::file_system::{FileSystem, FileSystemPath};
|
||||
use crate::parsed::parsed_module;
|
||||
use crate::source::{line_index, source_text};
|
||||
use crate::vfs::{VendoredPath, Vfs, VfsFile};
|
||||
|
||||
pub mod file_system;
|
||||
pub mod parsed;
|
||||
pub mod source;
|
||||
pub mod vfs;
|
||||
|
||||
pub(crate) type FxDashMap<K, V> = dashmap::DashMap<K, V, BuildHasherDefault<FxHasher>>;
|
||||
|
||||
#[salsa::jar(db=Db)]
|
||||
pub struct Jar(VfsFile);
|
||||
pub struct Jar(VfsFile, source_text, line_index, parsed_module);
|
||||
|
||||
/// Database that gives access to the virtual filesystem, source code, and parsed AST.
|
||||
pub trait Db: DbWithJar<Jar> {
|
||||
|
|
126
crates/ruff_db/src/parsed.rs
Normal file
126
crates/ruff_db/src/parsed.rs
Normal file
|
@ -0,0 +1,126 @@
|
|||
use std::fmt::Formatter;
|
||||
use std::ops::Deref;
|
||||
use std::sync::Arc;
|
||||
|
||||
use ruff_python_ast::{ModModule, PySourceType};
|
||||
use ruff_python_parser::{parse_unchecked_source, Parsed};
|
||||
|
||||
use crate::source::source_text;
|
||||
use crate::vfs::{VfsFile, VfsPath};
|
||||
use crate::Db;
|
||||
|
||||
/// Returns the parsed AST of `file`, including its token stream.
|
||||
///
|
||||
/// The query uses Ruff's error-resilient parser. That means that the parser always succeeds to produce a
|
||||
/// AST even if the file contains syntax errors. The parse errors
|
||||
/// are then accessible through [`Parsed::errors`].
|
||||
///
|
||||
/// The query is only cached when the [`source_text()`] hasn't changed. This is because
|
||||
/// comparing two ASTs is a non-trivial operation and every offset change is directly
|
||||
/// reflected in the changed AST offsets.
|
||||
/// The other reason is that Ruff's AST doesn't implement `Eq` which Sala requires
|
||||
/// for determining if a query result is unchanged.
|
||||
#[salsa::tracked(return_ref, no_eq)]
|
||||
pub fn parsed_module(db: &dyn Db, file: VfsFile) -> ParsedModule {
|
||||
let source = source_text(db, file);
|
||||
let path = file.path(db);
|
||||
|
||||
let ty = match path {
|
||||
VfsPath::FileSystem(path) => path
|
||||
.extension()
|
||||
.map_or(PySourceType::Python, PySourceType::from_extension),
|
||||
VfsPath::Vendored(_) => PySourceType::Stub,
|
||||
};
|
||||
|
||||
ParsedModule {
|
||||
inner: Arc::new(parse_unchecked_source(&source, ty)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Cheap cloneable wrapper around the parsed module.
|
||||
#[derive(Clone, PartialEq)]
|
||||
pub struct ParsedModule {
|
||||
inner: Arc<Parsed<ModModule>>,
|
||||
}
|
||||
|
||||
impl ParsedModule {
|
||||
/// Consumes `self` and returns the Arc storing the parsed module.
|
||||
pub fn into_arc(self) -> Arc<Parsed<ModModule>> {
|
||||
self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for ParsedModule {
|
||||
type Target = Parsed<ModModule>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for ParsedModule {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_tuple("ParsedModule").field(&self.inner).finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::file_system::FileSystemPath;
|
||||
use crate::parsed::parsed_module;
|
||||
use crate::tests::TestDb;
|
||||
use crate::vfs::VendoredPath;
|
||||
use crate::Db;
|
||||
|
||||
#[test]
|
||||
fn python_file() {
|
||||
let mut db = TestDb::new();
|
||||
let path = FileSystemPath::new("test.py");
|
||||
|
||||
db.file_system_mut().write_file(path, "x = 10".to_string());
|
||||
|
||||
let file = db.file(path);
|
||||
|
||||
let parsed = parsed_module(&db, file);
|
||||
|
||||
assert!(parsed.is_valid());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn python_ipynb_file() {
|
||||
let mut db = TestDb::new();
|
||||
let path = FileSystemPath::new("test.ipynb");
|
||||
|
||||
db.file_system_mut()
|
||||
.write_file(path, "%timeit a = b".to_string());
|
||||
|
||||
let file = db.file(path);
|
||||
|
||||
let parsed = parsed_module(&db, file);
|
||||
|
||||
assert!(parsed.is_valid());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn vendored_file() {
|
||||
let mut db = TestDb::new();
|
||||
db.vfs_mut().stub_vendored([(
|
||||
"path.pyi",
|
||||
r#"
|
||||
import sys
|
||||
|
||||
if sys.platform == "win32":
|
||||
from ntpath import *
|
||||
from ntpath import __all__ as __all__
|
||||
else:
|
||||
from posixpath import *
|
||||
from posixpath import __all__ as __all__"#,
|
||||
)]);
|
||||
|
||||
let file = db.vendored_file(VendoredPath::new("path.pyi")).unwrap();
|
||||
|
||||
let parsed = parsed_module(&db, file);
|
||||
|
||||
assert!(parsed.is_valid());
|
||||
}
|
||||
}
|
|
@ -96,11 +96,10 @@ mod tests {
|
|||
// Change the file permission only
|
||||
file.set_permissions(&mut db).to(Some(0o777));
|
||||
|
||||
db.events().lock().unwrap().clear();
|
||||
db.clear_salsa_events();
|
||||
assert_eq!(&*source_text(&db, file), "x = 10");
|
||||
|
||||
let events = db.events();
|
||||
let events = events.lock().unwrap();
|
||||
let events = db.take_salsa_events();
|
||||
|
||||
assert!(!events
|
||||
.iter()
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
pub use expression::*;
|
||||
|
@ -80,13 +81,25 @@ pub enum PySourceType {
|
|||
Ipynb,
|
||||
}
|
||||
|
||||
impl<P: AsRef<Path>> From<P> for PySourceType {
|
||||
fn from(path: P) -> Self {
|
||||
match path.as_ref().extension() {
|
||||
Some(ext) if ext == "py" => PySourceType::Python,
|
||||
Some(ext) if ext == "pyi" => PySourceType::Stub,
|
||||
Some(ext) if ext == "ipynb" => PySourceType::Ipynb,
|
||||
_ => PySourceType::Python,
|
||||
impl PySourceType {
|
||||
/// Infers the source type from the file extension.
|
||||
///
|
||||
/// Falls back to `Python` if the extension is not recognized.
|
||||
pub fn from_extension(extension: &str) -> Self {
|
||||
match extension {
|
||||
"py" => Self::Python,
|
||||
"pyi" => Self::Stub,
|
||||
"ipynb" => Self::Ipynb,
|
||||
_ => Self::Python,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<P: AsRef<Path>> From<P> for PySourceType {
|
||||
fn from(path: P) -> Self {
|
||||
path.as_ref()
|
||||
.extension()
|
||||
.and_then(OsStr::to_str)
|
||||
.map_or(Self::Python, Self::from_extension)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -234,7 +234,7 @@ pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Parsed
|
|||
}
|
||||
|
||||
/// Represents the parsed source code.
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct Parsed<T> {
|
||||
syntax: T,
|
||||
tokens: Tokens,
|
||||
|
@ -361,7 +361,7 @@ impl Parsed<ModExpression> {
|
|||
}
|
||||
|
||||
/// Tokens represents a vector of lexed [`Token`].
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Tokens {
|
||||
raw: Vec<Token>,
|
||||
|
||||
|
|
|
@ -14,11 +14,12 @@ use crate::SourceLocation;
|
|||
/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions.
|
||||
///
|
||||
/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
|
||||
#[derive(Clone)]
|
||||
#[derive(Clone, Eq, PartialEq)]
|
||||
pub struct LineIndex {
|
||||
inner: Arc<LineIndexInner>,
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq)]
|
||||
struct LineIndexInner {
|
||||
line_starts: Vec<TextSize>,
|
||||
kind: IndexKind,
|
||||
|
@ -268,7 +269,7 @@ impl Debug for LineIndex {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
|
||||
enum IndexKind {
|
||||
/// Optimized index for an ASCII only document
|
||||
Ascii,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue