red-knot: source_text, line_index, and parsed_module queries (#11822)

This commit is contained in:
Micha Reiser 2024-06-13 08:37:02 +01:00 committed by GitHub
parent efbf7b14b5
commit d4dd96d1f4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 194 additions and 16 deletions

4
Cargo.lock generated
View file

@ -2029,6 +2029,10 @@ dependencies = [
"countme",
"dashmap",
"filetime",
"ruff_python_ast",
"ruff_python_parser",
"ruff_source_file",
"ruff_text_size",
"rustc-hash",
"salsa-2022",
"tracing",

View file

@ -11,6 +11,11 @@ repository = { workspace = true }
license = { workspace = true }
[dependencies]
ruff_python_ast = { workspace = true }
ruff_python_parser = { workspace = true }
ruff_source_file = { workspace = true }
ruff_text_size = { workspace = true }
camino = { workspace = true }
countme = { workspace = true }
dashmap = { workspace = true }

View file

@ -48,6 +48,31 @@ impl FileSystemPath {
unsafe { &*(path as *const Utf8Path as *const FileSystemPath) }
}
/// Extracts the file extension, if possible.
///
/// The extension is:
///
/// * [`None`], if there is no file name;
/// * [`None`], if there is no embedded `.`;
/// * [`None`], if the file name begins with `.` and has no other `.`s within;
/// * Otherwise, the portion of the file name after the final `.`
///
/// # Examples
///
/// ```
/// use ruff_db::file_system::FileSystemPath;
///
/// assert_eq!("rs", FileSystemPath::new("foo.rs").extension().unwrap());
/// assert_eq!("gz", FileSystemPath::new("foo.tar.gz").extension().unwrap());
/// ```
///
/// See [`Path::extension`] for more details.
#[inline]
#[must_use]
pub fn extension(&self) -> Option<&str> {
self.0.extension()
}
/// Converts the path to an owned [`FileSystemPathBuf`].
pub fn to_path_buf(&self) -> FileSystemPathBuf {
FileSystemPathBuf(self.0.to_path_buf())
@ -251,9 +276,10 @@ impl FileType {
#[cfg(test)]
mod tests {
use crate::file_system::FileRevision;
use filetime::FileTime;
use crate::file_system::FileRevision;
#[test]
fn revision_from_file_time() {
let file_time = FileTime::now();

View file

@ -4,15 +4,19 @@ use rustc_hash::FxHasher;
use salsa::DbWithJar;
use crate::file_system::{FileSystem, FileSystemPath};
use crate::parsed::parsed_module;
use crate::source::{line_index, source_text};
use crate::vfs::{VendoredPath, Vfs, VfsFile};
pub mod file_system;
pub mod parsed;
pub mod source;
pub mod vfs;
pub(crate) type FxDashMap<K, V> = dashmap::DashMap<K, V, BuildHasherDefault<FxHasher>>;
#[salsa::jar(db=Db)]
pub struct Jar(VfsFile);
pub struct Jar(VfsFile, source_text, line_index, parsed_module);
/// Database that gives access to the virtual filesystem, source code, and parsed AST.
pub trait Db: DbWithJar<Jar> {

View file

@ -0,0 +1,126 @@
use std::fmt::Formatter;
use std::ops::Deref;
use std::sync::Arc;
use ruff_python_ast::{ModModule, PySourceType};
use ruff_python_parser::{parse_unchecked_source, Parsed};
use crate::source::source_text;
use crate::vfs::{VfsFile, VfsPath};
use crate::Db;
/// Returns the parsed AST of `file`, including its token stream.
///
/// The query uses Ruff's error-resilient parser. That means that the parser always succeeds to produce a
/// AST even if the file contains syntax errors. The parse errors
/// are then accessible through [`Parsed::errors`].
///
/// The query is only cached when the [`source_text()`] hasn't changed. This is because
/// comparing two ASTs is a non-trivial operation and every offset change is directly
/// reflected in the changed AST offsets.
/// The other reason is that Ruff's AST doesn't implement `Eq` which Sala requires
/// for determining if a query result is unchanged.
#[salsa::tracked(return_ref, no_eq)]
pub fn parsed_module(db: &dyn Db, file: VfsFile) -> ParsedModule {
let source = source_text(db, file);
let path = file.path(db);
let ty = match path {
VfsPath::FileSystem(path) => path
.extension()
.map_or(PySourceType::Python, PySourceType::from_extension),
VfsPath::Vendored(_) => PySourceType::Stub,
};
ParsedModule {
inner: Arc::new(parse_unchecked_source(&source, ty)),
}
}
/// Cheap cloneable wrapper around the parsed module.
#[derive(Clone, PartialEq)]
pub struct ParsedModule {
inner: Arc<Parsed<ModModule>>,
}
impl ParsedModule {
/// Consumes `self` and returns the Arc storing the parsed module.
pub fn into_arc(self) -> Arc<Parsed<ModModule>> {
self.inner
}
}
impl Deref for ParsedModule {
type Target = Parsed<ModModule>;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
impl std::fmt::Debug for ParsedModule {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("ParsedModule").field(&self.inner).finish()
}
}
#[cfg(test)]
mod tests {
use crate::file_system::FileSystemPath;
use crate::parsed::parsed_module;
use crate::tests::TestDb;
use crate::vfs::VendoredPath;
use crate::Db;
#[test]
fn python_file() {
let mut db = TestDb::new();
let path = FileSystemPath::new("test.py");
db.file_system_mut().write_file(path, "x = 10".to_string());
let file = db.file(path);
let parsed = parsed_module(&db, file);
assert!(parsed.is_valid());
}
#[test]
fn python_ipynb_file() {
let mut db = TestDb::new();
let path = FileSystemPath::new("test.ipynb");
db.file_system_mut()
.write_file(path, "%timeit a = b".to_string());
let file = db.file(path);
let parsed = parsed_module(&db, file);
assert!(parsed.is_valid());
}
#[test]
fn vendored_file() {
let mut db = TestDb::new();
db.vfs_mut().stub_vendored([(
"path.pyi",
r#"
import sys
if sys.platform == "win32":
from ntpath import *
from ntpath import __all__ as __all__
else:
from posixpath import *
from posixpath import __all__ as __all__"#,
)]);
let file = db.vendored_file(VendoredPath::new("path.pyi")).unwrap();
let parsed = parsed_module(&db, file);
assert!(parsed.is_valid());
}
}

View file

@ -96,11 +96,10 @@ mod tests {
// Change the file permission only
file.set_permissions(&mut db).to(Some(0o777));
db.events().lock().unwrap().clear();
db.clear_salsa_events();
assert_eq!(&*source_text(&db, file), "x = 10");
let events = db.events();
let events = events.lock().unwrap();
let events = db.take_salsa_events();
assert!(!events
.iter()

View file

@ -1,3 +1,4 @@
use std::ffi::OsStr;
use std::path::Path;
pub use expression::*;
@ -80,13 +81,25 @@ pub enum PySourceType {
Ipynb,
}
impl<P: AsRef<Path>> From<P> for PySourceType {
fn from(path: P) -> Self {
match path.as_ref().extension() {
Some(ext) if ext == "py" => PySourceType::Python,
Some(ext) if ext == "pyi" => PySourceType::Stub,
Some(ext) if ext == "ipynb" => PySourceType::Ipynb,
_ => PySourceType::Python,
impl PySourceType {
/// Infers the source type from the file extension.
///
/// Falls back to `Python` if the extension is not recognized.
pub fn from_extension(extension: &str) -> Self {
match extension {
"py" => Self::Python,
"pyi" => Self::Stub,
"ipynb" => Self::Ipynb,
_ => Self::Python,
}
}
}
impl<P: AsRef<Path>> From<P> for PySourceType {
fn from(path: P) -> Self {
path.as_ref()
.extension()
.and_then(OsStr::to_str)
.map_or(Self::Python, Self::from_extension)
}
}

View file

@ -234,7 +234,7 @@ pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Parsed
}
/// Represents the parsed source code.
#[derive(Debug, Clone)]
#[derive(Debug, PartialEq, Clone)]
pub struct Parsed<T> {
syntax: T,
tokens: Tokens,
@ -361,7 +361,7 @@ impl Parsed<ModExpression> {
}
/// Tokens represents a vector of lexed [`Token`].
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Tokens {
raw: Vec<Token>,

View file

@ -14,11 +14,12 @@ use crate::SourceLocation;
/// Index for fast [byte offset](TextSize) to [`SourceLocation`] conversions.
///
/// Cloning a [`LineIndex`] is cheap because it only requires bumping a reference count.
#[derive(Clone)]
#[derive(Clone, Eq, PartialEq)]
pub struct LineIndex {
inner: Arc<LineIndexInner>,
}
#[derive(Eq, PartialEq)]
struct LineIndexInner {
line_starts: Vec<TextSize>,
kind: IndexKind,
@ -268,7 +269,7 @@ impl Debug for LineIndex {
}
}
#[derive(Debug, Clone, Copy)]
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
enum IndexKind {
/// Optimized index for an ASCII only document
Ascii,