Use an empty vendored file system in Ruff (#13436)

## Summary

This PR changes removes the typeshed stubs from the vendored file system
shipped with ruff
and instead ships an empty "typeshed".

Making the typeshed files optional required extracting the typshed files
into a new `ruff_vendored` crate. I do like this even if all our builds
always include typeshed because it means `red_knot_python_semantic`
contains less code that needs compiling.

This also allows us to use deflate because the compression algorithm
doesn't matter for an archive containing a single, empty file.

## Test Plan

`cargo test`

I verified with ` cargo tree -f "{p} {f}" -p <package> ` that:

* red_knot_wasm: enables `deflate` compression
* red_knot: enables `zstd` compression
* `ruff`: uses stored


I'm not quiet sure how to build the binary that maturin builds but
comparing the release artifact size with `strip = true` shows a `1.5MB`
size reduction

---------

Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
This commit is contained in:
Micha Reiser 2024-09-21 18:31:42 +02:00 committed by GitHub
parent 8921fbb54c
commit 653c09001a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
602 changed files with 162 additions and 130 deletions

View file

@ -37,13 +37,13 @@ jobs:
- name: Sync typeshed
id: sync
run: |
rm -rf ruff/crates/red_knot_python_semantic/vendor/typeshed
mkdir ruff/crates/red_knot_python_semantic/vendor/typeshed
cp typeshed/README.md ruff/crates/red_knot_python_semantic/vendor/typeshed
cp typeshed/LICENSE ruff/crates/red_knot_python_semantic/vendor/typeshed
cp -r typeshed/stdlib ruff/crates/red_knot_python_semantic/vendor/typeshed/stdlib
rm -rf ruff/crates/red_knot_python_semantic/vendor/typeshed/stdlib/@tests
git -C typeshed rev-parse HEAD > ruff/crates/red_knot_python_semantic/vendor/typeshed/source_commit.txt
rm -rf ruff/crates/ruff_vendored/vendor/typeshed
mkdir ruff/crates/ruff_vendored/vendor/typeshed
cp typeshed/README.md ruff/crates/ruff_vendored/vendor/typeshed
cp typeshed/LICENSE ruff/crates/ruff_vendored/vendor/typeshed
cp -r typeshed/stdlib ruff/crates/ruff_vendored/vendor/typeshed/stdlib
rm -rf ruff/crates/ruff_vendored/vendor/typeshed/stdlib/@tests
git -C typeshed rev-parse HEAD > ruff/crates/ruff_vendored/vendor/typeshed/source_commit.txt
- name: Commit the changes
id: commit
if: ${{ steps.sync.outcome == 'success' }}

View file

@ -2,7 +2,7 @@ fail_fast: true
exclude: |
(?x)^(
crates/red_knot_python_semantic/vendor/.*|
crates/ruff_vendored/vendor/.*|
crates/red_knot_workspace/resources/.*|
crates/ruff_linter/resources/.*|
crates/ruff_linter/src/rules/.*/snapshots/.*|

19
Cargo.lock generated
View file

@ -2083,9 +2083,7 @@ dependencies = [
"countme",
"hashbrown",
"insta",
"once_cell",
"ordermap",
"path-slash",
"ruff_db",
"ruff_index",
"ruff_python_ast",
@ -2094,6 +2092,7 @@ dependencies = [
"ruff_python_stdlib",
"ruff_source_file",
"ruff_text_size",
"ruff_vendored",
"rustc-hash 2.0.0",
"salsa",
"smallvec",
@ -2102,8 +2101,6 @@ dependencies = [
"test-case",
"thiserror",
"tracing",
"walkdir",
"zip",
]
[[package]]
@ -2159,6 +2156,7 @@ dependencies = [
"ruff_db",
"ruff_python_ast",
"ruff_text_size",
"ruff_vendored",
"rustc-hash 2.0.0",
"salsa",
"tempfile",
@ -2450,6 +2448,7 @@ version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"once_cell",
"red_knot_python_semantic",
"ruff_cache",
"ruff_db",
@ -2460,6 +2459,7 @@ dependencies = [
"salsa",
"schemars",
"serde",
"zip",
]
[[package]]
@ -2790,6 +2790,17 @@ dependencies = [
"static_assertions",
]
[[package]]
name = "ruff_vendored"
version = "0.0.0"
dependencies = [
"once_cell",
"path-slash",
"ruff_db",
"walkdir",
"zip",
]
[[package]]
name = "ruff_wasm"
version = "0.6.6"

View file

@ -14,7 +14,7 @@ license = "MIT"
[workspace.dependencies]
ruff = { path = "crates/ruff" }
ruff_cache = { path = "crates/ruff_cache" }
ruff_db = { path = "crates/ruff_db" }
ruff_db = { path = "crates/ruff_db", default-features = false }
ruff_diagnostics = { path = "crates/ruff_diagnostics" }
ruff_formatter = { path = "crates/ruff_formatter" }
ruff_graph = { path = "crates/ruff_graph" }
@ -34,11 +34,12 @@ ruff_python_trivia = { path = "crates/ruff_python_trivia" }
ruff_server = { path = "crates/ruff_server" }
ruff_source_file = { path = "crates/ruff_source_file" }
ruff_text_size = { path = "crates/ruff_text_size" }
ruff_vendored = { path = "crates/ruff_vendored" }
ruff_workspace = { path = "crates/ruff_workspace" }
red_knot_python_semantic = { path = "crates/red_knot_python_semantic" }
red_knot_server = { path = "crates/red_knot_server" }
red_knot_workspace = { path = "crates/red_knot_workspace" }
red_knot_workspace = { path = "crates/red_knot_workspace", default-features = false }
aho-corasick = { version = "1.1.3" }
annotate-snippets = { version = "0.9.2", features = ["color"] }

View file

@ -13,9 +13,8 @@ license.workspace = true
[dependencies]
red_knot_python_semantic = { workspace = true }
red_knot_workspace = { workspace = true }
red_knot_workspace = { workspace = true, features = ["zstd"] }
red_knot_server = { workspace = true }
ruff_db = { workspace = true, features = ["os", "cache"] }
anyhow = { workspace = true }

View file

@ -24,7 +24,6 @@ bitflags = { workspace = true }
camino = { workspace = true }
compact_str = { workspace = true }
countme = { workspace = true }
once_cell = { workspace = true }
ordermap = { workspace = true }
salsa = { workspace = true }
thiserror = { workspace = true }
@ -35,20 +34,14 @@ smallvec = { workspace = true }
static_assertions = { workspace = true }
test-case = { workspace = true }
[build-dependencies]
path-slash = { workspace = true }
walkdir = { workspace = true }
zip = { workspace = true, features = ["zstd", "deflate"] }
[dev-dependencies]
ruff_db = { workspace = true, features = ["os", "testing"] }
ruff_python_parser = { workspace = true }
ruff_vendored = { workspace = true }
anyhow = { workspace = true }
insta = { workspace = true }
tempfile = { workspace = true }
walkdir = { workspace = true }
zip = { workspace = true }
[lints]
workspace = true

View file

@ -11,7 +11,6 @@ pub trait Db: SourceDb + Upcast<dyn SourceDb> {
pub(crate) mod tests {
use std::sync::Arc;
use crate::module_resolver::vendored_typeshed_stubs;
use ruff_db::files::{File, Files};
use ruff_db::system::{DbWithTestSystem, System, TestSystem};
use ruff_db::vendored::VendoredFileSystem;
@ -33,7 +32,7 @@ pub(crate) mod tests {
Self {
storage: salsa::Storage::default(),
system: TestSystem::default(),
vendored: vendored_typeshed_stubs().clone(),
vendored: ruff_vendored::file_system().clone(),
events: std::sync::Arc::default(),
files: Files::default(),
}

View file

@ -4,9 +4,7 @@ use rustc_hash::FxHasher;
pub use db::Db;
pub use module_name::ModuleName;
pub use module_resolver::{
resolve_module, system_module_search_paths, vendored_typeshed_stubs, Module,
};
pub use module_resolver::{resolve_module, system_module_search_paths, Module};
pub use program::{Program, ProgramSettings, SearchPathSettings, SitePackages};
pub use python_version::PythonVersion;
pub use semantic_model::{HasTy, SemanticModel};

View file

@ -4,7 +4,6 @@ pub use module::Module;
pub use resolver::resolve_module;
pub(crate) use resolver::{file_to_module, SearchPaths};
use ruff_db::system::SystemPath;
pub use typeshed::vendored_typeshed_stubs;
use crate::module_resolver::resolver::search_paths;
use crate::Db;

View file

@ -390,7 +390,8 @@ mod tests {
fn typeshed_versions_consistent_with_vendored_stubs() {
let db = TestDb::new();
let vendored_typeshed_versions = vendored_typeshed_versions(&db);
let vendored_typeshed_dir = Path::new("vendor/typeshed").canonicalize().unwrap();
let vendored_typeshed_dir =
Path::new(env!("CARGO_MANIFEST_DIR")).join("../ruff_vendored/vendor/typeshed");
let mut empty_iterator = true;

View file

@ -1,8 +0,0 @@
pub use self::vendored::vendored_typeshed_stubs;
pub(super) use self::versions::{
typeshed_versions, vendored_typeshed_versions, TypeshedVersions, TypeshedVersionsParseError,
TypeshedVersionsQueryResult,
};
mod vendored;
mod versions;

View file

@ -701,12 +701,12 @@ enum CallOutcome<'db> {
impl<'db> CallOutcome<'db> {
/// Create a new `CallOutcome::Callable` with given return type.
fn callable(return_ty: Type<'db>) -> CallOutcome {
fn callable(return_ty: Type<'db>) -> CallOutcome<'db> {
CallOutcome::Callable { return_ty }
}
/// Create a new `CallOutcome::NotCallable` with given not-callable type.
fn not_callable(not_callable_ty: Type<'db>) -> CallOutcome {
fn not_callable(not_callable_ty: Type<'db>) -> CallOutcome<'db> {
CallOutcome::NotCallable { not_callable_ty }
}
@ -719,7 +719,10 @@ impl<'db> CallOutcome<'db> {
}
/// Create a new `CallOutcome::Union` with given wrapped outcomes.
fn union(called_ty: Type<'db>, outcomes: impl Into<Box<[CallOutcome<'db>]>>) -> CallOutcome {
fn union(
called_ty: Type<'db>,
outcomes: impl Into<Box<[CallOutcome<'db>]>>,
) -> CallOutcome<'db> {
CallOutcome::Union {
called_ty,
outcomes: outcomes.into(),

View file

@ -20,9 +20,9 @@ default = ["console_error_panic_hook"]
[dependencies]
red_knot_python_semantic = { workspace = true }
red_knot_workspace = { workspace = true }
red_knot_workspace = { workspace = true, default-features = false, features = ["deflate"] }
ruff_db = { workspace = true }
ruff_db = { workspace = true, features = [] }
ruff_notebook = { workspace = true }
console_error_panic_hook = { workspace = true, optional = true }

View file

@ -18,6 +18,7 @@ ruff_cache = { workspace = true }
ruff_db = { workspace = true, features = ["os", "cache"] }
ruff_python_ast = { workspace = true }
ruff_text_size = { workspace = true }
ruff_vendored = { workspace = true }
anyhow = { workspace = true }
crossbeam = { workspace = true }
@ -31,5 +32,10 @@ tracing = { workspace = true }
ruff_db = { workspace = true, features = ["testing"] }
tempfile = { workspace = true }
[features]
default = ["zstd"]
zstd = ["ruff_vendored/zstd"]
deflate = ["ruff_vendored/deflate"]
[lints]
workspace = true

View file

@ -4,7 +4,7 @@ use std::sync::Arc;
use salsa::plumbing::ZalsaDatabase;
use salsa::{Cancelled, Event};
use red_knot_python_semantic::{vendored_typeshed_stubs, Db as SemanticDb, Program};
use red_knot_python_semantic::{Db as SemanticDb, Program};
use ruff_db::files::{File, Files};
use ruff_db::system::System;
use ruff_db::vendored::VendoredFileSystem;
@ -124,7 +124,7 @@ impl SemanticDb for RootDatabase {
#[salsa::db]
impl SourceDb for RootDatabase {
fn vendored(&self) -> &VendoredFileSystem {
vendored_typeshed_stubs()
ruff_vendored::file_system()
}
fn system(&self) -> &dyn System {
@ -161,7 +161,7 @@ pub(crate) mod tests {
use salsa::Event;
use red_knot_python_semantic::{vendored_typeshed_stubs, Db as SemanticDb};
use red_knot_python_semantic::Db as SemanticDb;
use ruff_db::files::Files;
use ruff_db::system::{DbWithTestSystem, System, TestSystem};
use ruff_db::vendored::VendoredFileSystem;
@ -183,7 +183,7 @@ pub(crate) mod tests {
Self {
storage: salsa::Storage::default(),
system: TestSystem::default(),
vendored: vendored_typeshed_stubs().clone(),
vendored: ruff_vendored::file_system().clone(),
files: Files::default(),
events: Arc::default(),
}

View file

@ -14,7 +14,7 @@ default-run = "ruff"
[dependencies]
ruff_cache = { workspace = true }
ruff_db = { workspace = true }
ruff_db = { workspace = true, default-features = false, features = ["os"] }
ruff_diagnostics = { workspace = true }
ruff_graph = { workspace = true, features = ["serde", "clap"] }
ruff_linter = { workspace = true, features = ["clap"] }

View file

@ -33,19 +33,17 @@ tracing = { workspace = true }
tracing-subscriber = { workspace = true, optional = true }
tracing-tree = { workspace = true, optional = true }
rustc-hash = { workspace = true }
[target.'cfg(not(target_arch="wasm32"))'.dependencies]
zip = { workspace = true, features = ["zstd"] }
zip = { workspace = true }
[target.'cfg(target_arch="wasm32")'.dependencies]
web-time = { version = "1.1.0" }
zip = { workspace = true, features = ["deflate"] }
[dev-dependencies]
insta = { workspace = true }
tempfile = { workspace = true }
[features]
default = ["os"]
cache = ["ruff_cache"]
os = ["ignore"]
serde = ["dep:serde", "camino/serde1"]

View file

@ -503,7 +503,8 @@ mod tests {
use crate::files::{system_path_to_file, vendored_path_to_file, FileError};
use crate::system::DbWithTestSystem;
use crate::tests::TestDb;
use crate::vendored::tests::VendoredFileSystemBuilder;
use crate::vendored::VendoredFileSystemBuilder;
use zip::CompressionMethod;
#[test]
fn system_existing_file() -> crate::system::Result<()> {
@ -548,7 +549,7 @@ mod tests {
fn stubbed_vendored_file() -> crate::system::Result<()> {
let mut db = TestDb::new();
let mut vendored_builder = VendoredFileSystemBuilder::new();
let mut vendored_builder = VendoredFileSystemBuilder::new(CompressionMethod::Stored);
vendored_builder
.add_file("test.pyi", "def foo() -> str")
.unwrap();

View file

@ -79,8 +79,9 @@ mod tests {
use crate::parsed::parsed_module;
use crate::system::{DbWithTestSystem, SystemPath, SystemVirtualPath};
use crate::tests::TestDb;
use crate::vendored::{tests::VendoredFileSystemBuilder, VendoredPath};
use crate::vendored::{VendoredFileSystemBuilder, VendoredPath};
use crate::Db;
use zip::CompressionMethod;
#[test]
fn python_file() -> crate::system::Result<()> {
@ -150,7 +151,7 @@ mod tests {
fn vendored_file() {
let mut db = TestDb::new();
let mut vendored_builder = VendoredFileSystemBuilder::new();
let mut vendored_builder = VendoredFileSystemBuilder::new(CompressionMethod::Stored);
vendored_builder
.add_file(
"path.pyi",

View file

@ -1,12 +1,13 @@
use std::borrow::Cow;
use std::collections::BTreeMap;
use std::fmt::{self, Debug};
use std::io::{self, Read};
use std::io::{self, Read, Write};
use std::sync::{Arc, Mutex, MutexGuard};
use zip::{read::ZipFile, ZipArchive, ZipWriter};
use crate::file_revision::FileRevision;
use zip::result::ZipResult;
use zip::write::FileOptions;
use zip::{read::ZipFile, CompressionMethod, ZipArchive, ZipWriter};
pub use self::path::{VendoredPath, VendoredPathBuf};
@ -177,7 +178,6 @@ struct ZipFileDebugInfo {
crc32_hash: u32,
compressed_size: u64,
uncompressed_size: u64,
compression_method: zip::CompressionMethod,
kind: FileType,
}
@ -187,7 +187,6 @@ impl<'a> From<ZipFile<'a>> for ZipFileDebugInfo {
crc32_hash: value.crc32(),
compressed_size: value.compressed_size(),
uncompressed_size: value.size(),
compression_method: value.compression(),
kind: if value.is_dir() {
FileType::Directory
} else {
@ -341,36 +340,18 @@ impl<'a> From<&'a VendoredPath> for NormalizedVendoredPath<'a> {
}
}
#[cfg(test)]
pub(crate) mod tests {
use std::io::Write;
use insta::assert_snapshot;
use zip::result::ZipResult;
use zip::write::FileOptions;
use zip::{CompressionMethod, ZipWriter};
use super::*;
const FUNCTOOLS_CONTENTS: &str = "def update_wrapper(): ...";
const ASYNCIO_TASKS_CONTENTS: &str = "class Task: ...";
pub struct VendoredFileSystemBuilder {
writer: ZipWriter<io::Cursor<Vec<u8>>>,
}
impl Default for VendoredFileSystemBuilder {
fn default() -> Self {
Self::new()
}
compression_method: CompressionMethod,
}
impl VendoredFileSystemBuilder {
pub fn new() -> Self {
pub fn new(compression_method: CompressionMethod) -> Self {
let buffer = io::Cursor::new(Vec::new());
Self {
writer: ZipWriter::new(buffer),
compression_method,
}
}
@ -380,13 +361,13 @@ pub(crate) mod tests {
content: &str,
) -> std::io::Result<()> {
self.writer
.start_file(path.as_ref().as_str(), Self::options())?;
.start_file(path.as_ref().as_str(), self.options())?;
self.writer.write_all(content.as_bytes())
}
pub fn add_directory(&mut self, path: impl AsRef<VendoredPath>) -> ZipResult<()> {
self.writer
.add_directory(path.as_ref().as_str(), Self::options())
.add_directory(path.as_ref().as_str(), self.options())
}
pub fn finish(mut self) -> Result<VendoredFileSystem> {
@ -395,15 +376,25 @@ pub(crate) mod tests {
VendoredFileSystem::new(buffer.into_inner())
}
fn options() -> FileOptions {
fn options(&self) -> FileOptions {
FileOptions::default()
.compression_method(CompressionMethod::Zstd)
.compression_method(self.compression_method)
.unix_permissions(0o644)
}
}
#[cfg(test)]
pub(crate) mod tests {
use insta::assert_snapshot;
use super::*;
const FUNCTOOLS_CONTENTS: &str = "def update_wrapper(): ...";
const ASYNCIO_TASKS_CONTENTS: &str = "class Task: ...";
fn mock_typeshed() -> VendoredFileSystem {
let mut builder = VendoredFileSystemBuilder::new();
let mut builder = VendoredFileSystemBuilder::new(CompressionMethod::Stored);
builder.add_directory("stdlib/").unwrap();
builder
@ -441,28 +432,24 @@ pub(crate) mod tests {
crc32_hash: 0,
compressed_size: 0,
uncompressed_size: 0,
compression_method: Stored,
kind: Directory,
},
"stdlib/asyncio/": ZipFileDebugInfo {
crc32_hash: 0,
compressed_size: 0,
uncompressed_size: 0,
compression_method: Stored,
kind: Directory,
},
"stdlib/asyncio/tasks.pyi": ZipFileDebugInfo {
crc32_hash: 2826547428,
compressed_size: 24,
compressed_size: 15,
uncompressed_size: 15,
compression_method: Zstd,
kind: File,
},
"stdlib/functools.pyi": ZipFileDebugInfo {
crc32_hash: 1099005079,
compressed_size: 34,
compressed_size: 25,
uncompressed_size: 25,
compression_method: Zstd,
kind: File,
},
},

View file

@ -20,9 +20,11 @@ ruff_python_parser = { workspace = true }
anyhow = { workspace = true }
clap = { workspace = true, optional = true }
once_cell = { workspace = true }
salsa = { workspace = true }
schemars = { workspace = true, optional = true }
serde = { workspace = true, optional = true }
zip = { workspace = true, features = [] }
[lints]
workspace = true

View file

@ -1,12 +1,19 @@
use anyhow::Result;
use red_knot_python_semantic::{
vendored_typeshed_stubs, Db, Program, ProgramSettings, PythonVersion, SearchPathSettings,
};
use zip::CompressionMethod;
use red_knot_python_semantic::{Db, Program, ProgramSettings, PythonVersion, SearchPathSettings};
use ruff_db::files::{File, Files};
use ruff_db::system::{OsSystem, System, SystemPathBuf};
use ruff_db::vendored::VendoredFileSystem;
use ruff_db::vendored::{VendoredFileSystem, VendoredFileSystemBuilder};
use ruff_db::{Db as SourceDb, Upcast};
static EMPTY_VENDORED: once_cell::sync::Lazy<VendoredFileSystem> =
once_cell::sync::Lazy::new(|| {
let mut builder = VendoredFileSystemBuilder::new(CompressionMethod::Stored);
builder.add_file("stdlib/VERSIONS", "\n").unwrap();
builder.finish().unwrap()
});
#[salsa::db]
#[derive(Default)]
pub struct ModuleDb {
@ -70,7 +77,7 @@ impl Upcast<dyn SourceDb> for ModuleDb {
#[salsa::db]
impl SourceDb for ModuleDb {
fn vendored(&self) -> &VendoredFileSystem {
vendored_typeshed_stubs()
&EMPTY_VENDORED
}
fn system(&self) -> &dyn System {

View file

@ -0,0 +1,32 @@
[package]
name = "ruff_vendored"
version = "0.0.0"
publish = false
authors = { workspace = true }
edition = { workspace = true }
rust-version = { workspace = true }
homepage = { workspace = true }
documentation = { workspace = true }
repository = { workspace = true }
license = { workspace = true }
[dependencies]
ruff_db = { workspace = true }
once_cell = { workspace = true }
zip = { workspace = true }
[build-dependencies]
path-slash = { workspace = true }
walkdir = { workspace = true }
zip = { workspace = true, features = ["zstd", "deflate"] }
[dev-dependencies]
walkdir = { workspace = true }
[features]
zstd = ["zip/zstd"]
deflate = ["zip/deflate"]
[lints]
workspace = true

View file

@ -30,10 +30,12 @@ fn zip_dir(directory_path: &str, writer: File) -> ZipResult<File> {
// We can't use `#[cfg(...)]` here because the target-arch in a build script is the
// architecture of the system running the build script and not the architecture of the build-target.
// That's why we use the `TARGET` environment variable here.
let method = if std::env::var("TARGET").unwrap().contains("wasm32") {
let method = if cfg!(feature = "zstd") {
CompressionMethod::Zstd
} else if cfg!(feature = "deflate") {
CompressionMethod::Deflated
} else {
CompressionMethod::Zstd
CompressionMethod::Stored
};
let options = FileOptions::default()

View file

@ -6,7 +6,7 @@ use ruff_db::vendored::VendoredFileSystem;
// Luckily this crate will fail to build if this file isn't available at build time.
static TYPESHED_ZIP_BYTES: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/zipped_typeshed.zip"));
pub fn vendored_typeshed_stubs() -> &'static VendoredFileSystem {
pub fn file_system() -> &'static VendoredFileSystem {
static VENDORED_TYPESHED_STUBS: Lazy<VendoredFileSystem> =
Lazy::new(|| VendoredFileSystem::new_static(TYPESHED_ZIP_BYTES).unwrap());
&VENDORED_TYPESHED_STUBS
@ -42,7 +42,7 @@ mod tests {
#[test]
fn typeshed_vfs_consistent_with_vendored_stubs() {
let vendored_typeshed_dir = Path::new("vendor/typeshed").canonicalize().unwrap();
let vendored_typeshed_stubs = vendored_typeshed_stubs();
let vendored_typeshed_stubs = file_system();
let mut empty_iterator = true;
for entry in walkdir::WalkDir::new(&vendored_typeshed_dir).min_depth(1) {

Some files were not shown because too many files have changed in this diff Show more