tinymist/crates/tinymist-vfs/src/lib.rs
2025-02-20 00:47:16 +08:00

672 lines
20 KiB
Rust

//! upstream of following files <https://github.com/rust-lang/rust-analyzer/tree/master/crates/vfs>
//! ::path_interner.rs -> path_interner.rs
#![allow(missing_docs)]
/// Provides ProxyAccessModel that makes access to JavaScript objects for
/// browser compilation.
#[cfg(feature = "browser")]
pub mod browser;
/// Provides SystemAccessModel that makes access to the local file system for
/// system compilation.
#[cfg(feature = "system")]
pub mod system;
/// Provides dummy access model.
///
/// Note: we can still perform compilation with dummy access model, since
/// [`Vfs`] will make a overlay access model over the provided dummy access
/// model.
pub mod dummy;
/// Provides snapshot models
pub mod snapshot;
pub use snapshot::*;
use tinymist_std::hash::{FxDashMap, FxHashMap};
/// Provides notify access model which retrieves file system events and changes
/// from some notify backend.
pub mod notify;
pub use notify::{FilesystemEvent, MemoryEvent};
/// Provides overlay access model which allows to shadow the underlying access
/// model with memory contents.
pub mod overlay;
/// Provides resolve access model.
pub mod resolve;
/// Provides trace access model which traces the underlying access model.
pub mod trace;
mod utils;
mod path_mapper;
pub use path_mapper::{PathResolution, RootResolver, WorkspaceResolution, WorkspaceResolver};
use core::fmt;
use std::num::NonZeroUsize;
use std::sync::OnceLock;
use std::{path::Path, sync::Arc};
use parking_lot::{Mutex, RwLock};
use rpds::RedBlackTreeMapSync;
use typst::diag::{FileError, FileResult};
use typst::foundations::Dict;
use typst::syntax::Source;
use typst::utils::LazyHash;
use crate::notify::NotifyAccessModel;
use crate::overlay::OverlayAccessModel;
use crate::resolve::ResolveAccessModel;
pub use tinymist_std::time::Time;
pub use tinymist_std::ImmutPath;
pub use typst::foundations::Bytes;
pub use typst::syntax::FileId as TypstFileId;
/// Handle to a file in [`Vfs`]
pub type FileId = TypstFileId;
/// Immutable prehashed reference to dictionary.
pub type ImmutDict = Arc<LazyHash<Dict>>;
/// A trait for accessing underlying file system.
///
/// This trait is simplified by [`Vfs`] and requires a minimal method set for
/// typst compilation.
pub trait PathAccessModel {
/// Clear the cache of the access model.
///
/// This is called when the vfs is reset. See [`Vfs`]'s reset method for
/// more information.
fn reset(&mut self) {}
/// Return the content of a file entry.
fn content(&self, src: &Path) -> FileResult<Bytes>;
}
/// A trait for accessing underlying file system.
///
/// This trait is simplified by [`Vfs`] and requires a minimal method set for
/// typst compilation.
pub trait AccessModel {
/// Clear the cache of the access model.
///
/// This is called when the vfs is reset. See [`Vfs`]'s reset method for
/// more information.
fn reset(&mut self) {}
/// Return the content of a file entry.
fn content(&self, src: TypstFileId) -> (Option<ImmutPath>, FileResult<Bytes>);
}
#[derive(Clone)]
pub struct SharedAccessModel<M> {
pub inner: Arc<RwLock<M>>,
}
impl<M> SharedAccessModel<M> {
pub fn new(inner: M) -> Self {
Self {
inner: Arc::new(RwLock::new(inner)),
}
}
}
impl<M> PathAccessModel for SharedAccessModel<M>
where
M: PathAccessModel,
{
#[inline]
fn reset(&mut self) {
self.inner.write().reset();
}
fn content(&self, src: &Path) -> FileResult<Bytes> {
self.inner.read().content(src)
}
}
type VfsPathAccessModel<M> = OverlayAccessModel<ImmutPath, NotifyAccessModel<SharedAccessModel<M>>>;
/// we add notify access model here since notify access model doesn't introduce
/// overheads by our observation
type VfsAccessModel<M> = OverlayAccessModel<TypstFileId, ResolveAccessModel<VfsPathAccessModel<M>>>;
pub trait FsProvider {
/// Arbitrary one of file path corresponding to the given `id`.
fn file_path(&self, id: TypstFileId) -> FileResult<PathResolution>;
fn read(&self, id: TypstFileId) -> FileResult<Bytes>;
fn read_source(&self, id: TypstFileId) -> FileResult<Source>;
}
struct SourceEntry {
last_accessed_rev: NonZeroUsize,
source: FileResult<Source>,
}
#[derive(Default)]
struct SourceIdShard {
last_accessed_rev: usize,
recent_source: Option<Source>,
sources: FxHashMap<Bytes, SourceEntry>,
}
#[derive(Default, Clone)]
pub struct SourceCache {
/// The cache entries for each paths
cache_entries: Arc<FxDashMap<TypstFileId, SourceIdShard>>,
}
impl SourceCache {
pub fn evict(&self, curr: NonZeroUsize, threshold: usize) {
self.cache_entries.retain(|_, shard| {
let diff = curr.get().saturating_sub(shard.last_accessed_rev);
if diff > threshold {
return false;
}
shard.sources.retain(|_, entry| {
let diff = curr.get().saturating_sub(entry.last_accessed_rev.get());
diff <= threshold
});
true
});
}
}
/// Create a new `Vfs` harnessing over the given `access_model` specific for
/// `reflexo_world::CompilerWorld`. With vfs, we can minimize the
/// implementation overhead for [`AccessModel`] trait.
pub struct Vfs<M: PathAccessModel + Sized> {
source_cache: SourceCache,
// The slots for all the files during a single lifecycle.
// pub slots: Arc<Mutex<FxHashMap<TypstFileId, SourceCache>>>,
managed: Arc<Mutex<EntryMap>>,
paths: Arc<Mutex<PathMap>>,
revision: NonZeroUsize,
// access_model: TraceAccessModel<VfsAccessModel<M>>,
/// The wrapped access model.
access_model: VfsAccessModel<M>,
}
impl<M: PathAccessModel + Sized> fmt::Debug for Vfs<M> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Vfs")
.field("revision", &self.revision)
.field("managed", &self.managed.lock().entries.size())
.field("paths", &self.paths.lock().paths.len())
.finish()
}
}
impl<M: PathAccessModel + Clone + Sized> Vfs<M> {
pub fn revision(&self) -> NonZeroUsize {
self.revision
}
pub fn snapshot(&self) -> Self {
Self {
source_cache: self.source_cache.clone(),
managed: self.managed.clone(),
paths: self.paths.clone(),
revision: self.revision,
access_model: self.access_model.clone(),
}
}
pub fn fork(&self) -> Self {
Self {
source_cache: self.source_cache.clone(),
managed: Arc::new(Mutex::new(EntryMap::default())),
paths: Arc::new(Mutex::new(PathMap::default())),
revision: NonZeroUsize::new(2).expect("initial revision is 2"),
access_model: self.access_model.clone(),
}
}
pub fn is_clean_compile(&self, rev: usize, file_ids: &[FileId]) -> bool {
let mut m = self.managed.lock();
for id in file_ids {
let Some(entry) = m.entries.get_mut(id) else {
log::info!("Vfs(dirty, {id:?}): file id not found");
return false;
};
if entry.changed_at > rev {
log::info!("Vfs(dirty, {id:?}): rev {rev:?} => {:?}", entry.changed_at);
return false;
}
log::info!(
"Vfs(clean, {id:?}, rev={rev}, changed_at={})",
entry.changed_at
);
}
true
}
}
impl<M: PathAccessModel + Sized> Vfs<M> {
/// Create a new `Vfs` with a given `access_model`.
///
/// Retrieving an [`AccessModel`], it will further wrap the access model
/// with [`OverlayAccessModel`] and [`NotifyAccessModel`]. This means that
/// you don't need to implement:
/// + overlay: allowing to shadow the underlying access model with memory
/// contents, which is useful for a limited execution environment and
/// instrumenting or overriding source files or packages.
/// + notify: regards problems of synchronizing with the file system when
/// the vfs is watching the file system.
///
/// See [`AccessModel`] for more information.
pub fn new(resolver: Arc<dyn RootResolver + Send + Sync>, access_model: M) -> Self {
let access_model = SharedAccessModel::new(access_model);
let access_model = NotifyAccessModel::new(access_model);
let access_model = OverlayAccessModel::new(access_model);
let access_model = ResolveAccessModel {
resolver,
inner: access_model,
};
let access_model = OverlayAccessModel::new(access_model);
// If you want to trace the access model, uncomment the following line
// let access_model = TraceAccessModel::new(access_model);
Self {
source_cache: SourceCache::default(),
managed: Arc::default(),
paths: Arc::default(),
revision: NonZeroUsize::new(2).expect("initial revision is 2"),
access_model,
}
}
/// Reset all state.
pub fn reset_all(&mut self) {
self.reset_access_model();
self.reset_mapping();
self.take_source_cache();
}
/// Reset access model.
pub fn reset_access_model(&mut self) {
self.access_model.reset();
}
/// Reset all possible caches.
pub fn reset_mapping(&mut self) {
self.revise().reset_cache();
}
/// Clear the cache that is not touched for a long time.
pub fn evict(&mut self, threshold: usize) {
let mut m = self.managed.lock();
let rev = self.revision.get();
for (id, entry) in m.entries.clone().iter() {
let entry_rev = entry.bytes.get().map(|e| e.1).unwrap_or_default();
if entry_rev + threshold < rev {
m.entries.remove_mut(id);
}
}
}
pub fn take_source_cache(&mut self) -> SourceCache {
std::mem::take(&mut self.source_cache)
}
pub fn clone_source_cache(&self) -> SourceCache {
self.source_cache.clone()
}
/// Resolve the real path for a file id.
pub fn file_path(&self, id: TypstFileId) -> Result<PathResolution, FileError> {
self.access_model.inner.resolver.path_for_id(id)
}
/// Get paths to all the shadowing paths in [`OverlayAccessModel`].
pub fn shadow_paths(&self) -> Vec<ImmutPath> {
self.access_model.inner.inner.file_paths()
}
/// Get paths to all the shadowing file ids in [`OverlayAccessModel`].
///
/// The in memory untitled files can have no path so
/// they only have file ids.
pub fn shadow_ids(&self) -> Vec<TypstFileId> {
self.access_model.file_paths()
}
/// Returns the overall memory usage for the stored files.
pub fn memory_usage(&self) -> usize {
0
}
pub fn revise(&mut self) -> RevisingVfs<M> {
let managed = self.managed.lock().clone();
let paths = self.paths.lock().clone();
let goal_revision = self.revision.checked_add(1).expect("revision overflowed");
RevisingVfs {
managed,
paths,
inner: self,
goal_revision,
view_changed: false,
}
}
pub fn display(&self) -> DisplayVfs<M> {
DisplayVfs { inner: self }
}
/// Reads a file.
pub fn read(&self, fid: TypstFileId) -> FileResult<Bytes> {
let bytes = self.managed.lock().slot(fid, |entry| entry.bytes.clone());
self.read_content(&bytes, fid).clone()
}
/// Reads a source.
pub fn source(&self, file_id: TypstFileId) -> FileResult<Source> {
let (bytes, source) = self
.managed
.lock()
.slot(file_id, |entry| (entry.bytes.clone(), entry.source.clone()));
let source = source.get_or_init(|| {
let content = self
.read_content(&bytes, file_id)
.as_ref()
.map_err(Clone::clone)?;
let mut cache_entry = self.source_cache.cache_entries.entry(file_id).or_default();
if let Some(source) = cache_entry.sources.get(content) {
return source.source.clone();
}
let source = (|| {
let prev = cache_entry.recent_source.clone();
let content = from_utf8_or_bom(content).map_err(|_| FileError::InvalidUtf8)?;
let next = match prev {
Some(mut prev) => {
prev.replace(content);
prev
}
None => Source::new(file_id, content.to_owned()),
};
let should_update = cache_entry.recent_source.is_none()
|| cache_entry.last_accessed_rev < self.revision.get();
if should_update {
cache_entry.recent_source = Some(next.clone());
}
Ok(next)
})();
let entry = cache_entry
.sources
.entry(content.clone())
.or_insert_with(|| SourceEntry {
last_accessed_rev: self.revision,
source: source.clone(),
});
if entry.last_accessed_rev < self.revision {
entry.last_accessed_rev = self.revision;
}
source
});
source.clone()
}
/// Reads and caches content of a file.
fn read_content<'a>(&self, bytes: &'a BytesQuery, fid: TypstFileId) -> &'a FileResult<Bytes> {
&bytes
.get_or_init(|| {
let (path, content) = self.access_model.content(fid);
if let Some(path) = path.as_ref() {
self.paths.lock().insert(path, fid);
}
(path, self.revision.get(), content)
})
.2
}
}
pub struct DisplayVfs<'a, M: PathAccessModel + Sized> {
inner: &'a Vfs<M>,
}
impl<M: PathAccessModel + Sized> fmt::Debug for DisplayVfs<'_, M> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Vfs")
.field("revision", &self.inner.revision)
.field("managed", &self.inner.managed.lock().display())
.field("paths", &self.inner.paths.lock().display())
.finish()
}
}
pub struct RevisingVfs<'a, M: PathAccessModel + Sized> {
inner: &'a mut Vfs<M>,
managed: EntryMap,
paths: PathMap,
goal_revision: NonZeroUsize,
view_changed: bool,
}
impl<M: PathAccessModel + Sized> Drop for RevisingVfs<'_, M> {
fn drop(&mut self) {
if self.view_changed {
self.inner.managed = Arc::new(Mutex::new(std::mem::take(&mut self.managed)));
self.inner.paths = Arc::new(Mutex::new(std::mem::take(&mut self.paths)));
let revision = &mut self.inner.revision;
*revision = self.goal_revision;
}
}
}
impl<M: PathAccessModel + Sized> RevisingVfs<'_, M> {
pub fn vfs(&mut self) -> &mut Vfs<M> {
self.inner
}
fn am(&mut self) -> &mut VfsAccessModel<M> {
&mut self.inner.access_model
}
fn invalidate_path(&mut self, path: &Path) {
if let Some(fids) = self.paths.remove(path) {
self.view_changed = true;
for fid in fids {
self.invalidate_file_id(fid);
}
}
}
fn invalidate_file_id(&mut self, file_id: TypstFileId) {
self.view_changed = true;
self.managed.slot(file_id, |e| {
e.changed_at = self.goal_revision.get();
e.bytes = Arc::default();
e.source = Arc::default();
});
}
/// Reset the shadowing files in [`OverlayAccessModel`].
pub fn reset_shadow(&mut self) {
for path in self.am().inner.inner.file_paths() {
self.invalidate_path(&path);
}
for fid in self.am().file_paths() {
self.invalidate_file_id(fid);
}
self.am().clear_shadow();
self.am().inner.inner.clear_shadow();
}
/// Reset all caches. This can happen when:
/// - package paths are reconfigured.
/// - The root of the workspace is switched.
pub fn reset_cache(&mut self) {
self.view_changed = true;
self.managed = EntryMap::default();
self.paths = PathMap::default();
}
/// Add a shadowing file to the [`OverlayAccessModel`].
pub fn map_shadow(&mut self, path: &Path, snap: FileSnapshot) -> FileResult<()> {
self.view_changed = true;
self.invalidate_path(path);
self.am().inner.inner.add_file(path, snap, |c| c.into());
Ok(())
}
/// Remove a shadowing file from the [`OverlayAccessModel`].
pub fn unmap_shadow(&mut self, path: &Path) -> FileResult<()> {
self.view_changed = true;
self.invalidate_path(path);
self.am().inner.inner.remove_file(path);
Ok(())
}
/// Add a shadowing file to the [`OverlayAccessModel`] by file id.
pub fn map_shadow_by_id(&mut self, file_id: TypstFileId, snap: FileSnapshot) -> FileResult<()> {
self.view_changed = true;
self.invalidate_file_id(file_id);
self.am().add_file(&file_id, snap, |c| *c);
Ok(())
}
/// Remove a shadowing file from the [`OverlayAccessModel`] by file id.
pub fn remove_shadow_by_id(&mut self, file_id: TypstFileId) {
self.view_changed = true;
self.invalidate_file_id(file_id);
self.am().remove_file(&file_id);
}
/// Let the vfs notify the access model with a filesystem event.
///
/// See [`NotifyAccessModel`] for more information.
pub fn notify_fs_event(&mut self, event: FilesystemEvent) {
self.notify_fs_changes(event.split().0);
}
/// Let the vfs notify the access model with a filesystem changes.
///
/// See [`NotifyAccessModel`] for more information.
pub fn notify_fs_changes(&mut self, event: FileChangeSet) {
for path in &event.removes {
self.invalidate_path(path);
}
for (path, _) in &event.inserts {
self.invalidate_path(path);
}
self.am().inner.inner.inner.notify(event);
}
}
type BytesQuery = Arc<OnceLock<(Option<ImmutPath>, usize, FileResult<Bytes>)>>;
#[derive(Debug, Clone, Default)]
struct VfsEntry {
changed_at: usize,
bytes: BytesQuery,
source: Arc<OnceLock<FileResult<Source>>>,
}
#[derive(Debug, Clone, Default)]
struct EntryMap {
entries: RedBlackTreeMapSync<TypstFileId, VfsEntry>,
}
impl EntryMap {
/// Read a slot.
#[inline(always)]
fn slot<T>(&mut self, path: TypstFileId, f: impl FnOnce(&mut VfsEntry) -> T) -> T {
if let Some(entry) = self.entries.get_mut(&path) {
f(entry)
} else {
let mut entry = VfsEntry::default();
let res = f(&mut entry);
self.entries.insert_mut(path, entry);
res
}
}
fn display(&self) -> DisplayEntryMap {
DisplayEntryMap { map: self }
}
}
pub struct DisplayEntryMap<'a> {
map: &'a EntryMap,
}
impl fmt::Debug for DisplayEntryMap<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_map().entries(self.map.entries.iter()).finish()
}
}
#[derive(Debug, Clone, Default)]
struct PathMap {
paths: FxHashMap<ImmutPath, Vec<TypstFileId>>,
}
impl PathMap {
fn insert(&mut self, path: &ImmutPath, fid: TypstFileId) {
if let Some(fids) = self.paths.get_mut(path) {
fids.push(fid);
} else {
self.paths.insert(path.clone(), vec![fid]);
}
}
fn remove(&mut self, path: &Path) -> Option<Vec<TypstFileId>> {
self.paths.remove(path)
}
fn display(&self) -> DisplayPathMap {
DisplayPathMap { map: self }
}
}
pub struct DisplayPathMap<'a> {
map: &'a PathMap,
}
impl fmt::Debug for DisplayPathMap<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_map().entries(self.map.paths.iter()).finish()
}
}
/// Convert a byte slice to a string, removing UTF-8 BOM if present.
fn from_utf8_or_bom(buf: &[u8]) -> FileResult<&str> {
Ok(std::str::from_utf8(if buf.starts_with(b"\xef\xbb\xbf") {
// remove UTF-8 BOM
&buf[3..]
} else {
// Assume UTF-8
buf
})?)
}
#[cfg(test)]
mod tests {
fn is_send<T: Send>() {}
fn is_sync<T: Sync>() {}
#[test]
fn test_vfs_send_sync() {
is_send::<super::Vfs<super::dummy::DummyAccessModel>>();
is_sync::<super::Vfs<super::dummy::DummyAccessModel>>();
}
}