Hash file contents to verify whether file actually changed

This commit is contained in:
Lukas Wirth 2024-05-14 11:55:12 +02:00
parent 793396c624
commit 1ca97ba896
4 changed files with 75 additions and 94 deletions

View file

@ -29,6 +29,7 @@
//! //!
//! In general, any item in the `ItemTree` stores its `AstId`, which allows mapping it back to its //! In general, any item in the `ItemTree` stores its `AstId`, which allows mapping it back to its
//! surface syntax. //! surface syntax.
#![allow(unexpected_cfgs)]
mod lower; mod lower;
mod pretty; mod pretty;
@ -467,13 +468,12 @@ macro_rules! mod_items {
pub enum GenericModItem { pub enum GenericModItem {
$( $(
$( $(
#[cfg_attr(FALSE, $generic_params)] #[cfg_attr(ignore_fragment, $generic_params)]
$typ(FileItemTreeId<$typ>), $typ(FileItemTreeId<$typ>),
)? )?
)+ )+
} }
#[allow(unexpected_cfgs)]
impl From<GenericModItem> for ModItem { impl From<GenericModItem> for ModItem {
fn from(id: GenericModItem) -> ModItem { fn from(id: GenericModItem) -> ModItem {
match id { match id {
@ -494,7 +494,6 @@ macro_rules! mod_items {
} }
$( $(
#[allow(unexpected_cfgs)]
impl From<FileItemTreeId<$typ>> for ModItem { impl From<FileItemTreeId<$typ>> for ModItem {
fn from(id: FileItemTreeId<$typ>) -> ModItem { fn from(id: FileItemTreeId<$typ>) -> ModItem {
ModItem::$typ(id) ModItem::$typ(id)

View file

@ -361,8 +361,8 @@ fn load_crate_graph(
} }
} }
let changes = vfs.take_changes(); let changes = vfs.take_changes();
for file in changes { for (_, file) in changes {
if let vfs::Change::Create(v) | vfs::Change::Modify(v) = file.change { if let vfs::Change::Create(v, _) | vfs::Change::Modify(v, _) = file.change {
if let Ok(text) = String::from_utf8(v) { if let Ok(text) = String::from_utf8(v) {
analysis_change.change_file(file.file_id, Some(text)) analysis_change.change_file(file.file_id, Some(text))
} }

View file

@ -3,7 +3,7 @@
//! //!
//! Each tick provides an immutable snapshot of the state as `WorldSnapshot`. //! Each tick provides an immutable snapshot of the state as `WorldSnapshot`.
use std::{collections::hash_map::Entry, time::Instant}; use std::time::Instant;
use crossbeam_channel::{unbounded, Receiver, Sender}; use crossbeam_channel::{unbounded, Receiver, Sender};
use flycheck::FlycheckHandle; use flycheck::FlycheckHandle;
@ -25,7 +25,7 @@ use project_model::{
use rustc_hash::{FxHashMap, FxHashSet}; use rustc_hash::{FxHashMap, FxHashSet};
use tracing::{span, Level}; use tracing::{span, Level};
use triomphe::Arc; use triomphe::Arc;
use vfs::{AnchoredPathBuf, ChangedFile, Vfs}; use vfs::{AnchoredPathBuf, Vfs};
use crate::{ use crate::{
config::{Config, ConfigError}, config::{Config, ConfigError},
@ -254,7 +254,6 @@ impl GlobalState {
pub(crate) fn process_changes(&mut self) -> bool { pub(crate) fn process_changes(&mut self) -> bool {
let _p = span!(Level::INFO, "GlobalState::process_changes").entered(); let _p = span!(Level::INFO, "GlobalState::process_changes").entered();
let mut file_changes = FxHashMap::<_, ChangedFile>::default();
let (change, modified_rust_files, workspace_structure_change) = { let (change, modified_rust_files, workspace_structure_change) = {
let mut change = ChangeWithProcMacros::new(); let mut change = ChangeWithProcMacros::new();
let mut guard = self.vfs.write(); let mut guard = self.vfs.write();
@ -266,44 +265,13 @@ impl GlobalState {
// downgrade to read lock to allow more readers while we are normalizing text // downgrade to read lock to allow more readers while we are normalizing text
let guard = RwLockWriteGuard::downgrade_to_upgradable(guard); let guard = RwLockWriteGuard::downgrade_to_upgradable(guard);
let vfs: &Vfs = &guard.0; let vfs: &Vfs = &guard.0;
// We need to fix up the changed events a bit. If we have a create or modify for a file
// id that is followed by a delete we actually skip observing the file text from the
// earlier event, to avoid problems later on.
for changed_file in changed_files {
use vfs::Change::*;
match file_changes.entry(changed_file.file_id) {
Entry::Occupied(mut o) => {
let change = o.get_mut();
match (&mut change.change, changed_file.change) {
// latter `Delete` wins
(change, Delete) => *change = Delete,
// merge `Create` with `Create` or `Modify`
(Create(prev), Create(new) | Modify(new)) => *prev = new,
// collapse identical `Modify`es
(Modify(prev), Modify(new)) => *prev = new,
// equivalent to `Modify`
(change @ Delete, Create(new)) => {
*change = Modify(new);
}
// shouldn't occur, but collapse into `Create`
(change @ Delete, Modify(new)) => {
stdx::never!();
*change = Create(new);
}
// shouldn't occur, but keep the Create
(prev @ Modify(_), new @ Create(_)) => *prev = new,
}
}
Entry::Vacant(v) => _ = v.insert(changed_file),
}
}
let mut workspace_structure_change = None; let mut workspace_structure_change = None;
// A file was added or deleted // A file was added or deleted
let mut has_structure_changes = false; let mut has_structure_changes = false;
let mut bytes = vec![]; let mut bytes = vec![];
let mut modified_rust_files = vec![]; let mut modified_rust_files = vec![];
for file in file_changes.into_values() { for file in changed_files.into_values() {
let vfs_path = vfs.file_path(file.file_id); let vfs_path = vfs.file_path(file.file_id);
if let Some(path) = vfs_path.as_path() { if let Some(path) = vfs_path.as_path() {
has_structure_changes |= file.is_created_or_deleted(); has_structure_changes |= file.is_created_or_deleted();
@ -326,7 +294,8 @@ impl GlobalState {
self.diagnostics.clear_native_for(file.file_id); self.diagnostics.clear_native_for(file.file_id);
} }
let text = if let vfs::Change::Create(v) | vfs::Change::Modify(v) = file.change { let text =
if let vfs::Change::Create(v, _) | vfs::Change::Modify(v, _) = file.change {
String::from_utf8(v).ok().map(|text| { String::from_utf8(v).ok().map(|text| {
// FIXME: Consider doing normalization in the `vfs` instead? That allows // FIXME: Consider doing normalization in the `vfs` instead? That allows
// getting rid of some locking // getting rid of some locking

View file

@ -46,7 +46,7 @@ pub mod loader;
mod path_interner; mod path_interner;
mod vfs_path; mod vfs_path;
use std::{fmt, mem}; use std::{fmt, hash::BuildHasherDefault, mem};
use crate::path_interner::PathInterner; use crate::path_interner::PathInterner;
@ -54,6 +54,7 @@ pub use crate::{
anchored_path::{AnchoredPath, AnchoredPathBuf}, anchored_path::{AnchoredPath, AnchoredPathBuf},
vfs_path::VfsPath, vfs_path::VfsPath,
}; };
use indexmap::{map::Entry, IndexMap};
pub use paths::{AbsPath, AbsPathBuf}; pub use paths::{AbsPath, AbsPathBuf};
use rustc_hash::FxHasher; use rustc_hash::FxHasher;
@ -95,19 +96,12 @@ impl nohash_hasher::IsEnabled for FileId {}
pub struct Vfs { pub struct Vfs {
interner: PathInterner, interner: PathInterner,
data: Vec<FileState>, data: Vec<FileState>,
// FIXME: This should be a HashMap<FileId, ChangeFile> changes: IndexMap<FileId, ChangedFile, BuildHasherDefault<FxHasher>>,
// right now we do a nasty deduplication in GlobalState::process_changes that would be a lot
// easier to handle here on insertion.
changes: Vec<ChangedFile>,
// The above FIXME would then also get rid of this probably
created_this_cycle: Vec<FileId>,
} }
#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)] #[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
pub enum FileState { pub enum FileState {
/// The file has been created this cycle. /// The file exists with the given content hash.
Created,
/// The file exists.
Exists(u64), Exists(u64),
/// The file is deleted. /// The file is deleted.
Deleted, Deleted,
@ -131,12 +125,12 @@ impl ChangedFile {
/// Returns `true` if the change is [`Create`](ChangeKind::Create) or /// Returns `true` if the change is [`Create`](ChangeKind::Create) or
/// [`Delete`](Change::Delete). /// [`Delete`](Change::Delete).
pub fn is_created_or_deleted(&self) -> bool { pub fn is_created_or_deleted(&self) -> bool {
matches!(self.change, Change::Create(_) | Change::Delete) matches!(self.change, Change::Create(_, _) | Change::Delete)
} }
/// Returns `true` if the change is [`Create`](ChangeKind::Create). /// Returns `true` if the change is [`Create`](ChangeKind::Create).
pub fn is_created(&self) -> bool { pub fn is_created(&self) -> bool {
matches!(self.change, Change::Create(_)) matches!(self.change, Change::Create(_, _))
} }
/// Returns `true` if the change is [`Modify`](ChangeKind::Modify). /// Returns `true` if the change is [`Modify`](ChangeKind::Modify).
@ -146,7 +140,7 @@ impl ChangedFile {
pub fn kind(&self) -> ChangeKind { pub fn kind(&self) -> ChangeKind {
match self.change { match self.change {
Change::Create(_) => ChangeKind::Create, Change::Create(_, _) => ChangeKind::Create,
Change::Modify(_, _) => ChangeKind::Modify, Change::Modify(_, _) => ChangeKind::Modify,
Change::Delete => ChangeKind::Delete, Change::Delete => ChangeKind::Delete,
} }
@ -157,7 +151,7 @@ impl ChangedFile {
#[derive(Eq, PartialEq, Debug)] #[derive(Eq, PartialEq, Debug)]
pub enum Change { pub enum Change {
/// The file was (re-)created /// The file was (re-)created
Create(Vec<u8>), Create(Vec<u8>, u64),
/// The file was modified /// The file was modified
Modify(Vec<u8>, u64), Modify(Vec<u8>, u64),
/// The file was deleted /// The file was deleted
@ -178,9 +172,7 @@ pub enum ChangeKind {
impl Vfs { impl Vfs {
/// Id of the given path if it exists in the `Vfs` and is not deleted. /// Id of the given path if it exists in the `Vfs` and is not deleted.
pub fn file_id(&self, path: &VfsPath) -> Option<FileId> { pub fn file_id(&self, path: &VfsPath) -> Option<FileId> {
self.interner self.interner.get(path).filter(|&it| matches!(self.get(it), FileState::Exists(_)))
.get(path)
.filter(|&it| matches!(self.get(it), FileState::Exists(_) | FileState::Created))
} }
/// File path corresponding to the given `file_id`. /// File path corresponding to the given `file_id`.
@ -198,9 +190,7 @@ impl Vfs {
pub fn iter(&self) -> impl Iterator<Item = (FileId, &VfsPath)> + '_ { pub fn iter(&self) -> impl Iterator<Item = (FileId, &VfsPath)> + '_ {
(0..self.data.len()) (0..self.data.len())
.map(|it| FileId(it as u32)) .map(|it| FileId(it as u32))
.filter(move |&file_id| { .filter(move |&file_id| matches!(self.get(file_id), FileState::Exists(_)))
matches!(self.get(file_id), FileState::Exists(_) | FileState::Created)
})
.map(move |file_id| { .map(move |file_id| {
let path = self.interner.lookup(file_id); let path = self.interner.lookup(file_id);
(file_id, path) (file_id, path)
@ -219,12 +209,11 @@ impl Vfs {
let state = self.get(file_id); let state = self.get(file_id);
let change_kind = match (state, contents) { let change_kind = match (state, contents) {
(FileState::Deleted, None) => return false, (FileState::Deleted, None) => return false,
(FileState::Deleted, Some(v)) => Change::Create(v), (FileState::Deleted, Some(v)) => {
(FileState::Exists(_) | FileState::Created, None) => Change::Delete,
(FileState::Created, Some(v)) => {
let hash = hash_once::<FxHasher>(&*v); let hash = hash_once::<FxHasher>(&*v);
Change::Modify(v, hash) Change::Create(v, hash)
} }
(FileState::Exists(_), None) => Change::Delete,
(FileState::Exists(hash), Some(v)) => { (FileState::Exists(hash), Some(v)) => {
let new_hash = hash_once::<FxHasher>(&*v); let new_hash = hash_once::<FxHasher>(&*v);
if new_hash == hash { if new_hash == hash {
@ -233,37 +222,61 @@ impl Vfs {
Change::Modify(v, new_hash) Change::Modify(v, new_hash)
} }
}; };
let mut set_data = |change_kind| {
self.data[file_id.0 as usize] = match change_kind { self.data[file_id.0 as usize] = match change_kind {
Change::Create(_) => { &Change::Create(_, hash) | &Change::Modify(_, hash) => FileState::Exists(hash),
self.created_this_cycle.push(file_id);
FileState::Created
}
// If the file got created this cycle, make sure we keep it that way even
// if a modify comes in
Change::Modify(_, _) if matches!(state, FileState::Created) => FileState::Created,
Change::Modify(_, hash) => FileState::Exists(hash),
Change::Delete => FileState::Deleted, Change::Delete => FileState::Deleted,
}; };
};
let changed_file = ChangedFile { file_id, change: change_kind }; let changed_file = ChangedFile { file_id, change: change_kind };
self.changes.push(changed_file); match self.changes.entry(file_id) {
// two changes to the same file in one cycle, merge them appropriately
Entry::Occupied(mut o) => {
use Change::*;
match (&mut o.get_mut().change, changed_file.change) {
// newer `Delete` wins
(change, Delete) => *change = Delete,
// merge `Create` with `Create` or `Modify`
(Create(prev, old_hash), Create(new, new_hash) | Modify(new, new_hash)) => {
*prev = new;
*old_hash = new_hash;
}
// collapse identical `Modify`es
(Modify(prev, old_hash), Modify(new, new_hash)) => {
*prev = new;
*old_hash = new_hash;
}
// equivalent to `Modify`
(change @ Delete, Create(new, new_hash)) => {
*change = Modify(new, new_hash);
}
// shouldn't occur, but collapse into `Create`
(change @ Delete, Modify(new, new_hash)) => {
stdx::never!();
*change = Create(new, new_hash);
}
// shouldn't occur, but keep the Create
(prev @ Modify(_, _), new @ Create(_, _)) => *prev = new,
}
set_data(&o.get().change);
}
Entry::Vacant(v) => set_data(&v.insert(changed_file).change),
};
true true
} }
/// Drain and returns all the changes in the `Vfs`. /// Drain and returns all the changes in the `Vfs`.
pub fn take_changes(&mut self) -> Vec<ChangedFile> { pub fn take_changes(&mut self) -> IndexMap<FileId, ChangedFile, BuildHasherDefault<FxHasher>> {
let _p = span!(Level::INFO, "Vfs::take_changes").entered();
for file_id in self.created_this_cycle.drain(..) {
if self.data[file_id.0 as usize] == FileState::Created {
// downgrade the file from `Created` to `Exists` as the cycle is done
self.data[file_id.0 as usize] = FileState::Exists(todo!());
}
}
mem::take(&mut self.changes) mem::take(&mut self.changes)
} }
/// Provides a panic-less way to verify file_id validity. /// Provides a panic-less way to verify file_id validity.
pub fn exists(&self, file_id: FileId) -> bool { pub fn exists(&self, file_id: FileId) -> bool {
matches!(self.get(file_id), FileState::Exists(_) | FileState::Created) matches!(self.get(file_id), FileState::Exists(_))
} }
/// Returns the id associated with `path` /// Returns the id associated with `path`