Refactor Vfs error handling

2025-12-23 09:19:49 +00:00 · 2023-02-10 09:00:44 +08:00 · 2023-02-10 09:00:44 +08:00 · 5179eb45dc
commit 5179eb45dc
parent 4dfaf037da
3 changed files with 74 additions and 71 deletions
--- a/crates/nil/src/lib.rs
+++ b/crates/nil/src/lib.rs
@ -6,7 +6,7 @@ mod semantic_tokens;
 mod server;
 mod vfs;

-use anyhow::{anyhow, Result};
+use anyhow::Result;
 use ide::VfsPath;
 use lsp_server::{Connection, ErrorCode};
 use lsp_types::{InitializeParams, Url};
@ -15,6 +15,17 @@ use std::fmt;
 pub(crate) use server::{Server, StateSnapshot};
 pub(crate) use vfs::{LineMap, Vfs};

+/// The file length limit. Files larger than this will be rejected from all interactions.
+/// The hard limit is `u32::MAX` due to following conditions.
+/// - The parser and the `rowan` library uses `u32` based indices.
+/// - `vfs::LineMap` uses `u32` based indices.
+///
+/// Since large files can cause significant performance issues, also to
+/// be away from off-by-n errors, here's an arbitrary chosen limit: 128MiB.
+///
+/// If you have any real world usages for files larger than this, please file an issue.
+pub const MAX_FILE_LEN: usize = 128 << 20;
+
 #[derive(Debug)]
 pub(crate) struct LspError {
    code: ErrorCode,
@ -31,20 +42,20 @@ impl fmt::Display for LspError {
 impl std::error::Error for LspError {}

 pub(crate) trait UrlExt: Sized {
-    fn to_vfs_path(&self) -> Result<VfsPath>;
+    fn to_vfs_path(&self) -> VfsPath;
    fn from_vfs_path(path: &VfsPath) -> Self;
 }

 impl UrlExt for Url {
-    fn to_vfs_path(&self) -> Result<VfsPath> {
+    fn to_vfs_path(&self) -> VfsPath {
        // `Url::to_file_path` doesn't do schema check.
        if self.scheme() == "file" {
-            let path = self
-                .to_file_path()
-                .map_err(|()| anyhow!("Invalid file URI: {self}"))?;
-            return Ok(path.into());
+            if let Ok(path) = self.to_file_path() {
+                return path.into();
+            }
+            tracing::warn!("Ignore invalid file URI: {self}");
        }
-        Ok(VfsPath::Virtual(self.as_str().to_owned()))
+        VfsPath::Virtual(self.as_str().to_owned())
    }

    fn from_vfs_path(vpath: &VfsPath) -> Self {
--- a/crates/nil/src/server.rs
+++ b/crates/nil/src/server.rs
@ -1,5 +1,5 @@
 use crate::config::{Config, CONFIG_KEY};
-use crate::{convert, handler, LspError, Vfs};
+use crate::{convert, handler, LspError, UrlExt, Vfs, MAX_FILE_LEN};
 use anyhow::{anyhow, bail, Context, Result};
 use crossbeam_channel::{Receiver, Sender};
 use ide::{Analysis, AnalysisHost, Cancelled, FlakeInfo, VfsPath};
@ -289,9 +289,19 @@ impl Server {
                Ok(())
            })?
            .on_sync_mut::<notif::DidOpenTextDocument>(|st, params| {
+                // Ignore the open event for unsupported files, thus all following interactions
+                // will error due to unopened files.
+                let len = params.text_document.text.len();
+                if len > MAX_FILE_LEN {
+                    st.show_message(
+                        MessageType::WARNING,
+                        "Disable LSP functionalities for too large file ({len} > {MAX_FILE_LEN})",
+                    );
+                    return Ok(());
+                }
                let uri = &params.text_document.uri;
+                st.set_vfs_file_content(uri, params.text_document.text);
                st.opened_files.insert(uri.clone(), FileData::default());
-                st.set_vfs_file_content(uri, params.text_document.text)?;
                Ok(())
            })?
            .on_sync_mut::<notif::DidCloseTextDocument>(|st, params| {
@ -301,23 +311,27 @@ impl Server {
            })?
            .on_sync_mut::<notif::DidChangeTextDocument>(|st, params| {
                let mut vfs = st.vfs.write().unwrap();
+                let uri = &params.text_document.uri;
                // Ignore files not maintained in Vfs.
-                let Ok(file) = vfs.file_for_uri(&params.text_document.uri) else { return Ok(()) };
+                let Ok(file) = vfs.file_for_uri(uri) else { return Ok(()) };
                for change in params.content_changes {
-                    let del_range = match change.range {
-                        None => None,
-                        Some(range) => match convert::from_range(&vfs, file, range) {
-                            Ok((_, range)) => Some(range),
-                            Err(err) => {
-                                tracing::error!(
-                                    "File out of sync! Invalid change range {range:?}: {err}. Change: {change:?}",
-                                );
-                                continue;
-                            }
-                        },
-                    };
-                    if let Err(err) = vfs.change_file_content(file, del_range, &change.text) {
-                        tracing::error!("File is out of sync! Failed to apply change: {err}. Change: {change:?}");
+                    let ret = (|| {
+                        let del_range = match change.range {
+                            None => None,
+                            Some(range) => Some(convert::from_range(&vfs, file, range).ok()?.1),
+                        };
+                        vfs.change_file_content(file, del_range, &change.text)
+                            .ok()?;
+                        Some(())
+                    })();
+                    if ret.is_none() {
+                        tracing::error!(
+                            "File is out of sync! Failed to apply change for {uri}: {change:?}"
+                        );
+
+                        // Clear file states to minimize pollution of the broken state.
+                        st.opened_files.remove(uri);
+                        // TODO: Remove the file from Vfs.
                    }
                }
                drop(vfs);
@ -369,7 +383,7 @@ impl Server {
                    // prefer the managed one. It contains more recent unsaved changes.
                    Ok(file) => file,
                    // Otherwise, cache the file content from disk.
-                    Err(_) => vfs.set_path_content(flake_vpath, flake_src)?,
+                    Err(_) => vfs.set_path_content(flake_vpath, flake_src),
                }
            };

@ -535,10 +549,10 @@ impl Server {
        }
    }

-    fn set_vfs_file_content(&mut self, uri: &Url, text: String) -> Result<()> {
-        self.vfs.write().unwrap().set_uri_content(uri, text)?;
+    fn set_vfs_file_content(&mut self, uri: &Url, text: String) {
+        let vpath = uri.to_vfs_path();
+        self.vfs.write().unwrap().set_path_content(vpath, text);
        self.apply_vfs_change();
-        Ok(())
    }

    fn apply_vfs_change(&mut self) {
--- a/crates/nil/src/vfs.rs
+++ b/crates/nil/src/vfs.rs
@ -44,39 +44,25 @@ impl Vfs {
        });
    }

-    pub fn set_uri_content(&mut self, uri: &Url, text: String) -> Result<()> {
-        let vpath = uri.to_vfs_path()?;
-        self.set_path_content(vpath, text)?;
-        Ok(())
-    }
-
-    pub fn set_path_content(&mut self, path: VfsPath, text: String) -> Result<FileId> {
-        // For invalid files (currently, too large), we store them as empty files in database,
-        // but remove them from `local_file_set`. Thus any interactions on them would fail.
-        let (text, line_map, is_valid) = LineMap::normalize(text)
-            .map(|(text, line_map)| (text, line_map, true))
-            .unwrap_or_default();
+    pub fn set_path_content(&mut self, path: VfsPath, text: String) -> FileId {
+        let (text, line_map) = LineMap::normalize(text);
        let text = <Arc<str>>::from(text);
        let line_map = Arc::new(line_map);
        match self.local_file_set.file_for_path(&path) {
            Some(file) => {
                self.files[file.0 as usize] = (text.clone(), line_map);
                self.change.change_file(file, text);
-                if !is_valid {
-                    self.local_file_set.remove_file(file);
-                    self.root_changed = true;
-                }
-                Ok(file)
+                self.local_file_set.remove_file(file);
+                self.root_changed = true;
+                file
            }
            None => {
-                // FIXME: Somehow get rid of this validity check from Vfs.
-                ensure!(is_valid, "File is not valid");
                let file = FileId(u32::try_from(self.files.len()).expect("Length overflow"));
                self.local_file_set.insert(file, path);
                self.root_changed = true;
                self.files.push((text.clone(), line_map));
                self.change.change_file(file, text);
-                Ok(file)
+                file
            }
        }
    }
@ -105,7 +91,7 @@ impl Vfs {
            }
        };
        // This is not quite efficient, but we already do many O(n) traversals.
-        let (new_text, line_map) = LineMap::normalize(new_text).context("File too large")?;
+        let (new_text, line_map) = LineMap::normalize(new_text);
        let new_text = <Arc<str>>::from(new_text);
        log::trace!("File {:?} content changed: {:?}", file, new_text);
        self.files[file.0 as usize] = (new_text.clone(), Arc::new(line_map));
@ -120,7 +106,7 @@ impl Vfs {
    }

    pub fn file_for_uri(&self, uri: &Url) -> Result<FileId> {
-        self.file_for_path(&uri.to_vfs_path()?)
+        self.file_for_path(&uri.to_vfs_path())
    }

    pub fn uri_for_file(&self, file: FileId) -> Url {
@ -165,20 +151,12 @@ enum CodeUnitsDiff {
    Two = 2,
 }

-impl Default for LineMap {
-    fn default() -> Self {
-        Self::normalize(String::new()).unwrap().1
-    }
-}
-
 impl LineMap {
-    fn normalize(text: String) -> Option<(String, Self)> {
-        // Too large for `TextSize`.
-        if text.len() > u32::MAX as usize {
-            return None;
-        }
+    fn normalize(mut text: String) -> (String, Self) {
+        // Must be valid for `TextSize`.
+        u32::try_from(text.len()).expect("Text too long");

-        let text = text.replace('\r', "");
+        text.retain(|c| c != '\r');
        let bytes = text.as_bytes();

        let mut line_starts = Some(0)
@ -215,7 +193,7 @@ impl LineMap {
            line_starts,
            char_diffs,
        };
-        Some((text, this))
+        (text, this)
    }

    pub fn last_line(&self) -> u32 {
@ -273,7 +251,7 @@ mod tests {
    #[test]
    fn line_map_ascii() {
        let s = "hello\nworld\nend";
-        let (norm, map) = LineMap::normalize(s.into()).unwrap();
+        let (norm, map) = LineMap::normalize(s.into());
        assert_eq!(norm, s);
        assert_eq!(&map.line_starts, &[0, 6, 12, 15]);

@ -299,7 +277,7 @@ mod tests {
        // ℝ  | U+0211D | E2 84 9D    | 211D
        // 💣 | U+1F4A3 | F0 9F 92 A3 | D83D DCA3
        let s = "_A_ß_ℝ_💣_";
-        let (norm, map) = LineMap::normalize(s.into()).unwrap();
+        let (norm, map) = LineMap::normalize(s.into());
        assert_eq!(norm, s);
        assert_eq!(&map.line_starts, &[0, 15]);
        assert_eq!(
@ -333,20 +311,20 @@ mod tests {

    #[test]
    fn last_line() {
-        let (_, map) = LineMap::normalize("".into()).unwrap();
+        let (_, map) = LineMap::normalize("".into());
        assert_eq!(map.last_line(), 0);
-        let (_, map) = LineMap::normalize("\n".into()).unwrap();
+        let (_, map) = LineMap::normalize("\n".into());
        assert_eq!(map.last_line(), 1);
-        let (_, map) = LineMap::normalize("foo\nbar".into()).unwrap();
+        let (_, map) = LineMap::normalize("foo\nbar".into());
        assert_eq!(map.last_line(), 1);
-        let (_, map) = LineMap::normalize("foo\nbar\n".into()).unwrap();
+        let (_, map) = LineMap::normalize("foo\nbar\n".into());
        assert_eq!(map.last_line(), 2);
    }

    #[test]
    fn line_end_col() {
        // See comments in `line_map_unicode`.
-        let (_, map) = LineMap::normalize("hello\nAßℝ💣\n\nend".into()).unwrap();
+        let (_, map) = LineMap::normalize("hello\nAßℝ💣\n\nend".into());
        assert_eq!(map.end_col_for_line(0), 5);
        assert_eq!(map.end_col_for_line(1), 5);
        assert_eq!(map.end_col_for_line(2), 0);