Split tex and aux indices into separate structs

2025-07-16 01:25:25 +00:00 · 2023-04-07 09:56:58 +02:00 · 2023-04-07 09:56:58 +02:00 · cb13ad3978
commit cb13ad3978
parent 2af2e3524f
14 changed files with 1032 additions and 12 deletions
--- a/crates/base-db/src/config.rs
+++ b/crates/base-db/src/config.rs
@ -0,0 +1,233 @@
+use std::time::Duration;
+
+use regex::Regex;
+use rustc_hash::FxHashSet;
+
+#[derive(Debug)]
+pub struct Config {
+    pub root_dir: Option<String>,
+    pub build: BuildConfig,
+    pub diagnostics: DiagnosticsConfig,
+    pub formatting: FormattingConfig,
+    pub synctex: Option<SynctexConfig>,
+    pub symbols: SymbolConfig,
+    pub syntax: SyntaxConfig,
+}
+
+#[derive(Debug)]
+pub struct BuildConfig {
+    pub program: String,
+    pub args: Vec<String>,
+    pub on_save: bool,
+    pub forward_search_after: bool,
+    pub output_dir: String,
+}
+
+#[derive(Debug)]
+pub struct DiagnosticsConfig {
+    pub allowed_patterns: Vec<Regex>,
+    pub ignored_patterns: Vec<Regex>,
+    pub chktex: ChktexConfig,
+    pub delay: Duration,
+}
+
+#[derive(Debug)]
+pub struct ChktexConfig {
+    pub on_open: bool,
+    pub on_save: bool,
+    pub on_edit: bool,
+}
+
+#[derive(Debug)]
+pub struct SynctexConfig {
+    pub program: String,
+    pub args: Vec<String>,
+}
+
+#[derive(Debug)]
+pub struct FormattingConfig {
+    pub tex_formatter: Formatter,
+    pub bib_formatter: Formatter,
+    pub latex_indent: LatexIndentConfig,
+    pub line_length: usize,
+}
+
+#[derive(Debug)]
+pub enum Formatter {
+    Null,
+    Server,
+    LatexIndent,
+}
+
+#[derive(Debug)]
+pub struct LatexIndentConfig {
+    pub local: Option<String>,
+    pub modify_line_breaks: bool,
+}
+
+#[derive(Debug)]
+pub struct SymbolConfig {
+    pub allowed_patterns: Vec<Regex>,
+    pub ignored_patterns: Vec<Regex>,
+}
+
+#[derive(Debug)]
+pub struct SyntaxConfig {
+    pub math_environments: FxHashSet<String>,
+    pub enum_environments: FxHashSet<String>,
+    pub verbatim_environments: FxHashSet<String>,
+}
+
+impl Default for Config {
+    fn default() -> Self {
+        Self {
+            root_dir: None,
+            build: BuildConfig::default(),
+            diagnostics: DiagnosticsConfig::default(),
+            formatting: FormattingConfig::default(),
+            synctex: None,
+            symbols: SymbolConfig::default(),
+            syntax: SyntaxConfig::default(),
+        }
+    }
+}
+
+impl Default for BuildConfig {
+    fn default() -> Self {
+        Self {
+            program: String::from("latexmk"),
+            args: ["-pdf", "-interaction=nonstopmode", "-synctex=1", "%f"]
+                .into_iter()
+                .map(String::from)
+                .collect(),
+            on_save: false,
+            forward_search_after: false,
+            output_dir: String::from("."),
+        }
+    }
+}
+
+impl Default for DiagnosticsConfig {
+    fn default() -> Self {
+        Self {
+            allowed_patterns: Vec::new(),
+            ignored_patterns: Vec::new(),
+            delay: Duration::from_millis(300),
+            chktex: ChktexConfig::default(),
+        }
+    }
+}
+
+impl Default for ChktexConfig {
+    fn default() -> Self {
+        Self {
+            on_open: false,
+            on_save: false,
+            on_edit: false,
+        }
+    }
+}
+
+impl Default for FormattingConfig {
+    fn default() -> Self {
+        Self {
+            tex_formatter: Formatter::LatexIndent,
+            bib_formatter: Formatter::Server,
+            line_length: 80,
+            latex_indent: LatexIndentConfig::default(),
+        }
+    }
+}
+
+impl Default for LatexIndentConfig {
+    fn default() -> Self {
+        Self {
+            local: None,
+            modify_line_breaks: false,
+        }
+    }
+}
+
+impl Default for SymbolConfig {
+    fn default() -> Self {
+        Self {
+            allowed_patterns: Vec::new(),
+            ignored_patterns: Vec::new(),
+        }
+    }
+}
+
+impl Default for SyntaxConfig {
+    fn default() -> Self {
+        let math_environments = DEFAULT_MATH_ENVIRONMENTS
+            .iter()
+            .copied()
+            .map(String::from)
+            .collect();
+
+        let enum_environments = ["enumerate", "itemize", "description"]
+            .into_iter()
+            .map(String::from)
+            .collect();
+
+        let verbatim_environments = ["pycode", "minted", "asy", "lstlisting", "verbatim"]
+            .into_iter()
+            .map(String::from)
+            .collect();
+
+        Self {
+            math_environments,
+            enum_environments,
+            verbatim_environments,
+        }
+    }
+}
+
+static DEFAULT_MATH_ENVIRONMENTS: &[&str] = &[
+    "align",
+    "align*",
+    "alignat",
+    "alignat*",
+    "aligned",
+    "aligned*",
+    "alignedat",
+    "alignedat*",
+    "array",
+    "array*",
+    "Bmatrix",
+    "Bmatrix*",
+    "bmatrix",
+    "bmatrix*",
+    "cases",
+    "cases*",
+    "CD",
+    "CD*",
+    "eqnarray",
+    "eqnarray*",
+    "equation",
+    "equation*",
+    "IEEEeqnarray",
+    "IEEEeqnarray*",
+    "subequations",
+    "subequations*",
+    "gather",
+    "gather*",
+    "gathered",
+    "gathered*",
+    "matrix",
+    "matrix*",
+    "multline",
+    "multline*",
+    "pmatrix",
+    "pmatrix*",
+    "smallmatrix",
+    "smallmatrix*",
+    "split",
+    "split*",
+    "subarray",
+    "subarray*",
+    "Vmatrix",
+    "Vmatrix*",
+    "vmatrix",
+    "vmatrix*",
+];
--- a/crates/base-db/src/document.rs
+++ b/crates/base-db/src/document.rs
@ -0,0 +1,97 @@
+use std::path::PathBuf;
+
+use rowan::TextSize;
+use syntax::latex;
+use url::Url;
+
+use crate::{line_index::LineIndex, semantics, Language};
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
+pub enum Owner {
+    Client,
+    Server,
+}
+
+#[derive(Debug)]
+pub struct Document {
+    pub uri: Url,
+    pub dir: Url,
+    pub path: Option<PathBuf>,
+    pub text: String,
+    pub line_index: LineIndex,
+    pub owner: Owner,
+    pub cursor: TextSize,
+    pub chktex: Vec<()>,
+    pub data: DocumentData,
+}
+
+impl Document {
+    pub fn parse(uri: Url, text: String, language: Language, owner: Owner) -> Self {
+        let dir = uri.join(".").unwrap();
+
+        let path = if uri.scheme() == "file" {
+            uri.to_file_path().ok()
+        } else {
+            None
+        };
+
+        let line_index = LineIndex::new(&text);
+
+        let cursor = TextSize::from(0);
+        let chktex = Vec::new();
+        let data = match language {
+            Language::Tex => {
+                let green = parser::parse_latex(&text);
+                let mut semantics = semantics::tex::Semantics::default();
+                semantics.process_root(&latex::SyntaxNode::new_root(green.clone()));
+                DocumentData::Tex(TexDocumentData { green, semantics })
+            }
+            Language::Bib => {
+                let green = parser::parse_bibtex(&text);
+                DocumentData::Bib(BibDocumentData { green })
+            }
+            Language::Log => DocumentData::Log,
+            Language::Root => DocumentData::Root,
+            Language::Tectonic => DocumentData::Tectonic,
+        };
+
+        Self {
+            uri,
+            dir,
+            path,
+            text,
+            line_index,
+            owner,
+            cursor,
+            chktex,
+            data,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub enum DocumentData {
+    Tex(TexDocumentData),
+    Bib(BibDocumentData),
+    Aux(AuxDocumentData),
+    Log,
+    Root,
+    Tectonic,
+}
+
+#[derive(Debug)]
+pub struct TexDocumentData {
+    pub green: rowan::GreenNode,
+    pub semantics: semantics::tex::Semantics,
+}
+
+#[derive(Debug)]
+pub struct BibDocumentData {
+    pub green: rowan::GreenNode,
+}
+
+#[derive(Debug)]
+pub struct AuxDocumentData {
+    pub green: rowan::GreenNode,
+    pub semantics: semantics::aux::Semantics,
+}
--- a/crates/base-db/src/language.rs
+++ b/crates/base-db/src/language.rs
@ -0,0 +1,40 @@
+use std::path::Path;
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
+pub enum Language {
+    Tex,
+    Bib,
+    Log,
+    Root,
+    Tectonic,
+}
+
+impl Language {
+    pub fn from_path(path: &Path) -> Option<Self> {
+        let name = path.file_name()?;
+        if name.eq_ignore_ascii_case(".texlabroot") || name.eq_ignore_ascii_case("texlabroot") {
+            return Some(Self::Root);
+        }
+
+        if name.eq_ignore_ascii_case("Tectonic.toml") {
+            return Some(Self::Tectonic);
+        }
+
+        let extname = path.extension()?.to_str()?;
+        match extname.to_lowercase().as_str() {
+            "tex" | "sty" | "cls" | "def" | "lco" | "aux" | "rnw" => Some(Self::Tex),
+            "bib" | "bibtex" => Some(Self::Bib),
+            "log" => Some(Self::Log),
+            _ => None,
+        }
+    }
+
+    pub fn from_id(id: &str) -> Option<Self> {
+        match id {
+            "tex" | "latex" => Some(Self::Tex),
+            "bib" | "bibtex" => Some(Self::Bib),
+            "texlabroot" => Some(Self::Root),
+            _ => None,
+        }
+    }
+}
--- a/crates/base-db/src/lib.rs
+++ b/crates/base-db/src/lib.rs
@ -0,0 +1,8 @@
+mod config;
+mod document;
+mod language;
+mod line_index;
+pub mod semantics;
+mod workspace;
+
+pub use self::{config::*, document::*, language::Language, workspace::*};
--- a/crates/base-db/src/line_index.rs
+++ b/crates/base-db/src/line_index.rs
@ -0,0 +1,217 @@
+// The following code has been copied from rust-analyzer.
+
+//! `LineIndex` maps flat `TextSize` offsets into `(Line, Column)`
+//! representation.
+use std::iter;
+
+use rowan::{TextRange, TextSize};
+use rustc_hash::FxHashMap;
+
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct LineIndex {
+    /// Offset the the beginning of each line, zero-based
+    pub(crate) newlines: Vec<TextSize>,
+    /// List of non-ASCII characters on each line
+    pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub struct LineColUtf16 {
+    /// Zero-based
+    pub line: u32,
+    /// Zero-based
+    pub col: u32,
+}
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub struct LineCol {
+    /// Zero-based
+    pub line: u32,
+    /// Zero-based utf8 offset
+    pub col: u32,
+}
+
+#[derive(Clone, Debug, Hash, PartialEq, Eq)]
+pub(crate) struct Utf16Char {
+    /// Start offset of a character inside a line, zero-based
+    pub(crate) start: TextSize,
+    /// End offset of a character inside a line, zero-based
+    pub(crate) end: TextSize,
+}
+
+impl Utf16Char {
+    /// Returns the length in 8-bit UTF-8 code units.
+    fn len(&self) -> TextSize {
+        self.end - self.start
+    }
+
+    /// Returns the length in 16-bit UTF-16 code units.
+    fn len_utf16(&self) -> usize {
+        if self.len() == TextSize::from(4) {
+            2
+        } else {
+            1
+        }
+    }
+}
+
+impl LineIndex {
+    pub fn new(text: &str) -> LineIndex {
+        let mut utf16_lines = FxHashMap::default();
+        let mut utf16_chars = Vec::new();
+
+        let mut newlines = vec![0.into()];
+        let mut curr_row = 0.into();
+        let mut curr_col = 0.into();
+        let mut line = 0;
+        for c in text.chars() {
+            let c_len = TextSize::of(c);
+            curr_row += c_len;
+            if c == '\n' {
+                newlines.push(curr_row);
+
+                // Save any utf-16 characters seen in the previous line
+                if !utf16_chars.is_empty() {
+                    utf16_lines.insert(line, utf16_chars);
+                    utf16_chars = Vec::new();
+                }
+
+                // Prepare for processing the next line
+                curr_col = 0.into();
+                line += 1;
+                continue;
+            }
+
+            if !c.is_ascii() {
+                utf16_chars.push(Utf16Char {
+                    start: curr_col,
+                    end: curr_col + c_len,
+                });
+            }
+
+            curr_col += c_len;
+        }
+
+        // Save any utf-16 characters seen in the last line
+        if !utf16_chars.is_empty() {
+            utf16_lines.insert(line, utf16_chars);
+        }
+
+        LineIndex {
+            newlines,
+            utf16_lines,
+        }
+    }
+
+    pub fn line_col(&self, offset: TextSize) -> LineCol {
+        let line = partition_point(&self.newlines, |&it| it <= offset) - 1;
+        let line_start_offset = self.newlines[line];
+        let col = offset - line_start_offset;
+        LineCol {
+            line: line as u32,
+            col: col.into(),
+        }
+    }
+
+    pub fn offset(&self, line_col: LineCol) -> TextSize {
+        self.newlines[line_col.line as usize] + TextSize::from(line_col.col)
+    }
+
+    pub fn to_utf16(&self, line_col: LineCol) -> LineColUtf16 {
+        let col = self.utf8_to_utf16_col(line_col.line, line_col.col.into());
+        LineColUtf16 {
+            line: line_col.line,
+            col: col as u32,
+        }
+    }
+
+    pub fn to_utf8(&self, line_col: LineColUtf16) -> LineCol {
+        let col = self.utf16_to_utf8_col(line_col.line, line_col.col);
+        LineCol {
+            line: line_col.line,
+            col: col.into(),
+        }
+    }
+
+    pub fn lines(&self, range: TextRange) -> impl Iterator<Item = TextRange> + '_ {
+        let lo = partition_point(&self.newlines, |&it| it < range.start());
+        let hi = partition_point(&self.newlines, |&it| it <= range.end());
+        let all = iter::once(range.start())
+            .chain(self.newlines[lo..hi].iter().copied())
+            .chain(iter::once(range.end()));
+
+        all.clone()
+            .zip(all.skip(1))
+            .map(|(lo, hi)| TextRange::new(lo, hi))
+            .filter(|it| !it.is_empty())
+    }
+
+    fn utf8_to_utf16_col(&self, line: u32, col: TextSize) -> usize {
+        let mut res: usize = col.into();
+        if let Some(utf16_chars) = self.utf16_lines.get(&line) {
+            for c in utf16_chars {
+                if c.end <= col {
+                    res -= usize::from(c.len()) - c.len_utf16();
+                } else {
+                    // From here on, all utf16 characters come *after* the character we are mapping,
+                    // so we don't need to take them into account
+                    break;
+                }
+            }
+        }
+        res
+    }
+
+    fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize {
+        if let Some(utf16_chars) = self.utf16_lines.get(&line) {
+            for c in utf16_chars {
+                if col > u32::from(c.start) {
+                    col += u32::from(c.len()) - c.len_utf16() as u32;
+                } else {
+                    // From here on, all utf16 characters come *after* the character we are mapping,
+                    // so we don't need to take them into account
+                    break;
+                }
+            }
+        }
+
+        col.into()
+    }
+}
+
+/// Returns `idx` such that:
+///
+/// ```text
+///     ∀ x in slice[..idx]:  pred(x)
+///  && ∀ x in slice[idx..]: !pred(x)
+/// ```
+///
+/// https://github.com/rust-lang/rust/issues/73831
+fn partition_point<T, P>(slice: &[T], mut pred: P) -> usize
+where
+    P: FnMut(&T) -> bool,
+{
+    let mut left = 0;
+    let mut right = slice.len();
+
+    while left != right {
+        let mid = left + (right - left) / 2;
+        // SAFETY:
+        // When left < right, left <= mid < right.
+        // Therefore left always increases and right always decreases,
+        // and either of them is selected.
+        // In both cases left <= right is satisfied.
+        // Therefore if left < right in a step,
+        // left <= right is satisfied in the next step.
+        // Therefore as long as left != right, 0 <= left < right <= len is satisfied
+        // and if this case 0 <= mid < len is satisfied too.
+        let value = unsafe { slice.get_unchecked(mid) };
+        if pred(value) {
+            left = mid + 1;
+        } else {
+            right = mid;
+        }
+    }
+
+    left
+}
--- a/crates/base-db/src/semantics.rs
+++ b/crates/base-db/src/semantics.rs
@ -0,0 +1,17 @@
+pub mod aux;
+pub mod tex;
+
+#[derive(Debug, PartialEq, Eq, Clone, Hash)]
+pub struct Span {
+    pub text: String,
+    pub range: rowan::TextRange,
+}
+
+impl From<&syntax::latex::Key> for Span {
+    fn from(key: &syntax::latex::Key) -> Self {
+        Span {
+            text: key.to_string(),
+            range: syntax::latex::small_range(key),
+        }
+    }
+}
--- a/crates/base-db/src/semantics/aux.rs
+++ b/crates/base-db/src/semantics/aux.rs
@ -0,0 +1,39 @@
+use rowan::ast::AstNode;
+use rustc_hash::FxHashMap;
+use syntax::latex;
+
+#[derive(Debug)]
+pub struct Semantics {
+    pub label_numbers: FxHashMap<String, String>,
+}
+
+impl Semantics {
+    pub fn process_root(&mut self, root: &latex::SyntaxNode) {
+        for node in root.descendants() {
+            self.process_node(&node);
+        }
+    }
+
+    fn process_node(&mut self, node: &latex::SyntaxNode) {
+        if let Some(label_number) = latex::LabelNumber::cast(node.clone()) {
+            self.process_label_number(&label_number);
+        }
+    }
+
+    fn process_label_number(&mut self, label_number: &latex::LabelNumber) {
+        let Some(name) = label_number
+            .name()
+            .and_then(|group| group.key())
+            .map(|key| key.to_string()) else { return };
+
+        let Some(text) = label_number
+            .text()
+            .map(|node| node.syntax().descendants())
+            .into_iter()
+            .flatten()
+            .find(|node| node.kind() == latex::TEXT || node.kind() == latex::MIXED_GROUP)
+            .map(|node| node.text().to_string()) else { return };
+
+        self.label_numbers.insert(name, text);
+    }
+}
--- a/crates/base-db/src/semantics/tex.rs
+++ b/crates/base-db/src/semantics/tex.rs
@ -0,0 +1,255 @@
+use rowan::ast::AstNode;
+use rustc_hash::FxHashSet;
+use syntax::latex::{self, HasCurly};
+use text_size::TextRange;
+
+use super::Span;
+
+#[derive(Debug, Default)]
+pub struct Semantics {
+    pub links: Vec<Link>,
+    pub labels: Vec<Label>,
+    pub commands: FxHashSet<String>,
+    pub environments: FxHashSet<String>,
+    pub theorem_definitions: Vec<TheoremDefinition>,
+    pub graphics_paths: FxHashSet<String>,
+}
+
+impl Semantics {
+    pub fn process_root(&mut self, root: &latex::SyntaxNode) {
+        for node in root.descendants_with_tokens() {
+            match node {
+                latex::SyntaxElement::Node(node) => {
+                    self.process_node(&node);
+                }
+                latex::SyntaxElement::Token(token) => {
+                    if token.kind() == latex::COMMAND_NAME {
+                        self.commands.insert(String::from(token.text()));
+                    }
+                }
+            };
+        }
+    }
+
+    fn process_node(&mut self, node: &latex::SyntaxNode) {
+        if let Some(include) = latex::Include::cast(node.clone()) {
+            self.process_include(include);
+        } else if let Some(import) = latex::Import::cast(node.clone()) {
+            self.process_import(import);
+        } else if let Some(label) = latex::LabelDefinition::cast(node.clone()) {
+            self.process_label_definition(label);
+        } else if let Some(label) = latex::LabelReference::cast(node.clone()) {
+            self.process_label_reference(label);
+        } else if let Some(label) = latex::LabelReferenceRange::cast(node.clone()) {
+            self.process_label_reference_range(label);
+        } else if let Some(environment) = latex::Environment::cast(node.clone()) {
+            self.process_environment(environment);
+        } else if let Some(theorem_def) = latex::TheoremDefinition::cast(node.clone()) {
+            self.process_theorem_definition(theorem_def);
+        }
+    }
+
+    fn process_include(&mut self, include: latex::Include) {
+        let Some(list) = include.path_list() else { return };
+
+        for path in list.keys() {
+            let kind = match include.syntax().kind() {
+                latex::PACKAGE_INCLUDE => LinkKind::Sty,
+                latex::CLASS_INCLUDE => LinkKind::Cls,
+                latex::LATEX_INCLUDE => LinkKind::Tex,
+                latex::BIBLATEX_INCLUDE => LinkKind::Bib,
+                latex::BIBTEX_INCLUDE => LinkKind::Bib,
+                _ => continue,
+            };
+
+            self.links.push(Link {
+                kind,
+                path: Span::from(&path),
+                base_dir: None,
+            });
+        }
+    }
+
+    fn process_import(&mut self, import: latex::Import) {
+        let Some(mut base_dir) = import
+            .directory()
+            .and_then(|dir| dir.key())
+            .map(|key| key.to_string()) else { return };
+
+        if !base_dir.ends_with('/') {
+            base_dir.push('/');
+        }
+
+        let Some(path) = import.file().and_then(|path| path.key()) else { return };
+
+        self.links.push(Link {
+            kind: LinkKind::Tex,
+            path: Span::from(&path),
+            base_dir: Some(base_dir),
+        });
+    }
+
+    fn process_label_definition(&mut self, label: latex::LabelDefinition) {
+        let Some(name) = label.name().and_then(|group| group.key()) else { return };
+
+        let mut objects = Vec::new();
+        for node in label.syntax().ancestors() {
+            if let Some(section) = latex::Section::cast(node.clone()) {
+                let Some(text) = section.name().and_then(|group| group.content_text()) else { continue };
+                let range = latex::small_range(&section);
+                let prefix = String::from(match section.syntax().kind() {
+                    latex::PART => "Part",
+                    latex::CHAPTER => "Chapter",
+                    latex::SECTION => "Section",
+                    latex::SUBSECTION => "Subsection",
+                    latex::SUBSUBSECTION => "Subsubsection",
+                    latex::PARAGRAPH => "Paragraph",
+                    latex::SUBPARAGRAPH => "Subparagraph",
+                    _ => unreachable!(),
+                });
+
+                let kind = LabelObject::Section { prefix, text };
+                objects.push(LabelTarget {
+                    object: kind,
+                    range,
+                });
+            } else if let Some(environment) = latex::Environment::cast(node.clone()) {
+                let Some(name) = environment.begin()
+                    .and_then(|begin| begin.name())
+                    .and_then(|group| group.key())
+                    .map(|key| key.to_string()) else { continue };
+
+                let range = latex::small_range(&environment);
+                let kind = LabelObject::Environment { name };
+                objects.push(LabelTarget {
+                    object: kind,
+                    range,
+                });
+            } else if let Some(enum_item) = latex::EnumItem::cast(node.clone()) {
+                let range = latex::small_range(&enum_item);
+                let kind = LabelObject::EnumItem;
+                objects.push(LabelTarget {
+                    object: kind,
+                    range,
+                });
+            }
+        }
+
+        self.labels.push(Label {
+            kind: LabelKind::Definition,
+            name: Span::from(&name),
+            targets: objects,
+        });
+    }
+
+    fn process_label_reference(&mut self, label: latex::LabelReference) {
+        let Some(name_list) = label.name_list() else { return };
+
+        for name in name_list.keys() {
+            self.labels.push(Label {
+                kind: LabelKind::Reference,
+                name: Span::from(&name),
+                targets: Vec::new(),
+            });
+        }
+    }
+
+    fn process_label_reference_range(&mut self, label: latex::LabelReferenceRange) {
+        if let Some(from) = label.from().and_then(|group| group.key()) {
+            self.labels.push(Label {
+                kind: LabelKind::ReferenceRange,
+                name: Span::from(&from),
+                targets: Vec::new(),
+            });
+        }
+
+        if let Some(to) = label.to().and_then(|group| group.key()) {
+            self.labels.push(Label {
+                kind: LabelKind::ReferenceRange,
+                name: Span::from(&to),
+                targets: Vec::new(),
+            });
+        }
+    }
+
+    fn process_environment(&mut self, environment: latex::Environment) {
+        let Some(name) = environment
+            .begin()
+            .and_then(|begin| begin.name())
+            .and_then(|group| group.key()) else { return };
+
+        self.environments.insert(String::from(name.syntax().text()));
+    }
+
+    fn process_theorem_definition(&mut self, theorem_def: latex::TheoremDefinition) {
+        let Some(name) = theorem_def.name().and_then(|name| name.key()) else { return };
+
+        let Some(description) = theorem_def
+            .description()
+            .and_then(|group| group.content_text()) else { return };
+
+        self.theorem_definitions.push(TheoremDefinition {
+            name: Span::from(&name),
+            description,
+        });
+    }
+}
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
+pub enum LinkKind {
+    Sty,
+    Cls,
+    Tex,
+    Bib,
+}
+
+impl LinkKind {
+    pub fn extensions(self) -> &'static [&'static str] {
+        match self {
+            Self::Sty => &["sty"],
+            Self::Cls => &["cls"],
+            Self::Tex => &["tex"],
+            Self::Bib => &["bib"],
+        }
+    }
+}
+
+#[derive(Debug)]
+pub struct Link {
+    pub kind: LinkKind,
+    pub path: Span,
+    pub base_dir: Option<String>,
+}
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone, Copy, Hash)]
+pub enum LabelKind {
+    Definition,
+    Reference,
+    ReferenceRange,
+}
+
+#[derive(Debug)]
+pub struct Label {
+    pub kind: LabelKind,
+    pub name: Span,
+    pub targets: Vec<LabelTarget>,
+}
+
+#[derive(Debug)]
+pub struct LabelTarget {
+    pub object: LabelObject,
+    pub range: TextRange,
+}
+
+#[derive(Debug)]
+pub enum LabelObject {
+    Section { prefix: String, text: String },
+    EnumItem,
+    Environment { name: String },
+}
+
+#[derive(Debug)]
+pub struct TheoremDefinition {
+    pub name: Span,
+    pub description: String,
+}
--- a/crates/base-db/src/workspace.rs
+++ b/crates/base-db/src/workspace.rs
@ -0,0 +1,75 @@
+use std::{borrow::Cow, path::Path};
+
+use rustc_hash::FxHashMap;
+use url::Url;
+
+use crate::{Config, Document, DocumentData, Language, Owner};
+
+#[derive(Debug)]
+pub struct Workspace {
+    documents: FxHashMap<Url, Document>,
+    config: Config,
+    root_dirs: Vec<Url>,
+}
+
+impl Workspace {
+    pub fn lookup(&self, uri: &Url) -> Option<&Document> {
+        self.documents.get(uri)
+    }
+
+    pub fn open(&mut self, uri: Url, text: String, language: Language, owner: Owner) {
+        log::debug!("Opening document {uri}...");
+        let document = Document::parse(uri, text, language, owner);
+        self.documents.insert(document.uri.clone(), document);
+    }
+
+    pub fn load(&mut self, path: &Path, language: Language, owner: Owner) -> std::io::Result<()> {
+        log::debug!("Loading document {} from disk...", path.display());
+        let uri = Url::from_file_path(path).unwrap();
+        let data = std::fs::read(path)?;
+        let text = match String::from_utf8_lossy(&data) {
+            Cow::Borrowed(_) => unsafe { String::from_utf8_unchecked(data) },
+            Cow::Owned(text) => text,
+        };
+
+        Ok(self.open(uri, text, language, owner))
+    }
+
+    pub fn watch(&mut self, watcher: &mut dyn notify::Watcher) {
+        self.documents
+            .values()
+            .filter(|document| document.uri.scheme() == "file")
+            .flat_map(|document| {
+                let dir1 = self.output_dir(&self.current_dir(&document.dir));
+                let dir2 = &document.dir;
+                [dir1.to_file_path(), dir2.to_file_path()]
+            })
+            .flatten()
+            .for_each(|path| {
+                let _ = watcher.watch(&path, notify::RecursiveMode::NonRecursive);
+            });
+    }
+
+    pub fn current_dir(&self, base_dir: &Url) -> Url {
+        let root_dir = self.config.root_dir.as_deref();
+        if let Some(dir) = root_dir.and_then(|path| base_dir.join(path).ok()) {
+            return dir;
+        }
+
+        self.documents
+            .values()
+            .filter(|doc| matches!(doc.data, DocumentData::Root | DocumentData::Tectonic))
+            .flat_map(|doc| doc.uri.join("."))
+            .find(|root_dir| base_dir.as_str().starts_with(root_dir.as_str()))
+            .unwrap_or_else(|| base_dir.clone())
+    }
+
+    pub fn output_dir(&self, base_dir: &Url) -> Url {
+        let mut path = self.config.build.output_dir.clone();
+        if !path.ends_with('/') {
+            path.push('/');
+        }
+
+        base_dir.join(&path).unwrap_or_else(|_| base_dir.clone())
+    }
+}