refactor: bib worker (#1585)

* refactor: bib worker

* fix: stupid slash
This commit is contained in:
Myriad-Dreamin 2025-03-30 21:47:06 +08:00 committed by GitHub
parent 7b74506dcc
commit 53ceba2801
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 137 additions and 142 deletions

View file

@ -1,14 +1,81 @@
use std::ffi::OsStr;
use typst::foundations::Bytes;
use yaml_rust2::{parser::Event, parser::MarkedEventReceiver, scanner::Marker};
use super::prelude::*;
pub(crate) fn bib_info(files: EcoVec<(TypstFileId, Bytes)>) -> Option<Arc<BibInfo>> {
let mut worker = BibWorker {
info: BibInfo::default(),
};
// We might have multiple bib/yaml files
for (file_id, content) in files.clone() {
worker.analyze_path(file_id, content);
}
let info = Arc::new(worker.info);
crate::log_debug_ct!("bib analysis: {files:?} -> {info:?}");
Some(info)
}
/// The bibliography information.
#[derive(Debug, Default)]
pub struct BibInfo {
/// The bibliography entries.
pub entries: indexmap::IndexMap<String, BibEntry>,
}
#[derive(Debug, Clone)]
pub struct BibEntry {
pub file_id: TypstFileId,
pub name_range: Range<usize>,
pub range: Range<usize>,
}
struct BibWorker {
info: BibInfo,
}
impl BibWorker {
fn analyze_path(&mut self, file_id: TypstFileId, content: Bytes) -> Option<()> {
let file_extension = file_id.vpath().as_rooted_path().extension()?.to_str()?;
let content = std::str::from_utf8(&content).ok()?;
match file_extension.to_lowercase().as_str() {
"yml" | "yaml" => self.yaml_bib(file_id, content),
"bib" => {
let bibliography = biblatex::RawBibliography::parse(content).ok()?;
self.tex_bib(file_id, bibliography)
}
_ => return None,
};
Some(())
}
fn yaml_bib(&mut self, file_id: TypstFileId, content: &str) {
let yaml = YamlBib::from_content(content, file_id);
self.info.entries.extend(yaml.entries);
}
fn tex_bib(&mut self, file_id: TypstFileId, bibliography: biblatex::RawBibliography) {
for entry in bibliography.entries {
let name = entry.v.key;
let entry = BibEntry {
file_id,
name_range: name.span,
range: entry.span,
};
self.info.entries.insert(name.v.to_owned(), entry);
}
}
}
#[derive(Debug, Clone)]
struct BibSpanned<T> {
value: T,
span: Range<usize>,
range: Range<usize>,
}
#[derive(Default)]
@ -24,17 +91,15 @@ impl MarkedEventReceiver for YamlBibLoader {
match event {
Event::MappingStart(..) => {
if self.depth == 1 {
crate::log_debug_ct!("mapping start: {:?} {:?}", self.key, mark.index());
self.start = self.key.take();
}
self.depth += 1;
}
Event::Scalar(s, ..) => {
crate::log_debug_ct!("scalar: {:?} {:?}", s, mark.index());
if self.depth == 1 {
self.key = Some(BibSpanned {
value: s.to_owned(),
span: mark.index()..mark.index() + s.chars().count(),
range: mark.index()..mark.index() + s.chars().count(),
});
}
}
@ -46,9 +111,8 @@ impl MarkedEventReceiver for YamlBibLoader {
let Some(start) = start else {
return;
};
let span = start.span.start..end;
let span = start.range.start..end;
self.content.push((start, span));
crate::log_debug_ct!("mapping end: {:?} {:?}", self.key, mark.index());
}
}
_ => {}
@ -56,6 +120,7 @@ impl MarkedEventReceiver for YamlBibLoader {
}
}
#[derive(Debug)]
struct YamlBib {
entries: Vec<(String, BibEntry)>,
}
@ -66,136 +131,69 @@ impl YamlBib {
let mut loader = YamlBibLoader::default();
parser.load(&mut loader, true).ok();
let mut span_mapper = Vec::from_iter(
loader
.content
.iter()
.flat_map(|(name, span)| [name.span.start, name.span.end, span.start, span.end])
.map(|offset| (offset, None)),
);
span_mapper.sort_by_key(|(offset, _)| *offset);
span_mapper.dedup_by_key(|(offset, _)| *offset);
let mut span_cursor = 0;
let mut byte_offset = 0;
for (off, ch) in content.chars().chain(Some('\0')).enumerate() {
if span_cursor < span_mapper.len() {
let (span, w) = &mut span_mapper[span_cursor];
if off == *span {
*w = Some(byte_offset);
span_cursor += 1;
// Resolves char offsets because yaml2 only provides char indices
let mut char_offsets = loader
.content
.iter()
.flat_map(|(name, span)| [name.range.start, name.range.end, span.start, span.end])
.map(|offset| (offset, None))
.collect::<Vec<_>>();
char_offsets.sort_by_key(|(offset, _)| *offset);
char_offsets.dedup_by_key(|(offset, _)| *offset);
let mut cursor = 0;
let mut utf8_offset = 0;
for (ch_idx, ch_offset) in content.chars().chain(Some('\0')).enumerate() {
if cursor < char_offsets.len() {
let (idx, offset) = &mut char_offsets[cursor];
if ch_idx == *idx {
*offset = Some(utf8_offset);
cursor += 1;
}
}
byte_offset += ch.len_utf8();
utf8_offset += ch_offset.len_utf8();
}
let span_map = HashMap::<usize, usize>::from_iter(
span_mapper
.into_iter()
.filter_map(|(span, offset)| offset.map(|offset| (span, offset))),
);
let map_span = |span: Range<usize>| {
let start = span_map.get(&span.start).copied()?;
let end = span_map.get(&span.end).copied()?;
// Maps the a char index to a char offset
let char_map = char_offsets
.into_iter()
.filter_map(|(start, end)| end.map(|end| (start, end)))
.collect::<HashMap<_, _>>();
let map_range = |range: Range<usize>| {
// The valid utf8 lower bound at the range.start
let start = char_map.get(&range.start).copied()?;
// The valid utf8 upper bound at the range.end
let end = char_map.get(&range.end).copied()?;
Some(start..end)
};
let entries = loader
.content
.into_iter()
.filter_map(|(name, span)| {
let name_span = map_span(name.span)?;
let span = map_span(span)?;
let entry = BibEntry {
file_id,
name_span: name_span.clone(),
span: span.clone(),
};
Some((name.value, entry))
})
.collect();
Self { entries }
}
}
#[derive(Debug, Clone)]
pub struct BibEntry {
pub file_id: TypstFileId,
pub name_span: Range<usize>,
pub span: Range<usize>,
}
#[derive(Default)]
pub struct BibInfo {
/// The bibliography entries.
pub entries: indexmap::IndexMap<String, BibEntry>,
}
pub(crate) fn analyze_bib(paths: EcoVec<(TypstFileId, Bytes)>) -> Option<Arc<BibInfo>> {
let mut worker = BibWorker {
info: BibInfo::default(),
};
// We might have multiple bib/yaml files
for (path, content) in paths.clone() {
worker.analyze_path(path, content);
}
crate::log_debug_ct!(
"bib analysis: {paths:?} -> {entries:?}",
entries = worker.info.entries
);
Some(Arc::new(worker.info))
}
struct BibWorker {
info: BibInfo,
}
impl BibWorker {
fn analyze_path(&mut self, path: TypstFileId, content: Bytes) -> Option<()> {
let content = std::str::from_utf8(&content).ok()?;
let ext = path
.vpath()
.as_rootless_path()
.extension()
.and_then(OsStr::to_str)
.unwrap_or_default();
match ext.to_lowercase().as_str() {
"yml" | "yaml" => {
let yaml = YamlBib::from_content(content, path);
self.info.entries.extend(yaml.entries);
}
"bib" => {
let bibliography = biblatex::RawBibliography::parse(content).ok()?;
for entry in bibliography.entries {
let name = entry.v.key;
let span = entry.span;
self.info.entries.insert(
name.v.to_owned(),
BibEntry {
file_id: path,
name_span: name.span,
span,
},
);
}
}
_ => return None,
let to_entry = |(name, range): (BibSpanned<String>, Range<usize>)| {
let name_range = map_range(name.range)?;
let range = map_range(range)?;
let entry = BibEntry {
file_id,
name_range,
range,
};
Some((name.value, entry))
};
Some(())
let entries = loader.content.into_iter().filter_map(to_entry).collect();
Self { entries }
}
}
#[cfg(test)]
mod tests {
use core::fmt;
use std::path::Path;
use typst::syntax::{FileId, VirtualPath};
// This is a workaround for slashes in the path on Windows and Linux
// are different
fn bib_snap(snap: &impl fmt::Debug) -> String {
format!("{snap:?}").replace('\\', "/")
}
#[test]
fn yaml_bib_test() {
let content = r#"
@ -206,13 +204,13 @@ Euclid2:
type: article
title: '{Elements, {V}ols.\ 2--13}'
"#;
let yaml = super::YamlBib::from_content(
let bib = super::YamlBib::from_content(
content,
FileId::new_fake(VirtualPath::new(Path::new("test.yml"))),
);
assert_eq!(yaml.entries.len(), 2);
assert_eq!(yaml.entries[0].0, "Euclid");
assert_eq!(yaml.entries[1].0, "Euclid2");
assert_eq!(bib.entries.len(), 2);
insta::assert_snapshot!(bib_snap(&bib.entries[0]), @r###"("Euclid", BibEntry { file_id: /test.yml, name_range: 1..7, range: 1..63 })"###);
insta::assert_snapshot!(bib_snap(&bib.entries[1]), @r###"("Euclid2", BibEntry { file_id: /test.yml, name_range: 63..70, range: 63..126 })"###);
}
#[test]
@ -223,9 +221,7 @@ Euclid:
title: '{Elements, {V}ols.\ 1--13}'
Euclid3
"#;
super::YamlBib::from_content(
content,
FileId::new_fake(VirtualPath::new(Path::new("test.yml"))),
);
let file_id = FileId::new_fake(VirtualPath::new(Path::new("test.yml")));
super::YamlBib::from_content(content, file_id);
}
}

View file

@ -155,7 +155,7 @@ fn bib_definition(
crate::log_debug_ct!("find_bib_definition: {key} => {entry:?}");
// todo: rename with regard to string format: yaml-key/bib etc.
let decl = Decl::bib_entry(key.into(), entry.file_id, entry.span.clone());
let decl = Decl::bib_entry(key.into(), entry.file_id, entry.range.clone());
Some(Definition::new(decl.into(), None))
}

View file

@ -12,7 +12,7 @@ use tinymist_project::LspWorld;
use tinymist_std::debug_loc::DataSource;
use tinymist_std::hash::{hash128, FxDashMap};
use tinymist_std::typst::TypstDocument;
use tinymist_world::vfs::{PathResolution, WorkspaceResolver};
use tinymist_world::vfs::{FileId, PathResolution, WorkspaceResolver};
use tinymist_world::{EntryReader, DETACHED_ENTRY};
use typst::diag::{eco_format, At, FileError, FileResult, SourceResult, StrResult};
use typst::foundations::{Bytes, Module, Styles};
@ -24,7 +24,7 @@ use typst_shim::eval::{eval_compat, Eval};
use crate::adt::revision::{RevisionLock, RevisionManager, RevisionManagerLike, RevisionSlot};
use crate::analysis::prelude::*;
use crate::analysis::{
analyze_bib, analyze_expr_, analyze_import_, analyze_signature, definition, post_type_check,
analyze_expr_, analyze_import_, analyze_signature, bib_info, definition, post_type_check,
AllocStats, AnalysisStats, BibInfo, CompletionFeat, Definition, PathPreference, QueryStatGuard,
SemanticTokenCache, SemanticTokenContext, SemanticTokens, Signature, SignatureTarget, Ty,
TypeInfo,
@ -878,7 +878,8 @@ impl SharedContext {
let w = &self.world;
let w = (w as &dyn World).track();
bib_info(w, span, bib_paths.collect())
let fid = span.id()?;
analyze_bib(w, bib_paths.collect(), fid)
}
/// Describe the item under the cursor.
@ -1260,21 +1261,19 @@ fn ceil_char_boundary(text: &str, mut cursor: usize) -> usize {
}
#[comemo::memoize]
fn bib_info(
w: Tracked<dyn World + '_>,
span: Span,
fn analyze_bib(
world: Tracked<dyn World + '_>,
bib_paths: EcoVec<EcoString>,
elem_fid: FileId,
) -> Option<Arc<BibInfo>> {
let id = span.id()?;
let files = bib_paths
.iter()
.flat_map(|s| {
let id = resolve_id_by_path(w.deref(), id, s)?;
Some((id, w.file(id).ok()?))
.flat_map(|bib_path| {
let bib_fid = resolve_id_by_path(world.deref(), elem_fid, bib_path)?;
Some((bib_fid, world.file(bib_fid).ok()?))
})
.collect::<EcoVec<_>>();
analyze_bib(files)
bib_info(files)
}
#[comemo::memoize]