mirror of
https://github.com/Myriad-Dreamin/tinymist.git
synced 2025-08-03 09:52:27 +00:00
refactor: bib worker (#1585)
* refactor: bib worker * fix: stupid slash
This commit is contained in:
parent
7b74506dcc
commit
53ceba2801
3 changed files with 137 additions and 142 deletions
|
@ -1,14 +1,81 @@
|
|||
use std::ffi::OsStr;
|
||||
|
||||
use typst::foundations::Bytes;
|
||||
use yaml_rust2::{parser::Event, parser::MarkedEventReceiver, scanner::Marker};
|
||||
|
||||
use super::prelude::*;
|
||||
|
||||
pub(crate) fn bib_info(files: EcoVec<(TypstFileId, Bytes)>) -> Option<Arc<BibInfo>> {
|
||||
let mut worker = BibWorker {
|
||||
info: BibInfo::default(),
|
||||
};
|
||||
|
||||
// We might have multiple bib/yaml files
|
||||
for (file_id, content) in files.clone() {
|
||||
worker.analyze_path(file_id, content);
|
||||
}
|
||||
|
||||
let info = Arc::new(worker.info);
|
||||
|
||||
crate::log_debug_ct!("bib analysis: {files:?} -> {info:?}");
|
||||
Some(info)
|
||||
}
|
||||
|
||||
/// The bibliography information.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct BibInfo {
|
||||
/// The bibliography entries.
|
||||
pub entries: indexmap::IndexMap<String, BibEntry>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BibEntry {
|
||||
pub file_id: TypstFileId,
|
||||
pub name_range: Range<usize>,
|
||||
pub range: Range<usize>,
|
||||
}
|
||||
|
||||
struct BibWorker {
|
||||
info: BibInfo,
|
||||
}
|
||||
|
||||
impl BibWorker {
|
||||
fn analyze_path(&mut self, file_id: TypstFileId, content: Bytes) -> Option<()> {
|
||||
let file_extension = file_id.vpath().as_rooted_path().extension()?.to_str()?;
|
||||
let content = std::str::from_utf8(&content).ok()?;
|
||||
|
||||
match file_extension.to_lowercase().as_str() {
|
||||
"yml" | "yaml" => self.yaml_bib(file_id, content),
|
||||
"bib" => {
|
||||
let bibliography = biblatex::RawBibliography::parse(content).ok()?;
|
||||
self.tex_bib(file_id, bibliography)
|
||||
}
|
||||
_ => return None,
|
||||
};
|
||||
|
||||
Some(())
|
||||
}
|
||||
|
||||
fn yaml_bib(&mut self, file_id: TypstFileId, content: &str) {
|
||||
let yaml = YamlBib::from_content(content, file_id);
|
||||
self.info.entries.extend(yaml.entries);
|
||||
}
|
||||
|
||||
fn tex_bib(&mut self, file_id: TypstFileId, bibliography: biblatex::RawBibliography) {
|
||||
for entry in bibliography.entries {
|
||||
let name = entry.v.key;
|
||||
let entry = BibEntry {
|
||||
file_id,
|
||||
name_range: name.span,
|
||||
range: entry.span,
|
||||
};
|
||||
self.info.entries.insert(name.v.to_owned(), entry);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct BibSpanned<T> {
|
||||
value: T,
|
||||
span: Range<usize>,
|
||||
range: Range<usize>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
|
@ -24,17 +91,15 @@ impl MarkedEventReceiver for YamlBibLoader {
|
|||
match event {
|
||||
Event::MappingStart(..) => {
|
||||
if self.depth == 1 {
|
||||
crate::log_debug_ct!("mapping start: {:?} {:?}", self.key, mark.index());
|
||||
self.start = self.key.take();
|
||||
}
|
||||
self.depth += 1;
|
||||
}
|
||||
Event::Scalar(s, ..) => {
|
||||
crate::log_debug_ct!("scalar: {:?} {:?}", s, mark.index());
|
||||
if self.depth == 1 {
|
||||
self.key = Some(BibSpanned {
|
||||
value: s.to_owned(),
|
||||
span: mark.index()..mark.index() + s.chars().count(),
|
||||
range: mark.index()..mark.index() + s.chars().count(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -46,9 +111,8 @@ impl MarkedEventReceiver for YamlBibLoader {
|
|||
let Some(start) = start else {
|
||||
return;
|
||||
};
|
||||
let span = start.span.start..end;
|
||||
let span = start.range.start..end;
|
||||
self.content.push((start, span));
|
||||
crate::log_debug_ct!("mapping end: {:?} {:?}", self.key, mark.index());
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
|
@ -56,6 +120,7 @@ impl MarkedEventReceiver for YamlBibLoader {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct YamlBib {
|
||||
entries: Vec<(String, BibEntry)>,
|
||||
}
|
||||
|
@ -66,136 +131,69 @@ impl YamlBib {
|
|||
let mut loader = YamlBibLoader::default();
|
||||
parser.load(&mut loader, true).ok();
|
||||
|
||||
let mut span_mapper = Vec::from_iter(
|
||||
loader
|
||||
.content
|
||||
.iter()
|
||||
.flat_map(|(name, span)| [name.span.start, name.span.end, span.start, span.end])
|
||||
.map(|offset| (offset, None)),
|
||||
);
|
||||
span_mapper.sort_by_key(|(offset, _)| *offset);
|
||||
span_mapper.dedup_by_key(|(offset, _)| *offset);
|
||||
let mut span_cursor = 0;
|
||||
let mut byte_offset = 0;
|
||||
for (off, ch) in content.chars().chain(Some('\0')).enumerate() {
|
||||
if span_cursor < span_mapper.len() {
|
||||
let (span, w) = &mut span_mapper[span_cursor];
|
||||
if off == *span {
|
||||
*w = Some(byte_offset);
|
||||
span_cursor += 1;
|
||||
// Resolves char offsets because yaml2 only provides char indices
|
||||
let mut char_offsets = loader
|
||||
.content
|
||||
.iter()
|
||||
.flat_map(|(name, span)| [name.range.start, name.range.end, span.start, span.end])
|
||||
.map(|offset| (offset, None))
|
||||
.collect::<Vec<_>>();
|
||||
char_offsets.sort_by_key(|(offset, _)| *offset);
|
||||
char_offsets.dedup_by_key(|(offset, _)| *offset);
|
||||
let mut cursor = 0;
|
||||
let mut utf8_offset = 0;
|
||||
for (ch_idx, ch_offset) in content.chars().chain(Some('\0')).enumerate() {
|
||||
if cursor < char_offsets.len() {
|
||||
let (idx, offset) = &mut char_offsets[cursor];
|
||||
if ch_idx == *idx {
|
||||
*offset = Some(utf8_offset);
|
||||
cursor += 1;
|
||||
}
|
||||
}
|
||||
byte_offset += ch.len_utf8();
|
||||
utf8_offset += ch_offset.len_utf8();
|
||||
}
|
||||
|
||||
let span_map = HashMap::<usize, usize>::from_iter(
|
||||
span_mapper
|
||||
.into_iter()
|
||||
.filter_map(|(span, offset)| offset.map(|offset| (span, offset))),
|
||||
);
|
||||
let map_span = |span: Range<usize>| {
|
||||
let start = span_map.get(&span.start).copied()?;
|
||||
let end = span_map.get(&span.end).copied()?;
|
||||
// Maps the a char index to a char offset
|
||||
let char_map = char_offsets
|
||||
.into_iter()
|
||||
.filter_map(|(start, end)| end.map(|end| (start, end)))
|
||||
.collect::<HashMap<_, _>>();
|
||||
let map_range = |range: Range<usize>| {
|
||||
// The valid utf8 lower bound at the range.start
|
||||
let start = char_map.get(&range.start).copied()?;
|
||||
// The valid utf8 upper bound at the range.end
|
||||
let end = char_map.get(&range.end).copied()?;
|
||||
Some(start..end)
|
||||
};
|
||||
|
||||
let entries = loader
|
||||
.content
|
||||
.into_iter()
|
||||
.filter_map(|(name, span)| {
|
||||
let name_span = map_span(name.span)?;
|
||||
let span = map_span(span)?;
|
||||
let entry = BibEntry {
|
||||
file_id,
|
||||
name_span: name_span.clone(),
|
||||
span: span.clone(),
|
||||
};
|
||||
Some((name.value, entry))
|
||||
})
|
||||
.collect();
|
||||
|
||||
Self { entries }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct BibEntry {
|
||||
pub file_id: TypstFileId,
|
||||
pub name_span: Range<usize>,
|
||||
pub span: Range<usize>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct BibInfo {
|
||||
/// The bibliography entries.
|
||||
pub entries: indexmap::IndexMap<String, BibEntry>,
|
||||
}
|
||||
|
||||
pub(crate) fn analyze_bib(paths: EcoVec<(TypstFileId, Bytes)>) -> Option<Arc<BibInfo>> {
|
||||
let mut worker = BibWorker {
|
||||
info: BibInfo::default(),
|
||||
};
|
||||
|
||||
// We might have multiple bib/yaml files
|
||||
for (path, content) in paths.clone() {
|
||||
worker.analyze_path(path, content);
|
||||
}
|
||||
|
||||
crate::log_debug_ct!(
|
||||
"bib analysis: {paths:?} -> {entries:?}",
|
||||
entries = worker.info.entries
|
||||
);
|
||||
Some(Arc::new(worker.info))
|
||||
}
|
||||
|
||||
struct BibWorker {
|
||||
info: BibInfo,
|
||||
}
|
||||
|
||||
impl BibWorker {
|
||||
fn analyze_path(&mut self, path: TypstFileId, content: Bytes) -> Option<()> {
|
||||
let content = std::str::from_utf8(&content).ok()?;
|
||||
|
||||
let ext = path
|
||||
.vpath()
|
||||
.as_rootless_path()
|
||||
.extension()
|
||||
.and_then(OsStr::to_str)
|
||||
.unwrap_or_default();
|
||||
|
||||
match ext.to_lowercase().as_str() {
|
||||
"yml" | "yaml" => {
|
||||
let yaml = YamlBib::from_content(content, path);
|
||||
self.info.entries.extend(yaml.entries);
|
||||
}
|
||||
"bib" => {
|
||||
let bibliography = biblatex::RawBibliography::parse(content).ok()?;
|
||||
for entry in bibliography.entries {
|
||||
let name = entry.v.key;
|
||||
let span = entry.span;
|
||||
self.info.entries.insert(
|
||||
name.v.to_owned(),
|
||||
BibEntry {
|
||||
file_id: path,
|
||||
name_span: name.span,
|
||||
span,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
_ => return None,
|
||||
let to_entry = |(name, range): (BibSpanned<String>, Range<usize>)| {
|
||||
let name_range = map_range(name.range)?;
|
||||
let range = map_range(range)?;
|
||||
let entry = BibEntry {
|
||||
file_id,
|
||||
name_range,
|
||||
range,
|
||||
};
|
||||
Some((name.value, entry))
|
||||
};
|
||||
|
||||
Some(())
|
||||
let entries = loader.content.into_iter().filter_map(to_entry).collect();
|
||||
Self { entries }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use core::fmt;
|
||||
use std::path::Path;
|
||||
|
||||
use typst::syntax::{FileId, VirtualPath};
|
||||
|
||||
// This is a workaround for slashes in the path on Windows and Linux
|
||||
// are different
|
||||
fn bib_snap(snap: &impl fmt::Debug) -> String {
|
||||
format!("{snap:?}").replace('\\', "/")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn yaml_bib_test() {
|
||||
let content = r#"
|
||||
|
@ -206,13 +204,13 @@ Euclid2:
|
|||
type: article
|
||||
title: '{Elements, {V}ols.\ 2--13}'
|
||||
"#;
|
||||
let yaml = super::YamlBib::from_content(
|
||||
let bib = super::YamlBib::from_content(
|
||||
content,
|
||||
FileId::new_fake(VirtualPath::new(Path::new("test.yml"))),
|
||||
);
|
||||
assert_eq!(yaml.entries.len(), 2);
|
||||
assert_eq!(yaml.entries[0].0, "Euclid");
|
||||
assert_eq!(yaml.entries[1].0, "Euclid2");
|
||||
assert_eq!(bib.entries.len(), 2);
|
||||
insta::assert_snapshot!(bib_snap(&bib.entries[0]), @r###"("Euclid", BibEntry { file_id: /test.yml, name_range: 1..7, range: 1..63 })"###);
|
||||
insta::assert_snapshot!(bib_snap(&bib.entries[1]), @r###"("Euclid2", BibEntry { file_id: /test.yml, name_range: 63..70, range: 63..126 })"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -223,9 +221,7 @@ Euclid:
|
|||
title: '{Elements, {V}ols.\ 1--13}'
|
||||
Euclid3
|
||||
"#;
|
||||
super::YamlBib::from_content(
|
||||
content,
|
||||
FileId::new_fake(VirtualPath::new(Path::new("test.yml"))),
|
||||
);
|
||||
let file_id = FileId::new_fake(VirtualPath::new(Path::new("test.yml")));
|
||||
super::YamlBib::from_content(content, file_id);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -155,7 +155,7 @@ fn bib_definition(
|
|||
crate::log_debug_ct!("find_bib_definition: {key} => {entry:?}");
|
||||
|
||||
// todo: rename with regard to string format: yaml-key/bib etc.
|
||||
let decl = Decl::bib_entry(key.into(), entry.file_id, entry.span.clone());
|
||||
let decl = Decl::bib_entry(key.into(), entry.file_id, entry.range.clone());
|
||||
Some(Definition::new(decl.into(), None))
|
||||
}
|
||||
|
||||
|
|
|
@ -12,7 +12,7 @@ use tinymist_project::LspWorld;
|
|||
use tinymist_std::debug_loc::DataSource;
|
||||
use tinymist_std::hash::{hash128, FxDashMap};
|
||||
use tinymist_std::typst::TypstDocument;
|
||||
use tinymist_world::vfs::{PathResolution, WorkspaceResolver};
|
||||
use tinymist_world::vfs::{FileId, PathResolution, WorkspaceResolver};
|
||||
use tinymist_world::{EntryReader, DETACHED_ENTRY};
|
||||
use typst::diag::{eco_format, At, FileError, FileResult, SourceResult, StrResult};
|
||||
use typst::foundations::{Bytes, Module, Styles};
|
||||
|
@ -24,7 +24,7 @@ use typst_shim::eval::{eval_compat, Eval};
|
|||
use crate::adt::revision::{RevisionLock, RevisionManager, RevisionManagerLike, RevisionSlot};
|
||||
use crate::analysis::prelude::*;
|
||||
use crate::analysis::{
|
||||
analyze_bib, analyze_expr_, analyze_import_, analyze_signature, definition, post_type_check,
|
||||
analyze_expr_, analyze_import_, analyze_signature, bib_info, definition, post_type_check,
|
||||
AllocStats, AnalysisStats, BibInfo, CompletionFeat, Definition, PathPreference, QueryStatGuard,
|
||||
SemanticTokenCache, SemanticTokenContext, SemanticTokens, Signature, SignatureTarget, Ty,
|
||||
TypeInfo,
|
||||
|
@ -878,7 +878,8 @@ impl SharedContext {
|
|||
let w = &self.world;
|
||||
let w = (w as &dyn World).track();
|
||||
|
||||
bib_info(w, span, bib_paths.collect())
|
||||
let fid = span.id()?;
|
||||
analyze_bib(w, bib_paths.collect(), fid)
|
||||
}
|
||||
|
||||
/// Describe the item under the cursor.
|
||||
|
@ -1260,21 +1261,19 @@ fn ceil_char_boundary(text: &str, mut cursor: usize) -> usize {
|
|||
}
|
||||
|
||||
#[comemo::memoize]
|
||||
fn bib_info(
|
||||
w: Tracked<dyn World + '_>,
|
||||
span: Span,
|
||||
fn analyze_bib(
|
||||
world: Tracked<dyn World + '_>,
|
||||
bib_paths: EcoVec<EcoString>,
|
||||
elem_fid: FileId,
|
||||
) -> Option<Arc<BibInfo>> {
|
||||
let id = span.id()?;
|
||||
|
||||
let files = bib_paths
|
||||
.iter()
|
||||
.flat_map(|s| {
|
||||
let id = resolve_id_by_path(w.deref(), id, s)?;
|
||||
Some((id, w.file(id).ok()?))
|
||||
.flat_map(|bib_path| {
|
||||
let bib_fid = resolve_id_by_path(world.deref(), elem_fid, bib_path)?;
|
||||
Some((bib_fid, world.file(bib_fid).ok()?))
|
||||
})
|
||||
.collect::<EcoVec<_>>();
|
||||
analyze_bib(files)
|
||||
bib_info(files)
|
||||
}
|
||||
|
||||
#[comemo::memoize]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue