feat: lock and snapshot {analysis,token} caches on main thread (#806)

* refactor: hide lock in `query_snapshot`

* refactor: hide lock in `query_snapshot`

* test: update snapshot

* dev: update comments

* dev: update snapshot
This commit is contained in:
Myriad-Dreamin 2024-11-14 01:40:27 +08:00 committed by GitHub
parent 61f2273e2e
commit fc3099a27c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 543 additions and 346 deletions

View file

@ -1,2 +1,3 @@
pub mod interner;
pub mod revision;
pub mod snapshot_map;

View file

@ -0,0 +1,136 @@
use std::{
collections::HashMap,
num::NonZeroUsize,
sync::{Arc, OnceLock},
};
pub struct RevisionLock {
estimated: usize,
used: OnceLock<usize>,
}
impl RevisionLock {
pub fn access(&self, revision: NonZeroUsize) {
self.used
.set(revision.get())
.unwrap_or_else(|_| panic!("revision {revision} is determined"))
}
}
pub struct RevisionSlot<T> {
pub revision: usize,
pub data: T,
}
impl<T> std::ops::Deref for RevisionSlot<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
&self.data
}
}
impl<T> std::ops::DerefMut for RevisionSlot<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.data
}
}
pub struct RevisionManager<T> {
estimated: usize,
locked: HashMap<usize, usize>,
slots: Vec<Arc<RevisionSlot<T>>>,
}
impl<T> Default for RevisionManager<T> {
fn default() -> Self {
Self {
estimated: 0,
locked: Default::default(),
slots: Default::default(),
}
}
}
impl<T> RevisionManager<T> {
pub fn clear(&mut self) {
self.slots.clear();
}
/// Lock the revision in *main thread*.
#[must_use]
pub fn lock(&mut self, used: NonZeroUsize) -> RevisionLock {
let l = self.lock_estimated();
l.access(used);
l
}
/// Lock the revision in *main thread*.
#[must_use]
pub fn lock_estimated(&mut self) -> RevisionLock {
let estimated = self.estimated;
*self.locked.entry(estimated).or_default() += 1;
RevisionLock {
estimated,
used: OnceLock::new(),
}
}
/// Find the last revision slot by revision number.
pub fn find_revision(
&mut self,
revision: NonZeroUsize,
f: impl FnOnce(Option<&Arc<RevisionSlot<T>>>) -> T,
) -> Arc<RevisionSlot<T>> {
let slot_base = self
.slots
.iter()
.filter(|e| e.revision <= revision.get())
.reduce(|a, b| if a.revision > b.revision { a } else { b });
if let Some(slot) = slot_base {
if slot.revision == revision.get() {
return slot.clone();
}
}
let slot = Arc::new(RevisionSlot {
revision: revision.get(),
data: f(slot_base),
});
self.slots.push(slot.clone());
self.estimated = revision.get().max(self.estimated);
slot
}
pub fn unlock(&mut self, rev: &mut RevisionLock) -> Option<usize> {
let rev = rev.estimated;
let revision_cnt = self
.locked
.entry(rev)
.or_insert_with(|| panic!("revision {rev} is not locked"));
*revision_cnt -= 1;
if *revision_cnt != 0 {
return None;
}
self.locked.remove(&rev);
let existing = self.locked.keys().min().copied();
existing.or_else(||
// if there is no locked revision, we only keep the latest revision
self.slots
.iter()
.map(|e| e.revision)
.max())
}
}
pub trait RevisionManagerLike {
fn gc(&mut self, min_rev: usize);
}
impl<T> RevisionManagerLike for RevisionManager<T> {
fn gc(&mut self, min_rev: usize) {
self.slots.retain(|r| r.revision >= min_rev);
}
}

View file

@ -22,6 +22,7 @@ use typst::model::Document;
use typst::syntax::package::PackageManifest;
use typst::syntax::{package::PackageSpec, Span, VirtualPath};
use crate::adt::revision::{RevisionLock, RevisionManager, RevisionManagerLike, RevisionSlot};
use crate::analysis::prelude::*;
use crate::analysis::{
analyze_bib, analyze_expr_, analyze_import_, analyze_signature, definition, post_type_check,
@ -36,8 +37,9 @@ use crate::syntax::{
};
use crate::upstream::{tooltip_, Tooltip};
use crate::{
lsp_to_typst, typst_to_lsp, ColorTheme, LspPosition, LspRange, LspWorldExt, PositionEncoding,
SemanticTokenContext, TypstRange, VersionedDocument,
lsp_to_typst, typst_to_lsp, ColorTheme, CompilerQueryRequest, LspPosition, LspRange,
LspWorldExt, PositionEncoding, SemanticTokenCache, SemanticTokenContext, TypstRange,
VersionedDocument,
};
use super::TypeEnv;
@ -47,18 +49,22 @@ use super::TypeEnv;
pub struct Analysis {
/// The position encoding for the workspace.
pub position_encoding: PositionEncoding,
/// Whether to allow overlapping semantic tokens.
pub allow_overlapping_token: bool,
/// Whether to allow multiline semantic tokens.
pub allow_multiline_token: bool,
/// The editor's color theme.
pub color_theme: ColorTheme,
/// The periscope provider.
pub periscope: Option<Arc<dyn PeriscopeProvider + Send + Sync>>,
/// The semantic token context.
pub tokens_ctx: Arc<SemanticTokenContext>,
/// The global worker resources for analysis.
pub workers: Arc<AnalysisGlobalWorkers>,
/// The semantic token cache.
pub tokens_caches: Arc<Mutex<SemanticTokenCache>>,
/// The global caches for analysis.
pub caches: AnalysisGlobalCaches,
/// The global cache grid for analysis.
pub cache_grid: Arc<Mutex<AnalysisGlobalCacheGrid>>,
/// The revisioned cache for analysis.
pub analysis_rev_cache: Arc<Mutex<AnalysisRevCache>>,
/// The statistics about the analyzers.
pub stats: Arc<AnalysisStats>,
}
@ -66,10 +72,21 @@ pub struct Analysis {
impl Analysis {
/// Get a snapshot of the analysis data.
pub fn snapshot(&self, world: LspWorld) -> LocalContextGuard {
self.snapshot_(world, self.lock_revision(None))
}
/// Get a snapshot of the analysis data.
pub fn snapshot_(&self, world: LspWorld, mut lg: AnalysisRevLock) -> LocalContextGuard {
let lifetime = self.caches.lifetime.fetch_add(1, Ordering::SeqCst);
let slot = self.cache_grid.lock().find_revision(world.revision());
let slot = self
.analysis_rev_cache
.lock()
.find_revision(world.revision(), &lg);
let tokens = lg.tokens.take();
LocalContextGuard {
rev_lock: lg,
local: LocalContext {
tokens,
caches: AnalysisCaches::default(),
shared: Arc::new(SharedContext {
slot,
@ -83,22 +100,36 @@ impl Analysis {
/// Lock the revision in *main thread*.
#[must_use]
pub fn lock_revision(&self) -> RevisionLock {
let mut grid = self.cache_grid.lock();
let revision = grid.revision;
*grid.locked_revisions.entry(revision).or_default() += 1;
RevisionLock {
grid: self.cache_grid.clone(),
revision,
pub fn lock_revision(&self, q: Option<&CompilerQueryRequest>) -> AnalysisRevLock {
let mut grid = self.analysis_rev_cache.lock();
AnalysisRevLock {
tokens: match q {
Some(CompilerQueryRequest::SemanticTokensFull(f)) => Some(
SemanticTokenCache::acquire(self.tokens_caches.clone(), &f.path, None),
),
Some(CompilerQueryRequest::SemanticTokensDelta(f)) => {
Some(SemanticTokenCache::acquire(
self.tokens_caches.clone(),
&f.path,
Some(&f.previous_result_id),
))
}
_ => None,
},
inner: grid.manager.lock_estimated(),
grid: self.analysis_rev_cache.clone(),
}
}
/// Clear all cached resources.
pub fn clear_cache(&self) {
self.caches.signatures.clear();
self.caches.def_signatures.clear();
self.caches.static_signatures.clear();
self.caches.terms.clear();
self.cache_grid.lock().clear();
self.tokens_caches.lock().clear();
self.analysis_rev_cache.lock().clear();
}
/// Report the statistics of the analysis.
@ -140,6 +171,8 @@ pub struct AnalysisGlobalWorkers {
pub struct LocalContextGuard {
/// Constructed local context
pub local: LocalContext,
/// The revision lock
pub rev_lock: AnalysisRevLock,
}
impl Deref for LocalContextGuard {
@ -206,6 +239,8 @@ impl LocalContextGuard {
/// The local context for analyzers.
pub struct LocalContext {
/// The created semantic token context.
pub(crate) tokens: Option<SemanticTokenContext>,
/// Local caches for analysis.
pub caches: AnalysisCaches,
/// The shared context
@ -387,8 +422,8 @@ pub struct SharedContext {
pub world: LspWorld,
/// The analysis data
pub analysis: Analysis,
/// The using revision slot
slot: Arc<RevisionSlot>,
/// The using analysis revision slot
slot: Arc<RevisionSlot<AnalysisRevSlot>>,
}
impl SharedContext {
@ -1047,20 +1082,14 @@ pub struct ModuleAnalysisCache {
/// The grid cache for all level of analysis results of a module.
#[derive(Default)]
pub struct AnalysisGlobalCacheGrid {
revision: usize,
default_slot: RevisionSlot,
revisions: Vec<Arc<RevisionSlot>>,
locked_revisions: HashMap<usize, usize>,
pub struct AnalysisRevCache {
default_slot: AnalysisRevSlot,
manager: RevisionManager<AnalysisRevSlot>,
}
impl AnalysisGlobalCacheGrid {
fn clear(&mut self) {
self.revisions.clear();
}
impl RevisionManagerLike for AnalysisRevCache {
fn gc(&mut self, rev: usize) {
self.revisions.retain(|r| r.revision >= rev);
self.manager.gc(rev);
self.default_slot
.expr_stage
.global
@ -1072,79 +1101,64 @@ impl AnalysisGlobalCacheGrid {
.lock()
.retain(|_, r| r.0 + 60 >= rev);
}
}
impl AnalysisRevCache {
fn clear(&mut self) {
self.manager.clear();
self.default_slot = Default::default();
}
/// Find the last revision slot by revision number.
fn find_revision(&mut self, revision: NonZeroUsize) -> Arc<RevisionSlot> {
let slot_base = self
.revisions
.iter()
.filter(|e| e.revision <= revision.get())
.reduce(|a, b| if a.revision > b.revision { a } else { b });
if let Some(slot) = slot_base {
if slot.revision == revision.get() {
return slot.clone();
}
}
let mut slot = slot_base
.map(|e| RevisionSlot {
revision: e.revision,
expr_stage: e.expr_stage.crawl(revision.get()),
type_check: e.type_check.crawl(revision.get()),
})
.unwrap_or_else(|| self.default_slot.clone());
slot.revision = revision.get();
let slot = Arc::new(slot);
self.revisions.push(slot.clone());
self.revision = revision.get().max(self.revision);
slot
fn find_revision(
&mut self,
revision: NonZeroUsize,
lg: &AnalysisRevLock,
) -> Arc<RevisionSlot<AnalysisRevSlot>> {
lg.inner.access(revision);
self.manager.find_revision(revision, |slot_base| {
slot_base
.map(|e| AnalysisRevSlot {
revision: e.revision,
expr_stage: e.data.expr_stage.crawl(revision.get()),
type_check: e.data.type_check.crawl(revision.get()),
})
.unwrap_or_else(|| self.default_slot.clone())
})
}
}
/// A lock for revision.
pub struct RevisionLock {
grid: Arc<Mutex<AnalysisGlobalCacheGrid>>,
revision: usize,
pub struct AnalysisRevLock {
inner: RevisionLock,
tokens: Option<SemanticTokenContext>,
grid: Arc<Mutex<AnalysisRevCache>>,
}
impl Drop for RevisionLock {
impl Drop for AnalysisRevLock {
fn drop(&mut self) {
let mut grid = self.grid.lock();
let revision_cnt = grid
.locked_revisions
.entry(self.revision)
.or_insert_with(|| panic!("revision {} is not locked", self.revision));
*revision_cnt -= 1;
if *revision_cnt != 0 {
return;
}
let mut mu = self.grid.lock();
let gc_revision = mu.manager.unlock(&mut self.inner);
grid.locked_revisions.remove(&self.revision);
if grid.revision <= self.revision {
return;
if let Some(gc_revision) = gc_revision {
let grid = self.grid.clone();
rayon::spawn(move || {
grid.lock().gc(gc_revision);
});
}
let existing = grid.locked_revisions.keys().min().copied();
let gc_revision = existing.unwrap_or(self.revision);
let grid = self.grid.clone();
rayon::spawn(move || {
grid.lock().gc(gc_revision);
});
}
}
#[derive(Default, Clone)]
struct RevisionSlot {
struct AnalysisRevSlot {
revision: usize,
expr_stage: IncrCacheMap<u128, Arc<ExprInfo>>,
type_check: IncrCacheMap<u128, Arc<TypeScheme>>,
}
impl Drop for RevisionSlot {
impl Drop for AnalysisRevSlot {
fn drop(&mut self) {
log::info!("revision {} is dropped", self.revision)
log::info!("analysis revision {} is dropped", self.revision)
}
}

View file

@ -1,75 +1 @@
use lsp_types::{SemanticToken, SemanticTokensEdit};
#[derive(Debug)]
struct CachedTokens {
tokens: Vec<SemanticToken>,
id: u64,
}
#[derive(Default, Debug)]
pub struct CacheInner {
last_sent: Option<CachedTokens>,
next_id: u64,
}
impl CacheInner {
pub fn try_take_result(&mut self, id: &str) -> Option<Vec<SemanticToken>> {
let id = id.parse::<u64>().ok()?;
match self.last_sent.take() {
Some(cached) if cached.id == id => Some(cached.tokens),
Some(cached) => {
// replace after taking
self.last_sent = Some(cached);
None
}
None => None,
}
}
pub fn cache_result(&mut self, tokens: Vec<SemanticToken>) -> String {
let id = self.get_next_id();
let cached = CachedTokens { tokens, id };
self.last_sent = Some(cached);
id.to_string()
}
fn get_next_id(&mut self) -> u64 {
let id = self.next_id;
self.next_id += 1;
id
}
}
pub fn token_delta(from: &[SemanticToken], to: &[SemanticToken]) -> Vec<SemanticTokensEdit> {
// Taken from `rust-analyzer`'s algorithm
// https://github.com/rust-lang/rust-analyzer/blob/master/crates/rust-analyzer/src/semantic_tokens.rs#L219
let start = from
.iter()
.zip(to.iter())
.take_while(|(x, y)| x == y)
.count();
let (_, from) = from.split_at(start);
let (_, to) = to.split_at(start);
let dist_from_end = from
.iter()
.rev()
.zip(to.iter().rev())
.take_while(|(x, y)| x == y)
.count();
let (from, _) = from.split_at(from.len() - dist_from_end);
let (to, _) = to.split_at(to.len() - dist_from_end);
if from.is_empty() && to.is_empty() {
vec![]
} else {
vec![SemanticTokensEdit {
start: 5 * start as u32,
delete_count: 5 * from.len() as u32,
data: Some(to.into()),
}]
}
}

View file

@ -1,95 +1,146 @@
use std::{ops::Range, sync::Arc};
use std::{
num::NonZeroUsize,
ops::Range,
path::Path,
sync::{Arc, OnceLock},
};
use lsp_types::{SemanticToken, SemanticTokensEdit};
use parking_lot::RwLock;
use hashbrown::HashMap;
use lsp_types::SemanticToken;
use parking_lot::Mutex;
use reflexo::ImmutPath;
use typst::syntax::{ast, LinkedNode, Source, SyntaxKind};
use crate::{
adt::revision::{RevisionLock, RevisionManager, RevisionManagerLike, RevisionSlot},
syntax::{Expr, ExprInfo},
ty::Ty,
LocalContext, LspPosition, PositionEncoding,
};
use self::delta::token_delta;
use self::modifier_set::ModifierSet;
use self::delta::CacheInner as TokenCacheInner;
mod delta;
mod modifier_set;
mod typst_tokens;
pub use self::typst_tokens::{Modifier, TokenType};
/// A semantic token context providing incremental semantic tokens rendering.
/// A shared semantic tokens object.
pub type SemanticTokens = Arc<Vec<SemanticToken>>;
/// A shared semantic tokens cache.
#[derive(Default)]
pub struct SemanticTokenContext {
cache: RwLock<TokenCacheInner>,
/// Whether to allow overlapping tokens.
pub allow_overlapping_token: bool,
/// Whether to allow multiline tokens.
pub allow_multiline_token: bool,
pub struct SemanticTokenCache {
next_id: usize,
// todo: clear cache after didClose
manager: HashMap<ImmutPath, RevisionManager<OnceLock<SemanticTokens>>>,
}
impl SemanticTokenCache {
pub(crate) fn clear(&mut self) {
self.next_id = 0;
self.manager.clear();
}
/// Lock the token cache with an optional previous id in *main thread*.
pub(crate) fn acquire(
cache: Arc<Mutex<Self>>,
p: &Path,
prev: Option<&str>,
) -> SemanticTokenContext {
let that = cache.clone();
let mut that = that.lock();
that.next_id += 1;
let prev = prev.and_then(|id| {
id.parse::<NonZeroUsize>()
.inspect_err(|_| {
log::warn!("invalid previous id: {id}");
})
.ok()
});
let next = NonZeroUsize::new(that.next_id).expect("id overflow");
let path = ImmutPath::from(p);
let manager = that.manager.entry(path.clone()).or_default();
let _rev_lock = manager.lock(prev.unwrap_or(next));
let prev = prev.and_then(|prev| {
manager
.find_revision(prev, |_| OnceLock::new())
.data
.get()
.cloned()
});
let next = manager.find_revision(next, |_| OnceLock::new());
SemanticTokenContext {
_rev_lock,
cache,
path,
prev,
next,
}
}
}
/// A semantic token context providing incremental semantic tokens rendering.
pub(crate) struct SemanticTokenContext {
_rev_lock: RevisionLock,
cache: Arc<Mutex<SemanticTokenCache>>,
path: ImmutPath,
prev: Option<SemanticTokens>,
next: Arc<RevisionSlot<OnceLock<SemanticTokens>>>,
}
impl SemanticTokenContext {
/// Create a new semantic token context.
pub fn new(allow_overlapping_token: bool, allow_multiline_token: bool) -> Self {
Self {
cache: RwLock::new(TokenCacheInner::default()),
allow_overlapping_token,
allow_multiline_token,
pub fn previous(&self) -> Option<&[SemanticToken]> {
self.prev.as_ref().map(|cached| cached.as_slice())
}
pub fn cache_result(&self, cached: SemanticTokens) -> String {
let id = self.next.revision;
self.next
.data
.set(cached)
.unwrap_or_else(|_| panic!("unexpected slot overwrite {id}"));
id.to_string()
}
}
impl Drop for SemanticTokenContext {
fn drop(&mut self) {
let mut cache = self.cache.lock();
let manager = cache.manager.get_mut(&self.path);
if let Some(manager) = manager {
let min_rev = manager.unlock(&mut self._rev_lock);
if let Some(min_rev) = min_rev {
manager.gc(min_rev);
}
}
}
}
/// Get the semantic tokens for a source.
pub(crate) fn semantic_tokens_full(
pub(crate) fn get_semantic_tokens(
ctx: &mut LocalContext,
source: &Source,
ei: Arc<ExprInfo>,
) -> (Vec<SemanticToken>, String) {
) -> (SemanticTokens, Option<String>) {
let root = LinkedNode::new(source.root());
let mut tokenizer = Tokenizer::new(
source.clone(),
ei,
ctx.analysis.tokens_ctx.allow_multiline_token,
ctx.analysis.allow_multiline_token,
ctx.analysis.position_encoding,
);
tokenizer.tokenize_tree(&root, ModifierSet::empty());
let output = tokenizer.output;
let output = SemanticTokens::new(tokenizer.output);
let result_id = ctx
.analysis
.tokens_ctx
.cache
.write()
.cache_result(output.clone());
let result_id = ctx.tokens.as_ref().map(|t| t.cache_result(output.clone()));
(output, result_id)
}
/// Get the semantic tokens delta for a source.
pub(crate) fn semantic_tokens_delta(
ctx: &mut LocalContext,
source: &Source,
ei: Arc<ExprInfo>,
result_id: &str,
) -> (Result<Vec<SemanticTokensEdit>, Vec<SemanticToken>>, String) {
let cached = ctx
.analysis
.tokens_ctx
.cache
.write()
.try_take_result(result_id);
// this call will overwrite the cache, so need to read from cache first
let (tokens, result_id) = semantic_tokens_full(ctx, source, ei);
match cached {
Some(cached) => (Ok(token_delta(&cached, &tokens)), result_id),
None => (Err(tokens), result_id),
}
}
pub(crate) struct Tokenizer {
curr_pos: LspPosition,
pos_offset: usize,

View file

@ -1,4 +1,6 @@
use crate::{prelude::*, semantic_tokens_delta};
use lsp_types::{SemanticToken, SemanticTokensEdit};
use crate::{get_semantic_tokens, prelude::*};
/// The [`textDocument/semanticTokens/full/delta`] request is sent from the
/// client to the server to resolve the semantic tokens of a given file,
@ -28,24 +30,63 @@ impl SemanticRequest for SemanticTokensDeltaRequest {
fn request(self, ctx: &mut LocalContext) -> Option<Self::Response> {
let source = ctx.source_by_path(&self.path).ok()?;
let ei = ctx.expr_stage(&source);
let (tokens, result_id) = get_semantic_tokens(ctx, &source, ei);
let (tokens, result_id) = semantic_tokens_delta(ctx, &source, ei, &self.previous_result_id);
let (tokens, result_id) = match ctx.tokens.as_ref().and_then(|t| t.previous()) {
Some(cached) => (Ok(token_delta(cached, &tokens)), result_id),
None => {
log::warn!(
"No previous tokens found for delta computation in {}, prev_id: {:?}",
self.path.display(),
self.previous_result_id
);
(Err(tokens), result_id)
}
};
match tokens {
Ok(edits) => Some(
SemanticTokensDelta {
result_id: Some(result_id),
edits,
}
.into(),
),
Ok(edits) => Some(SemanticTokensDelta { result_id, edits }.into()),
Err(tokens) => Some(
SemanticTokens {
result_id: Some(result_id),
data: tokens,
result_id,
data: tokens.as_ref().clone(),
}
.into(),
),
}
}
}
fn token_delta(from: &[SemanticToken], to: &[SemanticToken]) -> Vec<SemanticTokensEdit> {
// Taken from `rust-analyzer`'s algorithm
// https://github.com/rust-lang/rust-analyzer/blob/master/crates/rust-analyzer/src/semantic_tokens.rs#L219
let start = from
.iter()
.zip(to.iter())
.take_while(|(x, y)| x == y)
.count();
let (_, from) = from.split_at(start);
let (_, to) = to.split_at(start);
let dist_from_end = from
.iter()
.rev()
.zip(to.iter().rev())
.take_while(|(x, y)| x == y)
.count();
let (from, _) = from.split_at(from.len() - dist_from_end);
let (to, _) = to.split_at(to.len() - dist_from_end);
if from.is_empty() && to.is_empty() {
vec![]
} else {
vec![SemanticTokensEdit {
start: 5 * start as u32,
delete_count: 5 * from.len() as u32,
data: Some(to.into()),
}]
}
}

View file

@ -1,4 +1,4 @@
use crate::{prelude::*, semantic_tokens_full};
use crate::{get_semantic_tokens, prelude::*};
/// The [`textDocument/semanticTokens/full`] request is sent from the client to
/// the server to resolve the semantic tokens of a given file.
@ -29,12 +29,12 @@ impl SemanticRequest for SemanticTokensFullRequest {
fn request(self, ctx: &mut LocalContext) -> Option<Self::Response> {
let source = ctx.source_by_path(&self.path).ok()?;
let ei = ctx.expr_stage(&source);
let (tokens, result_id) = semantic_tokens_full(ctx, &source, ei);
let (tokens, result_id) = get_semantic_tokens(ctx, &source, ei);
Some(
SemanticTokens {
result_id: Some(result_id),
data: tokens,
result_id,
data: tokens.as_ref().clone(),
}
.into(),
)