dev: refactor semantic_tokens crate (#809)

* dev: move semantic_tokens crate

* dev: remove bad comment
This commit is contained in:
Myriad-Dreamin 2024-11-14 02:39:26 +08:00 committed by GitHub
parent fc3099a27c
commit 3469e215eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 299 additions and 315 deletions

View file

@ -14,6 +14,8 @@ pub mod definition;
pub use definition::*;
pub mod signature;
pub use signature::*;
pub mod semantic_tokens;
pub use semantic_tokens::*;
mod post_tyck;
mod tyck;
pub(crate) use crate::ty::*;

View file

@ -26,8 +26,9 @@ use crate::adt::revision::{RevisionLock, RevisionManager, RevisionManagerLike, R
use crate::analysis::prelude::*;
use crate::analysis::{
analyze_bib, analyze_expr_, analyze_import_, analyze_signature, definition, post_type_check,
AllocStats, AnalysisStats, BibInfo, Definition, PathPreference, QueryStatGuard, Signature,
SignatureTarget, Ty, TypeScheme,
AllocStats, AnalysisStats, BibInfo, Definition, PathPreference, QueryStatGuard,
SemanticTokenCache, SemanticTokenContext, SemanticTokens, Signature, SignatureTarget, Ty,
TypeScheme,
};
use crate::docs::{DefDocs, TidyModuleDocs};
use crate::syntax::{
@ -38,8 +39,7 @@ use crate::syntax::{
use crate::upstream::{tooltip_, Tooltip};
use crate::{
lsp_to_typst, typst_to_lsp, ColorTheme, CompilerQueryRequest, LspPosition, LspRange,
LspWorldExt, PositionEncoding, SemanticTokenCache, SemanticTokenContext, TypstRange,
VersionedDocument,
LspWorldExt, PositionEncoding, TypstRange, VersionedDocument,
};
use super::TypeEnv;
@ -371,6 +371,20 @@ impl LocalContext {
self.analyze_import(mod_import_node.source().to_untyped()).1
}
pub(crate) fn cached_tokens(&mut self, source: &Source) -> (SemanticTokens, Option<String>) {
let tokens = crate::analysis::semantic_tokens::get_semantic_tokens(self, source);
let result_id = self.tokens.as_ref().map(|t| {
let id = t.next.revision;
t.next
.data
.set(tokens.clone())
.unwrap_or_else(|_| panic!("unexpected slot overwrite {id}"));
id.to_string()
});
(tokens, result_id)
}
/// Get the expression information of a source file.
pub(crate) fn expr_stage_by_id(&mut self, fid: TypstFileId) -> Option<Arc<ExprInfo>> {
Some(self.expr_stage(&self.source_by_id(fid).ok()?))

View file

@ -1,3 +1,5 @@
//! Semantic tokens (highlighting) support for LSP.
use std::{
num::NonZeroUsize,
ops::Range,
@ -7,8 +9,10 @@ use std::{
use hashbrown::HashMap;
use lsp_types::SemanticToken;
use lsp_types::{SemanticTokenModifier, SemanticTokenType};
use parking_lot::Mutex;
use reflexo::ImmutPath;
use strum::EnumIter;
use typst::syntax::{ast, LinkedNode, Source, SyntaxKind};
use crate::{
@ -18,16 +22,21 @@ use crate::{
LocalContext, LspPosition, PositionEncoding,
};
use self::modifier_set::ModifierSet;
mod delta;
mod modifier_set;
mod typst_tokens;
pub use self::typst_tokens::{Modifier, TokenType};
/// A shared semantic tokens object.
pub type SemanticTokens = Arc<Vec<SemanticToken>>;
/// Get the semantic tokens for a source.
pub(crate) fn get_semantic_tokens(ctx: &mut LocalContext, source: &Source) -> SemanticTokens {
let mut tokenizer = Tokenizer::new(
source.clone(),
ctx.expr_stage(source),
ctx.analysis.allow_multiline_token,
ctx.analysis.position_encoding,
);
tokenizer.tokenize_tree(&LinkedNode::new(source.root()), ModifierSet::empty());
SemanticTokens::new(tokenizer.output)
}
/// A shared semantic tokens cache.
#[derive(Default)]
pub struct SemanticTokenCache {
@ -88,23 +97,8 @@ pub(crate) struct SemanticTokenContext {
_rev_lock: RevisionLock,
cache: Arc<Mutex<SemanticTokenCache>>,
path: ImmutPath,
prev: Option<SemanticTokens>,
next: Arc<RevisionSlot<OnceLock<SemanticTokens>>>,
}
impl SemanticTokenContext {
pub fn previous(&self) -> Option<&[SemanticToken]> {
self.prev.as_ref().map(|cached| cached.as_slice())
}
pub fn cache_result(&self, cached: SemanticTokens) -> String {
let id = self.next.revision;
self.next
.data
.set(cached)
.unwrap_or_else(|_| panic!("unexpected slot overwrite {id}"));
id.to_string()
}
pub prev: Option<SemanticTokens>,
pub next: Arc<RevisionSlot<OnceLock<SemanticTokens>>>,
}
impl Drop for SemanticTokenContext {
@ -120,25 +114,194 @@ impl Drop for SemanticTokenContext {
}
}
/// Get the semantic tokens for a source.
pub(crate) fn get_semantic_tokens(
ctx: &mut LocalContext,
source: &Source,
ei: Arc<ExprInfo>,
) -> (SemanticTokens, Option<String>) {
let root = LinkedNode::new(source.root());
const BOOL: SemanticTokenType = SemanticTokenType::new("bool");
const PUNCTUATION: SemanticTokenType = SemanticTokenType::new("punct");
const ESCAPE: SemanticTokenType = SemanticTokenType::new("escape");
const LINK: SemanticTokenType = SemanticTokenType::new("link");
const RAW: SemanticTokenType = SemanticTokenType::new("raw");
const LABEL: SemanticTokenType = SemanticTokenType::new("label");
const REF: SemanticTokenType = SemanticTokenType::new("ref");
const HEADING: SemanticTokenType = SemanticTokenType::new("heading");
const LIST_MARKER: SemanticTokenType = SemanticTokenType::new("marker");
const LIST_TERM: SemanticTokenType = SemanticTokenType::new("term");
const DELIMITER: SemanticTokenType = SemanticTokenType::new("delim");
const INTERPOLATED: SemanticTokenType = SemanticTokenType::new("pol");
const ERROR: SemanticTokenType = SemanticTokenType::new("error");
const TEXT: SemanticTokenType = SemanticTokenType::new("text");
let mut tokenizer = Tokenizer::new(
source.clone(),
ei,
ctx.analysis.allow_multiline_token,
ctx.analysis.position_encoding,
);
tokenizer.tokenize_tree(&root, ModifierSet::empty());
let output = SemanticTokens::new(tokenizer.output);
/// Very similar to `typst_ide::Tag`, but with convenience traits, and
/// extensible because we want to further customize highlighting
#[derive(Clone, Copy, Eq, PartialEq, EnumIter, Default)]
#[repr(u32)]
pub enum TokenType {
// Standard LSP types
/// A comment token.
Comment,
/// A string token.
String,
/// A keyword token.
Keyword,
/// An operator token.
Operator,
/// A number token.
Number,
/// A function token.
Function,
/// A decorator token.
Decorator,
/// A type token.
Type,
/// A namespace token.
Namespace,
// Custom types
/// A boolean token.
Bool,
/// A punctuation token.
Punctuation,
/// An escape token.
Escape,
/// A link token.
Link,
/// A raw token.
Raw,
/// A label token.
Label,
/// A markup reference token.
Ref,
/// A heading token.
Heading,
/// A list marker token.
ListMarker,
/// A list term token.
ListTerm,
/// A delimiter token.
Delimiter,
/// An interpolated token.
Interpolated,
/// An error token.
Error,
/// Any text in markup without a more specific token type, possible styled.
///
/// We perform styling (like bold and italics) via modifiers. That means
/// everything that should receive styling needs to be a token so we can
/// apply a modifier to it. This token type is mostly for that, since
/// text should usually not be specially styled.
Text,
/// A token that is not recognized by the lexer
#[default]
None,
}
let result_id = ctx.tokens.as_ref().map(|t| t.cache_result(output.clone()));
(output, result_id)
impl From<TokenType> for SemanticTokenType {
fn from(token_type: TokenType) -> Self {
use TokenType::*;
match token_type {
Comment => Self::COMMENT,
String => Self::STRING,
Keyword => Self::KEYWORD,
Operator => Self::OPERATOR,
Number => Self::NUMBER,
Function => Self::FUNCTION,
Decorator => Self::DECORATOR,
Type => Self::TYPE,
Namespace => Self::NAMESPACE,
Bool => BOOL,
Punctuation => PUNCTUATION,
Escape => ESCAPE,
Link => LINK,
Raw => RAW,
Label => LABEL,
Ref => REF,
Heading => HEADING,
ListMarker => LIST_MARKER,
ListTerm => LIST_TERM,
Delimiter => DELIMITER,
Interpolated => INTERPOLATED,
Error => ERROR,
Text => TEXT,
None => unreachable!(),
}
}
}
const STRONG: SemanticTokenModifier = SemanticTokenModifier::new("strong");
const EMPH: SemanticTokenModifier = SemanticTokenModifier::new("emph");
const MATH: SemanticTokenModifier = SemanticTokenModifier::new("math");
/// A modifier to some semantic token.
#[derive(Clone, Copy, EnumIter)]
#[repr(u8)]
pub enum Modifier {
/// Strong modifier.
Strong,
/// Emphasis modifier.
Emph,
/// Math modifier.
Math,
/// Read-only modifier.
ReadOnly,
/// Static modifier.
Static,
/// Default library modifier.
DefaultLibrary,
}
impl Modifier {
/// Get the index of the modifier.
pub const fn index(self) -> u8 {
self as u8
}
/// Get the bitmask of the modifier.
pub const fn bitmask(self) -> u32 {
0b1 << self.index()
}
}
impl From<Modifier> for SemanticTokenModifier {
fn from(modifier: Modifier) -> Self {
use Modifier::*;
match modifier {
Strong => STRONG,
Emph => EMPH,
Math => MATH,
ReadOnly => Self::READONLY,
Static => Self::STATIC,
DefaultLibrary => Self::DEFAULT_LIBRARY,
}
}
}
#[derive(Default, Clone, Copy)]
pub(crate) struct ModifierSet(u32);
impl ModifierSet {
pub fn empty() -> Self {
Self::default()
}
pub fn new(modifiers: &[Modifier]) -> Self {
let bits = modifiers
.iter()
.copied()
.map(Modifier::bitmask)
.fold(0, |bits, mask| bits | mask);
Self(bits)
}
pub fn bitset(self) -> u32 {
self.0
}
}
impl std::ops::BitOr for ModifierSet {
type Output = Self;
fn bitor(self, rhs: Self) -> Self::Output {
Self(self.0 | rhs.0)
}
}
pub(crate) struct Tokenizer {
@ -549,3 +712,17 @@ fn token_from_hashtag(
.as_ref()
.and_then(|e| token_from_node(ei, e, modifier))
}
#[cfg(test)]
mod tests {
use strum::IntoEnumIterator;
use super::*;
#[test]
fn ensure_not_too_many_modifiers() {
// Because modifiers are encoded in a 32 bit bitmask, we can't have more than 32
// modifiers
assert!(Modifier::iter().len() <= 32);
}
}

View file

@ -60,8 +60,6 @@ mod rename;
pub use rename::*;
mod selection_range;
pub use selection_range::*;
mod semantic_tokens;
pub use semantic_tokens::*;
mod semantic_tokens_full;
pub use semantic_tokens_full::*;
mod semantic_tokens_delta;
@ -79,8 +77,6 @@ pub use references::*;
mod lsp_typst_boundary;
pub use lsp_typst_boundary::*;
mod lsp_features;
pub use lsp_features::*;
mod prelude;

View file

@ -1,49 +0,0 @@
// todo: remove this
#![allow(missing_docs)]
use lsp_types::{
Registration, SemanticTokensFullOptions, SemanticTokensLegend, SemanticTokensOptions,
Unregistration,
};
use strum::IntoEnumIterator;
use crate::{Modifier, TokenType};
fn get_legend() -> SemanticTokensLegend {
SemanticTokensLegend {
token_types: TokenType::iter()
.filter(|e| *e != TokenType::None)
.map(Into::into)
.collect(),
token_modifiers: Modifier::iter().map(Into::into).collect(),
}
}
const SEMANTIC_TOKENS_REGISTRATION_ID: &str = "semantic_tokens";
const SEMANTIC_TOKENS_METHOD_ID: &str = "textDocument/semanticTokens";
pub fn get_semantic_tokens_registration(options: SemanticTokensOptions) -> Registration {
Registration {
id: SEMANTIC_TOKENS_REGISTRATION_ID.to_owned(),
method: SEMANTIC_TOKENS_METHOD_ID.to_owned(),
register_options: Some(
serde_json::to_value(options)
.expect("semantic tokens options should be representable as JSON value"),
),
}
}
pub fn get_semantic_tokens_unregistration() -> Unregistration {
Unregistration {
id: SEMANTIC_TOKENS_REGISTRATION_ID.to_owned(),
method: SEMANTIC_TOKENS_METHOD_ID.to_owned(),
}
}
pub fn get_semantic_tokens_options() -> SemanticTokensOptions {
SemanticTokensOptions {
legend: get_legend(),
full: Some(SemanticTokensFullOptions::Delta { delta: Some(true) }),
..Default::default()
}
}

View file

@ -1,33 +0,0 @@
use std::ops;
use super::typst_tokens::Modifier;
#[derive(Default, Clone, Copy)]
pub(crate) struct ModifierSet(u32);
impl ModifierSet {
pub fn empty() -> Self {
Self::default()
}
pub fn new(modifiers: &[Modifier]) -> Self {
let bits = modifiers
.iter()
.copied()
.map(Modifier::bitmask)
.fold(0, |bits, mask| bits | mask);
Self(bits)
}
pub fn bitset(self) -> u32 {
self.0
}
}
impl ops::BitOr for ModifierSet {
type Output = Self;
fn bitor(self, rhs: Self) -> Self::Output {
Self(self.0 | rhs.0)
}
}

View file

@ -1,150 +0,0 @@
//! Types for tokens used for Typst syntax
// todo: remove this
#![allow(missing_docs)]
use lsp_types::{SemanticTokenModifier, SemanticTokenType};
use strum::EnumIter;
const BOOL: SemanticTokenType = SemanticTokenType::new("bool");
const PUNCTUATION: SemanticTokenType = SemanticTokenType::new("punct");
const ESCAPE: SemanticTokenType = SemanticTokenType::new("escape");
const LINK: SemanticTokenType = SemanticTokenType::new("link");
const RAW: SemanticTokenType = SemanticTokenType::new("raw");
const LABEL: SemanticTokenType = SemanticTokenType::new("label");
const REF: SemanticTokenType = SemanticTokenType::new("ref");
const HEADING: SemanticTokenType = SemanticTokenType::new("heading");
const LIST_MARKER: SemanticTokenType = SemanticTokenType::new("marker");
const LIST_TERM: SemanticTokenType = SemanticTokenType::new("term");
const DELIMITER: SemanticTokenType = SemanticTokenType::new("delim");
const INTERPOLATED: SemanticTokenType = SemanticTokenType::new("pol");
const ERROR: SemanticTokenType = SemanticTokenType::new("error");
const TEXT: SemanticTokenType = SemanticTokenType::new("text");
/// Very similar to `typst_ide::Tag`, but with convenience traits, and
/// extensible because we want to further customize highlighting
#[derive(Clone, Copy, Eq, PartialEq, EnumIter, Default)]
#[repr(u32)]
pub enum TokenType {
// Standard LSP types
Comment,
String,
Keyword,
Operator,
Number,
Function,
Decorator,
Type,
Namespace,
// Custom types
Bool,
Punctuation,
Escape,
Link,
Raw,
Label,
Ref,
Heading,
ListMarker,
ListTerm,
Delimiter,
Interpolated,
Error,
/// Any text in markup without a more specific token type, possible styled.
///
/// We perform styling (like bold and italics) via modifiers. That means
/// everything that should receive styling needs to be a token so we can
/// apply a modifier to it. This token type is mostly for that, since
/// text should usually not be specially styled.
Text,
/// A token that is not recognized by the lexer
#[default]
None,
}
impl From<TokenType> for SemanticTokenType {
fn from(token_type: TokenType) -> Self {
use TokenType::*;
match token_type {
Comment => Self::COMMENT,
String => Self::STRING,
Keyword => Self::KEYWORD,
Operator => Self::OPERATOR,
Number => Self::NUMBER,
Function => Self::FUNCTION,
Decorator => Self::DECORATOR,
Type => Self::TYPE,
Namespace => Self::NAMESPACE,
Bool => BOOL,
Punctuation => PUNCTUATION,
Escape => ESCAPE,
Link => LINK,
Raw => RAW,
Label => LABEL,
Ref => REF,
Heading => HEADING,
ListMarker => LIST_MARKER,
ListTerm => LIST_TERM,
Delimiter => DELIMITER,
Interpolated => INTERPOLATED,
Error => ERROR,
Text => TEXT,
None => unreachable!(),
}
}
}
const STRONG: SemanticTokenModifier = SemanticTokenModifier::new("strong");
const EMPH: SemanticTokenModifier = SemanticTokenModifier::new("emph");
const MATH: SemanticTokenModifier = SemanticTokenModifier::new("math");
#[derive(Clone, Copy, EnumIter)]
#[repr(u8)]
pub enum Modifier {
Strong,
Emph,
Math,
ReadOnly,
Static,
DefaultLibrary,
}
impl Modifier {
pub const fn index(self) -> u8 {
self as u8
}
pub const fn bitmask(self) -> u32 {
0b1 << self.index()
}
}
impl From<Modifier> for SemanticTokenModifier {
fn from(modifier: Modifier) -> Self {
use Modifier::*;
match modifier {
Strong => STRONG,
Emph => EMPH,
Math => MATH,
ReadOnly => Self::READONLY,
Static => Self::STATIC,
DefaultLibrary => Self::DEFAULT_LIBRARY,
}
}
}
#[cfg(test)]
mod test {
use strum::IntoEnumIterator;
use super::*;
#[test]
fn ensure_not_too_many_modifiers() {
// Because modifiers are encoded in a 32 bit bitmask, we can't have more than 32
// modifiers
assert!(Modifier::iter().len() <= 32);
}
}

View file

@ -1,6 +1,6 @@
use lsp_types::{SemanticToken, SemanticTokensEdit};
use crate::{get_semantic_tokens, prelude::*};
use crate::prelude::*;
/// The [`textDocument/semanticTokens/full/delta`] request is sent from the
/// client to the server to resolve the semantic tokens of a given file,
@ -29,31 +29,25 @@ impl SemanticRequest for SemanticTokensDeltaRequest {
/// document.
fn request(self, ctx: &mut LocalContext) -> Option<Self::Response> {
let source = ctx.source_by_path(&self.path).ok()?;
let ei = ctx.expr_stage(&source);
let (tokens, result_id) = get_semantic_tokens(ctx, &source, ei);
let (tokens, result_id) = ctx.cached_tokens(&source);
let (tokens, result_id) = match ctx.tokens.as_ref().and_then(|t| t.previous()) {
Some(cached) => (Ok(token_delta(cached, &tokens)), result_id),
Some(match ctx.tokens.as_ref().and_then(|t| t.prev.as_ref()) {
Some(cached) => SemanticTokensFullDeltaResult::TokensDelta(SemanticTokensDelta {
result_id,
edits: token_delta(cached, &tokens),
}),
None => {
log::warn!(
"No previous tokens found for delta computation in {}, prev_id: {:?}",
self.path.display(),
self.previous_result_id
);
(Err(tokens), result_id)
}
};
match tokens {
Ok(edits) => Some(SemanticTokensDelta { result_id, edits }.into()),
Err(tokens) => Some(
SemanticTokens {
SemanticTokensFullDeltaResult::Tokens(SemanticTokens {
result_id,
data: tokens.as_ref().clone(),
}
.into(),
),
}
})
}
})
}
}

View file

@ -1,4 +1,4 @@
use crate::{get_semantic_tokens, prelude::*};
use crate::prelude::*;
/// The [`textDocument/semanticTokens/full`] request is sent from the client to
/// the server to resolve the semantic tokens of a given file.
@ -28,16 +28,12 @@ impl SemanticRequest for SemanticTokensFullRequest {
/// Handles the request to compute the semantic tokens for a given document.
fn request(self, ctx: &mut LocalContext) -> Option<Self::Response> {
let source = ctx.source_by_path(&self.path).ok()?;
let ei = ctx.expr_stage(&source);
let (tokens, result_id) = get_semantic_tokens(ctx, &source, ei);
let (tokens, result_id) = ctx.cached_tokens(&source);
Some(
SemanticTokens {
result_id,
data: tokens.as_ref().clone(),
}
.into(),
)
Some(SemanticTokensResult::Tokens(SemanticTokens {
result_id,
data: tokens.as_ref().clone(),
}))
}
}

View file

@ -35,6 +35,7 @@ serde_json.workspace = true
serde_yaml.workspace = true
parking_lot.workspace = true
paste.workspace = true
strum.workspace = true
clap.workspace = true
clap_builder.workspace = true

View file

@ -12,8 +12,10 @@ use reflexo_typst::world::EntryState;
use reflexo_typst::{ImmutPath, TypstDict};
use serde::{Deserialize, Serialize};
use serde_json::{json, Map, Value as JsonValue};
use strum::IntoEnumIterator;
use task::FormatUserConfig;
use tinymist_query::{get_semantic_tokens_options, PositionEncoding};
use tinymist_query::analysis::{Modifier, TokenType};
use tinymist_query::PositionEncoding;
use tinymist_render::PeriscopeArgs;
use typst::foundations::IntoValue;
use typst::syntax::{FileId, VirtualPath};
@ -150,7 +152,9 @@ impl Initializer for SuperInit {
// registration
let semantic_tokens_provider = match service.config.semantic_tokens {
SemanticTokensMode::Enable if !const_config.tokens_dynamic_registration => {
Some(get_semantic_tokens_options().into())
Some(SemanticTokensServerCapabilities::SemanticTokensOptions(
get_semantic_tokens_options(),
))
}
_ => None,
};
@ -839,6 +843,20 @@ pub enum SemanticTokensMode {
Enable,
}
pub(crate) fn get_semantic_tokens_options() -> SemanticTokensOptions {
SemanticTokensOptions {
legend: SemanticTokensLegend {
token_types: TokenType::iter()
.filter(|e| *e != TokenType::None)
.map(Into::into)
.collect(),
token_modifiers: Modifier::iter().map(Into::into).collect(),
},
full: Some(SemanticTokensFullOptions::Delta { delta: Some(true) }),
..Default::default()
}
}
/// Additional options for compilation.
#[derive(Debug, Clone, PartialEq, Default)]
pub struct CompileExtraOpts {

View file

@ -22,10 +22,7 @@ use serde::{Deserialize, Serialize};
use serde_json::{Map, Value as JsonValue};
use sync_lsp::*;
use task::{CacheTask, ExportUserConfig, FormatTask, FormatUserConfig, UserActionTask};
use tinymist_query::{
get_semantic_tokens_options, get_semantic_tokens_registration,
get_semantic_tokens_unregistration, PageSelection,
};
use tinymist_query::PageSelection;
use tinymist_query::{
lsp_to_typst, CompilerQueryRequest, CompilerQueryResponse, FoldRequestFeature, OnExportRequest,
PositionEncoding, SyntaxRequest,
@ -324,6 +321,27 @@ impl LanguageState {
return Ok(());
}
const SEMANTIC_TOKENS_REGISTRATION_ID: &str = "semantic_tokens";
const SEMANTIC_TOKENS_METHOD_ID: &str = "textDocument/semanticTokens";
pub fn get_semantic_tokens_registration(options: SemanticTokensOptions) -> Registration {
Registration {
id: SEMANTIC_TOKENS_REGISTRATION_ID.to_owned(),
method: SEMANTIC_TOKENS_METHOD_ID.to_owned(),
register_options: Some(
serde_json::to_value(options)
.expect("semantic tokens options should be representable as JSON value"),
),
}
}
pub fn get_semantic_tokens_unregistration() -> Unregistration {
Unregistration {
id: SEMANTIC_TOKENS_REGISTRATION_ID.to_owned(),
method: SEMANTIC_TOKENS_METHOD_ID.to_owned(),
}
}
match (enable, self.sema_tokens_registered) {
(true, false) => {
trace!("registering semantic tokens");