[ty] First cut at semantic token provider (#19108)

This PR implements a basic semantic token provider for ty's language
server. This allows for more accurate semantic highlighting / coloring
within editors that support this LSP functionality.

Here are screen shots that show how code appears in VS Code using the
"rainbow" theme both before and after this change.


![461737617-15630625-d4a9-4ec5-9886-77b00eb7a41a](https://github.com/user-attachments/assets/f963b55b-3195-41d1-ba38-ac2e7508d5f5)


![461737624-d6dcf5f0-7b9b-47de-a410-e202c63e2058](https://github.com/user-attachments/assets/111ca2c5-bb4f-4c8a-a0b5-6c1b2b6f246b)

The token types and modifier tags in this implementation largely mirror
those used in Microsoft's default language server for Python.

The implementation supports two LSP interfaces. The first provides
semantic tokens for an entire document, and the second returns semantic
tokens for a requested range within a document.

The PR includes unit tests. It also includes comments that document
known limitations and areas for future improvements.

---------

Co-authored-by: UnboundVariable <unbound@gmail.com>
This commit is contained in:
UnboundVariable 2025-07-07 15:34:47 -07:00 committed by GitHub
parent 4dd2c03144
commit 278f93022a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 2221 additions and 3 deletions

View file

@ -6,9 +6,10 @@ use crate::session::{AllOptions, ClientOptions, Session};
use lsp_server::Connection;
use lsp_types::{
ClientCapabilities, DiagnosticOptions, DiagnosticServerCapabilities, HoverProviderCapability,
InlayHintOptions, InlayHintServerCapabilities, MessageType, ServerCapabilities,
InlayHintOptions, InlayHintServerCapabilities, MessageType, SemanticTokensLegend,
SemanticTokensOptions, SemanticTokensServerCapabilities, ServerCapabilities,
TextDocumentSyncCapability, TextDocumentSyncKind, TextDocumentSyncOptions,
TypeDefinitionProviderCapability, Url,
TypeDefinitionProviderCapability, Url, WorkDoneProgressOptions,
};
use std::num::NonZeroUsize;
use std::panic::PanicHookInfo;
@ -188,6 +189,23 @@ impl Server {
inlay_hint_provider: Some(lsp_types::OneOf::Right(
InlayHintServerCapabilities::Options(InlayHintOptions::default()),
)),
semantic_tokens_provider: Some(
SemanticTokensServerCapabilities::SemanticTokensOptions(SemanticTokensOptions {
work_done_progress_options: WorkDoneProgressOptions::default(),
legend: SemanticTokensLegend {
token_types: ty_ide::SemanticTokenType::all()
.iter()
.map(|token_type| token_type.as_lsp_concept().into())
.collect(),
token_modifiers: ty_ide::SemanticTokenModifier::all_names()
.iter()
.map(|&s| s.into())
.collect(),
},
range: Some(true),
full: Some(lsp_types::SemanticTokensFullOptions::Bool(true)),
}),
),
completion_provider: Some(lsp_types::CompletionOptions {
trigger_characters: Some(vec!['.'.to_string()]),
..Default::default()

View file

@ -11,6 +11,7 @@ use std::panic::{AssertUnwindSafe, UnwindSafe};
mod diagnostics;
mod notifications;
mod requests;
mod semantic_tokens;
mod traits;
use self::traits::{NotificationHandler, RequestHandler};
@ -49,6 +50,14 @@ pub(super) fn request(req: server::Request) -> Task {
requests::InlayHintRequestHandler::METHOD => background_document_request_task::<
requests::InlayHintRequestHandler,
>(req, BackgroundSchedule::Worker),
requests::SemanticTokensRequestHandler::METHOD => background_document_request_task::<
requests::SemanticTokensRequestHandler,
>(req, BackgroundSchedule::Worker),
requests::SemanticTokensRangeRequestHandler::METHOD => background_document_request_task::<
requests::SemanticTokensRangeRequestHandler,
>(
req, BackgroundSchedule::Worker
),
requests::CompletionRequestHandler::METHOD => background_document_request_task::<
requests::CompletionRequestHandler,
>(

View file

@ -3,6 +3,8 @@ mod diagnostic;
mod goto_type_definition;
mod hover;
mod inlay_hints;
mod semantic_tokens;
mod semantic_tokens_range;
mod shutdown;
mod workspace_diagnostic;
@ -11,5 +13,7 @@ pub(super) use diagnostic::DocumentDiagnosticRequestHandler;
pub(super) use goto_type_definition::GotoTypeDefinitionRequestHandler;
pub(super) use hover::HoverRequestHandler;
pub(super) use inlay_hints::InlayHintRequestHandler;
pub(super) use semantic_tokens::SemanticTokensRequestHandler;
pub(super) use semantic_tokens_range::SemanticTokensRangeRequestHandler;
pub(super) use shutdown::ShutdownHandler;
pub(super) use workspace_diagnostic::WorkspaceDiagnosticRequestHandler;

View file

@ -0,0 +1,55 @@
use std::borrow::Cow;
use crate::DocumentSnapshot;
use crate::server::api::semantic_tokens::generate_semantic_tokens;
use crate::server::api::traits::{
BackgroundDocumentRequestHandler, RequestHandler, RetriableRequestHandler,
};
use crate::session::client::Client;
use lsp_types::{SemanticTokens, SemanticTokensParams, SemanticTokensResult, Url};
use ty_project::ProjectDatabase;
pub(crate) struct SemanticTokensRequestHandler;
impl RequestHandler for SemanticTokensRequestHandler {
type RequestType = lsp_types::request::SemanticTokensFullRequest;
}
impl BackgroundDocumentRequestHandler for SemanticTokensRequestHandler {
fn document_url(params: &SemanticTokensParams) -> Cow<Url> {
Cow::Borrowed(&params.text_document.uri)
}
fn run_with_snapshot(
db: &ProjectDatabase,
snapshot: DocumentSnapshot,
_client: &Client,
params: SemanticTokensParams,
) -> crate::server::Result<Option<SemanticTokensResult>> {
if snapshot.client_settings().is_language_services_disabled() {
return Ok(None);
}
let Some(file) = snapshot.file(db) else {
tracing::debug!("Failed to resolve file for {:?}", params);
return Ok(None);
};
let lsp_tokens = generate_semantic_tokens(
db,
file,
None,
snapshot.encoding(),
snapshot
.resolved_client_capabilities()
.semantic_tokens_multiline_support,
);
Ok(Some(SemanticTokensResult::Tokens(SemanticTokens {
result_id: None,
data: lsp_tokens,
})))
}
}
impl RetriableRequestHandler for SemanticTokensRequestHandler {}

View file

@ -0,0 +1,65 @@
use std::borrow::Cow;
use crate::DocumentSnapshot;
use crate::document::RangeExt;
use crate::server::api::semantic_tokens::generate_semantic_tokens;
use crate::server::api::traits::{
BackgroundDocumentRequestHandler, RequestHandler, RetriableRequestHandler,
};
use crate::session::client::Client;
use lsp_types::{SemanticTokens, SemanticTokensRangeParams, SemanticTokensRangeResult, Url};
use ruff_db::source::{line_index, source_text};
use ty_project::ProjectDatabase;
pub(crate) struct SemanticTokensRangeRequestHandler;
impl RequestHandler for SemanticTokensRangeRequestHandler {
type RequestType = lsp_types::request::SemanticTokensRangeRequest;
}
impl BackgroundDocumentRequestHandler for SemanticTokensRangeRequestHandler {
fn document_url(params: &SemanticTokensRangeParams) -> Cow<Url> {
Cow::Borrowed(&params.text_document.uri)
}
fn run_with_snapshot(
db: &ProjectDatabase,
snapshot: DocumentSnapshot,
_client: &Client,
params: SemanticTokensRangeParams,
) -> crate::server::Result<Option<SemanticTokensRangeResult>> {
if snapshot.client_settings().is_language_services_disabled() {
return Ok(None);
}
let Some(file) = snapshot.file(db) else {
tracing::debug!("Failed to resolve file for {:?}", params);
return Ok(None);
};
let source = source_text(db, file);
let line_index = line_index(db, file);
// Convert LSP range to text offsets
let requested_range = params
.range
.to_text_range(&source, &line_index, snapshot.encoding());
let lsp_tokens = generate_semantic_tokens(
db,
file,
Some(requested_range),
snapshot.encoding(),
snapshot
.resolved_client_capabilities()
.semantic_tokens_multiline_support,
);
Ok(Some(SemanticTokensRangeResult::Tokens(SemanticTokens {
result_id: None,
data: lsp_tokens,
})))
}
}
impl RetriableRequestHandler for SemanticTokensRangeRequestHandler {}

View file

@ -0,0 +1,97 @@
use lsp_types::SemanticToken;
use ruff_db::source::{line_index, source_text};
use ruff_text_size::{Ranged, TextRange};
use ty_ide::semantic_tokens;
use ty_project::ProjectDatabase;
use crate::document::{PositionEncoding, ToRangeExt};
/// Common logic for generating semantic tokens, either for full document or a specific range.
/// If no range is provided, the entire file is processed.
pub(crate) fn generate_semantic_tokens(
db: &ProjectDatabase,
file: ruff_db::files::File,
range: Option<TextRange>,
encoding: PositionEncoding,
multiline_token_support: bool,
) -> Vec<SemanticToken> {
let source = source_text(db, file);
let line_index = line_index(db, file);
let semantic_token_data = semantic_tokens(db, file, range);
// Convert semantic tokens to LSP format
let mut lsp_tokens = Vec::new();
let mut prev_line = 0u32;
let mut prev_start = 0u32;
for token in &*semantic_token_data {
let lsp_range = token.range().to_lsp_range(&source, &line_index, encoding);
let line = lsp_range.start.line;
let character = lsp_range.start.character;
// Calculate length in the negotiated encoding
let length = if !multiline_token_support && lsp_range.start.line != lsp_range.end.line {
// Token spans multiple lines but client doesn't support it
// Clamp to the end of the current line
if let Some(line_text) = source.lines().nth(lsp_range.start.line as usize) {
let line_length_in_encoding = match encoding {
PositionEncoding::UTF8 => line_text.len().try_into().unwrap_or(u32::MAX),
PositionEncoding::UTF16 => line_text
.encode_utf16()
.count()
.try_into()
.unwrap_or(u32::MAX),
PositionEncoding::UTF32 => {
line_text.chars().count().try_into().unwrap_or(u32::MAX)
}
};
line_length_in_encoding.saturating_sub(lsp_range.start.character)
} else {
0
}
} else {
// Either client supports multiline tokens or this is a single-line token
// Use the difference between start and end character positions
if lsp_range.start.line == lsp_range.end.line {
lsp_range.end.character - lsp_range.start.character
} else {
// Multiline token and client supports it - calculate full token length
let token_text = &source[token.range()];
match encoding {
PositionEncoding::UTF8 => token_text.len().try_into().unwrap_or(u32::MAX),
PositionEncoding::UTF16 => token_text
.encode_utf16()
.count()
.try_into()
.unwrap_or(u32::MAX),
PositionEncoding::UTF32 => {
token_text.chars().count().try_into().unwrap_or(u32::MAX)
}
}
}
};
let token_type = token.token_type as u32;
let token_modifiers = token.modifiers.bits();
// LSP semantic tokens are encoded as deltas
let delta_line = line - prev_line;
let delta_start = if delta_line == 0 {
character - prev_start
} else {
character
};
lsp_tokens.push(SemanticToken {
delta_line,
delta_start,
length,
token_type,
token_modifiers_bitset: token_modifiers,
});
prev_line = line;
prev_start = character;
}
lsp_tokens
}

View file

@ -19,6 +19,9 @@ pub(crate) struct ResolvedClientCapabilities {
/// `true`, if the first markup kind in `textDocument.hover.contentFormat` is `Markdown`
pub(crate) hover_prefer_markdown: bool,
/// Whether the client supports multiline semantic tokens
pub(crate) semantic_tokens_multiline_support: bool,
}
impl ResolvedClientCapabilities {
@ -85,6 +88,13 @@ impl ResolvedClientCapabilities {
})
.unwrap_or_default();
let semantic_tokens_multiline_support = client_capabilities
.text_document
.as_ref()
.and_then(|doc| doc.semantic_tokens.as_ref())
.and_then(|semantic_tokens| semantic_tokens.multiline_token_support)
.unwrap_or(false);
Self {
code_action_deferred_edit_resolution: code_action_data_support
&& code_action_edit_resolution,
@ -95,6 +105,7 @@ impl ResolvedClientCapabilities {
pull_diagnostics,
type_definition_link_support: declaration_link_support,
hover_prefer_markdown,
semantic_tokens_multiline_support,
}
}
}