[ty] First cut at semantic token provider (#19108)

This PR implements a basic semantic token provider for ty's language
server. This allows for more accurate semantic highlighting / coloring
within editors that support this LSP functionality.

Here are screen shots that show how code appears in VS Code using the
"rainbow" theme both before and after this change.


![461737617-15630625-d4a9-4ec5-9886-77b00eb7a41a](https://github.com/user-attachments/assets/f963b55b-3195-41d1-ba38-ac2e7508d5f5)


![461737624-d6dcf5f0-7b9b-47de-a410-e202c63e2058](https://github.com/user-attachments/assets/111ca2c5-bb4f-4c8a-a0b5-6c1b2b6f246b)

The token types and modifier tags in this implementation largely mirror
those used in Microsoft's default language server for Python.

The implementation supports two LSP interfaces. The first provides
semantic tokens for an entire document, and the second returns semantic
tokens for a requested range within a document.

The PR includes unit tests. It also includes comments that document
known limitations and areas for future improvements.

---------

Co-authored-by: UnboundVariable <unbound@gmail.com>
This commit is contained in:
UnboundVariable 2025-07-07 15:34:47 -07:00 committed by GitHub
parent 4dd2c03144
commit 278f93022a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 2221 additions and 3 deletions

1
Cargo.lock generated
View file

@ -4172,6 +4172,7 @@ dependencies = [
name = "ty_ide" name = "ty_ide"
version = "0.0.0" version = "0.0.0"
dependencies = [ dependencies = [
"bitflags 2.9.1",
"insta", "insta",
"ruff_db", "ruff_db",
"ruff_python_ast", "ruff_python_ast",

View file

@ -11,6 +11,7 @@ repository = { workspace = true }
license = { workspace = true } license = { workspace = true }
[dependencies] [dependencies]
bitflags = { workspace = true }
ruff_db = { workspace = true } ruff_db = { workspace = true }
ruff_python_ast = { workspace = true } ruff_python_ast = { workspace = true }
ruff_python_parser = { workspace = true } ruff_python_parser = { workspace = true }

View file

@ -5,6 +5,7 @@ mod goto;
mod hover; mod hover;
mod inlay_hints; mod inlay_hints;
mod markup; mod markup;
mod semantic_tokens;
pub use completion::completion; pub use completion::completion;
pub use db::Db; pub use db::Db;
@ -12,6 +13,9 @@ pub use goto::goto_type_definition;
pub use hover::hover; pub use hover::hover;
pub use inlay_hints::inlay_hints; pub use inlay_hints::inlay_hints;
pub use markup::MarkupKind; pub use markup::MarkupKind;
pub use semantic_tokens::{
SemanticToken, SemanticTokenModifier, SemanticTokenType, SemanticTokens, semantic_tokens,
};
use ruff_db::files::{File, FileRange}; use ruff_db::files::{File, FileRange};
use ruff_text_size::{Ranged, TextRange}; use ruff_text_size::{Ranged, TextRange};

File diff suppressed because it is too large Load diff

View file

@ -46,7 +46,7 @@ use crate::types::generics::{
GenericContext, PartialSpecialization, Specialization, walk_generic_context, GenericContext, PartialSpecialization, Specialization, walk_generic_context,
walk_partial_specialization, walk_specialization, walk_partial_specialization, walk_specialization,
}; };
pub use crate::types::ide_support::all_members; pub use crate::types::ide_support::{all_members, definition_kind_for_name};
use crate::types::infer::infer_unpack_types; use crate::types::infer::infer_unpack_types;
use crate::types::mro::{Mro, MroError, MroIterator}; use crate::types::mro::{Mro, MroError, MroIterator};
pub(crate) use crate::types::narrow::infer_narrowing_constraint; pub(crate) use crate::types::narrow::infer_narrowing_constraint;

View file

@ -1,10 +1,13 @@
use crate::place::{Place, imported_symbol, place_from_bindings, place_from_declarations}; use crate::place::{Place, imported_symbol, place_from_bindings, place_from_declarations};
use crate::semantic_index::definition::DefinitionKind;
use crate::semantic_index::place::ScopeId; use crate::semantic_index::place::ScopeId;
use crate::semantic_index::{ use crate::semantic_index::{
attribute_scopes, global_scope, imported_modules, place_table, semantic_index, use_def_map, attribute_scopes, global_scope, imported_modules, place_table, semantic_index, use_def_map,
}; };
use crate::types::{ClassBase, ClassLiteral, KnownClass, KnownInstanceType, Type}; use crate::types::{ClassBase, ClassLiteral, KnownClass, KnownInstanceType, Type};
use crate::{Db, NameKind}; use crate::{Db, NameKind};
use ruff_db::files::File;
use ruff_python_ast as ast;
use ruff_python_ast::name::Name; use ruff_python_ast::name::Name;
use rustc_hash::FxHashSet; use rustc_hash::FxHashSet;
@ -241,3 +244,37 @@ impl AllMembers {
pub fn all_members<'db>(db: &'db dyn Db, ty: Type<'db>) -> FxHashSet<Name> { pub fn all_members<'db>(db: &'db dyn Db, ty: Type<'db>) -> FxHashSet<Name> {
AllMembers::of(db, ty).members AllMembers::of(db, ty).members
} }
/// Get the primary definition kind for a name expression within a specific file.
/// Returns the first definition kind that is reachable for this name in its scope.
/// This is useful for IDE features like semantic tokens.
pub fn definition_kind_for_name<'db>(
db: &'db dyn Db,
file: File,
name: &ast::ExprName,
) -> Option<DefinitionKind<'db>> {
let index = semantic_index(db, file);
let name_str = name.id.as_str();
// Get the scope for this name expression
let file_scope = index.try_expression_scope_id(&ast::Expr::Name(name.clone()))?;
// Get the place table for this scope
let place_table = index.place_table(file_scope);
// Look up the place by name
let place_id = place_table.place_id_by_name(name_str)?;
// Get the use-def map and look up definitions for this place
let use_def_map = index.use_def_map(file_scope);
let declarations = use_def_map.all_reachable_declarations(place_id);
// Find the first valid definition and return its kind
for declaration in declarations {
if let Some(def) = declaration.declaration.definition() {
return Some(def.kind(db).clone());
}
}
None
}

View file

@ -6,9 +6,10 @@ use crate::session::{AllOptions, ClientOptions, Session};
use lsp_server::Connection; use lsp_server::Connection;
use lsp_types::{ use lsp_types::{
ClientCapabilities, DiagnosticOptions, DiagnosticServerCapabilities, HoverProviderCapability, ClientCapabilities, DiagnosticOptions, DiagnosticServerCapabilities, HoverProviderCapability,
InlayHintOptions, InlayHintServerCapabilities, MessageType, ServerCapabilities, InlayHintOptions, InlayHintServerCapabilities, MessageType, SemanticTokensLegend,
SemanticTokensOptions, SemanticTokensServerCapabilities, ServerCapabilities,
TextDocumentSyncCapability, TextDocumentSyncKind, TextDocumentSyncOptions, TextDocumentSyncCapability, TextDocumentSyncKind, TextDocumentSyncOptions,
TypeDefinitionProviderCapability, Url, TypeDefinitionProviderCapability, Url, WorkDoneProgressOptions,
}; };
use std::num::NonZeroUsize; use std::num::NonZeroUsize;
use std::panic::PanicHookInfo; use std::panic::PanicHookInfo;
@ -188,6 +189,23 @@ impl Server {
inlay_hint_provider: Some(lsp_types::OneOf::Right( inlay_hint_provider: Some(lsp_types::OneOf::Right(
InlayHintServerCapabilities::Options(InlayHintOptions::default()), InlayHintServerCapabilities::Options(InlayHintOptions::default()),
)), )),
semantic_tokens_provider: Some(
SemanticTokensServerCapabilities::SemanticTokensOptions(SemanticTokensOptions {
work_done_progress_options: WorkDoneProgressOptions::default(),
legend: SemanticTokensLegend {
token_types: ty_ide::SemanticTokenType::all()
.iter()
.map(|token_type| token_type.as_lsp_concept().into())
.collect(),
token_modifiers: ty_ide::SemanticTokenModifier::all_names()
.iter()
.map(|&s| s.into())
.collect(),
},
range: Some(true),
full: Some(lsp_types::SemanticTokensFullOptions::Bool(true)),
}),
),
completion_provider: Some(lsp_types::CompletionOptions { completion_provider: Some(lsp_types::CompletionOptions {
trigger_characters: Some(vec!['.'.to_string()]), trigger_characters: Some(vec!['.'.to_string()]),
..Default::default() ..Default::default()

View file

@ -11,6 +11,7 @@ use std::panic::{AssertUnwindSafe, UnwindSafe};
mod diagnostics; mod diagnostics;
mod notifications; mod notifications;
mod requests; mod requests;
mod semantic_tokens;
mod traits; mod traits;
use self::traits::{NotificationHandler, RequestHandler}; use self::traits::{NotificationHandler, RequestHandler};
@ -49,6 +50,14 @@ pub(super) fn request(req: server::Request) -> Task {
requests::InlayHintRequestHandler::METHOD => background_document_request_task::< requests::InlayHintRequestHandler::METHOD => background_document_request_task::<
requests::InlayHintRequestHandler, requests::InlayHintRequestHandler,
>(req, BackgroundSchedule::Worker), >(req, BackgroundSchedule::Worker),
requests::SemanticTokensRequestHandler::METHOD => background_document_request_task::<
requests::SemanticTokensRequestHandler,
>(req, BackgroundSchedule::Worker),
requests::SemanticTokensRangeRequestHandler::METHOD => background_document_request_task::<
requests::SemanticTokensRangeRequestHandler,
>(
req, BackgroundSchedule::Worker
),
requests::CompletionRequestHandler::METHOD => background_document_request_task::< requests::CompletionRequestHandler::METHOD => background_document_request_task::<
requests::CompletionRequestHandler, requests::CompletionRequestHandler,
>( >(

View file

@ -3,6 +3,8 @@ mod diagnostic;
mod goto_type_definition; mod goto_type_definition;
mod hover; mod hover;
mod inlay_hints; mod inlay_hints;
mod semantic_tokens;
mod semantic_tokens_range;
mod shutdown; mod shutdown;
mod workspace_diagnostic; mod workspace_diagnostic;
@ -11,5 +13,7 @@ pub(super) use diagnostic::DocumentDiagnosticRequestHandler;
pub(super) use goto_type_definition::GotoTypeDefinitionRequestHandler; pub(super) use goto_type_definition::GotoTypeDefinitionRequestHandler;
pub(super) use hover::HoverRequestHandler; pub(super) use hover::HoverRequestHandler;
pub(super) use inlay_hints::InlayHintRequestHandler; pub(super) use inlay_hints::InlayHintRequestHandler;
pub(super) use semantic_tokens::SemanticTokensRequestHandler;
pub(super) use semantic_tokens_range::SemanticTokensRangeRequestHandler;
pub(super) use shutdown::ShutdownHandler; pub(super) use shutdown::ShutdownHandler;
pub(super) use workspace_diagnostic::WorkspaceDiagnosticRequestHandler; pub(super) use workspace_diagnostic::WorkspaceDiagnosticRequestHandler;

View file

@ -0,0 +1,55 @@
use std::borrow::Cow;
use crate::DocumentSnapshot;
use crate::server::api::semantic_tokens::generate_semantic_tokens;
use crate::server::api::traits::{
BackgroundDocumentRequestHandler, RequestHandler, RetriableRequestHandler,
};
use crate::session::client::Client;
use lsp_types::{SemanticTokens, SemanticTokensParams, SemanticTokensResult, Url};
use ty_project::ProjectDatabase;
pub(crate) struct SemanticTokensRequestHandler;
impl RequestHandler for SemanticTokensRequestHandler {
type RequestType = lsp_types::request::SemanticTokensFullRequest;
}
impl BackgroundDocumentRequestHandler for SemanticTokensRequestHandler {
fn document_url(params: &SemanticTokensParams) -> Cow<Url> {
Cow::Borrowed(&params.text_document.uri)
}
fn run_with_snapshot(
db: &ProjectDatabase,
snapshot: DocumentSnapshot,
_client: &Client,
params: SemanticTokensParams,
) -> crate::server::Result<Option<SemanticTokensResult>> {
if snapshot.client_settings().is_language_services_disabled() {
return Ok(None);
}
let Some(file) = snapshot.file(db) else {
tracing::debug!("Failed to resolve file for {:?}", params);
return Ok(None);
};
let lsp_tokens = generate_semantic_tokens(
db,
file,
None,
snapshot.encoding(),
snapshot
.resolved_client_capabilities()
.semantic_tokens_multiline_support,
);
Ok(Some(SemanticTokensResult::Tokens(SemanticTokens {
result_id: None,
data: lsp_tokens,
})))
}
}
impl RetriableRequestHandler for SemanticTokensRequestHandler {}

View file

@ -0,0 +1,65 @@
use std::borrow::Cow;
use crate::DocumentSnapshot;
use crate::document::RangeExt;
use crate::server::api::semantic_tokens::generate_semantic_tokens;
use crate::server::api::traits::{
BackgroundDocumentRequestHandler, RequestHandler, RetriableRequestHandler,
};
use crate::session::client::Client;
use lsp_types::{SemanticTokens, SemanticTokensRangeParams, SemanticTokensRangeResult, Url};
use ruff_db::source::{line_index, source_text};
use ty_project::ProjectDatabase;
pub(crate) struct SemanticTokensRangeRequestHandler;
impl RequestHandler for SemanticTokensRangeRequestHandler {
type RequestType = lsp_types::request::SemanticTokensRangeRequest;
}
impl BackgroundDocumentRequestHandler for SemanticTokensRangeRequestHandler {
fn document_url(params: &SemanticTokensRangeParams) -> Cow<Url> {
Cow::Borrowed(&params.text_document.uri)
}
fn run_with_snapshot(
db: &ProjectDatabase,
snapshot: DocumentSnapshot,
_client: &Client,
params: SemanticTokensRangeParams,
) -> crate::server::Result<Option<SemanticTokensRangeResult>> {
if snapshot.client_settings().is_language_services_disabled() {
return Ok(None);
}
let Some(file) = snapshot.file(db) else {
tracing::debug!("Failed to resolve file for {:?}", params);
return Ok(None);
};
let source = source_text(db, file);
let line_index = line_index(db, file);
// Convert LSP range to text offsets
let requested_range = params
.range
.to_text_range(&source, &line_index, snapshot.encoding());
let lsp_tokens = generate_semantic_tokens(
db,
file,
Some(requested_range),
snapshot.encoding(),
snapshot
.resolved_client_capabilities()
.semantic_tokens_multiline_support,
);
Ok(Some(SemanticTokensRangeResult::Tokens(SemanticTokens {
result_id: None,
data: lsp_tokens,
})))
}
}
impl RetriableRequestHandler for SemanticTokensRangeRequestHandler {}

View file

@ -0,0 +1,97 @@
use lsp_types::SemanticToken;
use ruff_db::source::{line_index, source_text};
use ruff_text_size::{Ranged, TextRange};
use ty_ide::semantic_tokens;
use ty_project::ProjectDatabase;
use crate::document::{PositionEncoding, ToRangeExt};
/// Common logic for generating semantic tokens, either for full document or a specific range.
/// If no range is provided, the entire file is processed.
pub(crate) fn generate_semantic_tokens(
db: &ProjectDatabase,
file: ruff_db::files::File,
range: Option<TextRange>,
encoding: PositionEncoding,
multiline_token_support: bool,
) -> Vec<SemanticToken> {
let source = source_text(db, file);
let line_index = line_index(db, file);
let semantic_token_data = semantic_tokens(db, file, range);
// Convert semantic tokens to LSP format
let mut lsp_tokens = Vec::new();
let mut prev_line = 0u32;
let mut prev_start = 0u32;
for token in &*semantic_token_data {
let lsp_range = token.range().to_lsp_range(&source, &line_index, encoding);
let line = lsp_range.start.line;
let character = lsp_range.start.character;
// Calculate length in the negotiated encoding
let length = if !multiline_token_support && lsp_range.start.line != lsp_range.end.line {
// Token spans multiple lines but client doesn't support it
// Clamp to the end of the current line
if let Some(line_text) = source.lines().nth(lsp_range.start.line as usize) {
let line_length_in_encoding = match encoding {
PositionEncoding::UTF8 => line_text.len().try_into().unwrap_or(u32::MAX),
PositionEncoding::UTF16 => line_text
.encode_utf16()
.count()
.try_into()
.unwrap_or(u32::MAX),
PositionEncoding::UTF32 => {
line_text.chars().count().try_into().unwrap_or(u32::MAX)
}
};
line_length_in_encoding.saturating_sub(lsp_range.start.character)
} else {
0
}
} else {
// Either client supports multiline tokens or this is a single-line token
// Use the difference between start and end character positions
if lsp_range.start.line == lsp_range.end.line {
lsp_range.end.character - lsp_range.start.character
} else {
// Multiline token and client supports it - calculate full token length
let token_text = &source[token.range()];
match encoding {
PositionEncoding::UTF8 => token_text.len().try_into().unwrap_or(u32::MAX),
PositionEncoding::UTF16 => token_text
.encode_utf16()
.count()
.try_into()
.unwrap_or(u32::MAX),
PositionEncoding::UTF32 => {
token_text.chars().count().try_into().unwrap_or(u32::MAX)
}
}
}
};
let token_type = token.token_type as u32;
let token_modifiers = token.modifiers.bits();
// LSP semantic tokens are encoded as deltas
let delta_line = line - prev_line;
let delta_start = if delta_line == 0 {
character - prev_start
} else {
character
};
lsp_tokens.push(SemanticToken {
delta_line,
delta_start,
length,
token_type,
token_modifiers_bitset: token_modifiers,
});
prev_line = line;
prev_start = character;
}
lsp_tokens
}

View file

@ -19,6 +19,9 @@ pub(crate) struct ResolvedClientCapabilities {
/// `true`, if the first markup kind in `textDocument.hover.contentFormat` is `Markdown` /// `true`, if the first markup kind in `textDocument.hover.contentFormat` is `Markdown`
pub(crate) hover_prefer_markdown: bool, pub(crate) hover_prefer_markdown: bool,
/// Whether the client supports multiline semantic tokens
pub(crate) semantic_tokens_multiline_support: bool,
} }
impl ResolvedClientCapabilities { impl ResolvedClientCapabilities {
@ -85,6 +88,13 @@ impl ResolvedClientCapabilities {
}) })
.unwrap_or_default(); .unwrap_or_default();
let semantic_tokens_multiline_support = client_capabilities
.text_document
.as_ref()
.and_then(|doc| doc.semantic_tokens.as_ref())
.and_then(|semantic_tokens| semantic_tokens.multiline_token_support)
.unwrap_or(false);
Self { Self {
code_action_deferred_edit_resolution: code_action_data_support code_action_deferred_edit_resolution: code_action_data_support
&& code_action_edit_resolution, && code_action_edit_resolution,
@ -95,6 +105,7 @@ impl ResolvedClientCapabilities {
pull_diagnostics, pull_diagnostics,
type_definition_link_support: declaration_link_support, type_definition_link_support: declaration_link_support,
hover_prefer_markdown, hover_prefer_markdown,
semantic_tokens_multiline_support,
} }
} }
} }