[ty] First cut at semantic token provider (#19108)

This PR implements a basic semantic token provider for ty's language server. This allows for more accurate semantic highlighting / coloring within editors that support this LSP functionality. Here are screen shots that show how code appears in VS Code using the "rainbow" theme both before and after this change. ![461737617-15630625-d4a9-4ec5-9886-77b00eb7a41a](https://github.com/user-attachments/assets/f963b55b-3195-41d1-ba38-ac2e7508d5f5) ![461737624-d6dcf5f0-7b9b-47de-a410-e202c63e2058](https://github.com/user-attachments/assets/111ca2c5-bb4f-4c8a-a0b5-6c1b2b6f246b) The token types and modifier tags in this implementation largely mirror those used in Microsoft's default language server for Python. The implementation supports two LSP interfaces. The first provides semantic tokens for an entire document, and the second returns semantic tokens for a requested range within a document. The PR includes unit tests. It also includes comments that document known limitations and areas for future improvements. --------- Co-authored-by: UnboundVariable <unbound@gmail.com>
2025-10-01 14:21:24 +00:00 · 2025-07-07 15:34:47 -07:00 · 2025-07-07 15:34:47 -07:00 · 278f93022a
commit 278f93022a
parent 4dd2c03144
13 changed files with 2221 additions and 3 deletions
--- a/crates/ty_server/src/server.rs
+++ b/crates/ty_server/src/server.rs
@ -6,9 +6,10 @@ use crate::session::{AllOptions, ClientOptions, Session};
 use lsp_server::Connection;
 use lsp_types::{
    ClientCapabilities, DiagnosticOptions, DiagnosticServerCapabilities, HoverProviderCapability,
-    InlayHintOptions, InlayHintServerCapabilities, MessageType, ServerCapabilities,
+    InlayHintOptions, InlayHintServerCapabilities, MessageType, SemanticTokensLegend,
+    SemanticTokensOptions, SemanticTokensServerCapabilities, ServerCapabilities,
    TextDocumentSyncCapability, TextDocumentSyncKind, TextDocumentSyncOptions,
-    TypeDefinitionProviderCapability, Url,
+    TypeDefinitionProviderCapability, Url, WorkDoneProgressOptions,
 };
 use std::num::NonZeroUsize;
 use std::panic::PanicHookInfo;
@ -188,6 +189,23 @@ impl Server {
            inlay_hint_provider: Some(lsp_types::OneOf::Right(
                InlayHintServerCapabilities::Options(InlayHintOptions::default()),
            )),
+            semantic_tokens_provider: Some(
+                SemanticTokensServerCapabilities::SemanticTokensOptions(SemanticTokensOptions {
+                    work_done_progress_options: WorkDoneProgressOptions::default(),
+                    legend: SemanticTokensLegend {
+                        token_types: ty_ide::SemanticTokenType::all()
+                            .iter()
+                            .map(|token_type| token_type.as_lsp_concept().into())
+                            .collect(),
+                        token_modifiers: ty_ide::SemanticTokenModifier::all_names()
+                            .iter()
+                            .map(|&s| s.into())
+                            .collect(),
+                    },
+                    range: Some(true),
+                    full: Some(lsp_types::SemanticTokensFullOptions::Bool(true)),
+                }),
+            ),
            completion_provider: Some(lsp_types::CompletionOptions {
                trigger_characters: Some(vec!['.'.to_string()]),
                ..Default::default()
--- a/crates/ty_server/src/server/api.rs
+++ b/crates/ty_server/src/server/api.rs
@ -11,6 +11,7 @@ use std::panic::{AssertUnwindSafe, UnwindSafe};
 mod diagnostics;
 mod notifications;
 mod requests;
+mod semantic_tokens;
 mod traits;

 use self::traits::{NotificationHandler, RequestHandler};
@ -49,6 +50,14 @@ pub(super) fn request(req: server::Request) -> Task {
        requests::InlayHintRequestHandler::METHOD => background_document_request_task::<
            requests::InlayHintRequestHandler,
        >(req, BackgroundSchedule::Worker),
+        requests::SemanticTokensRequestHandler::METHOD => background_document_request_task::<
+            requests::SemanticTokensRequestHandler,
+        >(req, BackgroundSchedule::Worker),
+        requests::SemanticTokensRangeRequestHandler::METHOD => background_document_request_task::<
+            requests::SemanticTokensRangeRequestHandler,
+        >(
+            req, BackgroundSchedule::Worker
+        ),
        requests::CompletionRequestHandler::METHOD => background_document_request_task::<
            requests::CompletionRequestHandler,
        >(
--- a/crates/ty_server/src/server/api/requests.rs
+++ b/crates/ty_server/src/server/api/requests.rs
@ -3,6 +3,8 @@ mod diagnostic;
 mod goto_type_definition;
 mod hover;
 mod inlay_hints;
+mod semantic_tokens;
+mod semantic_tokens_range;
 mod shutdown;
 mod workspace_diagnostic;

@ -11,5 +13,7 @@ pub(super) use diagnostic::DocumentDiagnosticRequestHandler;
 pub(super) use goto_type_definition::GotoTypeDefinitionRequestHandler;
 pub(super) use hover::HoverRequestHandler;
 pub(super) use inlay_hints::InlayHintRequestHandler;
+pub(super) use semantic_tokens::SemanticTokensRequestHandler;
+pub(super) use semantic_tokens_range::SemanticTokensRangeRequestHandler;
 pub(super) use shutdown::ShutdownHandler;
 pub(super) use workspace_diagnostic::WorkspaceDiagnosticRequestHandler;
--- a/crates/ty_server/src/server/api/requests/semantic_tokens.rs
+++ b/crates/ty_server/src/server/api/requests/semantic_tokens.rs
@ -0,0 +1,55 @@
+use std::borrow::Cow;
+
+use crate::DocumentSnapshot;
+use crate::server::api::semantic_tokens::generate_semantic_tokens;
+use crate::server::api::traits::{
+    BackgroundDocumentRequestHandler, RequestHandler, RetriableRequestHandler,
+};
+use crate::session::client::Client;
+use lsp_types::{SemanticTokens, SemanticTokensParams, SemanticTokensResult, Url};
+use ty_project::ProjectDatabase;
+
+pub(crate) struct SemanticTokensRequestHandler;
+
+impl RequestHandler for SemanticTokensRequestHandler {
+    type RequestType = lsp_types::request::SemanticTokensFullRequest;
+}
+
+impl BackgroundDocumentRequestHandler for SemanticTokensRequestHandler {
+    fn document_url(params: &SemanticTokensParams) -> Cow<Url> {
+        Cow::Borrowed(&params.text_document.uri)
+    }
+
+    fn run_with_snapshot(
+        db: &ProjectDatabase,
+        snapshot: DocumentSnapshot,
+        _client: &Client,
+        params: SemanticTokensParams,
+    ) -> crate::server::Result<Option<SemanticTokensResult>> {
+        if snapshot.client_settings().is_language_services_disabled() {
+            return Ok(None);
+        }
+
+        let Some(file) = snapshot.file(db) else {
+            tracing::debug!("Failed to resolve file for {:?}", params);
+            return Ok(None);
+        };
+
+        let lsp_tokens = generate_semantic_tokens(
+            db,
+            file,
+            None,
+            snapshot.encoding(),
+            snapshot
+                .resolved_client_capabilities()
+                .semantic_tokens_multiline_support,
+        );
+
+        Ok(Some(SemanticTokensResult::Tokens(SemanticTokens {
+            result_id: None,
+            data: lsp_tokens,
+        })))
+    }
+}
+
+impl RetriableRequestHandler for SemanticTokensRequestHandler {}
--- a/crates/ty_server/src/server/api/requests/semantic_tokens_range.rs
+++ b/crates/ty_server/src/server/api/requests/semantic_tokens_range.rs
@ -0,0 +1,65 @@
+use std::borrow::Cow;
+
+use crate::DocumentSnapshot;
+use crate::document::RangeExt;
+use crate::server::api::semantic_tokens::generate_semantic_tokens;
+use crate::server::api::traits::{
+    BackgroundDocumentRequestHandler, RequestHandler, RetriableRequestHandler,
+};
+use crate::session::client::Client;
+use lsp_types::{SemanticTokens, SemanticTokensRangeParams, SemanticTokensRangeResult, Url};
+use ruff_db::source::{line_index, source_text};
+use ty_project::ProjectDatabase;
+
+pub(crate) struct SemanticTokensRangeRequestHandler;
+
+impl RequestHandler for SemanticTokensRangeRequestHandler {
+    type RequestType = lsp_types::request::SemanticTokensRangeRequest;
+}
+
+impl BackgroundDocumentRequestHandler for SemanticTokensRangeRequestHandler {
+    fn document_url(params: &SemanticTokensRangeParams) -> Cow<Url> {
+        Cow::Borrowed(&params.text_document.uri)
+    }
+
+    fn run_with_snapshot(
+        db: &ProjectDatabase,
+        snapshot: DocumentSnapshot,
+        _client: &Client,
+        params: SemanticTokensRangeParams,
+    ) -> crate::server::Result<Option<SemanticTokensRangeResult>> {
+        if snapshot.client_settings().is_language_services_disabled() {
+            return Ok(None);
+        }
+
+        let Some(file) = snapshot.file(db) else {
+            tracing::debug!("Failed to resolve file for {:?}", params);
+            return Ok(None);
+        };
+
+        let source = source_text(db, file);
+        let line_index = line_index(db, file);
+
+        // Convert LSP range to text offsets
+        let requested_range = params
+            .range
+            .to_text_range(&source, &line_index, snapshot.encoding());
+
+        let lsp_tokens = generate_semantic_tokens(
+            db,
+            file,
+            Some(requested_range),
+            snapshot.encoding(),
+            snapshot
+                .resolved_client_capabilities()
+                .semantic_tokens_multiline_support,
+        );
+
+        Ok(Some(SemanticTokensRangeResult::Tokens(SemanticTokens {
+            result_id: None,
+            data: lsp_tokens,
+        })))
+    }
+}
+
+impl RetriableRequestHandler for SemanticTokensRangeRequestHandler {}
--- a/crates/ty_server/src/server/api/semantic_tokens.rs
+++ b/crates/ty_server/src/server/api/semantic_tokens.rs
@ -0,0 +1,97 @@
+use lsp_types::SemanticToken;
+use ruff_db::source::{line_index, source_text};
+use ruff_text_size::{Ranged, TextRange};
+use ty_ide::semantic_tokens;
+use ty_project::ProjectDatabase;
+
+use crate::document::{PositionEncoding, ToRangeExt};
+
+/// Common logic for generating semantic tokens, either for full document or a specific range.
+/// If no range is provided, the entire file is processed.
+pub(crate) fn generate_semantic_tokens(
+    db: &ProjectDatabase,
+    file: ruff_db::files::File,
+    range: Option<TextRange>,
+    encoding: PositionEncoding,
+    multiline_token_support: bool,
+) -> Vec<SemanticToken> {
+    let source = source_text(db, file);
+    let line_index = line_index(db, file);
+    let semantic_token_data = semantic_tokens(db, file, range);
+
+    // Convert semantic tokens to LSP format
+    let mut lsp_tokens = Vec::new();
+    let mut prev_line = 0u32;
+    let mut prev_start = 0u32;
+
+    for token in &*semantic_token_data {
+        let lsp_range = token.range().to_lsp_range(&source, &line_index, encoding);
+        let line = lsp_range.start.line;
+        let character = lsp_range.start.character;
+
+        // Calculate length in the negotiated encoding
+        let length = if !multiline_token_support && lsp_range.start.line != lsp_range.end.line {
+            // Token spans multiple lines but client doesn't support it
+            // Clamp to the end of the current line
+            if let Some(line_text) = source.lines().nth(lsp_range.start.line as usize) {
+                let line_length_in_encoding = match encoding {
+                    PositionEncoding::UTF8 => line_text.len().try_into().unwrap_or(u32::MAX),
+                    PositionEncoding::UTF16 => line_text
+                        .encode_utf16()
+                        .count()
+                        .try_into()
+                        .unwrap_or(u32::MAX),
+                    PositionEncoding::UTF32 => {
+                        line_text.chars().count().try_into().unwrap_or(u32::MAX)
+                    }
+                };
+                line_length_in_encoding.saturating_sub(lsp_range.start.character)
+            } else {
+                0
+            }
+        } else {
+            // Either client supports multiline tokens or this is a single-line token
+            // Use the difference between start and end character positions
+            if lsp_range.start.line == lsp_range.end.line {
+                lsp_range.end.character - lsp_range.start.character
+            } else {
+                // Multiline token and client supports it - calculate full token length
+                let token_text = &source[token.range()];
+                match encoding {
+                    PositionEncoding::UTF8 => token_text.len().try_into().unwrap_or(u32::MAX),
+                    PositionEncoding::UTF16 => token_text
+                        .encode_utf16()
+                        .count()
+                        .try_into()
+                        .unwrap_or(u32::MAX),
+                    PositionEncoding::UTF32 => {
+                        token_text.chars().count().try_into().unwrap_or(u32::MAX)
+                    }
+                }
+            }
+        };
+        let token_type = token.token_type as u32;
+        let token_modifiers = token.modifiers.bits();
+
+        // LSP semantic tokens are encoded as deltas
+        let delta_line = line - prev_line;
+        let delta_start = if delta_line == 0 {
+            character - prev_start
+        } else {
+            character
+        };
+
+        lsp_tokens.push(SemanticToken {
+            delta_line,
+            delta_start,
+            length,
+            token_type,
+            token_modifiers_bitset: token_modifiers,
+        });
+
+        prev_line = line;
+        prev_start = character;
+    }
+
+    lsp_tokens
+}
--- a/crates/ty_server/src/session/capabilities.rs
+++ b/crates/ty_server/src/session/capabilities.rs
@ -19,6 +19,9 @@ pub(crate) struct ResolvedClientCapabilities {

    /// `true`, if the first markup kind in `textDocument.hover.contentFormat` is `Markdown`
    pub(crate) hover_prefer_markdown: bool,
+
+    /// Whether the client supports multiline semantic tokens
+    pub(crate) semantic_tokens_multiline_support: bool,
 }

 impl ResolvedClientCapabilities {
@ -85,6 +88,13 @@ impl ResolvedClientCapabilities {
            })
            .unwrap_or_default();

+        let semantic_tokens_multiline_support = client_capabilities
+            .text_document
+            .as_ref()
+            .and_then(|doc| doc.semantic_tokens.as_ref())
+            .and_then(|semantic_tokens| semantic_tokens.multiline_token_support)
+            .unwrap_or(false);
+
        Self {
            code_action_deferred_edit_resolution: code_action_data_support
                && code_action_edit_resolution,
@ -95,6 +105,7 @@ impl ResolvedClientCapabilities {
            pull_diagnostics,
            type_definition_link_support: declaration_link_support,
            hover_prefer_markdown,
+            semantic_tokens_multiline_support,
        }
    }
 }