mirror of
https://github.com/Myriad-Dreamin/tinymist.git
synced 2025-07-24 05:05:00 +00:00
fix: pass to_multiline_tokens2
checking by copilot (#639)
* test: generate and pass sema_tokens sanity checking by copilot * test: pass sema_tokens sanity checking * dev: update snapshot
This commit is contained in:
parent
d4492e0436
commit
b0f5398412
10 changed files with 166 additions and 52 deletions
|
@ -3,4 +3,4 @@ source: crates/tinymist-query/src/semantic_tokens_full.rs
|
|||
expression: "serde_json::to_string(&result).unwrap()"
|
||||
input_file: crates/tinymist-query/src/fixtures/semantic_tokens/content-block.typ
|
||||
---
|
||||
{"data":[0,0,1,8,0,0,1,1,8,0,0,1,1,8,0,0,1,1,20,0,1,0,0,20,0,0,0,1,8,0,0,1,1,8,0,0,1,1,20,0,0,1,1,8,0,0,1,1,20,0,1,0,0,20,0,0,0,1,8,0,0,1,1,8,0,0,1,1,8,0,0,1,1,8,0,0,1,1,20,0,0,1,1,8,0,0,1,1,8,0,0,1,1,8,0,0,1,1,8,0,0,1,1,8,0,0,1,3,20,0,0,3,1,8,0,0,1,1,8,0,0,1,1,8,0,0,1,1,20,0,1,0,0,20,0]}
|
||||
{"data":[0,0,1,8,0,0,1,1,8,0,0,1,1,8,0,0,1,1,20,0,1,0,1,8,0,0,1,1,8,0,0,1,1,20,0,0,1,1,8,0,0,1,1,20,0,1,0,1,8,0,0,1,1,8,0,0,1,1,8,0,0,1,1,8,0,0,1,1,20,0,0,1,1,8,0,0,1,1,8,0,0,1,1,8,0,0,1,1,8,0,0,1,1,8,0,0,1,3,20,0,0,3,1,8,0,0,1,1,8,0,0,1,1,8,0,0,1,1,20,0]}
|
||||
|
|
|
@ -3,4 +3,4 @@ source: crates/tinymist-query/src/semantic_tokens_full.rs
|
|||
expression: "serde_json::to_string(&result).unwrap()"
|
||||
input_file: crates/tinymist-query/src/fixtures/semantic_tokens/fn2.typ
|
||||
---
|
||||
{"data":[0,0,1,2,0,0,1,3,2,0,0,3,1,20,0,0,1,2,5,0,0,2,1,8,0,0,1,1,8,0,0,1,1,20,0,0,1,1,3,0,0,1,1,20,0,0,1,1,8,0,0,1,1,20,0,1,0,2,20,0,0,2,3,5,0,0,3,1,8,0,0,1,7,1,0,0,7,1,8,0,0,1,1,20,0,0,1,4,18,0,0,4,1,8,0,0,1,1,20,0,0,1,5,1,0,0,5,1,8,0,0,1,1,20,0,1,0,2,20,0,0,2,3,20,0,0,0,3,11,0,0,3,3,20,0,0,0,3,11,0,0,3,1,20,0,1,0,2,20,0,0,2,1,20,0,0,0,1,11,0,0,1,1,20,0,1,0,2,20,0,0,2,1,20,0,0,0,1,11,0,0,1,1,20,0,1,0,2,20,0,0,2,3,20,0,0,0,3,11,0,0,3,1,20,0,1,0,2,20,0,0,2,1,8,0,0,1,1,20,0,0,1,1,8,0,0,1,1,20,0,1,0,0,20,0,0,0,1,8,0]}
|
||||
{"data":[0,0,1,2,0,0,1,3,2,0,0,3,1,20,0,0,1,2,5,0,0,2,1,8,0,0,1,1,8,0,0,1,1,20,0,0,1,1,3,0,0,1,1,20,0,0,1,1,8,0,0,1,1,20,0,1,0,2,20,0,0,2,3,5,0,0,3,1,8,0,0,1,7,1,0,0,7,1,8,0,0,1,1,20,0,0,1,4,18,0,0,4,1,8,0,0,1,1,20,0,0,1,5,1,0,0,5,1,8,0,0,1,1,20,0,1,0,2,20,0,0,2,3,20,0,0,3,3,20,0,0,3,1,20,0,1,0,2,20,0,0,2,1,20,0,0,1,1,20,0,1,0,2,20,0,0,2,1,20,0,0,1,1,20,0,1,0,2,20,0,0,2,3,20,0,0,3,1,20,0,1,0,2,20,0,0,2,1,8,0,0,1,1,20,0,0,1,1,8,0,0,1,1,20,0,1,0,1,8,0]}
|
||||
|
|
|
@ -3,4 +3,4 @@ source: crates/tinymist-query/src/semantic_tokens_full.rs
|
|||
expression: "serde_json::to_string(&result).unwrap()"
|
||||
input_file: crates/tinymist-query/src/fixtures/semantic_tokens/heading.typ
|
||||
---
|
||||
{"data":[0,0,3,14,0,0,0,3,14,0,0,3,1,20,0,0,0,1,14,0,0,1,1,5,0,0,0,1,14,0,0,1,4,5,0,0,0,4,14,0,0,4,1,8,0,0,0,1,14,0,0,1,17,1,0,0,0,17,14,0,0,17,1,8,0,0,0,1,14,0]}
|
||||
{"data":[0,0,3,14,0,0,3,1,20,0,0,1,1,5,0,0,1,4,5,0,0,4,1,8,0,0,1,17,1,0,0,17,1,8,0]}
|
||||
|
|
|
@ -3,4 +3,4 @@ source: crates/tinymist-query/src/semantic_tokens_full.rs
|
|||
expression: "serde_json::to_string(&result).unwrap()"
|
||||
input_file: crates/tinymist-query/src/fixtures/semantic_tokens/tinymist_issue_601.typ
|
||||
---
|
||||
{"data":[0,0,1,19,4,0,1,3,5,4,0,3,1,8,4,0,1,1,20,4,0,1,1,3,4,0,1,1,20,4,0,1,1,20,4,1,0,0,20,4,0,0,1,20,4,0,1,1,20,4,1,0,1,20,4,1,0,0,20,4,0,0,0,20,0]}
|
||||
{"data":[0,0,1,19,4,0,1,3,5,4,0,3,1,8,4,0,1,1,20,4,0,1,1,3,4,0,1,1,20,4,0,1,1,20,4,1,0,1,20,4,0,1,1,20,4,1,0,1,20,4]}
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
---
|
||||
source: crates/tinymist-query/src/semantic_tokens_full.rs
|
||||
expression: "serde_json::to_string(&result).unwrap()"
|
||||
input_file: crates/tinymist-query/src/fixtures/semantic_tokens/tinymist_issue_638.typ
|
||||
---
|
||||
{"data":[0,0,9,20,0,0,9,1,20,0]}
|
|
@ -3,4 +3,4 @@ source: crates/tinymist-query/src/semantic_tokens_full.rs
|
|||
expression: "serde_json::to_string(&result).unwrap()"
|
||||
input_file: crates/tinymist-query/src/fixtures/semantic_tokens/typst_lsp_issue_264.typ
|
||||
---
|
||||
{"data":[0,0,3,0,0,1,0,4,0,0,1,0,3,0,0,1,0,2,0,0,0,2,1,20,0,1,0,1,20,0,1,0,0,20,0,0,0,3,20,0,0,0,3,11,0,0,3,4,20,0,0,0,4,11,0,0,4,1,20,0,1,0,0,20,0,0,0,17,20,0,0,0,17,11,0,0,17,1,20,0,1,0,0,20,0,0,0,3,20,0,0,0,3,11,0]}
|
||||
{"data":[0,0,3,0,0,1,0,4,0,0,1,0,3,0,0,1,0,2,0,0,0,2,1,20,0,1,0,1,20,0,1,0,3,20,0,0,3,4,20,0,0,4,1,20,0,1,0,17,20,0,0,17,1,20,0,1,0,3,20,0]}
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Bob’s car
|
|
@ -85,7 +85,7 @@ struct Tokenizer {
|
|||
|
||||
allow_multiline_token: bool,
|
||||
|
||||
token: Token,
|
||||
token: Option<Token>,
|
||||
}
|
||||
|
||||
impl Tokenizer {
|
||||
|
@ -98,7 +98,7 @@ impl Tokenizer {
|
|||
allow_multiline_token,
|
||||
encoding,
|
||||
|
||||
token: Token::default(),
|
||||
token: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -113,33 +113,35 @@ impl Tokenizer {
|
|||
.map(|token_type| Token::new(token_type, modifiers, range.clone()));
|
||||
|
||||
// Push start
|
||||
if !self.token.range.is_empty() && self.token.range.start < range.start {
|
||||
let end = self.token.range.end.min(range.start);
|
||||
self.push(Token {
|
||||
token_type: self.token.token_type,
|
||||
modifiers: self.token.modifiers,
|
||||
range: self.token.range.start..end,
|
||||
});
|
||||
self.token.range.start = end;
|
||||
if let Some(prev_token) = self.token.as_mut() {
|
||||
if !prev_token.range.is_empty() && prev_token.range.start < range.start {
|
||||
let end = prev_token.range.end.min(range.start);
|
||||
let sliced = Token {
|
||||
token_type: prev_token.token_type,
|
||||
modifiers: prev_token.modifiers,
|
||||
range: prev_token.range.start..end,
|
||||
};
|
||||
// Slice the previous token
|
||||
prev_token.range.start = end;
|
||||
self.push(sliced);
|
||||
}
|
||||
}
|
||||
|
||||
if !is_leaf {
|
||||
if let Some(token) = token.as_mut() {
|
||||
std::mem::swap(&mut self.token, token);
|
||||
}
|
||||
|
||||
std::mem::swap(&mut self.token, &mut token);
|
||||
for child in root.children() {
|
||||
self.tokenize_tree(&child, modifiers);
|
||||
}
|
||||
|
||||
if let Some(token) = token.as_mut() {
|
||||
std::mem::swap(&mut self.token, token);
|
||||
}
|
||||
std::mem::swap(&mut self.token, &mut token);
|
||||
}
|
||||
|
||||
// Push end
|
||||
if let Some(token) = token.clone() {
|
||||
if !token.range.is_empty() {
|
||||
// Slice the previous token
|
||||
if let Some(prev_token) = self.token.as_mut() {
|
||||
prev_token.range.start = token.range.end;
|
||||
}
|
||||
self.push(token);
|
||||
}
|
||||
}
|
||||
|
@ -160,16 +162,16 @@ impl Tokenizer {
|
|||
}
|
||||
|
||||
// This might be a bug of typst, that `end > len` is possible
|
||||
let utf8_end = (range.end).min(self.source.text().len());
|
||||
let source_len = self.source.text().len();
|
||||
let utf8_end = (range.end).min(source_len);
|
||||
self.pos_offset = utf8_start;
|
||||
if utf8_end < range.start || range.start > self.source.text().len() {
|
||||
if utf8_end <= utf8_start || utf8_start > source_len {
|
||||
return;
|
||||
}
|
||||
|
||||
let position = typst_to_lsp::offset_to_position(utf8_start, self.encoding, &self.source);
|
||||
|
||||
let delta = self.curr_pos.delta(&position);
|
||||
self.curr_pos = position;
|
||||
|
||||
let encode_length = |s, t| {
|
||||
match self.encoding {
|
||||
|
@ -191,6 +193,7 @@ impl Tokenizer {
|
|||
token_type: token_type as u32,
|
||||
token_modifiers_bitset: modifiers.bitset(),
|
||||
});
|
||||
self.curr_pos = position;
|
||||
} else {
|
||||
let final_line = self
|
||||
.source
|
||||
|
@ -199,38 +202,51 @@ impl Tokenizer {
|
|||
let next_offset = self
|
||||
.source
|
||||
.line_to_byte((self.curr_pos.line + 1) as usize)
|
||||
.unwrap_or(self.source.text().len());
|
||||
self.output.push(SemanticToken {
|
||||
delta_line: delta.delta_line,
|
||||
delta_start: delta.delta_start,
|
||||
length: encode_length(utf8_start, utf8_end.min(next_offset)) as u32,
|
||||
token_type: token_type as u32,
|
||||
token_modifiers_bitset: modifiers.bitset(),
|
||||
});
|
||||
let mut utf8_cursor = next_offset;
|
||||
if self.curr_pos.line < final_line {
|
||||
for line in self.curr_pos.line + 1..=final_line {
|
||||
let next_offset = if line == final_line {
|
||||
utf8_end
|
||||
} else {
|
||||
self.source
|
||||
.line_to_byte((line + 1) as usize)
|
||||
.unwrap_or(self.source.text().len())
|
||||
};
|
||||
.unwrap_or(source_len);
|
||||
let inline_length = encode_length(utf8_start, utf8_end.min(next_offset)) as u32;
|
||||
if inline_length != 0 {
|
||||
self.output.push(SemanticToken {
|
||||
delta_line: delta.delta_line,
|
||||
delta_start: delta.delta_start,
|
||||
length: inline_length,
|
||||
token_type: token_type as u32,
|
||||
token_modifiers_bitset: modifiers.bitset(),
|
||||
});
|
||||
self.curr_pos = position;
|
||||
}
|
||||
if self.curr_pos.line >= final_line {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut utf8_cursor = next_offset;
|
||||
let mut delta_line = 0;
|
||||
for line in self.curr_pos.line + 1..=final_line {
|
||||
let next_offset = if line == final_line {
|
||||
utf8_end
|
||||
} else {
|
||||
self.source
|
||||
.line_to_byte((line + 1) as usize)
|
||||
.unwrap_or(source_len)
|
||||
};
|
||||
|
||||
if utf8_cursor < next_offset {
|
||||
let inline_length = encode_length(utf8_cursor, next_offset) as u32;
|
||||
self.output.push(SemanticToken {
|
||||
delta_line: 1,
|
||||
delta_line: delta_line + 1,
|
||||
delta_start: 0,
|
||||
length: encode_length(utf8_cursor, next_offset) as u32,
|
||||
length: inline_length,
|
||||
token_type: token_type as u32,
|
||||
token_modifiers_bitset: modifiers.bitset(),
|
||||
});
|
||||
self.pos_offset = utf8_cursor;
|
||||
utf8_cursor = next_offset;
|
||||
delta_line = 0;
|
||||
self.curr_pos.character = 0;
|
||||
} else {
|
||||
delta_line += 1;
|
||||
}
|
||||
self.curr_pos.line = final_line;
|
||||
self.curr_pos.character = 0;
|
||||
self.pos_offset = utf8_cursor;
|
||||
utf8_cursor = next_offset;
|
||||
}
|
||||
self.curr_pos.line = final_line - delta_line;
|
||||
}
|
||||
|
||||
pub trait PositionExt {
|
||||
|
|
|
@ -46,6 +46,88 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::tests::*;
|
||||
|
||||
/// This is converted by Copilot from TypeScript `to_multiline_tokens2`.
|
||||
/// <https://github.com/microsoft/vscode/blob/2acc0e52cbc7434c415f221d5c34ee1bbdd6cd71/src/vs/editor/common/services/semanticTokensProviderStyling.ts#L147>
|
||||
fn check_tokens(tokens: &SemanticTokens) {
|
||||
const DESIRED_TOKENS_PER_AREA: usize = 400;
|
||||
const DESIRED_MAX_AREAS: usize = 1024;
|
||||
|
||||
let src_data = &tokens.data;
|
||||
let token_count = src_data.len();
|
||||
let tokens_per_area = std::cmp::max(
|
||||
(token_count as f64 / DESIRED_MAX_AREAS as f64).ceil() as usize,
|
||||
DESIRED_TOKENS_PER_AREA,
|
||||
);
|
||||
|
||||
let mut token_index = 0;
|
||||
let mut last_line_number = 1;
|
||||
let mut last_start_character = 0;
|
||||
|
||||
while token_index < token_count {
|
||||
let token_start_index = token_index;
|
||||
let mut token_end_index =
|
||||
std::cmp::min(token_start_index + tokens_per_area, token_count);
|
||||
|
||||
// Keep tokens on the same line in the same area...
|
||||
if token_end_index < token_count {
|
||||
let mut small_token_end_index = token_end_index;
|
||||
while small_token_end_index - 1 > token_start_index
|
||||
&& src_data[small_token_end_index].delta_line == 0
|
||||
{
|
||||
small_token_end_index -= 1;
|
||||
}
|
||||
|
||||
if small_token_end_index - 1 == token_start_index {
|
||||
// there are so many tokens on this line that our area would be empty, we must
|
||||
// now go right
|
||||
let mut big_token_end_index = token_end_index;
|
||||
while big_token_end_index + 1 < token_count
|
||||
&& src_data[big_token_end_index].delta_line == 0
|
||||
{
|
||||
big_token_end_index += 1;
|
||||
}
|
||||
token_end_index = big_token_end_index;
|
||||
} else {
|
||||
token_end_index = small_token_end_index;
|
||||
}
|
||||
}
|
||||
|
||||
let mut prev_line_number = 0;
|
||||
let mut prev_end_character = 0;
|
||||
|
||||
while token_index < token_end_index {
|
||||
let delta_line = src_data[token_index].delta_line;
|
||||
let delta_character = src_data[token_index].delta_start;
|
||||
let length = src_data[token_index].length;
|
||||
let line_number = last_line_number + delta_line;
|
||||
let start_character = if delta_line == 0 {
|
||||
last_start_character + delta_character
|
||||
} else {
|
||||
delta_character
|
||||
};
|
||||
// delta_character
|
||||
let end_character = start_character + length;
|
||||
|
||||
if end_character <= start_character {
|
||||
// this token is invalid (most likely a negative length casted to uint32)
|
||||
panic!(
|
||||
"Invalid length for semantic token at line {line_number}, character {start_character}, end: {end_character}"
|
||||
);
|
||||
} else if prev_line_number == line_number && prev_end_character > start_character {
|
||||
// this token overlaps with the previous token
|
||||
panic!("Overlapping semantic tokens at line {line_number}, character {start_character}, previous line {prev_line_number}, previous end {prev_end_character}");
|
||||
} else {
|
||||
prev_line_number = line_number;
|
||||
prev_end_character = end_character;
|
||||
}
|
||||
|
||||
last_line_number = line_number;
|
||||
last_start_character = start_character;
|
||||
token_index += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test() {
|
||||
snapshot_testing("semantic_tokens", &|ctx, path| {
|
||||
|
@ -60,6 +142,15 @@ mod tests {
|
|||
tokens.result_id.take();
|
||||
}
|
||||
|
||||
match &result {
|
||||
SemanticTokensResult::Tokens(tokens) => {
|
||||
check_tokens(tokens);
|
||||
}
|
||||
SemanticTokensResult::Partial(_) => {
|
||||
panic!("Unexpected partial result");
|
||||
}
|
||||
}
|
||||
|
||||
assert_snapshot!(serde_json::to_string(&result).unwrap());
|
||||
});
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue