[ty] Fix completions at end of file (#20993)

This commit is contained in:
Micha Reiser 2025-10-21 11:24:31 +02:00 committed by GitHub
parent 523fc418ed
commit 69ce064569
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 69 additions and 72 deletions

View file

@ -489,6 +489,23 @@ impl Tokens {
TokenIterWithContext::new(&self.raw) TokenIterWithContext::new(&self.raw)
} }
/// Performs a binary search to find the index of the **first** token that starts at the given `offset`.
///
/// Unlike `binary_search_by_key`, this method ensures that if multiple tokens start at the same offset,
/// it returns the index of the first one. Multiple tokens can start at the same offset in cases where
/// zero-length tokens are involved (like `Dedent` or `Newline` at the end of the file).
pub fn binary_search_by_start(&self, offset: TextSize) -> Result<usize, usize> {
let partition_point = self.partition_point(|token| token.start() < offset);
let after = &self[partition_point..];
if after.first().is_some_and(|first| first.start() == offset) {
Ok(partition_point)
} else {
Err(partition_point)
}
}
/// Returns a slice of [`Token`] that are within the given `range`. /// Returns a slice of [`Token`] that are within the given `range`.
/// ///
/// The start and end offset of the given range should be either: /// The start and end offset of the given range should be either:
@ -532,30 +549,7 @@ impl Tokens {
pub fn in_range(&self, range: TextRange) -> &[Token] { pub fn in_range(&self, range: TextRange) -> &[Token] {
let tokens_after_start = self.after(range.start()); let tokens_after_start = self.after(range.start());
match tokens_after_start.binary_search_by_key(&range.end(), Ranged::end) { Self::before_impl(tokens_after_start, range.end())
Ok(idx) => {
// If we found the token with the end offset, that token should be included in the
// return slice.
&tokens_after_start[..=idx]
}
Err(idx) => {
if let Some(token) = tokens_after_start.get(idx) {
// If it's equal to the start offset, then it's at a token boundary which is
// valid. If it's less than the start offset, then it's in the gap between the
// tokens which is valid as well.
assert!(
range.end() <= token.start(),
"End offset {:?} is inside a token range {:?}",
range.end(),
token.range()
);
}
// This index is where the token with the offset _could_ be, so that token should
// be excluded from the return slice.
&tokens_after_start[..idx]
}
}
} }
/// Searches the token(s) at `offset`. /// Searches the token(s) at `offset`.
@ -597,7 +591,7 @@ impl Tokens {
/// assert_eq!(collect_tokens(TextSize::new(57)), vec! []); /// assert_eq!(collect_tokens(TextSize::new(57)), vec! []);
/// ``` /// ```
pub fn at_offset(&self, offset: TextSize) -> TokenAt { pub fn at_offset(&self, offset: TextSize) -> TokenAt {
match self.binary_search_by_key(&offset, ruff_text_size::Ranged::start) { match self.binary_search_by_start(offset) {
// The token at `index` starts exactly at `offset. // The token at `index` starts exactly at `offset.
// ```python // ```python
// object.attribute // object.attribute
@ -649,28 +643,25 @@ impl Tokens {
/// If the given offset is inside a token range at any point /// If the given offset is inside a token range at any point
/// other than the start of the range. /// other than the start of the range.
pub fn before(&self, offset: TextSize) -> &[Token] { pub fn before(&self, offset: TextSize) -> &[Token] {
match self.binary_search_by(|token| token.start().cmp(&offset)) { Self::before_impl(&self.raw, offset)
Ok(idx) => &self[..idx], }
Err(idx) => {
// We can't use `saturating_sub` here because a file could contain a BOM header, in
// which case the token starts at offset 3 for UTF-8 encoded file content.
if idx > 0 {
if let Some(prev) = self.get(idx - 1) {
// If it's equal to the end offset, then it's at a token boundary which is
// valid. If it's greater than the end offset, then it's in the gap between
// the tokens which is valid as well.
assert!(
offset >= prev.end(),
"Offset {:?} is inside a token range {:?}",
offset,
prev.range()
);
}
}
&self[..idx] fn before_impl(tokens: &[Token], offset: TextSize) -> &[Token] {
} let partition_point = tokens.partition_point(|token| token.start() < offset);
let before = &tokens[..partition_point];
if let Some(last) = before.last() {
// If it's equal to the end offset, then it's at a token boundary which is
// valid. If it's greater than the end offset, then it's in the gap between
// the tokens which is valid as well.
assert!(
offset >= last.end(),
"Offset {:?} is inside a token range {:?}",
offset,
last.range()
);
} }
before
} }
/// Returns a slice of tokens after the given [`TextSize`] offset. /// Returns a slice of tokens after the given [`TextSize`] offset.
@ -684,28 +675,21 @@ impl Tokens {
/// If the given offset is inside a token range at any point /// If the given offset is inside a token range at any point
/// other than the start of the range. /// other than the start of the range.
pub fn after(&self, offset: TextSize) -> &[Token] { pub fn after(&self, offset: TextSize) -> &[Token] {
match self.binary_search_by(|token| token.start().cmp(&offset)) { let partition_point = self.partition_point(|token| token.end() <= offset);
Ok(idx) => &self[idx..], let after = &self[partition_point..];
Err(idx) => {
// We can't use `saturating_sub` here because a file could contain a BOM header, in
// which case the token starts at offset 3 for UTF-8 encoded file content.
if idx > 0 {
if let Some(prev) = self.get(idx - 1) {
// If it's equal to the end offset, then it's at a token boundary which is
// valid. If it's greater than the end offset, then it's in the gap between
// the tokens which is valid as well.
assert!(
offset >= prev.end(),
"Offset {:?} is inside a token range {:?}",
offset,
prev.range()
);
}
}
&self[idx..] if let Some(first) = after.first() {
} // valid. If it's greater than the end offset, then it's in the gap between
// the tokens which is valid as well.
assert!(
offset <= first.start(),
"Offset {:?} is inside a token range {:?}",
offset,
first.range()
);
} }
after
} }
} }
@ -1099,7 +1083,7 @@ mod tests {
} }
#[test] #[test]
#[should_panic(expected = "End offset 6 is inside a token range 4..7")] #[should_panic(expected = "Offset 6 is inside a token range 4..7")]
fn tokens_in_range_end_offset_inside_token() { fn tokens_in_range_end_offset_inside_token() {
let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter());
tokens.in_range(TextRange::new(0.into(), 6.into())); tokens.in_range(TextRange::new(0.into(), 6.into()));

View file

@ -608,7 +608,7 @@ struct ScopedTarget<'t> {
node: ast::AnyNodeRef<'t>, node: ast::AnyNodeRef<'t>,
} }
/// Returns a slice of tokens that all start before or at the given /// Returns a slice of tokens that all start before the given
/// [`TextSize`] offset. /// [`TextSize`] offset.
/// ///
/// If the given offset is between two tokens, the returned slice will end just /// If the given offset is between two tokens, the returned slice will end just
@ -620,11 +620,9 @@ struct ScopedTarget<'t> {
/// range (including if it's at the very beginning), then that token will be /// range (including if it's at the very beginning), then that token will be
/// included in the slice returned. /// included in the slice returned.
fn tokens_start_before(tokens: &Tokens, offset: TextSize) -> &[Token] { fn tokens_start_before(tokens: &Tokens, offset: TextSize) -> &[Token] {
let idx = match tokens.binary_search_by(|token| token.start().cmp(&offset)) { let partition_point = tokens.partition_point(|token| token.start() < offset);
Ok(idx) => idx,
Err(idx) => idx, &tokens[..partition_point]
};
&tokens[..idx]
} }
/// Returns a suffix of `tokens` corresponding to the `kinds` given. /// Returns a suffix of `tokens` corresponding to the `kinds` given.
@ -1453,6 +1451,21 @@ def frob(): ...
"); ");
} }
/// Regression test for <https://github.com/astral-sh/ty/issues/1392>
///
/// This test ensures completions work when the cursor is at the
/// start of a zero-length token.
#[test]
fn completion_at_eof() {
let test = cursor_test("def f(msg: str):\n msg.<CURSOR>");
test.assert_completions_include("upper");
test.assert_completions_include("capitalize");
let test = cursor_test("def f(msg: str):\n msg.u<CURSOR>");
test.assert_completions_include("upper");
test.assert_completions_do_not_include("capitalize");
}
#[test] #[test]
fn list_comprehension1() { fn list_comprehension1() {
let test = cursor_test( let test = cursor_test(