ty_ide: improve completions by using scopes

Previously, completions were based on just returning every identifier
parsed in the current Python file. In this commit, we change it to
identify an expression under the cursor and then return all symbols
available to the scope containing that expression.

This is still returning too much, and also, in some cases, not enough.
Namely, it doesn't really take the specific context into account other
than scope. But this does improve on the status quo. For example:

    def foo(): ...
    def bar():
        def fast(): ...
    def foofoo(): ...

    f<CURSOR>

When asking for completions here, the LSP will no longer include `fast`
as a possible completion in this context.

Ref https://github.com/astral-sh/ty/issues/86
This commit is contained in:
Andrew Gallant 2025-05-22 08:56:41 -04:00 committed by Andrew Gallant
parent a827b16ebd
commit 33ed502edb
3 changed files with 893 additions and 22 deletions

View file

@ -111,6 +111,13 @@ impl From<Name> for compact_str::CompactString {
} }
} }
impl From<Name> for String {
#[inline]
fn from(name: Name) -> Self {
name.as_str().into()
}
}
impl FromIterator<char> for Name { impl FromIterator<char> for Name {
fn from_iter<I: IntoIterator<Item = char>>(iter: I) -> Self { fn from_iter<I: IntoIterator<Item = char>>(iter: I) -> Self {
Self(iter.into_iter().collect()) Self(iter.into_iter().collect())

View file

@ -1,40 +1,877 @@
use ruff_db::files::File; use ruff_db::files::File;
use ruff_db::parsed::parsed_module; use ruff_db::parsed::{ParsedModule, parsed_module};
use ruff_python_ast::visitor::source_order::SourceOrderVisitor; use ruff_python_parser::TokenAt;
use ruff_python_ast::{AnyNodeRef, Identifier}; use ruff_text_size::{Ranged, TextRange, TextSize};
use ruff_text_size::TextSize;
use crate::Db; use crate::Db;
use crate::find_node::{CoveringNode, covering_node};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Completion { pub struct Completion {
pub label: String, pub label: String,
} }
pub fn completion(db: &dyn Db, file: File, _offset: TextSize) -> Vec<Completion> { pub fn completion(db: &dyn Db, file: File, offset: TextSize) -> Vec<Completion> {
let parsed = parsed_module(db.upcast(), file); let parsed = parsed_module(db.upcast(), file);
identifiers(parsed.syntax().into())
let Some(target) = find_target(parsed, offset) else {
return vec![];
};
let model = ty_python_semantic::SemanticModel::new(db.upcast(), file);
let mut completions = model.completions(target.node());
completions.sort();
completions.dedup();
completions
.into_iter() .into_iter()
.map(|label| Completion { label }) .map(|name| Completion { label: name.into() })
.collect() .collect()
} }
fn identifiers(node: AnyNodeRef) -> Vec<String> { fn find_target(parsed: &ParsedModule, offset: TextSize) -> Option<CoveringNode> {
struct Visitor { let offset = match parsed.tokens().at_offset(offset) {
identifiers: Vec<String>, TokenAt::None => {
return Some(covering_node(
parsed.syntax().into(),
TextRange::empty(offset),
));
} }
TokenAt::Single(tok) => tok.end(),
impl<'a> SourceOrderVisitor<'a> for Visitor { TokenAt::Between(_, tok) => tok.start(),
fn visit_identifier(&mut self, id: &'a Identifier) {
self.identifiers.push(id.id.as_str().to_string());
}
}
let mut visitor = Visitor {
identifiers: vec![],
}; };
node.visit_source_order(&mut visitor); let before = parsed.tokens().before(offset);
visitor.identifiers.sort(); let last = before.last()?;
visitor.identifiers.dedup(); let covering_node = covering_node(parsed.syntax().into(), last.range());
visitor.identifiers Some(covering_node)
}
#[cfg(test)]
mod tests {
use insta::assert_snapshot;
use crate::completion;
use crate::tests::{CursorTest, cursor_test};
// At time of writing (2025-05-22), the tests below show some of the
// naivete of our completions. That is, we don't even take what has been
// typed into account. We just kind return all possible completions
// regardless of what has been typed and rely on the client to do filtering
// based on prefixes and what not.
//
// In the future, we might consider using "text edits,"[1] which will let
// us have more control over which completions are shown to the end user.
// But that will require us to at least do some kind of filtering based on
// what has been typed.
//
// [1]: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#textDocument_completion
#[test]
fn empty() {
let test = cursor_test(
"\
<CURSOR>
",
);
assert_snapshot!(test.completions(), @"<No completions found>");
}
#[test]
fn imports1() {
let test = cursor_test(
"\
import re
<CURSOR>
",
);
assert_snapshot!(test.completions(), @"re");
}
#[test]
fn imports2() {
let test = cursor_test(
"\
from os import path
<CURSOR>
",
);
assert_snapshot!(test.completions(), @"path");
}
// N.B. We don't currently explore module APIs. This
// is still just emitting symbols from the detected scope.
#[test]
fn module_api() {
let test = cursor_test(
"\
import re
re.<CURSOR>
",
);
assert_snapshot!(test.completions(), @"re");
}
#[test]
fn one_function_prefix() {
let test = cursor_test(
"\
def foo(): ...
f<CURSOR>
",
);
assert_snapshot!(test.completions(), @r"
f
foo
");
}
#[test]
fn one_function_not_prefix() {
let test = cursor_test(
"\
def foo(): ...
g<CURSOR>
",
);
assert_snapshot!(test.completions(), @r"
foo
g
");
}
#[test]
fn one_function_blank() {
let test = cursor_test(
"\
def foo(): ...
<CURSOR>
",
);
assert_snapshot!(test.completions(), @r"
foo
");
}
#[test]
fn nested_function_prefix() {
let test = cursor_test(
"\
def foo():
def foofoo(): ...
f<CURSOR>
",
);
assert_snapshot!(test.completions(), @r"
f
foo
");
}
#[test]
fn nested_function_blank() {
let test = cursor_test(
"\
def foo():
def foofoo(): ...
<CURSOR>
",
);
assert_snapshot!(test.completions(), @r"
foo
");
}
#[test]
fn nested_function_not_in_global_scope_prefix() {
let test = cursor_test(
"\
def foo():
def foofoo(): ...
f<CURSOR>
",
);
assert_snapshot!(test.completions(), @r"
f
foo
foofoo
");
}
#[test]
fn nested_function_not_in_global_scope_blank() {
let test = cursor_test(
"\
def foo():
def foofoo(): ...
<CURSOR>
",
);
// FIXME: Should include `foofoo`.
//
// `foofoo` isn't included at present (2025-05-22). The problem
// here is that the AST for `def foo():` doesn't encompass the
// trailing indentation. So when the cursor position is in that
// trailing indentation, we can't (easily) get a handle to the
// right scope. And even if we could, the AST expressions for
// `def foo():` and `def foofoo(): ...` end at precisely the
// same point. So there is no AST we can hold after the end of
// `foofoo` but before the end of `foo`. So at the moment, it's
// not totally clear how to get the right scope.
//
// If we didn't want to change the ranges on the AST nodes,
// another approach here would be to get the inner most scope,
// and explore its ancestors until we get to a level that
// matches the current cursor's indentation. This seems fraught
// however. It's not clear to me that we can always assume a
// correspondence between scopes and indentation level.
assert_snapshot!(test.completions(), @r"
foo
");
}
#[test]
fn double_nested_function_not_in_global_scope_prefix1() {
let test = cursor_test(
"\
def foo():
def foofoo():
def foofoofoo(): ...
f<CURSOR>
",
);
assert_snapshot!(test.completions(), @r"
f
foo
foofoo
");
}
#[test]
fn double_nested_function_not_in_global_scope_prefix2() {
let test = cursor_test(
"\
def foo():
def foofoo():
def foofoofoo(): ...
f<CURSOR>",
);
assert_snapshot!(test.completions(), @r"
f
foo
foofoo
");
}
#[test]
fn double_nested_function_not_in_global_scope_prefix3() {
let test = cursor_test(
"\
def foo():
def foofoo():
def foofoofoo(): ...
f<CURSOR>
def frob(): ...
",
);
assert_snapshot!(test.completions(), @r"
f
foo
foofoo
frob
");
}
#[test]
fn double_nested_function_not_in_global_scope_prefix4() {
let test = cursor_test(
"\
def foo():
def foofoo():
def foofoofoo(): ...
f<CURSOR>
def frob(): ...
",
);
assert_snapshot!(test.completions(), @r"
f
foo
frob
");
}
#[test]
fn double_nested_function_not_in_global_scope_prefix5() {
let test = cursor_test(
"\
def foo():
def foofoo():
def foofoofoo(): ...
f<CURSOR>
def frob(): ...
",
);
assert_snapshot!(test.completions(), @r"
f
foo
foofoo
foofoofoo
frob
");
}
#[test]
fn double_nested_function_not_in_global_scope_blank1() {
let test = cursor_test(
"\
def foo():
def foofoo():
def foofoofoo(): ...
<CURSOR>
",
);
// FIXME: Should include `foofoo` (but not `foofoofoo`).
//
// The tests below fail for the same reason that
// `nested_function_not_in_global_scope_blank` fails: there is no
// space in the AST ranges after the end of `foofoofoo` but before
// the end of `foofoo`. So either the AST needs to be tweaked to
// account for the indented whitespace, or some other technique
// needs to be used to get the scope containing `foofoo` but not
// `foofoofoo`.
assert_snapshot!(test.completions(), @r"
foo
");
}
#[test]
fn double_nested_function_not_in_global_scope_blank2() {
let test = cursor_test(
" \
def foo():
def foofoo():
def foofoofoo(): ...
<CURSOR>",
);
// FIXME: Should include `foofoo` (but not `foofoofoo`).
assert_snapshot!(test.completions(), @r"
foo
");
}
#[test]
fn double_nested_function_not_in_global_scope_blank3() {
let test = cursor_test(
"\
def foo():
def foofoo():
def foofoofoo(): ...
<CURSOR>
def frob(): ...
",
);
// FIXME: Should include `foofoo` (but not `foofoofoo`).
assert_snapshot!(test.completions(), @r"
foo
frob
");
}
#[test]
fn double_nested_function_not_in_global_scope_blank4() {
let test = cursor_test(
"\
def foo():
def foofoo():
def foofoofoo(): ...
<CURSOR>
def frob(): ...
",
);
// FIXME: Should include `foofoo` (but not `foofoofoo`).
assert_snapshot!(test.completions(), @r"
foo
frob
");
}
#[test]
fn double_nested_function_not_in_global_scope_blank5() {
let test = cursor_test(
"\
def foo():
def foofoo():
def foofoofoo(): ...
<CURSOR>
def frob(): ...
",
);
// FIXME: Should include `foofoo` (but not `foofoofoo`).
assert_snapshot!(test.completions(), @r"
foo
frob
");
}
#[test]
fn list_comprehension1() {
let test = cursor_test(
"\
[<CURSOR> for bar in [1, 2, 3]]
",
);
// It's not totally clear why `for` shows up in the
// symbol tables of the detected scopes here. My guess
// is that there's perhaps some sub-optimal behavior
// here because the list comprehension as written is not
// valid.
assert_snapshot!(test.completions(), @r"
bar
for
");
}
#[test]
fn list_comprehension2() {
let test = cursor_test(
"\
[f<CURSOR> for foo in [1, 2, 3]]
",
);
assert_snapshot!(test.completions(), @r"
f
foo
");
}
#[test]
fn lambda_prefix1() {
let test = cursor_test(
"\
(lambda foo: (1 + f<CURSOR> + 2))(2)
",
);
assert_snapshot!(test.completions(), @r"
f
foo
");
}
#[test]
fn lambda_prefix2() {
let test = cursor_test(
"\
(lambda foo: f<CURSOR> + 1)(2)
",
);
assert_snapshot!(test.completions(), @r"
f
foo
");
}
#[test]
fn lambda_prefix3() {
let test = cursor_test(
"\
(lambda foo: (f<CURSOR> + 1))(2)
",
);
assert_snapshot!(test.completions(), @r"
f
foo
");
}
#[test]
fn lambda_prefix4() {
let test = cursor_test(
"\
(lambda foo: 1 + f<CURSOR>)(2)
",
);
assert_snapshot!(test.completions(), @r"
f
foo
");
}
#[test]
fn lambda_blank1() {
let test = cursor_test(
"\
(lambda foo: 1 + <CURSOR> + 2)(2)
",
);
assert_snapshot!(test.completions(), @"foo");
}
#[test]
fn lambda_blank2() {
let test = cursor_test(
"\
(lambda foo: <CURSOR> + 1)(2)
",
);
// FIXME: Should include `foo`.
//
// These fails for similar reasons as above: the body of the
// lambda doesn't include the position of <CURSOR> because
// <CURSOR> is inside leading or trailing whitespace. (Even
// when enclosed in parentheses. Specifically, parentheses
// aren't part of the node's range unless it's relevant e.g.,
// tuples.)
//
// The `lambda_blank1` test works because there are expressions
// on either side of <CURSOR>.
assert_snapshot!(test.completions(), @"<No completions found>");
}
#[test]
fn lambda_blank3() {
let test = cursor_test(
"\
(lambda foo: (<CURSOR> + 1))(2)
",
);
// FIXME: Should include `foo`.
assert_snapshot!(test.completions(), @"<No completions found>");
}
#[test]
fn lambda_blank4() {
let test = cursor_test(
"\
(lambda foo: 1 + <CURSOR>)(2)
",
);
// FIXME: Should include `foo`.
assert_snapshot!(test.completions(), @"<No completions found>");
}
#[test]
fn class_prefix1() {
let test = cursor_test(
"\
class Foo:
bar = 1
quux = b<CURSOR>
frob = 3
",
);
assert_snapshot!(test.completions(), @r"
Foo
b
bar
frob
quux
");
}
#[test]
fn class_prefix2() {
let test = cursor_test(
"\
class Foo:
bar = 1
quux = b<CURSOR>
",
);
assert_snapshot!(test.completions(), @r"
Foo
b
bar
quux
");
}
#[test]
fn class_blank1() {
let test = cursor_test(
"\
class Foo:
bar = 1
quux = <CURSOR>
frob = 3
",
);
// FIXME: Should include `bar`, `quux` and `frob`.
// (Unclear if `Foo` should be included, but a false
// positive isn't the end of the world.)
//
// These don't work for similar reasons as other
// tests above with the <CURSOR> inside of whitespace.
assert_snapshot!(test.completions(), @r"
Foo
");
}
#[test]
fn class_blank2() {
let test = cursor_test(
"\
class Foo:
bar = 1
quux = <CURSOR>
frob = 3
",
);
// FIXME: Should include `bar`, `quux` and `frob`.
// (Unclear if `Foo` should be included, but a false
// positive isn't the end of the world.)
assert_snapshot!(test.completions(), @r"
Foo
");
}
#[test]
fn class_super1() {
let test = cursor_test(
"\
class Bar: ...
class Foo(<CURSOR>):
bar = 1
",
);
assert_snapshot!(test.completions(), @r"
Bar
Foo
");
}
#[test]
fn class_super2() {
let test = cursor_test(
"\
class Foo(<CURSOR>):
bar = 1
class Bar: ...
",
);
assert_snapshot!(test.completions(), @r"
Bar
Foo
");
}
#[test]
fn class_super3() {
let test = cursor_test(
"\
class Foo(<CURSOR>
bar = 1
class Bar: ...
",
);
assert_snapshot!(test.completions(), @r"
Bar
Foo
");
}
#[test]
fn class_super4() {
let test = cursor_test(
"\
class Bar: ...
class Foo(<CURSOR>",
);
assert_snapshot!(test.completions(), @r"
Bar
Foo
");
}
// We don't yet take function parameters into account.
#[test]
fn call_prefix1() {
let test = cursor_test(
"\
def bar(okay=None): ...
foo = 1
bar(o<CURSOR>
",
);
assert_snapshot!(test.completions(), @r"
bar
foo
o
");
}
#[test]
fn call_blank1() {
let test = cursor_test(
"\
def bar(okay=None): ...
foo = 1
bar(<CURSOR>
",
);
assert_snapshot!(test.completions(), @r"
bar
foo
");
}
#[test]
fn duplicate1() {
let test = cursor_test(
"\
def foo(): ...
class C:
def foo(self): ...
def bar(self):
f<CURSOR>
",
);
assert_snapshot!(test.completions(), @r"
C
bar
f
foo
self
");
}
#[test]
fn instance_methods_are_not_regular_functions1() {
let test = cursor_test(
"\
class C:
def foo(self): ...
<CURSOR>
",
);
assert_snapshot!(test.completions(), @"C");
}
#[test]
fn instance_methods_are_not_regular_functions2() {
let test = cursor_test(
"\
class C:
def foo(self): ...
def bar(self):
f<CURSOR>
",
);
// FIXME: Should NOT include `foo` here, since
// that is only a method that can be called on
// `self`.
assert_snapshot!(test.completions(), @r"
C
bar
f
foo
self
");
}
#[test]
fn identifier_keyword_clash1() {
let test = cursor_test(
"\
classy_variable_name = 1
class<CURSOR>
",
);
assert_snapshot!(test.completions(), @"classy_variable_name");
}
#[test]
fn identifier_keyword_clash2() {
let test = cursor_test(
"\
some_symbol = 1
print(f\"{some<CURSOR>
",
);
assert_snapshot!(test.completions(), @r"
print
some
some_symbol
");
}
impl CursorTest {
fn completions(&self) -> String {
let completions = completion(&self.db, self.file, self.cursor_offset);
if completions.is_empty() {
return "<No completions found>".to_string();
}
completions
.into_iter()
.map(|completion| completion.label)
.collect::<Vec<String>>()
.join("\n")
}
}
} }

View file

@ -1,7 +1,7 @@
use ruff_db::files::{File, FilePath}; use ruff_db::files::{File, FilePath};
use ruff_db::source::line_index; use ruff_db::source::line_index;
use ruff_python_ast as ast; use ruff_python_ast as ast;
use ruff_python_ast::{Expr, ExprRef}; use ruff_python_ast::{Expr, ExprRef, name::Name};
use ruff_source_file::LineIndex; use ruff_source_file::LineIndex;
use crate::Db; use crate::Db;
@ -9,6 +9,7 @@ use crate::module_name::ModuleName;
use crate::module_resolver::{Module, resolve_module}; use crate::module_resolver::{Module, resolve_module};
use crate::semantic_index::ast_ids::HasScopedExpressionId; use crate::semantic_index::ast_ids::HasScopedExpressionId;
use crate::semantic_index::semantic_index; use crate::semantic_index::semantic_index;
use crate::semantic_index::symbol::FileScopeId;
use crate::types::{Type, binding_type, infer_scope_types}; use crate::types::{Type, binding_type, infer_scope_types};
pub struct SemanticModel<'db> { pub struct SemanticModel<'db> {
@ -38,6 +39,32 @@ impl<'db> SemanticModel<'db> {
pub fn resolve_module(&self, module_name: &ModuleName) -> Option<Module> { pub fn resolve_module(&self, module_name: &ModuleName) -> Option<Module> {
resolve_module(self.db, module_name) resolve_module(self.db, module_name)
} }
/// Returns completions for symbols available in the scope containing the
/// given expression.
///
/// If a scope could not be determined, then completions for the global
/// scope of this model's `File` are returned.
pub fn completions(&self, node: ast::AnyNodeRef<'_>) -> Vec<Name> {
let index = semantic_index(self.db, self.file);
let file_scope = match node {
ast::AnyNodeRef::Identifier(identifier) => index.expression_scope_id(identifier),
node => match node.as_expr_ref() {
// If we couldn't identify a specific
// expression that we're in, then just
// fall back to the global scope.
None => FileScopeId::global(),
Some(expr) => index.expression_scope_id(expr),
},
};
let mut symbols = vec![];
for (file_scope, _) in index.ancestor_scopes(file_scope) {
for symbol in index.symbol_table(file_scope).symbols() {
symbols.push(symbol.name().clone());
}
}
symbols
}
} }
pub trait HasType { pub trait HasType {