[ty] Implement global handling and load-before-global-declaration syntax error (#17637)

Summary
--

This PR resolves both the typing-related and syntax error TODOs added in
#17563 by tracking a set of `global` bindings for each scope. As
discussed below, we avoid the additional AST traversal from ruff by
collecting `Name`s from `global` statements while building the semantic
index and emit a syntax error if the `Name` is already bound in the
current scope at the point of the `global` statement. This has the
downside of separating the error from the `SemanticSyntaxChecker`, but I
plan to explore using this approach in the `SemanticSyntaxChecker`
itself as a follow-up. It seems like this may be a better approach for
ruff as well.

Test Plan
--

Updated all of the related mdtests to remove the TODOs (and add quotes I
forgot on the messages).

There is one remaining TODO, but it requires `nonlocal` support, which
isn't even incorporated into the `SemanticSyntaxChecker` yet.

---------

Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
Co-authored-by: Carl Meyer <carl@astral.sh>
This commit is contained in:
Brent Westbrook 2025-05-08 10:30:04 -04:00 committed by GitHub
parent 67cd94ed64
commit 57bf7dfbd9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 174 additions and 48 deletions

View file

@ -12,7 +12,7 @@ use ruff_python_ast::name::Name;
use ruff_python_ast::visitor::{walk_expr, walk_pattern, walk_stmt, Visitor};
use ruff_python_ast::{self as ast, PySourceType, PythonVersion};
use ruff_python_parser::semantic_errors::{
SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError,
SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, SemanticSyntaxErrorKind,
};
use ruff_text_size::TextRange;
@ -106,6 +106,7 @@ pub(super) struct SemanticIndexBuilder<'db> {
use_def_maps: IndexVec<FileScopeId, UseDefMapBuilder<'db>>,
scopes_by_node: FxHashMap<NodeWithScopeKey, FileScopeId>,
scopes_by_expression: FxHashMap<ExpressionNodeKey, FileScopeId>,
globals_by_scope: FxHashMap<FileScopeId, FxHashSet<ScopedSymbolId>>,
definitions_by_node: FxHashMap<DefinitionNodeKey, Definitions<'db>>,
expressions_by_node: FxHashMap<ExpressionNodeKey, Expression<'db>>,
imported_modules: FxHashSet<ModuleName>,
@ -144,6 +145,7 @@ impl<'db> SemanticIndexBuilder<'db> {
scopes_by_node: FxHashMap::default(),
definitions_by_node: FxHashMap::default(),
expressions_by_node: FxHashMap::default(),
globals_by_scope: FxHashMap::default(),
imported_modules: FxHashSet::default(),
generator_functions: FxHashSet::default(),
@ -1085,6 +1087,7 @@ impl<'db> SemanticIndexBuilder<'db> {
self.scopes_by_node.shrink_to_fit();
self.generator_functions.shrink_to_fit();
self.eager_snapshots.shrink_to_fit();
self.globals_by_scope.shrink_to_fit();
SemanticIndex {
symbol_tables,
@ -1093,6 +1096,7 @@ impl<'db> SemanticIndexBuilder<'db> {
definitions_by_node: self.definitions_by_node,
expressions_by_node: self.expressions_by_node,
scope_ids_by_scope: self.scope_ids_by_scope,
globals_by_scope: self.globals_by_scope,
ast_ids,
scopes_by_expression: self.scopes_by_expression,
scopes_by_node: self.scopes_by_node,
@ -1898,7 +1902,38 @@ where
// Everything in the current block after a terminal statement is unreachable.
self.mark_unreachable();
}
ast::Stmt::Global(ast::StmtGlobal { range: _, names }) => {
for name in names {
let symbol_id = self.add_symbol(name.id.clone());
let symbol_table = self.current_symbol_table();
let symbol = symbol_table.symbol(symbol_id);
if symbol.is_bound() || symbol.is_declared() || symbol.is_used() {
self.report_semantic_error(SemanticSyntaxError {
kind: SemanticSyntaxErrorKind::LoadBeforeGlobalDeclaration {
name: name.to_string(),
start: name.range.start(),
},
range: name.range,
python_version: self.python_version,
});
}
let scope_id = self.current_scope();
self.globals_by_scope
.entry(scope_id)
.or_default()
.insert(symbol_id);
}
walk_stmt(self, stmt);
}
ast::Stmt::Delete(ast::StmtDelete { targets, range: _ }) => {
for target in targets {
if let ast::Expr::Name(ast::ExprName { id, .. }) = target {
let symbol_id = self.add_symbol(id.clone());
self.current_symbol_table().mark_symbol_used(symbol_id);
}
}
walk_stmt(self, stmt);
}
_ => {
walk_stmt(self, stmt);
}
@ -2387,7 +2422,8 @@ impl SemanticSyntaxContext for SemanticIndexBuilder<'_> {
self.source_text().as_str()
}
// TODO(brent) handle looking up `global` bindings
// We handle the one syntax error that relies on this method (`LoadBeforeGlobalDeclaration`)
// directly in `visit_stmt`, so this just returns a placeholder value.
fn global(&self, _name: &str) -> Option<TextRange> {
None
}