[ty] Implement global handling and load-before-global-declaration syntax error (#17637)

Summary
--

This PR resolves both the typing-related and syntax error TODOs added in
#17563 by tracking a set of `global` bindings for each scope. As
discussed below, we avoid the additional AST traversal from ruff by
collecting `Name`s from `global` statements while building the semantic
index and emit a syntax error if the `Name` is already bound in the
current scope at the point of the `global` statement. This has the
downside of separating the error from the `SemanticSyntaxChecker`, but I
plan to explore using this approach in the `SemanticSyntaxChecker`
itself as a follow-up. It seems like this may be a better approach for
ruff as well.

Test Plan
--

Updated all of the related mdtests to remove the TODOs (and add quotes I
forgot on the messages).

There is one remaining TODO, but it requires `nonlocal` support, which
isn't even incorporated into the `SemanticSyntaxChecker` yet.

---------

Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
Co-authored-by: Carl Meyer <carl@astral.sh>
This commit is contained in:
Brent Westbrook 2025-05-08 10:30:04 -04:00 committed by GitHub
parent 67cd94ed64
commit 57bf7dfbd9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 174 additions and 48 deletions

View file

@ -176,6 +176,9 @@ pub(crate) struct SemanticIndex<'db> {
/// Map from the file-local [`FileScopeId`] to the salsa-ingredient [`ScopeId`].
scope_ids_by_scope: IndexVec<FileScopeId, ScopeId<'db>>,
/// Map from the file-local [`FileScopeId`] to the set of explicit-global symbols it contains.
globals_by_scope: FxHashMap<FileScopeId, FxHashSet<ScopedSymbolId>>,
/// Use-def map for each scope in this file.
use_def_maps: IndexVec<FileScopeId, Arc<UseDefMap<'db>>>,
@ -255,6 +258,16 @@ impl<'db> SemanticIndex<'db> {
self.scope_ids_by_scope.iter().copied()
}
pub(crate) fn symbol_is_global_in_scope(
&self,
symbol: ScopedSymbolId,
scope: FileScopeId,
) -> bool {
self.globals_by_scope
.get(&scope)
.is_some_and(|globals| globals.contains(&symbol))
}
/// Returns the id of the parent scope.
pub(crate) fn parent_scope_id(&self, scope_id: FileScopeId) -> Option<FileScopeId> {
let scope = self.scope(scope_id);

View file

@ -12,7 +12,7 @@ use ruff_python_ast::name::Name;
use ruff_python_ast::visitor::{walk_expr, walk_pattern, walk_stmt, Visitor};
use ruff_python_ast::{self as ast, PySourceType, PythonVersion};
use ruff_python_parser::semantic_errors::{
SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError,
SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, SemanticSyntaxErrorKind,
};
use ruff_text_size::TextRange;
@ -106,6 +106,7 @@ pub(super) struct SemanticIndexBuilder<'db> {
use_def_maps: IndexVec<FileScopeId, UseDefMapBuilder<'db>>,
scopes_by_node: FxHashMap<NodeWithScopeKey, FileScopeId>,
scopes_by_expression: FxHashMap<ExpressionNodeKey, FileScopeId>,
globals_by_scope: FxHashMap<FileScopeId, FxHashSet<ScopedSymbolId>>,
definitions_by_node: FxHashMap<DefinitionNodeKey, Definitions<'db>>,
expressions_by_node: FxHashMap<ExpressionNodeKey, Expression<'db>>,
imported_modules: FxHashSet<ModuleName>,
@ -144,6 +145,7 @@ impl<'db> SemanticIndexBuilder<'db> {
scopes_by_node: FxHashMap::default(),
definitions_by_node: FxHashMap::default(),
expressions_by_node: FxHashMap::default(),
globals_by_scope: FxHashMap::default(),
imported_modules: FxHashSet::default(),
generator_functions: FxHashSet::default(),
@ -1085,6 +1087,7 @@ impl<'db> SemanticIndexBuilder<'db> {
self.scopes_by_node.shrink_to_fit();
self.generator_functions.shrink_to_fit();
self.eager_snapshots.shrink_to_fit();
self.globals_by_scope.shrink_to_fit();
SemanticIndex {
symbol_tables,
@ -1093,6 +1096,7 @@ impl<'db> SemanticIndexBuilder<'db> {
definitions_by_node: self.definitions_by_node,
expressions_by_node: self.expressions_by_node,
scope_ids_by_scope: self.scope_ids_by_scope,
globals_by_scope: self.globals_by_scope,
ast_ids,
scopes_by_expression: self.scopes_by_expression,
scopes_by_node: self.scopes_by_node,
@ -1898,7 +1902,38 @@ where
// Everything in the current block after a terminal statement is unreachable.
self.mark_unreachable();
}
ast::Stmt::Global(ast::StmtGlobal { range: _, names }) => {
for name in names {
let symbol_id = self.add_symbol(name.id.clone());
let symbol_table = self.current_symbol_table();
let symbol = symbol_table.symbol(symbol_id);
if symbol.is_bound() || symbol.is_declared() || symbol.is_used() {
self.report_semantic_error(SemanticSyntaxError {
kind: SemanticSyntaxErrorKind::LoadBeforeGlobalDeclaration {
name: name.to_string(),
start: name.range.start(),
},
range: name.range,
python_version: self.python_version,
});
}
let scope_id = self.current_scope();
self.globals_by_scope
.entry(scope_id)
.or_default()
.insert(symbol_id);
}
walk_stmt(self, stmt);
}
ast::Stmt::Delete(ast::StmtDelete { targets, range: _ }) => {
for target in targets {
if let ast::Expr::Name(ast::ExprName { id, .. }) = target {
let symbol_id = self.add_symbol(id.clone());
self.current_symbol_table().mark_symbol_used(symbol_id);
}
}
walk_stmt(self, stmt);
}
_ => {
walk_stmt(self, stmt);
}
@ -2387,7 +2422,8 @@ impl SemanticSyntaxContext for SemanticIndexBuilder<'_> {
self.source_text().as_str()
}
// TODO(brent) handle looking up `global` bindings
// We handle the one syntax error that relies on this method (`LoadBeforeGlobalDeclaration`)
// directly in `visit_stmt`, so this just returns a placeholder value.
fn global(&self, _name: &str) -> Option<TextRange> {
None
}

View file

@ -56,7 +56,7 @@ use crate::semantic_index::definition::{
use crate::semantic_index::expression::{Expression, ExpressionKind};
use crate::semantic_index::narrowing_constraints::ConstraintKey;
use crate::semantic_index::symbol::{
FileScopeId, NodeWithScopeKind, NodeWithScopeRef, ScopeId, ScopeKind,
FileScopeId, NodeWithScopeKind, NodeWithScopeRef, ScopeId, ScopeKind, ScopedSymbolId,
};
use crate::semantic_index::{semantic_index, EagerSnapshotResult, SemanticIndex};
use crate::symbol::{
@ -1387,9 +1387,29 @@ impl<'db> TypeInferenceBuilder<'db> {
.kind(self.db())
.category(self.context.in_stub())
.is_binding());
let use_def = self.index.use_def_map(binding.file_scope(self.db()));
let declarations = use_def.declarations_at_binding(binding);
let file_scope_id = binding.file_scope(self.db());
let symbol_table = self.index.symbol_table(file_scope_id);
let use_def = self.index.use_def_map(file_scope_id);
let mut bound_ty = ty;
let symbol_id = binding.symbol(self.db());
let global_use_def_map = self.index.use_def_map(FileScopeId::global());
let declarations = if self.skip_non_global_scopes(file_scope_id, symbol_id) {
let symbol_name = symbol_table.symbol(symbol_id).name();
match self
.index
.symbol_table(FileScopeId::global())
.symbol_id_by_name(symbol_name)
{
Some(id) => global_use_def_map.public_declarations(id),
// This case is a syntax error (load before global declaration) but ignore that here
None => use_def.declarations_at_binding(binding),
}
} else {
use_def.declarations_at_binding(binding)
};
let declared_ty = symbol_from_declarations(self.db(), declarations)
.map(|SymbolAndQualifiers { symbol, .. }| {
symbol.ignore_possibly_unbound().unwrap_or(Type::unknown())
@ -1415,6 +1435,19 @@ impl<'db> TypeInferenceBuilder<'db> {
self.types.bindings.insert(binding, bound_ty);
}
/// Returns `true` if `symbol_id` should be looked up in the global scope, skipping intervening
/// local scopes.
fn skip_non_global_scopes(
&self,
file_scope_id: FileScopeId,
symbol_id: ScopedSymbolId,
) -> bool {
!file_scope_id.is_global()
&& self
.index
.symbol_is_global_in_scope(symbol_id, file_scope_id)
}
fn add_declaration(
&mut self,
node: AnyNodeRef,
@ -5256,6 +5289,20 @@ impl<'db> TypeInferenceBuilder<'db> {
}
};
let current_file = self.file();
let skip_non_global_scopes = symbol_table
.symbol_id_by_name(symbol_name)
.is_some_and(|symbol_id| self.skip_non_global_scopes(file_scope_id, symbol_id));
if skip_non_global_scopes {
return symbol(
db,
FileScopeId::global().to_scope_id(db, current_file),
symbol_name,
);
}
// If it's a function-like scope and there is one or more binding in this scope (but
// none of those bindings are visible from where we are in the control flow), we cannot
// fallback to any bindings in enclosing scopes. As such, we can immediately short-circuit
@ -5273,8 +5320,6 @@ impl<'db> TypeInferenceBuilder<'db> {
constraint_keys.push((file_scope_id, ConstraintKey::UseId(use_id)));
}
let current_file = self.file();
// Walk up parent scopes looking for a possible enclosing scope that may have a
// definition of this name visible to us (would be `LOAD_DEREF` at runtime.)
// Note that we skip the scope containing the use that we are resolving, since we