[ty] Implement global handling and load-before-global-declaration syntax error (#17637)

Summary
--

This PR resolves both the typing-related and syntax error TODOs added in
#17563 by tracking a set of `global` bindings for each scope. As
discussed below, we avoid the additional AST traversal from ruff by
collecting `Name`s from `global` statements while building the semantic
index and emit a syntax error if the `Name` is already bound in the
current scope at the point of the `global` statement. This has the
downside of separating the error from the `SemanticSyntaxChecker`, but I
plan to explore using this approach in the `SemanticSyntaxChecker`
itself as a follow-up. It seems like this may be a better approach for
ruff as well.

Test Plan
--

Updated all of the related mdtests to remove the TODOs (and add quotes I
forgot on the messages).

There is one remaining TODO, but it requires `nonlocal` support, which
isn't even incorporated into the `SemanticSyntaxChecker` yet.

---------

Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
Co-authored-by: Carl Meyer <carl@astral.sh>
This commit is contained in:
Brent Westbrook 2025-05-08 10:30:04 -04:00 committed by GitHub
parent 67cd94ed64
commit 57bf7dfbd9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 174 additions and 48 deletions

View file

@ -252,12 +252,8 @@ def _():
## Load before `global` declaration ## Load before `global` declaration
This should be an error, but it's not yet.
TODO implement `SemanticSyntaxContext::global`
```py ```py
def f(): def f():
x = 1 x = 1
global x global x # error: [invalid-syntax] "name `x` is used prior to global declaration"
``` ```

View file

@ -32,8 +32,14 @@ def f():
y = "" y = ""
global x global x
# TODO: error: [invalid-assignment] "Object of type `Literal[""]` is not assignable to `int`" # error: [invalid-assignment] "Object of type `Literal[""]` is not assignable to `int`"
x = "" x = ""
global z
# error: [invalid-assignment] "Object of type `Literal[""]` is not assignable to `int`"
z = ""
z: int
``` ```
## Nested intervening scope ## Nested intervening scope
@ -48,8 +54,7 @@ def outer():
def inner(): def inner():
global x global x
# TODO: revealed: int reveal_type(x) # revealed: int
reveal_type(x) # revealed: str
``` ```
## Narrowing ## Narrowing
@ -87,8 +92,7 @@ def f():
```py ```py
def f(): def f():
global x global x
# TODO this should also not be an error y = x
y = x # error: [unresolved-reference] "Name `x` used when not defined"
x = 1 # No error. x = 1 # No error.
x = 2 x = 2
@ -99,79 +103,111 @@ x = 2
Using a name prior to its `global` declaration in the same scope is a syntax error. Using a name prior to its `global` declaration in the same scope is a syntax error.
```py ```py
x = 1
def f(): def f():
print(x) # TODO: error: [invalid-syntax] name `x` is used prior to global declaration print(x)
global x global x # error: [invalid-syntax] "name `x` is used prior to global declaration"
print(x) print(x)
def f(): def f():
global x global x
print(x) # TODO: error: [invalid-syntax] name `x` is used prior to global declaration print(x)
global x global x # error: [invalid-syntax] "name `x` is used prior to global declaration"
print(x) print(x)
def f(): def f():
print(x) # TODO: error: [invalid-syntax] name `x` is used prior to global declaration print(x)
global x, y global x, y # error: [invalid-syntax] "name `x` is used prior to global declaration"
print(x) print(x)
def f(): def f():
global x, y global x, y
print(x) # TODO: error: [invalid-syntax] name `x` is used prior to global declaration print(x)
global x, y global x, y # error: [invalid-syntax] "name `x` is used prior to global declaration"
print(x) print(x)
def f(): def f():
x = 1 # TODO: error: [invalid-syntax] name `x` is used prior to global declaration x = 1
global x global x # error: [invalid-syntax] "name `x` is used prior to global declaration"
x = 1 x = 1
def f(): def f():
global x global x
x = 1 # TODO: error: [invalid-syntax] name `x` is used prior to global declaration x = 1
global x global x # error: [invalid-syntax] "name `x` is used prior to global declaration"
x = 1 x = 1
def f(): def f():
del x # TODO: error: [invalid-syntax] name `x` is used prior to global declaration del x
global x, y global x, y # error: [invalid-syntax] "name `x` is used prior to global declaration"
del x del x
def f(): def f():
global x, y global x, y
del x # TODO: error: [invalid-syntax] name `x` is used prior to global declaration del x
global x, y global x, y # error: [invalid-syntax] "name `x` is used prior to global declaration"
del x del x
def f(): def f():
del x # TODO: error: [invalid-syntax] name `x` is used prior to global declaration del x
global x global x # error: [invalid-syntax] "name `x` is used prior to global declaration"
del x del x
def f(): def f():
global x global x
del x # TODO: error: [invalid-syntax] name `x` is used prior to global declaration del x
global x global x # error: [invalid-syntax] "name `x` is used prior to global declaration"
del x del x
def f(): def f():
del x # TODO: error: [invalid-syntax] name `x` is used prior to global declaration del x
global x, y global x, y # error: [invalid-syntax] "name `x` is used prior to global declaration"
del x del x
def f(): def f():
global x, y global x, y
del x # TODO: error: [invalid-syntax] name `x` is used prior to global declaration del x
global x, y global x, y # error: [invalid-syntax] "name `x` is used prior to global declaration"
del x del x
def f(): def f():
print(f"{x=}") # TODO: error: [invalid-syntax] name `x` is used prior to global declaration print(f"{x=}")
global x global x # error: [invalid-syntax] "name `x` is used prior to global declaration"
# still an error in module scope # still an error in module scope
x = None # TODO: error: [invalid-syntax] name `x` is used prior to global declaration x = None
global x global x # error: [invalid-syntax] "name `x` is used prior to global declaration"
```
## Local bindings override preceding `global` bindings
```py
x = 42
def f():
global x
reveal_type(x) # revealed: Unknown | Literal[42]
x = "56"
reveal_type(x) # revealed: Literal["56"]
```
## Local assignment prevents falling back to the outer scope
```py
x = 42
def f():
# error: [unresolved-reference] "Name `x` used when not defined"
reveal_type(x) # revealed: Unknown
x = "56"
reveal_type(x) # revealed: Literal["56"]
```
## Annotating a `global` binding is a syntax error
```py
x: int = 1
def f():
global x
x: str = "foo" # TODO: error: [invalid-syntax] "annotated name 'x' can't be global"
``` ```

View file

@ -176,6 +176,9 @@ pub(crate) struct SemanticIndex<'db> {
/// Map from the file-local [`FileScopeId`] to the salsa-ingredient [`ScopeId`]. /// Map from the file-local [`FileScopeId`] to the salsa-ingredient [`ScopeId`].
scope_ids_by_scope: IndexVec<FileScopeId, ScopeId<'db>>, scope_ids_by_scope: IndexVec<FileScopeId, ScopeId<'db>>,
/// Map from the file-local [`FileScopeId`] to the set of explicit-global symbols it contains.
globals_by_scope: FxHashMap<FileScopeId, FxHashSet<ScopedSymbolId>>,
/// Use-def map for each scope in this file. /// Use-def map for each scope in this file.
use_def_maps: IndexVec<FileScopeId, Arc<UseDefMap<'db>>>, use_def_maps: IndexVec<FileScopeId, Arc<UseDefMap<'db>>>,
@ -255,6 +258,16 @@ impl<'db> SemanticIndex<'db> {
self.scope_ids_by_scope.iter().copied() self.scope_ids_by_scope.iter().copied()
} }
pub(crate) fn symbol_is_global_in_scope(
&self,
symbol: ScopedSymbolId,
scope: FileScopeId,
) -> bool {
self.globals_by_scope
.get(&scope)
.is_some_and(|globals| globals.contains(&symbol))
}
/// Returns the id of the parent scope. /// Returns the id of the parent scope.
pub(crate) fn parent_scope_id(&self, scope_id: FileScopeId) -> Option<FileScopeId> { pub(crate) fn parent_scope_id(&self, scope_id: FileScopeId) -> Option<FileScopeId> {
let scope = self.scope(scope_id); let scope = self.scope(scope_id);

View file

@ -12,7 +12,7 @@ use ruff_python_ast::name::Name;
use ruff_python_ast::visitor::{walk_expr, walk_pattern, walk_stmt, Visitor}; use ruff_python_ast::visitor::{walk_expr, walk_pattern, walk_stmt, Visitor};
use ruff_python_ast::{self as ast, PySourceType, PythonVersion}; use ruff_python_ast::{self as ast, PySourceType, PythonVersion};
use ruff_python_parser::semantic_errors::{ use ruff_python_parser::semantic_errors::{
SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, SemanticSyntaxChecker, SemanticSyntaxContext, SemanticSyntaxError, SemanticSyntaxErrorKind,
}; };
use ruff_text_size::TextRange; use ruff_text_size::TextRange;
@ -106,6 +106,7 @@ pub(super) struct SemanticIndexBuilder<'db> {
use_def_maps: IndexVec<FileScopeId, UseDefMapBuilder<'db>>, use_def_maps: IndexVec<FileScopeId, UseDefMapBuilder<'db>>,
scopes_by_node: FxHashMap<NodeWithScopeKey, FileScopeId>, scopes_by_node: FxHashMap<NodeWithScopeKey, FileScopeId>,
scopes_by_expression: FxHashMap<ExpressionNodeKey, FileScopeId>, scopes_by_expression: FxHashMap<ExpressionNodeKey, FileScopeId>,
globals_by_scope: FxHashMap<FileScopeId, FxHashSet<ScopedSymbolId>>,
definitions_by_node: FxHashMap<DefinitionNodeKey, Definitions<'db>>, definitions_by_node: FxHashMap<DefinitionNodeKey, Definitions<'db>>,
expressions_by_node: FxHashMap<ExpressionNodeKey, Expression<'db>>, expressions_by_node: FxHashMap<ExpressionNodeKey, Expression<'db>>,
imported_modules: FxHashSet<ModuleName>, imported_modules: FxHashSet<ModuleName>,
@ -144,6 +145,7 @@ impl<'db> SemanticIndexBuilder<'db> {
scopes_by_node: FxHashMap::default(), scopes_by_node: FxHashMap::default(),
definitions_by_node: FxHashMap::default(), definitions_by_node: FxHashMap::default(),
expressions_by_node: FxHashMap::default(), expressions_by_node: FxHashMap::default(),
globals_by_scope: FxHashMap::default(),
imported_modules: FxHashSet::default(), imported_modules: FxHashSet::default(),
generator_functions: FxHashSet::default(), generator_functions: FxHashSet::default(),
@ -1085,6 +1087,7 @@ impl<'db> SemanticIndexBuilder<'db> {
self.scopes_by_node.shrink_to_fit(); self.scopes_by_node.shrink_to_fit();
self.generator_functions.shrink_to_fit(); self.generator_functions.shrink_to_fit();
self.eager_snapshots.shrink_to_fit(); self.eager_snapshots.shrink_to_fit();
self.globals_by_scope.shrink_to_fit();
SemanticIndex { SemanticIndex {
symbol_tables, symbol_tables,
@ -1093,6 +1096,7 @@ impl<'db> SemanticIndexBuilder<'db> {
definitions_by_node: self.definitions_by_node, definitions_by_node: self.definitions_by_node,
expressions_by_node: self.expressions_by_node, expressions_by_node: self.expressions_by_node,
scope_ids_by_scope: self.scope_ids_by_scope, scope_ids_by_scope: self.scope_ids_by_scope,
globals_by_scope: self.globals_by_scope,
ast_ids, ast_ids,
scopes_by_expression: self.scopes_by_expression, scopes_by_expression: self.scopes_by_expression,
scopes_by_node: self.scopes_by_node, scopes_by_node: self.scopes_by_node,
@ -1898,7 +1902,38 @@ where
// Everything in the current block after a terminal statement is unreachable. // Everything in the current block after a terminal statement is unreachable.
self.mark_unreachable(); self.mark_unreachable();
} }
ast::Stmt::Global(ast::StmtGlobal { range: _, names }) => {
for name in names {
let symbol_id = self.add_symbol(name.id.clone());
let symbol_table = self.current_symbol_table();
let symbol = symbol_table.symbol(symbol_id);
if symbol.is_bound() || symbol.is_declared() || symbol.is_used() {
self.report_semantic_error(SemanticSyntaxError {
kind: SemanticSyntaxErrorKind::LoadBeforeGlobalDeclaration {
name: name.to_string(),
start: name.range.start(),
},
range: name.range,
python_version: self.python_version,
});
}
let scope_id = self.current_scope();
self.globals_by_scope
.entry(scope_id)
.or_default()
.insert(symbol_id);
}
walk_stmt(self, stmt);
}
ast::Stmt::Delete(ast::StmtDelete { targets, range: _ }) => {
for target in targets {
if let ast::Expr::Name(ast::ExprName { id, .. }) = target {
let symbol_id = self.add_symbol(id.clone());
self.current_symbol_table().mark_symbol_used(symbol_id);
}
}
walk_stmt(self, stmt);
}
_ => { _ => {
walk_stmt(self, stmt); walk_stmt(self, stmt);
} }
@ -2387,7 +2422,8 @@ impl SemanticSyntaxContext for SemanticIndexBuilder<'_> {
self.source_text().as_str() self.source_text().as_str()
} }
// TODO(brent) handle looking up `global` bindings // We handle the one syntax error that relies on this method (`LoadBeforeGlobalDeclaration`)
// directly in `visit_stmt`, so this just returns a placeholder value.
fn global(&self, _name: &str) -> Option<TextRange> { fn global(&self, _name: &str) -> Option<TextRange> {
None None
} }

View file

@ -56,7 +56,7 @@ use crate::semantic_index::definition::{
use crate::semantic_index::expression::{Expression, ExpressionKind}; use crate::semantic_index::expression::{Expression, ExpressionKind};
use crate::semantic_index::narrowing_constraints::ConstraintKey; use crate::semantic_index::narrowing_constraints::ConstraintKey;
use crate::semantic_index::symbol::{ use crate::semantic_index::symbol::{
FileScopeId, NodeWithScopeKind, NodeWithScopeRef, ScopeId, ScopeKind, FileScopeId, NodeWithScopeKind, NodeWithScopeRef, ScopeId, ScopeKind, ScopedSymbolId,
}; };
use crate::semantic_index::{semantic_index, EagerSnapshotResult, SemanticIndex}; use crate::semantic_index::{semantic_index, EagerSnapshotResult, SemanticIndex};
use crate::symbol::{ use crate::symbol::{
@ -1387,9 +1387,29 @@ impl<'db> TypeInferenceBuilder<'db> {
.kind(self.db()) .kind(self.db())
.category(self.context.in_stub()) .category(self.context.in_stub())
.is_binding()); .is_binding());
let use_def = self.index.use_def_map(binding.file_scope(self.db()));
let declarations = use_def.declarations_at_binding(binding); let file_scope_id = binding.file_scope(self.db());
let symbol_table = self.index.symbol_table(file_scope_id);
let use_def = self.index.use_def_map(file_scope_id);
let mut bound_ty = ty; let mut bound_ty = ty;
let symbol_id = binding.symbol(self.db());
let global_use_def_map = self.index.use_def_map(FileScopeId::global());
let declarations = if self.skip_non_global_scopes(file_scope_id, symbol_id) {
let symbol_name = symbol_table.symbol(symbol_id).name();
match self
.index
.symbol_table(FileScopeId::global())
.symbol_id_by_name(symbol_name)
{
Some(id) => global_use_def_map.public_declarations(id),
// This case is a syntax error (load before global declaration) but ignore that here
None => use_def.declarations_at_binding(binding),
}
} else {
use_def.declarations_at_binding(binding)
};
let declared_ty = symbol_from_declarations(self.db(), declarations) let declared_ty = symbol_from_declarations(self.db(), declarations)
.map(|SymbolAndQualifiers { symbol, .. }| { .map(|SymbolAndQualifiers { symbol, .. }| {
symbol.ignore_possibly_unbound().unwrap_or(Type::unknown()) symbol.ignore_possibly_unbound().unwrap_or(Type::unknown())
@ -1415,6 +1435,19 @@ impl<'db> TypeInferenceBuilder<'db> {
self.types.bindings.insert(binding, bound_ty); self.types.bindings.insert(binding, bound_ty);
} }
/// Returns `true` if `symbol_id` should be looked up in the global scope, skipping intervening
/// local scopes.
fn skip_non_global_scopes(
&self,
file_scope_id: FileScopeId,
symbol_id: ScopedSymbolId,
) -> bool {
!file_scope_id.is_global()
&& self
.index
.symbol_is_global_in_scope(symbol_id, file_scope_id)
}
fn add_declaration( fn add_declaration(
&mut self, &mut self,
node: AnyNodeRef, node: AnyNodeRef,
@ -5256,6 +5289,20 @@ impl<'db> TypeInferenceBuilder<'db> {
} }
}; };
let current_file = self.file();
let skip_non_global_scopes = symbol_table
.symbol_id_by_name(symbol_name)
.is_some_and(|symbol_id| self.skip_non_global_scopes(file_scope_id, symbol_id));
if skip_non_global_scopes {
return symbol(
db,
FileScopeId::global().to_scope_id(db, current_file),
symbol_name,
);
}
// If it's a function-like scope and there is one or more binding in this scope (but // If it's a function-like scope and there is one or more binding in this scope (but
// none of those bindings are visible from where we are in the control flow), we cannot // none of those bindings are visible from where we are in the control flow), we cannot
// fallback to any bindings in enclosing scopes. As such, we can immediately short-circuit // fallback to any bindings in enclosing scopes. As such, we can immediately short-circuit
@ -5273,8 +5320,6 @@ impl<'db> TypeInferenceBuilder<'db> {
constraint_keys.push((file_scope_id, ConstraintKey::UseId(use_id))); constraint_keys.push((file_scope_id, ConstraintKey::UseId(use_id)));
} }
let current_file = self.file();
// Walk up parent scopes looking for a possible enclosing scope that may have a // Walk up parent scopes looking for a possible enclosing scope that may have a
// definition of this name visible to us (would be `LOAD_DEREF` at runtime.) // definition of this name visible to us (would be `LOAD_DEREF` at runtime.)
// Note that we skip the scope containing the use that we are resolving, since we // Note that we skip the scope containing the use that we are resolving, since we