Add a method to Checker for cached parsing of stringified type annotations (#13158)

This commit is contained in:
Alex Waygood 2024-09-02 13:44:20 +01:00 committed by GitHub
parent ea0246c51a
commit b7c7b4b387
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 186 additions and 114 deletions

View file

@ -26,10 +26,12 @@
//! represents the lint-rule analysis phase. In the future, these steps may be separated into
//! distinct passes over the AST.
use std::cell::RefCell;
use std::path::Path;
use itertools::Itertools;
use log::debug;
use rustc_hash::FxHashMap;
use ruff_diagnostics::{Diagnostic, IsolationLevel};
use ruff_notebook::{CellOffsets, NotebookIndex};
@ -40,13 +42,13 @@ use ruff_python_ast::str::Quote;
use ruff_python_ast::visitor::{walk_except_handler, walk_pattern, Visitor};
use ruff_python_ast::{
self as ast, AnyParameterRef, Comprehension, ElifElseClause, ExceptHandler, Expr, ExprContext,
FStringElement, Keyword, MatchCase, ModExpression, ModModule, Parameter, Parameters, Pattern,
Stmt, Suite, UnaryOp,
FStringElement, Keyword, MatchCase, ModModule, Parameter, Parameters, Pattern, Stmt, Suite,
UnaryOp,
};
use ruff_python_ast::{helpers, str, visitor, PySourceType};
use ruff_python_codegen::{Generator, Stylist};
use ruff_python_index::Indexer;
use ruff_python_parser::typing::{parse_type_annotation, AnnotationKind};
use ruff_python_parser::typing::{parse_type_annotation, AnnotationKind, ParsedAnnotation};
use ruff_python_parser::{Parsed, Tokens};
use ruff_python_semantic::all::{DunderAllDefinition, DunderAllFlags};
use ruff_python_semantic::analyze::{imports, typing};
@ -177,8 +179,10 @@ impl ExpectedDocstringKind {
pub(crate) struct Checker<'a> {
/// The [`Parsed`] output for the source code.
parsed: &'a Parsed<ModModule>,
/// An internal cache for parsed string annotations
parsed_annotations_cache: ParsedAnnotationsCache<'a>,
/// The [`Parsed`] output for the type annotation the checker is currently in.
parsed_type_annotation: Option<&'a Parsed<ModExpression>>,
parsed_type_annotation: Option<&'a ParsedAnnotation>,
/// The [`Path`] to the file under analysis.
path: &'a Path,
/// The [`Path`] to the package containing the current file.
@ -229,6 +233,7 @@ impl<'a> Checker<'a> {
#[allow(clippy::too_many_arguments)]
pub(crate) fn new(
parsed: &'a Parsed<ModModule>,
parsed_annotations_arena: &'a typed_arena::Arena<ParsedAnnotation>,
settings: &'a LinterSettings,
noqa_line_for: &'a NoqaMapping,
noqa: flags::Noqa,
@ -245,6 +250,7 @@ impl<'a> Checker<'a> {
Checker {
parsed,
parsed_type_annotation: None,
parsed_annotations_cache: ParsedAnnotationsCache::new(parsed_annotations_arena),
settings,
noqa_line_for,
noqa,
@ -333,8 +339,8 @@ impl<'a> Checker<'a> {
/// Returns the [`Tokens`] for the parsed type annotation if the checker is in a typing context
/// or the parsed source code.
pub(crate) fn tokens(&self) -> &'a Tokens {
if let Some(parsed_type_annotation) = self.parsed_type_annotation {
parsed_type_annotation.tokens()
if let Some(type_annotation) = self.parsed_type_annotation {
type_annotation.parsed().tokens()
} else {
self.parsed.tokens()
}
@ -405,6 +411,19 @@ impl<'a> Checker<'a> {
.map(|node_id| IsolationLevel::Group(node_id.into()))
.unwrap_or_default()
}
/// Parse a stringified type annotation as an AST expression,
/// e.g. `"List[str]"` in `x: "List[str]"`
///
/// This method is a wrapper around [`ruff_python_parser::typing::parse_type_annotation`]
/// that adds caching.
pub(crate) fn parse_type_annotation(
&self,
annotation: &ast::ExprStringLiteral,
) -> Option<&'a ParsedAnnotation> {
self.parsed_annotations_cache
.lookup_or_parse(annotation, self.locator.contents())
}
}
impl<'a> Visitor<'a> for Checker<'a> {
@ -2168,18 +2187,13 @@ impl<'a> Checker<'a> {
///
/// class Bar: pass
/// ```
fn visit_deferred_string_type_definitions(
&mut self,
allocator: &'a typed_arena::Arena<Parsed<ModExpression>>,
) {
fn visit_deferred_string_type_definitions(&mut self) {
let snapshot = self.semantic.snapshot();
while !self.visit.string_type_definitions.is_empty() {
let type_definitions = std::mem::take(&mut self.visit.string_type_definitions);
for (string_expr, snapshot) in type_definitions {
if let Ok((parsed_annotation, kind)) =
parse_type_annotation(string_expr, self.locator.contents())
{
let parsed_annotation = allocator.alloc(parsed_annotation);
let annotation_parse_result = self.parse_type_annotation(string_expr);
if let Some(parsed_annotation) = annotation_parse_result {
self.parsed_type_annotation = Some(parsed_annotation);
let annotation = string_expr.value.to_str();
@ -2198,7 +2212,7 @@ impl<'a> Checker<'a> {
}
}
let type_definition_flag = match kind {
let type_definition_flag = match parsed_annotation.kind() {
AnnotationKind::Simple => SemanticModelFlags::SIMPLE_STRING_TYPE_DEFINITION,
AnnotationKind::Complex => {
SemanticModelFlags::COMPLEX_STRING_TYPE_DEFINITION
@ -2207,7 +2221,7 @@ impl<'a> Checker<'a> {
self.semantic.flags |=
SemanticModelFlags::TYPE_DEFINITION | type_definition_flag;
self.visit_expr(parsed_annotation.expr());
self.visit_expr(parsed_annotation.expression());
self.parsed_type_annotation = None;
} else {
if self.enabled(Rule::ForwardAnnotationSyntaxError) {
@ -2220,6 +2234,35 @@ impl<'a> Checker<'a> {
}
}
}
// If we're parsing string annotations inside string annotations
// (which is the only reason we might enter a second iteration of this loop),
// the cache is no longer valid. We must invalidate it to avoid an infinite loop.
//
// For example, consider the following annotation:
// ```python
// x: "list['str']"
// ```
//
// The first time we visit the AST, we see `"list['str']"`
// and identify it as a stringified annotation.
// We store it in `self.visit.string_type_definitions` to be analyzed later.
//
// After the entire tree has been visited, we look through
// `self.visit.string_type_definitions` and find `"list['str']"`.
// We parse it, and it becomes `list['str']`.
// After parsing it, we call `self.visit_expr()` on the `list['str']` node,
// and that `visit_expr` call is going to find `'str'` inside that node and
// identify it as a string type definition, appending it to
// `self.visit.string_type_definitions`, ensuring that there will be one
// more iteration of this loop.
//
// Unfortunately, the `TextRange` of `'str'`
// here will be *relative to the parsed `list['str']` node* rather than
// *relative to the original module*, meaning the cache
// (which uses `TextSize` as the key) becomes invalid on the second
// iteration of this loop.
self.parsed_annotations_cache.clear();
}
self.semantic.restore(snapshot);
}
@ -2283,14 +2326,14 @@ impl<'a> Checker<'a> {
/// After initial traversal of the source tree has been completed,
/// recursively visit all AST nodes that were deferred on the first pass.
/// This includes lambdas, functions, type parameters, and type annotations.
fn visit_deferred(&mut self, allocator: &'a typed_arena::Arena<Parsed<ModExpression>>) {
fn visit_deferred(&mut self) {
while !self.visit.is_empty() {
self.visit_deferred_class_bases();
self.visit_deferred_functions();
self.visit_deferred_type_param_definitions();
self.visit_deferred_lambdas();
self.visit_deferred_future_type_definitions();
self.visit_deferred_string_type_definitions(allocator);
self.visit_deferred_string_type_definitions();
}
}
@ -2358,6 +2401,42 @@ impl<'a> Checker<'a> {
}
}
struct ParsedAnnotationsCache<'a> {
arena: &'a typed_arena::Arena<ParsedAnnotation>,
by_offset: RefCell<FxHashMap<TextSize, Option<&'a ParsedAnnotation>>>,
}
impl<'a> ParsedAnnotationsCache<'a> {
fn new(arena: &'a typed_arena::Arena<ParsedAnnotation>) -> Self {
Self {
arena,
by_offset: RefCell::default(),
}
}
fn lookup_or_parse(
&self,
annotation: &ast::ExprStringLiteral,
source: &str,
) -> Option<&'a ParsedAnnotation> {
*self
.by_offset
.borrow_mut()
.entry(annotation.start())
.or_insert_with(|| {
if let Ok(annotation) = parse_type_annotation(annotation, source) {
Some(self.arena.alloc(annotation))
} else {
None
}
})
}
fn clear(&self) {
self.by_offset.borrow_mut().clear();
}
}
#[allow(clippy::too_many_arguments)]
pub(crate) fn check_ast(
parsed: &Parsed<ModModule>,
@ -2393,8 +2472,10 @@ pub(crate) fn check_ast(
python_ast: parsed.suite(),
};
let allocator = typed_arena::Arena::new();
let mut checker = Checker::new(
parsed,
&allocator,
settings,
noqa_line_for,
noqa,
@ -2417,8 +2498,7 @@ pub(crate) fn check_ast(
// Visit any deferred syntax nodes. Take care to visit in order, such that we avoid adding
// new deferred nodes after visiting nodes of that kind. For example, visiting a deferred
// function can add a deferred lambda, but the opposite is not true.
let allocator = typed_arena::Arena::new();
checker.visit_deferred(&allocator);
checker.visit_deferred();
checker.visit_exports();
// Check docstrings, bindings, and unresolved references.

View file

@ -4,7 +4,6 @@ use ruff_python_ast::helpers::ReturnStatementVisitor;
use ruff_python_ast::identifier::Identifier;
use ruff_python_ast::visitor::Visitor;
use ruff_python_ast::{self as ast, Expr, ParameterWithDefault, Stmt};
use ruff_python_parser::typing::parse_type_annotation;
use ruff_python_semantic::analyze::visibility;
use ruff_python_semantic::Definition;
use ruff_python_stdlib::typing::simple_magic_return_type;
@ -514,13 +513,10 @@ fn check_dynamically_typed<F>(
{
if let Expr::StringLiteral(string_expr) = annotation {
// Quoted annotations
if let Ok((parsed_annotation, _)) =
parse_type_annotation(string_expr, checker.locator().contents())
{
if let Some(parsed_annotation) = checker.parse_type_annotation(string_expr) {
if type_hint_resolves_to_any(
parsed_annotation.expr(),
checker.semantic(),
checker.locator(),
parsed_annotation.expression(),
checker,
checker.settings.target_version.minor(),
) {
diagnostics.push(Diagnostic::new(
@ -530,12 +526,7 @@ fn check_dynamically_typed<F>(
}
}
} else {
if type_hint_resolves_to_any(
annotation,
checker.semantic(),
checker.locator(),
checker.settings.target_version.minor(),
) {
if type_hint_resolves_to_any(annotation, checker, checker.settings.target_version.minor()) {
diagnostics.push(Diagnostic::new(
AnyType { name: func() },
annotation.range(),

View file

@ -7,7 +7,6 @@ use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::name::Name;
use ruff_python_ast::{self as ast, Expr, Operator, ParameterWithDefault, Parameters};
use ruff_python_parser::typing::parse_type_annotation;
use ruff_text_size::{Ranged, TextRange};
use crate::checkers::ast::Checker;
@ -180,13 +179,10 @@ pub(crate) fn implicit_optional(checker: &mut Checker, parameters: &Parameters)
if let Expr::StringLiteral(string_expr) = annotation.as_ref() {
// Quoted annotation.
if let Ok((parsed_annotation, kind)) =
parse_type_annotation(string_expr, checker.locator().contents())
{
if let Some(parsed_annotation) = checker.parse_type_annotation(string_expr) {
let Some(expr) = type_hint_explicitly_allows_none(
parsed_annotation.expr(),
checker.semantic(),
checker.locator(),
parsed_annotation.expression(),
checker,
checker.settings.target_version.minor(),
) else {
continue;
@ -195,7 +191,7 @@ pub(crate) fn implicit_optional(checker: &mut Checker, parameters: &Parameters)
let mut diagnostic =
Diagnostic::new(ImplicitOptional { conversion_type }, expr.range());
if kind.is_simple() {
if parsed_annotation.kind().is_simple() {
diagnostic.try_set_fix(|| generate_fix(checker, conversion_type, expr));
}
checker.diagnostics.push(diagnostic);
@ -204,8 +200,7 @@ pub(crate) fn implicit_optional(checker: &mut Checker, parameters: &Parameters)
// Unquoted annotation.
let Some(expr) = type_hint_explicitly_allows_none(
annotation,
checker.semantic(),
checker.locator(),
checker,
checker.settings.target_version.minor(),
) else {
continue;

View file

@ -2,10 +2,9 @@ use itertools::Either::{Left, Right};
use ruff_python_ast::name::QualifiedName;
use ruff_python_ast::{self as ast, Expr, Operator};
use ruff_python_parser::typing::parse_type_annotation;
use ruff_python_semantic::SemanticModel;
use ruff_python_stdlib::sys::is_known_standard_library;
use ruff_source_file::Locator;
use crate::checkers::ast::Checker;
/// Returns `true` if the given qualified name is a known type.
///
@ -40,7 +39,7 @@ enum TypingTarget<'a> {
Object,
/// Forward reference to a type e.g., `"List[str]"`.
ForwardReference(Expr),
ForwardReference(&'a Expr),
/// A `typing.Union` type e.g., `Union[int, str]`.
Union(&'a Expr),
@ -71,12 +70,8 @@ enum TypingTarget<'a> {
}
impl<'a> TypingTarget<'a> {
fn try_from_expr(
expr: &'a Expr,
semantic: &SemanticModel,
locator: &Locator,
minor_version: u8,
) -> Option<Self> {
fn try_from_expr(expr: &'a Expr, checker: &'a Checker, minor_version: u8) -> Option<Self> {
let semantic = checker.semantic();
match expr {
Expr::Subscript(ast::ExprSubscript { value, slice, .. }) => {
semantic.resolve_qualified_name(value).map_or(
@ -112,14 +107,11 @@ impl<'a> TypingTarget<'a> {
..
}) => Some(TypingTarget::PEP604Union(left, right)),
Expr::NoneLiteral(_) => Some(TypingTarget::None),
Expr::StringLiteral(string_expr) => parse_type_annotation(
string_expr,
locator.contents(),
)
.map_or(None, |(parsed_annotation, _)| {
Some(TypingTarget::ForwardReference(
parsed_annotation.into_expr(),
))
Expr::StringLiteral(string_expr) => checker
.parse_type_annotation(string_expr)
.as_ref()
.map(|parsed_annotation| {
TypingTarget::ForwardReference(parsed_annotation.expression())
}),
_ => semantic.resolve_qualified_name(expr).map_or(
// If we can't resolve the call path, it must be defined in the
@ -149,12 +141,7 @@ impl<'a> TypingTarget<'a> {
}
/// Check if the [`TypingTarget`] explicitly allows `None`.
fn contains_none(
&self,
semantic: &SemanticModel,
locator: &Locator,
minor_version: u8,
) -> bool {
fn contains_none(&self, checker: &Checker, minor_version: u8) -> bool {
match self {
TypingTarget::None
| TypingTarget::Optional(_)
@ -166,43 +153,43 @@ impl<'a> TypingTarget<'a> {
TypingTarget::Literal(slice) => resolve_slice_value(slice).any(|element| {
// Literal can only contain `None`, a literal value, other `Literal`
// or an enum value.
match TypingTarget::try_from_expr(element, semantic, locator, minor_version) {
match TypingTarget::try_from_expr(element, checker, minor_version) {
None | Some(TypingTarget::None) => true,
Some(new_target @ TypingTarget::Literal(_)) => {
new_target.contains_none(semantic, locator, minor_version)
new_target.contains_none(checker, minor_version)
}
_ => false,
}
}),
TypingTarget::Union(slice) => resolve_slice_value(slice).any(|element| {
TypingTarget::try_from_expr(element, semantic, locator, minor_version)
TypingTarget::try_from_expr(element, checker, minor_version)
.map_or(true, |new_target| {
new_target.contains_none(semantic, locator, minor_version)
new_target.contains_none(checker, minor_version)
})
}),
TypingTarget::PEP604Union(left, right) => [left, right].iter().any(|element| {
TypingTarget::try_from_expr(element, semantic, locator, minor_version)
TypingTarget::try_from_expr(element, checker, minor_version)
.map_or(true, |new_target| {
new_target.contains_none(semantic, locator, minor_version)
new_target.contains_none(checker, minor_version)
})
}),
TypingTarget::Annotated(expr) => {
TypingTarget::try_from_expr(expr, semantic, locator, minor_version)
TypingTarget::try_from_expr(expr, checker, minor_version)
.map_or(true, |new_target| {
new_target.contains_none(semantic, locator, minor_version)
new_target.contains_none(checker, minor_version)
})
}
TypingTarget::ForwardReference(expr) => {
TypingTarget::try_from_expr(expr, semantic, locator, minor_version)
TypingTarget::try_from_expr(expr, checker, minor_version)
.map_or(true, |new_target| {
new_target.contains_none(semantic, locator, minor_version)
new_target.contains_none(checker, minor_version)
})
}
}
}
/// Check if the [`TypingTarget`] explicitly allows `Any`.
fn contains_any(&self, semantic: &SemanticModel, locator: &Locator, minor_version: u8) -> bool {
fn contains_any(&self, checker: &Checker, minor_version: u8) -> bool {
match self {
TypingTarget::Any => true,
// `Literal` cannot contain `Any` as it's a dynamic value.
@ -213,27 +200,27 @@ impl<'a> TypingTarget<'a> {
| TypingTarget::Known
| TypingTarget::Unknown => false,
TypingTarget::Union(slice) => resolve_slice_value(slice).any(|element| {
TypingTarget::try_from_expr(element, semantic, locator, minor_version)
TypingTarget::try_from_expr(element, checker, minor_version)
.map_or(true, |new_target| {
new_target.contains_any(semantic, locator, minor_version)
new_target.contains_any(checker, minor_version)
})
}),
TypingTarget::PEP604Union(left, right) => [left, right].iter().any(|element| {
TypingTarget::try_from_expr(element, semantic, locator, minor_version)
TypingTarget::try_from_expr(element, checker, minor_version)
.map_or(true, |new_target| {
new_target.contains_any(semantic, locator, minor_version)
new_target.contains_any(checker, minor_version)
})
}),
TypingTarget::Annotated(expr) | TypingTarget::Optional(expr) => {
TypingTarget::try_from_expr(expr, semantic, locator, minor_version)
TypingTarget::try_from_expr(expr, checker, minor_version)
.map_or(true, |new_target| {
new_target.contains_any(semantic, locator, minor_version)
new_target.contains_any(checker, minor_version)
})
}
TypingTarget::ForwardReference(expr) => {
TypingTarget::try_from_expr(expr, semantic, locator, minor_version)
TypingTarget::try_from_expr(expr, checker, minor_version)
.map_or(true, |new_target| {
new_target.contains_any(semantic, locator, minor_version)
new_target.contains_any(checker, minor_version)
})
}
}
@ -249,11 +236,10 @@ impl<'a> TypingTarget<'a> {
/// This function assumes that the annotation is a valid typing annotation expression.
pub(crate) fn type_hint_explicitly_allows_none<'a>(
annotation: &'a Expr,
semantic: &SemanticModel,
locator: &Locator,
checker: &'a Checker,
minor_version: u8,
) -> Option<&'a Expr> {
match TypingTarget::try_from_expr(annotation, semantic, locator, minor_version) {
match TypingTarget::try_from_expr(annotation, checker, minor_version) {
None |
// Short circuit on top level `None`, `Any` or `Optional`
Some(TypingTarget::None | TypingTarget::Optional(_) | TypingTarget::Any) => None,
@ -262,10 +248,10 @@ pub(crate) fn type_hint_explicitly_allows_none<'a>(
// is found nested inside another type, then the outer type should
// be returned.
Some(TypingTarget::Annotated(expr)) => {
type_hint_explicitly_allows_none(expr, semantic, locator, minor_version)
type_hint_explicitly_allows_none(expr, checker, minor_version)
}
Some(target) => {
if target.contains_none(semantic, locator, minor_version) {
if target.contains_none(checker, minor_version) {
None
} else {
Some(annotation)
@ -279,20 +265,19 @@ pub(crate) fn type_hint_explicitly_allows_none<'a>(
/// This function assumes that the annotation is a valid typing annotation expression.
pub(crate) fn type_hint_resolves_to_any(
annotation: &Expr,
semantic: &SemanticModel,
locator: &Locator,
checker: &Checker,
minor_version: u8,
) -> bool {
match TypingTarget::try_from_expr(annotation, semantic, locator, minor_version) {
match TypingTarget::try_from_expr(annotation, checker, minor_version) {
None |
// Short circuit on top level `Any`
Some(TypingTarget::Any) => true,
// Top-level `Annotated` node should check if the inner type resolves
// to `Any`.
Some(TypingTarget::Annotated(expr)) => {
type_hint_resolves_to_any(expr, semantic, locator, minor_version)
type_hint_resolves_to_any(expr, checker, minor_version)
}
Some(target) => target.contains_any(semantic, locator, minor_version),
Some(target) => target.contains_any(checker, minor_version),
}
}

View file

@ -2,11 +2,33 @@
use ruff_python_ast::relocate::relocate_expr;
use ruff_python_ast::str::raw_contents;
use ruff_python_ast::{ExprStringLiteral, ModExpression, StringFlags, StringLiteral};
use ruff_python_ast::{Expr, ExprStringLiteral, ModExpression, StringFlags, StringLiteral};
use ruff_text_size::Ranged;
use crate::{parse_expression, parse_expression_range, ParseError, Parsed};
type AnnotationParseResult = Result<ParsedAnnotation, ParseError>;
#[derive(Debug)]
pub struct ParsedAnnotation {
parsed: Parsed<ModExpression>,
kind: AnnotationKind,
}
impl ParsedAnnotation {
pub fn parsed(&self) -> &Parsed<ModExpression> {
&self.parsed
}
pub fn expression(&self) -> &Expr {
self.parsed.expr()
}
pub fn kind(&self) -> AnnotationKind {
self.kind
}
}
#[derive(Copy, Clone, Debug)]
pub enum AnnotationKind {
/// The annotation is defined as part a simple string literal,
@ -34,7 +56,7 @@ impl AnnotationKind {
pub fn parse_type_annotation(
string_expr: &ExprStringLiteral,
source: &str,
) -> Result<(Parsed<ModExpression>, AnnotationKind), ParseError> {
) -> AnnotationParseResult {
let expr_text = &source[string_expr.range()];
if let [string_literal] = string_expr.value.as_slice() {
@ -58,23 +80,22 @@ pub fn parse_type_annotation(
fn parse_simple_type_annotation(
string_literal: &StringLiteral,
source: &str,
) -> Result<(Parsed<ModExpression>, AnnotationKind), ParseError> {
Ok((
parse_expression_range(
source,
string_literal
) -> AnnotationParseResult {
let range_excluding_quotes = string_literal
.range()
.add_start(string_literal.flags.opener_len())
.sub_end(string_literal.flags.closer_len()),
)?,
AnnotationKind::Simple,
))
.sub_end(string_literal.flags.closer_len());
Ok(ParsedAnnotation {
parsed: parse_expression_range(source, range_excluding_quotes)?,
kind: AnnotationKind::Simple,
})
}
fn parse_complex_type_annotation(
string_expr: &ExprStringLiteral,
) -> Result<(Parsed<ModExpression>, AnnotationKind), ParseError> {
fn parse_complex_type_annotation(string_expr: &ExprStringLiteral) -> AnnotationParseResult {
let mut parsed = parse_expression(string_expr.value.to_str())?;
relocate_expr(parsed.expr_mut(), string_expr.range());
Ok((parsed, AnnotationKind::Complex))
Ok(ParsedAnnotation {
parsed,
kind: AnnotationKind::Complex,
})
}