Remove parser dependency from ruff-python-ast (#6096)

2025-11-17 19:27:11 +00:00 · 2023-07-26 17:47:22 +02:00 · 2023-07-26 17:47:22 +02:00 · 2cf00fee96
commit 2cf00fee96
parent 99127243f4
658 changed files with 1714 additions and 1546 deletions
--- a/crates/ruff_python_parser/src/lib.rs
+++ b/crates/ruff_python_parser/src/lib.rs
@ -0,0 +1,240 @@
+use rustpython_ast::text_size::TextSize;
+use rustpython_ast::{CmpOp, Expr, Mod, ModModule, Ranged, Suite};
+use rustpython_parser as parser;
+use rustpython_parser::lexer::LexResult;
+use rustpython_parser::text_size::TextRange;
+use rustpython_parser::{lexer, Mode, ParseError, Tok};
+
+pub mod token_kind;
+pub mod typing;
+
+/// Collect tokens up to and including the first error.
+pub fn tokenize(contents: &str) -> Vec<LexResult> {
+    let mut tokens: Vec<LexResult> = vec![];
+    for tok in lexer::lex(contents, Mode::Module) {
+        let is_err = tok.is_err();
+        tokens.push(tok);
+        if is_err {
+            break;
+        }
+    }
+    tokens
+}
+
+/// Parse a full Python program from its tokens.
+pub fn parse_program_tokens(
+    lxr: Vec<LexResult>,
+    source_path: &str,
+) -> anyhow::Result<Suite, ParseError> {
+    parser::parse_tokens(lxr, Mode::Module, source_path).map(|top| match top {
+        Mod::Module(ModModule { body, .. }) => body,
+        _ => unreachable!(),
+    })
+}
+
+/// Return the `Range` of the first `Tok::Colon` token in a `Range`.
+pub fn first_colon_range(range: TextRange, source: &str) -> Option<TextRange> {
+    let contents = &source[range];
+    let range = lexer::lex_starts_at(contents, Mode::Module, range.start())
+        .flatten()
+        .find(|(tok, _)| tok.is_colon())
+        .map(|(_, range)| range);
+    range
+}
+
+/// Extract all [`CmpOp`] operators from an expression snippet, with appropriate
+/// ranges.
+///
+/// `RustPython` doesn't include line and column information on [`CmpOp`] nodes.
+/// `CPython` doesn't either. This method iterates over the token stream and
+/// re-identifies [`CmpOp`] nodes, annotating them with valid ranges.
+pub fn locate_cmp_ops(expr: &Expr, source: &str) -> Vec<LocatedCmpOp> {
+    // If `Expr` is a multi-line expression, we need to parenthesize it to
+    // ensure that it's lexed correctly.
+    let contents = &source[expr.range()];
+    let parenthesized_contents = format!("({contents})");
+    let mut tok_iter = lexer::lex(&parenthesized_contents, Mode::Expression)
+        .flatten()
+        .skip(1)
+        .map(|(tok, range)| (tok, range - TextSize::from(1)))
+        .filter(|(tok, _)| !matches!(tok, Tok::NonLogicalNewline | Tok::Comment(_)))
+        .peekable();
+
+    let mut ops: Vec<LocatedCmpOp> = vec![];
+    let mut count = 0u32;
+    loop {
+        let Some((tok, range)) = tok_iter.next() else {
+            break;
+        };
+        if matches!(tok, Tok::Lpar) {
+            count = count.saturating_add(1);
+            continue;
+        } else if matches!(tok, Tok::Rpar) {
+            count = count.saturating_sub(1);
+            continue;
+        }
+        if count == 0 {
+            match tok {
+                Tok::Not => {
+                    if let Some((_, next_range)) =
+                        tok_iter.next_if(|(tok, _)| matches!(tok, Tok::In))
+                    {
+                        ops.push(LocatedCmpOp::new(
+                            TextRange::new(range.start(), next_range.end()),
+                            CmpOp::NotIn,
+                        ));
+                    }
+                }
+                Tok::In => {
+                    ops.push(LocatedCmpOp::new(range, CmpOp::In));
+                }
+                Tok::Is => {
+                    let op = if let Some((_, next_range)) =
+                        tok_iter.next_if(|(tok, _)| matches!(tok, Tok::Not))
+                    {
+                        LocatedCmpOp::new(
+                            TextRange::new(range.start(), next_range.end()),
+                            CmpOp::IsNot,
+                        )
+                    } else {
+                        LocatedCmpOp::new(range, CmpOp::Is)
+                    };
+                    ops.push(op);
+                }
+                Tok::NotEqual => {
+                    ops.push(LocatedCmpOp::new(range, CmpOp::NotEq));
+                }
+                Tok::EqEqual => {
+                    ops.push(LocatedCmpOp::new(range, CmpOp::Eq));
+                }
+                Tok::GreaterEqual => {
+                    ops.push(LocatedCmpOp::new(range, CmpOp::GtE));
+                }
+                Tok::Greater => {
+                    ops.push(LocatedCmpOp::new(range, CmpOp::Gt));
+                }
+                Tok::LessEqual => {
+                    ops.push(LocatedCmpOp::new(range, CmpOp::LtE));
+                }
+                Tok::Less => {
+                    ops.push(LocatedCmpOp::new(range, CmpOp::Lt));
+                }
+                _ => {}
+            }
+        }
+    }
+    ops
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct LocatedCmpOp {
+    pub range: TextRange,
+    pub op: CmpOp,
+}
+
+impl LocatedCmpOp {
+    fn new<T: Into<TextRange>>(range: T, op: CmpOp) -> Self {
+        Self {
+            range: range.into(),
+            op,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::{first_colon_range, locate_cmp_ops, LocatedCmpOp};
+    use anyhow::Result;
+    use ruff_text_size::TextSize;
+    use rustpython_ast::text_size::{TextLen, TextRange};
+    use rustpython_ast::CmpOp;
+    use rustpython_ast::Expr;
+    use rustpython_parser::Parse;
+
+    #[test]
+    fn extract_first_colon_range() {
+        let contents = "with a: pass";
+        let range = first_colon_range(
+            TextRange::new(TextSize::from(0), contents.text_len()),
+            contents,
+        )
+        .unwrap();
+        assert_eq!(&contents[range], ":");
+        assert_eq!(range, TextRange::new(TextSize::from(6), TextSize::from(7)));
+    }
+
+    #[test]
+    fn extract_cmp_op_location() -> Result<()> {
+        let contents = "x == 1";
+        let expr = Expr::parse(contents, "<filename>")?;
+        assert_eq!(
+            locate_cmp_ops(&expr, contents),
+            vec![LocatedCmpOp::new(
+                TextSize::from(2)..TextSize::from(4),
+                CmpOp::Eq
+            )]
+        );
+
+        let contents = "x != 1";
+        let expr = Expr::parse(contents, "<filename>")?;
+        assert_eq!(
+            locate_cmp_ops(&expr, contents),
+            vec![LocatedCmpOp::new(
+                TextSize::from(2)..TextSize::from(4),
+                CmpOp::NotEq
+            )]
+        );
+
+        let contents = "x is 1";
+        let expr = Expr::parse(contents, "<filename>")?;
+        assert_eq!(
+            locate_cmp_ops(&expr, contents),
+            vec![LocatedCmpOp::new(
+                TextSize::from(2)..TextSize::from(4),
+                CmpOp::Is
+            )]
+        );
+
+        let contents = "x is not 1";
+        let expr = Expr::parse(contents, "<filename>")?;
+        assert_eq!(
+            locate_cmp_ops(&expr, contents),
+            vec![LocatedCmpOp::new(
+                TextSize::from(2)..TextSize::from(8),
+                CmpOp::IsNot
+            )]
+        );
+
+        let contents = "x in 1";
+        let expr = Expr::parse(contents, "<filename>")?;
+        assert_eq!(
+            locate_cmp_ops(&expr, contents),
+            vec![LocatedCmpOp::new(
+                TextSize::from(2)..TextSize::from(4),
+                CmpOp::In
+            )]
+        );
+
+        let contents = "x not in 1";
+        let expr = Expr::parse(contents, "<filename>")?;
+        assert_eq!(
+            locate_cmp_ops(&expr, contents),
+            vec![LocatedCmpOp::new(
+                TextSize::from(2)..TextSize::from(8),
+                CmpOp::NotIn
+            )]
+        );
+
+        let contents = "x != (1 is not 2)";
+        let expr = Expr::parse(contents, "<filename>")?;
+        assert_eq!(
+            locate_cmp_ops(&expr, contents),
+            vec![LocatedCmpOp::new(
+                TextSize::from(2)..TextSize::from(4),
+                CmpOp::NotEq
+            )]
+        );
+
+        Ok(())
+    }
+}
--- a/crates/ruff_python_parser/src/token_kind.rs
+++ b/crates/ruff_python_parser/src/token_kind.rs
@ -0,0 +1,448 @@
+use rustpython_parser::Tok;
+
+// TODO move to ruff_python_parser?
+#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)]
+pub enum TokenKind {
+    /// Token value for a name, commonly known as an identifier.
+    Name,
+    /// Token value for an integer.
+    Int,
+    /// Token value for a floating point number.
+    Float,
+    /// Token value for a complex number.
+    Complex,
+    /// Token value for a string.
+    String,
+    /// Token value for a Jupyter magic command.
+    MagicCommand,
+    /// Token value for a comment. These are filtered out of the token stream prior to parsing.
+    Comment,
+    /// Token value for a newline.
+    Newline,
+    /// Token value for a newline that is not a logical line break. These are filtered out of
+    /// the token stream prior to parsing.
+    NonLogicalNewline,
+    /// Token value for an indent.
+    Indent,
+    /// Token value for a dedent.
+    Dedent,
+    EndOfFile,
+    /// Token value for a left parenthesis `(`.
+    Lpar,
+    /// Token value for a right parenthesis `)`.
+    Rpar,
+    /// Token value for a left square bracket `[`.
+    Lsqb,
+    /// Token value for a right square bracket `]`.
+    Rsqb,
+    /// Token value for a colon `:`.
+    Colon,
+    /// Token value for a comma `,`.
+    Comma,
+    /// Token value for a semicolon `;`.
+    Semi,
+    /// Token value for plus `+`.
+    Plus,
+    /// Token value for minus `-`.
+    Minus,
+    /// Token value for star `*`.
+    Star,
+    /// Token value for slash `/`.
+    Slash,
+    /// Token value for vertical bar `|`.
+    Vbar,
+    /// Token value for ampersand `&`.
+    Amper,
+    /// Token value for less than `<`.
+    Less,
+    /// Token value for greater than `>`.
+    Greater,
+    /// Token value for equal `=`.
+    Equal,
+    /// Token value for dot `.`.
+    Dot,
+    /// Token value for percent `%`.
+    Percent,
+    /// Token value for left bracket `{`.
+    Lbrace,
+    /// Token value for right bracket `}`.
+    Rbrace,
+    /// Token value for double equal `==`.
+    EqEqual,
+    /// Token value for not equal `!=`.
+    NotEqual,
+    /// Token value for less than or equal `<=`.
+    LessEqual,
+    /// Token value for greater than or equal `>=`.
+    GreaterEqual,
+    /// Token value for tilde `~`.
+    Tilde,
+    /// Token value for caret `^`.
+    CircumFlex,
+    /// Token value for left shift `<<`.
+    LeftShift,
+    /// Token value for right shift `>>`.
+    RightShift,
+    /// Token value for double star `**`.
+    DoubleStar,
+    /// Token value for double star equal `**=`.
+    DoubleStarEqual,
+    /// Token value for plus equal `+=`.
+    PlusEqual,
+    /// Token value for minus equal `-=`.
+    MinusEqual,
+    /// Token value for star equal `*=`.
+    StarEqual,
+    /// Token value for slash equal `/=`.
+    SlashEqual,
+    /// Token value for percent equal `%=`.
+    PercentEqual,
+    /// Token value for ampersand equal `&=`.
+    AmperEqual,
+    /// Token value for vertical bar equal `|=`.
+    VbarEqual,
+    /// Token value for caret equal `^=`.
+    CircumflexEqual,
+    /// Token value for left shift equal `<<=`.
+    LeftShiftEqual,
+    /// Token value for right shift equal `>>=`.
+    RightShiftEqual,
+    /// Token value for double slash `//`.
+    DoubleSlash,
+    /// Token value for double slash equal `//=`.
+    DoubleSlashEqual,
+    /// Token value for colon equal `:=`.
+    ColonEqual,
+    /// Token value for at `@`.
+    At,
+    /// Token value for at equal `@=`.
+    AtEqual,
+    /// Token value for arrow `->`.
+    Rarrow,
+    /// Token value for ellipsis `...`.
+    Ellipsis,
+
+    // Self documenting.
+    // Keywords (alphabetically):
+    False,
+    None,
+    True,
+
+    And,
+    As,
+    Assert,
+    Async,
+    Await,
+    Break,
+    Class,
+    Continue,
+    Def,
+    Del,
+    Elif,
+    Else,
+    Except,
+    Finally,
+    For,
+    From,
+    Global,
+    If,
+    Import,
+    In,
+    Is,
+    Lambda,
+    Nonlocal,
+    Not,
+    Or,
+    Pass,
+    Raise,
+    Return,
+    Try,
+    While,
+    Match,
+    Type,
+    Case,
+    With,
+    Yield,
+
+    // RustPython specific.
+    StartModule,
+    StartInteractive,
+    StartExpression,
+}
+
+impl TokenKind {
+    #[inline]
+    pub const fn is_newline(&self) -> bool {
+        matches!(self, TokenKind::Newline | TokenKind::NonLogicalNewline)
+    }
+
+    #[inline]
+    pub const fn is_unary(&self) -> bool {
+        matches!(self, TokenKind::Plus | TokenKind::Minus)
+    }
+
+    #[inline]
+    pub const fn is_keyword(&self) -> bool {
+        matches!(
+            self,
+            TokenKind::False
+                | TokenKind::True
+                | TokenKind::None
+                | TokenKind::And
+                | TokenKind::As
+                | TokenKind::Assert
+                | TokenKind::Await
+                | TokenKind::Break
+                | TokenKind::Class
+                | TokenKind::Continue
+                | TokenKind::Def
+                | TokenKind::Del
+                | TokenKind::Elif
+                | TokenKind::Else
+                | TokenKind::Except
+                | TokenKind::Finally
+                | TokenKind::For
+                | TokenKind::From
+                | TokenKind::Global
+                | TokenKind::If
+                | TokenKind::Import
+                | TokenKind::In
+                | TokenKind::Is
+                | TokenKind::Lambda
+                | TokenKind::Nonlocal
+                | TokenKind::Not
+                | TokenKind::Or
+                | TokenKind::Pass
+                | TokenKind::Raise
+                | TokenKind::Return
+                | TokenKind::Try
+                | TokenKind::While
+                | TokenKind::With
+                | TokenKind::Yield
+        )
+    }
+
+    #[inline]
+    pub const fn is_operator(&self) -> bool {
+        matches!(
+            self,
+            TokenKind::Lpar
+                | TokenKind::Rpar
+                | TokenKind::Lsqb
+                | TokenKind::Rsqb
+                | TokenKind::Comma
+                | TokenKind::Semi
+                | TokenKind::Plus
+                | TokenKind::Minus
+                | TokenKind::Star
+                | TokenKind::Slash
+                | TokenKind::Vbar
+                | TokenKind::Amper
+                | TokenKind::Less
+                | TokenKind::Greater
+                | TokenKind::Equal
+                | TokenKind::Dot
+                | TokenKind::Percent
+                | TokenKind::Lbrace
+                | TokenKind::Rbrace
+                | TokenKind::EqEqual
+                | TokenKind::NotEqual
+                | TokenKind::LessEqual
+                | TokenKind::GreaterEqual
+                | TokenKind::Tilde
+                | TokenKind::CircumFlex
+                | TokenKind::LeftShift
+                | TokenKind::RightShift
+                | TokenKind::DoubleStar
+                | TokenKind::PlusEqual
+                | TokenKind::MinusEqual
+                | TokenKind::StarEqual
+                | TokenKind::SlashEqual
+                | TokenKind::PercentEqual
+                | TokenKind::AmperEqual
+                | TokenKind::VbarEqual
+                | TokenKind::CircumflexEqual
+                | TokenKind::LeftShiftEqual
+                | TokenKind::RightShiftEqual
+                | TokenKind::DoubleStarEqual
+                | TokenKind::DoubleSlash
+                | TokenKind::DoubleSlashEqual
+                | TokenKind::At
+                | TokenKind::AtEqual
+                | TokenKind::Rarrow
+                | TokenKind::Ellipsis
+                | TokenKind::ColonEqual
+                | TokenKind::Colon
+                | TokenKind::And
+                | TokenKind::Or
+                | TokenKind::Not
+                | TokenKind::In
+                | TokenKind::Is
+        )
+    }
+
+    #[inline]
+    pub const fn is_singleton(&self) -> bool {
+        matches!(self, TokenKind::False | TokenKind::True | TokenKind::None)
+    }
+
+    #[inline]
+    pub const fn is_trivia(&self) -> bool {
+        matches!(
+            self,
+            TokenKind::Newline
+                | TokenKind::Indent
+                | TokenKind::Dedent
+                | TokenKind::NonLogicalNewline
+                | TokenKind::Comment
+        )
+    }
+
+    #[inline]
+    pub const fn is_arithmetic(&self) -> bool {
+        matches!(
+            self,
+            TokenKind::DoubleStar
+                | TokenKind::Star
+                | TokenKind::Plus
+                | TokenKind::Minus
+                | TokenKind::Slash
+                | TokenKind::DoubleSlash
+                | TokenKind::At
+        )
+    }
+
+    #[inline]
+    pub const fn is_bitwise_or_shift(&self) -> bool {
+        matches!(
+            self,
+            TokenKind::LeftShift
+                | TokenKind::LeftShiftEqual
+                | TokenKind::RightShift
+                | TokenKind::RightShiftEqual
+                | TokenKind::Amper
+                | TokenKind::AmperEqual
+                | TokenKind::Vbar
+                | TokenKind::VbarEqual
+                | TokenKind::CircumFlex
+                | TokenKind::CircumflexEqual
+                | TokenKind::Tilde
+        )
+    }
+
+    #[inline]
+    pub const fn is_soft_keyword(&self) -> bool {
+        matches!(self, TokenKind::Match | TokenKind::Case)
+    }
+
+    pub const fn from_token(token: &Tok) -> Self {
+        match token {
+            Tok::Name { .. } => TokenKind::Name,
+            Tok::Int { .. } => TokenKind::Int,
+            Tok::Float { .. } => TokenKind::Float,
+            Tok::Complex { .. } => TokenKind::Complex,
+            Tok::String { .. } => TokenKind::String,
+            Tok::MagicCommand { .. } => TokenKind::MagicCommand,
+            Tok::Comment(_) => TokenKind::Comment,
+            Tok::Newline => TokenKind::Newline,
+            Tok::NonLogicalNewline => TokenKind::NonLogicalNewline,
+            Tok::Indent => TokenKind::Indent,
+            Tok::Dedent => TokenKind::Dedent,
+            Tok::EndOfFile => TokenKind::EndOfFile,
+            Tok::Lpar => TokenKind::Lpar,
+            Tok::Rpar => TokenKind::Rpar,
+            Tok::Lsqb => TokenKind::Lsqb,
+            Tok::Rsqb => TokenKind::Rsqb,
+            Tok::Colon => TokenKind::Colon,
+            Tok::Comma => TokenKind::Comma,
+            Tok::Semi => TokenKind::Semi,
+            Tok::Plus => TokenKind::Plus,
+            Tok::Minus => TokenKind::Minus,
+            Tok::Star => TokenKind::Star,
+            Tok::Slash => TokenKind::Slash,
+            Tok::Vbar => TokenKind::Vbar,
+            Tok::Amper => TokenKind::Amper,
+            Tok::Less => TokenKind::Less,
+            Tok::Greater => TokenKind::Greater,
+            Tok::Equal => TokenKind::Equal,
+            Tok::Dot => TokenKind::Dot,
+            Tok::Percent => TokenKind::Percent,
+            Tok::Lbrace => TokenKind::Lbrace,
+            Tok::Rbrace => TokenKind::Rbrace,
+            Tok::EqEqual => TokenKind::EqEqual,
+            Tok::NotEqual => TokenKind::NotEqual,
+            Tok::LessEqual => TokenKind::LessEqual,
+            Tok::GreaterEqual => TokenKind::GreaterEqual,
+            Tok::Tilde => TokenKind::Tilde,
+            Tok::CircumFlex => TokenKind::CircumFlex,
+            Tok::LeftShift => TokenKind::LeftShift,
+            Tok::RightShift => TokenKind::RightShift,
+            Tok::DoubleStar => TokenKind::DoubleStar,
+            Tok::DoubleStarEqual => TokenKind::DoubleStarEqual,
+            Tok::PlusEqual => TokenKind::PlusEqual,
+            Tok::MinusEqual => TokenKind::MinusEqual,
+            Tok::StarEqual => TokenKind::StarEqual,
+            Tok::SlashEqual => TokenKind::SlashEqual,
+            Tok::PercentEqual => TokenKind::PercentEqual,
+            Tok::AmperEqual => TokenKind::AmperEqual,
+            Tok::VbarEqual => TokenKind::VbarEqual,
+            Tok::CircumflexEqual => TokenKind::CircumflexEqual,
+            Tok::LeftShiftEqual => TokenKind::LeftShiftEqual,
+            Tok::RightShiftEqual => TokenKind::RightShiftEqual,
+            Tok::DoubleSlash => TokenKind::DoubleSlash,
+            Tok::DoubleSlashEqual => TokenKind::DoubleSlashEqual,
+            Tok::ColonEqual => TokenKind::ColonEqual,
+            Tok::At => TokenKind::At,
+            Tok::AtEqual => TokenKind::AtEqual,
+            Tok::Rarrow => TokenKind::Rarrow,
+            Tok::Ellipsis => TokenKind::Ellipsis,
+            Tok::False => TokenKind::False,
+            Tok::None => TokenKind::None,
+            Tok::True => TokenKind::True,
+            Tok::And => TokenKind::And,
+            Tok::As => TokenKind::As,
+            Tok::Assert => TokenKind::Assert,
+            Tok::Async => TokenKind::Async,
+            Tok::Await => TokenKind::Await,
+            Tok::Break => TokenKind::Break,
+            Tok::Class => TokenKind::Class,
+            Tok::Continue => TokenKind::Continue,
+            Tok::Def => TokenKind::Def,
+            Tok::Del => TokenKind::Del,
+            Tok::Elif => TokenKind::Elif,
+            Tok::Else => TokenKind::Else,
+            Tok::Except => TokenKind::Except,
+            Tok::Finally => TokenKind::Finally,
+            Tok::For => TokenKind::For,
+            Tok::From => TokenKind::From,
+            Tok::Global => TokenKind::Global,
+            Tok::If => TokenKind::If,
+            Tok::Import => TokenKind::Import,
+            Tok::In => TokenKind::In,
+            Tok::Is => TokenKind::Is,
+            Tok::Lambda => TokenKind::Lambda,
+            Tok::Nonlocal => TokenKind::Nonlocal,
+            Tok::Not => TokenKind::Not,
+            Tok::Or => TokenKind::Or,
+            Tok::Pass => TokenKind::Pass,
+            Tok::Raise => TokenKind::Raise,
+            Tok::Return => TokenKind::Return,
+            Tok::Try => TokenKind::Try,
+            Tok::While => TokenKind::While,
+            Tok::Match => TokenKind::Match,
+            Tok::Case => TokenKind::Case,
+            Tok::Type => TokenKind::Type,
+            Tok::With => TokenKind::With,
+            Tok::Yield => TokenKind::Yield,
+            Tok::StartModule => TokenKind::StartModule,
+            Tok::StartInteractive => TokenKind::StartInteractive,
+            Tok::StartExpression => TokenKind::StartExpression,
+        }
+    }
+}
+
+impl From<&Tok> for TokenKind {
+    fn from(value: &Tok) -> Self {
+        Self::from_token(value)
+    }
+}
--- a/crates/ruff_python_parser/src/typing.rs
+++ b/crates/ruff_python_parser/src/typing.rs
@ -0,0 +1,48 @@
+use anyhow::Result;
+use ruff_python_ast::relocate::relocate_expr;
+use ruff_python_ast::str;
+use ruff_text_size::{TextLen, TextRange};
+use rustpython_ast::Expr;
+use rustpython_parser::Parse;
+
+#[derive(is_macro::Is, Copy, Clone)]
+pub enum AnnotationKind {
+    /// The annotation is defined as part a simple string literal,
+    /// e.g. `x: "List[int]" = []`. Annotations within simple literals
+    /// can be accurately located. For example, we can underline specific
+    /// expressions within the annotation and apply automatic fixes, which is
+    /// not possible for complex string literals.
+    Simple,
+    /// The annotation is defined as part of a complex string literal, such as
+    /// a literal containing an implicit concatenation or escaped characters,
+    /// e.g. `x: "List" "[int]" = []`. These are comparatively rare, but valid.
+    Complex,
+}
+
+/// Parse a type annotation from a string.
+pub fn parse_type_annotation(
+    value: &str,
+    range: TextRange,
+    source: &str,
+) -> Result<(Expr, AnnotationKind)> {
+    let expression = &source[range];
+
+    if str::raw_contents(expression).map_or(false, |body| body == value) {
+        // The annotation is considered "simple" if and only if the raw representation (e.g.,
+        // `List[int]` within "List[int]") exactly matches the parsed representation. This
+        // isn't the case, e.g., for implicit concatenations, or for annotations that contain
+        // escaped quotes.
+        let leading_quote = str::leading_quote(expression).unwrap();
+        let expr = Expr::parse_starts_at(
+            value,
+            "<filename>",
+            range.start() + leading_quote.text_len(),
+        )?;
+        Ok((expr, AnnotationKind::Simple))
+    } else {
+        // Otherwise, consider this a "complex" annotation.
+        let mut expr = Expr::parse(value, "<filename>")?;
+        relocate_expr(&mut expr, range);
+        Ok((expr, AnnotationKind::Complex))
+    }
+}