//! This crate can be used to parse Python source code into an Abstract //! Syntax Tree. //! //! ## Overview //! //! The process by which source code is parsed into an AST can be broken down //! into two general stages: [lexical analysis] and [parsing]. //! //! During lexical analysis, the source code is converted into a stream of lexical //! tokens that represent the smallest meaningful units of the language. For example, //! the source code `print("Hello world")` would _roughly_ be converted into the following //! stream of tokens: //! //! ```text //! Name("print"), LeftParen, String("Hello world"), RightParen //! ``` //! //! These tokens are then consumed by the `ruff_python_parser`, which matches them against a set of //! grammar rules to verify that the source code is syntactically valid and to construct //! an AST that represents the source code. //! //! During parsing, the `ruff_python_parser` consumes the tokens generated by the lexer and constructs //! a tree representation of the source code. The tree is made up of nodes that represent //! the different syntactic constructs of the language. If the source code is syntactically //! invalid, parsing fails and an error is returned. After a successful parse, the AST can //! be used to perform further analysis on the source code. Continuing with the example //! above, the AST generated by the `ruff_python_parser` would _roughly_ look something like this: //! //! ```text //! node: Expr { //! value: { //! node: Call { //! func: { //! node: Name { //! id: "print", //! ctx: Load, //! }, //! }, //! args: [ //! node: Constant { //! value: Str("Hello World"), //! kind: None, //! }, //! ], //! keywords: [], //! }, //! }, //! }, //!``` //! //! **Note:** The Tokens/ASTs shown above are not the exact tokens/ASTs generated by the `ruff_python_parser`. //! Refer to the [playground](https://play.ruff.rs) for the correct representation. //! //! ## Source code layout //! //! The functionality of this crate is split into several modules: //! //! - token: This module contains the definition of the tokens that are generated by the lexer. //! - [lexer]: This module contains the lexer and is responsible for generating the tokens. //! - parser: This module contains an interface to the [Parsed] and is responsible for generating the AST. //! - mode: This module contains the definition of the different modes that the `ruff_python_parser` can be in. //! //! [lexical analysis]: https://en.wikipedia.org/wiki/Lexical_analysis //! [parsing]: https://en.wikipedia.org/wiki/Parsing //! [lexer]: crate::lexer use std::iter::FusedIterator; use std::ops::Deref; pub use crate::error::{ InterpolatedStringErrorType, LexicalErrorType, ParseError, ParseErrorType, UnsupportedSyntaxError, UnsupportedSyntaxErrorKind, }; pub use crate::parser::ParseOptions; pub use crate::token::{Token, TokenKind}; use crate::parser::Parser; use ruff_python_ast::{ Expr, Mod, ModExpression, ModModule, PySourceType, StringFlags, StringLiteral, Suite, }; use ruff_python_trivia::CommentRanges; use ruff_text_size::{Ranged, TextRange, TextSize}; mod error; pub mod lexer; mod parser; pub mod semantic_errors; mod string; mod token; mod token_set; mod token_source; pub mod typing; /// Parse a full Python module usually consisting of multiple lines. /// /// This is a convenience function that can be used to parse a full Python program without having to /// specify the [`Mode`] or the location. It is probably what you want to use most of the time. /// /// # Example /// /// For example, parsing a simple function definition and a call to that function: /// /// ``` /// use ruff_python_parser::parse_module; /// /// let source = r#" /// def foo(): /// return 42 /// /// print(foo()) /// "#; /// /// let module = parse_module(source); /// assert!(module.is_ok()); /// ``` pub fn parse_module(source: &str) -> Result, ParseError> { Parser::new(source, ParseOptions::from(Mode::Module)) .parse() .try_into_module() .unwrap() .into_result() } /// Parses a single Python expression. /// /// This convenience function can be used to parse a single expression without having to /// specify the Mode or the location. /// /// # Example /// /// For example, parsing a single expression denoting the addition of two numbers: /// /// ``` /// use ruff_python_parser::parse_expression; /// /// let expr = parse_expression("1 + 2"); /// assert!(expr.is_ok()); /// ``` pub fn parse_expression(source: &str) -> Result, ParseError> { Parser::new(source, ParseOptions::from(Mode::Expression)) .parse() .try_into_expression() .unwrap() .into_result() } /// Parses a Python expression for the given range in the source. /// /// This function allows to specify the range of the expression in the source code, other than /// that, it behaves exactly like [`parse_expression`]. /// /// # Example /// /// Parsing one of the numeric literal which is part of an addition expression: /// /// ``` /// use ruff_python_parser::parse_expression_range; /// # use ruff_text_size::{TextRange, TextSize}; /// /// let parsed = parse_expression_range("11 + 22 + 33", TextRange::new(TextSize::new(5), TextSize::new(7))); /// assert!(parsed.is_ok()); /// ``` pub fn parse_expression_range( source: &str, range: TextRange, ) -> Result, ParseError> { let source = &source[..range.end().to_usize()]; Parser::new_starts_at(source, range.start(), ParseOptions::from(Mode::Expression)) .parse() .try_into_expression() .unwrap() .into_result() } /// Parses a Python expression as if it is parenthesized. /// /// It behaves similarly to [`parse_expression_range`] but allows what would be valid within parenthesis /// /// # Example /// /// Parsing an expression that would be valid within parenthesis: /// /// ``` /// use ruff_python_parser::parse_parenthesized_expression_range; /// # use ruff_text_size::{TextRange, TextSize}; /// /// let parsed = parse_parenthesized_expression_range("'''\n int | str'''", TextRange::new(TextSize::new(3), TextSize::new(14))); /// assert!(parsed.is_ok()); pub fn parse_parenthesized_expression_range( source: &str, range: TextRange, ) -> Result, ParseError> { let source = &source[..range.end().to_usize()]; let parsed = Parser::new_starts_at( source, range.start(), ParseOptions::from(Mode::ParenthesizedExpression), ) .parse(); parsed.try_into_expression().unwrap().into_result() } /// Parses a Python expression from a string annotation. /// /// # Example /// /// Parsing a string annotation: /// /// ``` /// use ruff_python_parser::parse_string_annotation; /// use ruff_python_ast::{StringLiteral, StringLiteralFlags, AtomicNodeIndex}; /// use ruff_text_size::{TextRange, TextSize}; /// /// let string = StringLiteral { /// value: "'''\n int | str'''".to_string().into_boxed_str(), /// flags: StringLiteralFlags::empty(), /// range: TextRange::new(TextSize::new(0), TextSize::new(16)), /// node_index: AtomicNodeIndex::dummy() /// }; /// let parsed = parse_string_annotation("'''\n int | str'''", &string); /// assert!(!parsed.is_ok()); /// ``` pub fn parse_string_annotation( source: &str, string: &StringLiteral, ) -> Result, ParseError> { let range = string .range() .add_start(string.flags.opener_len()) .sub_end(string.flags.closer_len()); let source = &source[..range.end().to_usize()]; if string.flags.is_triple_quoted() { parse_parenthesized_expression_range(source, range) } else { parse_expression_range(source, range) } } /// Parse the given Python source code using the specified [`ParseOptions`]. /// /// This function is the most general function to parse Python code. Based on the [`Mode`] supplied /// via the [`ParseOptions`], it can be used to parse a single expression, a full Python program, /// an interactive expression or a Python program containing IPython escape commands. /// /// # Example /// /// If we want to parse a simple expression, we can use the [`Mode::Expression`] mode during /// parsing: /// /// ``` /// use ruff_python_parser::{parse, Mode, ParseOptions}; /// /// let parsed = parse("1 + 2", ParseOptions::from(Mode::Expression)); /// assert!(parsed.is_ok()); /// ``` /// /// Alternatively, we can parse a full Python program consisting of multiple lines: /// /// ``` /// use ruff_python_parser::{parse, Mode, ParseOptions}; /// /// let source = r#" /// class Greeter: /// /// def greet(self): /// print("Hello, world!") /// "#; /// let parsed = parse(source, ParseOptions::from(Mode::Module)); /// assert!(parsed.is_ok()); /// ``` /// /// Additionally, we can parse a Python program containing IPython escapes: /// /// ``` /// use ruff_python_parser::{parse, Mode, ParseOptions}; /// /// let source = r#" /// %timeit 1 + 2 /// ?str.replace /// !ls /// "#; /// let parsed = parse(source, ParseOptions::from(Mode::Ipython)); /// assert!(parsed.is_ok()); /// ``` pub fn parse(source: &str, options: ParseOptions) -> Result, ParseError> { parse_unchecked(source, options).into_result() } /// Parse the given Python source code using the specified [`ParseOptions`]. /// /// This is same as the [`parse`] function except that it doesn't check for any [`ParseError`] /// and returns the [`Parsed`] as is. pub fn parse_unchecked(source: &str, options: ParseOptions) -> Parsed { Parser::new(source, options).parse() } /// Parse the given Python source code using the specified [`PySourceType`]. pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Parsed { // SAFETY: Safe because `PySourceType` always parses to a `ModModule` Parser::new(source, ParseOptions::from(source_type)) .parse() .try_into_module() .unwrap() } /// Represents the parsed source code. #[derive(Debug, PartialEq, Clone)] pub struct Parsed { syntax: T, tokens: Tokens, errors: Vec, unsupported_syntax_errors: Vec, } impl Parsed { /// Returns the syntax node represented by this parsed output. pub fn syntax(&self) -> &T { &self.syntax } /// Returns all the tokens for the parsed output. pub fn tokens(&self) -> &Tokens { &self.tokens } /// Returns a list of syntax errors found during parsing. pub fn errors(&self) -> &[ParseError] { &self.errors } /// Returns a list of version-related syntax errors found during parsing. pub fn unsupported_syntax_errors(&self) -> &[UnsupportedSyntaxError] { &self.unsupported_syntax_errors } /// Consumes the [`Parsed`] output and returns the contained syntax node. pub fn into_syntax(self) -> T { self.syntax } /// Consumes the [`Parsed`] output and returns a list of syntax errors found during parsing. pub fn into_errors(self) -> Vec { self.errors } /// Returns `true` if the parsed source code is valid i.e., it has no [`ParseError`]s. /// /// Note that this does not include version-related [`UnsupportedSyntaxError`]s. /// /// See [`Parsed::has_no_syntax_errors`] for a version that takes these into account. pub fn has_valid_syntax(&self) -> bool { self.errors.is_empty() } /// Returns `true` if the parsed source code is invalid i.e., it has [`ParseError`]s. /// /// Note that this does not include version-related [`UnsupportedSyntaxError`]s. /// /// See [`Parsed::has_no_syntax_errors`] for a version that takes these into account. pub fn has_invalid_syntax(&self) -> bool { !self.has_valid_syntax() } /// Returns `true` if the parsed source code does not contain any [`ParseError`]s *or* /// [`UnsupportedSyntaxError`]s. /// /// See [`Parsed::has_valid_syntax`] for a version specific to [`ParseError`]s. pub fn has_no_syntax_errors(&self) -> bool { self.has_valid_syntax() && self.unsupported_syntax_errors.is_empty() } /// Returns `true` if the parsed source code contains any [`ParseError`]s *or* /// [`UnsupportedSyntaxError`]s. /// /// See [`Parsed::has_invalid_syntax`] for a version specific to [`ParseError`]s. pub fn has_syntax_errors(&self) -> bool { !self.has_no_syntax_errors() } /// Returns the [`Parsed`] output as a [`Result`], returning [`Ok`] if it has no syntax errors, /// or [`Err`] containing the first [`ParseError`] encountered. /// /// Note that any [`unsupported_syntax_errors`](Parsed::unsupported_syntax_errors) will not /// cause [`Err`] to be returned. pub fn as_result(&self) -> Result<&Parsed, &[ParseError]> { if self.has_valid_syntax() { Ok(self) } else { Err(&self.errors) } } /// Consumes the [`Parsed`] output and returns a [`Result`] which is [`Ok`] if it has no syntax /// errors, or [`Err`] containing the first [`ParseError`] encountered. /// /// Note that any [`unsupported_syntax_errors`](Parsed::unsupported_syntax_errors) will not /// cause [`Err`] to be returned. pub(crate) fn into_result(self) -> Result, ParseError> { if self.has_valid_syntax() { Ok(self) } else { Err(self.into_errors().into_iter().next().unwrap()) } } } impl Parsed { /// Attempts to convert the [`Parsed`] into a [`Parsed`]. /// /// This method checks if the `syntax` field of the output is a [`Mod::Module`]. If it is, the /// method returns [`Some(Parsed)`] with the contained module. Otherwise, it /// returns [`None`]. /// /// [`Some(Parsed)`]: Some pub fn try_into_module(self) -> Option> { match self.syntax { Mod::Module(module) => Some(Parsed { syntax: module, tokens: self.tokens, errors: self.errors, unsupported_syntax_errors: self.unsupported_syntax_errors, }), Mod::Expression(_) => None, } } /// Attempts to convert the [`Parsed`] into a [`Parsed`]. /// /// This method checks if the `syntax` field of the output is a [`Mod::Expression`]. If it is, /// the method returns [`Some(Parsed)`] with the contained expression. /// Otherwise, it returns [`None`]. /// /// [`Some(Parsed)`]: Some pub fn try_into_expression(self) -> Option> { match self.syntax { Mod::Module(_) => None, Mod::Expression(expression) => Some(Parsed { syntax: expression, tokens: self.tokens, errors: self.errors, unsupported_syntax_errors: self.unsupported_syntax_errors, }), } } } impl Parsed { /// Returns the module body contained in this parsed output as a [`Suite`]. pub fn suite(&self) -> &Suite { &self.syntax.body } /// Consumes the [`Parsed`] output and returns the module body as a [`Suite`]. pub fn into_suite(self) -> Suite { self.syntax.body } } impl Parsed { /// Returns the expression contained in this parsed output. pub fn expr(&self) -> &Expr { &self.syntax.body } /// Returns a mutable reference to the expression contained in this parsed output. pub fn expr_mut(&mut self) -> &mut Expr { &mut self.syntax.body } /// Consumes the [`Parsed`] output and returns the contained [`Expr`]. pub fn into_expr(self) -> Expr { *self.syntax.body } } /// Tokens represents a vector of lexed [`Token`]. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Tokens { raw: Vec, } impl Tokens { pub(crate) fn new(tokens: Vec) -> Tokens { Tokens { raw: tokens } } /// Returns an iterator over all the tokens that provides context. pub fn iter_with_context(&self) -> TokenIterWithContext { TokenIterWithContext::new(&self.raw) } /// Returns a slice of [`Token`] that are within the given `range`. /// /// The start and end offset of the given range should be either: /// 1. Token boundary /// 2. Gap between the tokens /// /// For example, considering the following tokens and their corresponding range: /// /// | Token | Range | /// |---------------------|-----------| /// | `Def` | `0..3` | /// | `Name` | `4..7` | /// | `Lpar` | `7..8` | /// | `Rpar` | `8..9` | /// | `Colon` | `9..10` | /// | `Newline` | `10..11` | /// | `Comment` | `15..24` | /// | `NonLogicalNewline` | `24..25` | /// | `Indent` | `25..29` | /// | `Pass` | `29..33` | /// /// Here, for (1) a token boundary is considered either the start or end offset of any of the /// above tokens. For (2), the gap would be any offset between the `Newline` and `Comment` /// token which are 12, 13, and 14. /// /// Examples: /// 1) `4..10` would give `Name`, `Lpar`, `Rpar`, `Colon` /// 2) `11..25` would give `Comment`, `NonLogicalNewline` /// 3) `12..25` would give same as (2) and offset 12 is in the "gap" /// 4) `9..12` would give `Colon`, `Newline` and offset 12 is in the "gap" /// 5) `18..27` would panic because both the start and end offset is within a token /// /// ## Note /// /// The returned slice can contain the [`TokenKind::Unknown`] token if there was a lexical /// error encountered within the given range. /// /// # Panics /// /// If either the start or end offset of the given range is within a token range. pub fn in_range(&self, range: TextRange) -> &[Token] { let tokens_after_start = self.after(range.start()); match tokens_after_start.binary_search_by_key(&range.end(), Ranged::end) { Ok(idx) => { // If we found the token with the end offset, that token should be included in the // return slice. &tokens_after_start[..=idx] } Err(idx) => { if let Some(token) = tokens_after_start.get(idx) { // If it's equal to the start offset, then it's at a token boundary which is // valid. If it's less than the start offset, then it's in the gap between the // tokens which is valid as well. assert!( range.end() <= token.start(), "End offset {:?} is inside a token range {:?}", range.end(), token.range() ); } // This index is where the token with the offset _could_ be, so that token should // be excluded from the return slice. &tokens_after_start[..idx] } } } /// Searches the token(s) at `offset`. /// /// Returns [`TokenAt::Between`] if `offset` points directly inbetween two tokens /// (the left token ends at `offset` and the right token starts at `offset`). /// /// /// ## Examples /// /// [Playground](https://play.ruff.rs/f3ad0a55-5931-4a13-96c7-b2b8bfdc9a2e?secondary=Tokens) /// /// ``` /// # use ruff_python_ast::PySourceType; /// # use ruff_python_parser::{Token, TokenAt, TokenKind}; /// # use ruff_text_size::{Ranged, TextSize}; /// /// let source = r#" /// def test(arg): /// arg.call() /// if True: /// pass /// print("true") /// "#.trim(); /// /// let parsed = ruff_python_parser::parse_unchecked_source(source, PySourceType::Python); /// let tokens = parsed.tokens(); /// /// let collect_tokens = |offset: TextSize| { /// tokens.at_offset(offset).into_iter().map(|t| (t.kind(), &source[t.range()])).collect::>() /// }; /// /// assert_eq!(collect_tokens(TextSize::new(4)), vec! [(TokenKind::Name, "test")]); /// assert_eq!(collect_tokens(TextSize::new(6)), vec! [(TokenKind::Name, "test")]); /// // between `arg` and `.` /// assert_eq!(collect_tokens(TextSize::new(22)), vec! [(TokenKind::Name, "arg"), (TokenKind::Dot, ".")]); /// assert_eq!(collect_tokens(TextSize::new(36)), vec! [(TokenKind::If, "if")]); /// // Before the dedent token /// assert_eq!(collect_tokens(TextSize::new(57)), vec! []); /// ``` pub fn at_offset(&self, offset: TextSize) -> TokenAt { match self.binary_search_by_key(&offset, ruff_text_size::Ranged::start) { // The token at `index` starts exactly at `offset. // ```python // object.attribute // ^ OFFSET // ``` Ok(index) => { let token = self[index]; // `token` starts exactly at `offset`. Test if the offset is right between // `token` and the previous token (if there's any) if let Some(previous) = index.checked_sub(1).map(|idx| self[idx]) { if previous.end() == offset { return TokenAt::Between(previous, token); } } TokenAt::Single(token) } // No token found that starts exactly at the given offset. But it's possible that // the token starting before `offset` fully encloses `offset` (it's end range ends after `offset`). // ```python // object.attribute // ^ OFFSET // # or // if True: // print("test") // ^ OFFSET // ``` Err(index) => { if let Some(previous) = index.checked_sub(1).map(|idx| self[idx]) { if previous.range().contains_inclusive(offset) { return TokenAt::Single(previous); } } TokenAt::None } } } /// Returns a slice of tokens before the given [`TextSize`] offset. /// /// If the given offset is between two tokens, the returned slice will end just before the /// following token. In other words, if the offset is between the end of previous token and /// start of next token, the returned slice will end just before the next token. /// /// # Panics /// /// If the given offset is inside a token range at any point /// other than the start of the range. pub fn before(&self, offset: TextSize) -> &[Token] { match self.binary_search_by(|token| token.start().cmp(&offset)) { Ok(idx) => &self[..idx], Err(idx) => { // We can't use `saturating_sub` here because a file could contain a BOM header, in // which case the token starts at offset 3 for UTF-8 encoded file content. if idx > 0 { if let Some(prev) = self.get(idx - 1) { // If it's equal to the end offset, then it's at a token boundary which is // valid. If it's greater than the end offset, then it's in the gap between // the tokens which is valid as well. assert!( offset >= prev.end(), "Offset {:?} is inside a token range {:?}", offset, prev.range() ); } } &self[..idx] } } } /// Returns a slice of tokens after the given [`TextSize`] offset. /// /// If the given offset is between two tokens, the returned slice will start from the following /// token. In other words, if the offset is between the end of previous token and start of next /// token, the returned slice will start from the next token. /// /// # Panics /// /// If the given offset is inside a token range at any point /// other than the start of the range. pub fn after(&self, offset: TextSize) -> &[Token] { match self.binary_search_by(|token| token.start().cmp(&offset)) { Ok(idx) => &self[idx..], Err(idx) => { // We can't use `saturating_sub` here because a file could contain a BOM header, in // which case the token starts at offset 3 for UTF-8 encoded file content. if idx > 0 { if let Some(prev) = self.get(idx - 1) { // If it's equal to the end offset, then it's at a token boundary which is // valid. If it's greater than the end offset, then it's in the gap between // the tokens which is valid as well. assert!( offset >= prev.end(), "Offset {:?} is inside a token range {:?}", offset, prev.range() ); } } &self[idx..] } } } } impl<'a> IntoIterator for &'a Tokens { type Item = &'a Token; type IntoIter = std::slice::Iter<'a, Token>; fn into_iter(self) -> Self::IntoIter { self.iter() } } impl Deref for Tokens { type Target = [Token]; fn deref(&self) -> &Self::Target { &self.raw } } /// A token that encloses a given offset or ends exactly at it. #[derive(Debug, Clone)] pub enum TokenAt { /// There's no token at the given offset None, /// There's a single token at the given offset. Single(Token), /// The offset falls exactly between two tokens. E.g. `CURSOR` in `call(arguments)` is /// positioned exactly between the `call` and `(` tokens. Between(Token, Token), } impl Iterator for TokenAt { type Item = Token; fn next(&mut self) -> Option { match *self { TokenAt::None => None, TokenAt::Single(token) => { *self = TokenAt::None; Some(token) } TokenAt::Between(first, second) => { *self = TokenAt::Single(second); Some(first) } } } } impl FusedIterator for TokenAt {} impl From<&Tokens> for CommentRanges { fn from(tokens: &Tokens) -> Self { let mut ranges = vec![]; for token in tokens { if token.kind() == TokenKind::Comment { ranges.push(token.range()); } } CommentRanges::new(ranges) } } /// An iterator over the [`Token`]s with context. /// /// This struct is created by the [`iter_with_context`] method on [`Tokens`]. Refer to its /// documentation for more details. /// /// [`iter_with_context`]: Tokens::iter_with_context #[derive(Debug, Clone)] pub struct TokenIterWithContext<'a> { inner: std::slice::Iter<'a, Token>, nesting: u32, } impl<'a> TokenIterWithContext<'a> { fn new(tokens: &'a [Token]) -> TokenIterWithContext<'a> { TokenIterWithContext { inner: tokens.iter(), nesting: 0, } } /// Return the nesting level the iterator is currently in. pub const fn nesting(&self) -> u32 { self.nesting } /// Returns `true` if the iterator is within a parenthesized context. pub const fn in_parenthesized_context(&self) -> bool { self.nesting > 0 } /// Returns the next [`Token`] in the iterator without consuming it. pub fn peek(&self) -> Option<&'a Token> { self.clone().next() } } impl<'a> Iterator for TokenIterWithContext<'a> { type Item = &'a Token; fn next(&mut self) -> Option { let token = self.inner.next()?; match token.kind() { TokenKind::Lpar | TokenKind::Lbrace | TokenKind::Lsqb => self.nesting += 1, TokenKind::Rpar | TokenKind::Rbrace | TokenKind::Rsqb => { self.nesting = self.nesting.saturating_sub(1); } // This mimics the behavior of re-lexing which reduces the nesting level on the lexer. // We don't need to reduce it by 1 because unlike the lexer we see the final token // after recovering from every unclosed parenthesis. TokenKind::Newline if self.nesting > 0 => { self.nesting = 0; } _ => {} } Some(token) } } impl FusedIterator for TokenIterWithContext<'_> {} /// Control in the different modes by which a source file can be parsed. /// /// The mode argument specifies in what way code must be parsed. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] pub enum Mode { /// The code consists of a sequence of statements. Module, /// The code consists of a single expression. Expression, /// The code consists of a single expression and is parsed as if it is parenthesized. The parentheses themselves aren't required. /// This allows for having valid multiline expression without the need of parentheses /// and is specifically useful for parsing string annotations. ParenthesizedExpression, /// The code consists of a sequence of statements which can include the /// escape commands that are part of IPython syntax. /// /// ## Supported escape commands: /// /// - [Magic command system] which is limited to [line magics] and can start /// with `?` or `??`. /// - [Dynamic object information] which can start with `?` or `??`. /// - [System shell access] which can start with `!` or `!!`. /// - [Automatic parentheses and quotes] which can start with `/`, `;`, or `,`. /// /// [Magic command system]: https://ipython.readthedocs.io/en/stable/interactive/reference.html#magic-command-system /// [line magics]: https://ipython.readthedocs.io/en/stable/interactive/magics.html#line-magics /// [Dynamic object information]: https://ipython.readthedocs.io/en/stable/interactive/reference.html#dynamic-object-information /// [System shell access]: https://ipython.readthedocs.io/en/stable/interactive/reference.html#system-shell-access /// [Automatic parentheses and quotes]: https://ipython.readthedocs.io/en/stable/interactive/reference.html#automatic-parentheses-and-quotes Ipython, } impl std::str::FromStr for Mode { type Err = ModeParseError; fn from_str(s: &str) -> Result { match s { "exec" | "single" => Ok(Mode::Module), "eval" => Ok(Mode::Expression), "ipython" => Ok(Mode::Ipython), _ => Err(ModeParseError), } } } /// A type that can be represented as [Mode]. pub trait AsMode { fn as_mode(&self) -> Mode; } impl AsMode for PySourceType { fn as_mode(&self) -> Mode { match self { PySourceType::Python | PySourceType::Stub => Mode::Module, PySourceType::Ipynb => Mode::Ipython, } } } /// Returned when a given mode is not valid. #[derive(Debug)] pub struct ModeParseError; impl std::fmt::Display for ModeParseError { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, r#"mode must be "exec", "eval", "ipython", or "single""#) } } #[cfg(test)] mod tests { use std::ops::Range; use crate::token::TokenFlags; use super::*; /// Test case containing a "gap" between two tokens. /// /// Code: const TEST_CASE_WITH_GAP: [(TokenKind, Range); 10] = [ (TokenKind::Def, 0..3), (TokenKind::Name, 4..7), (TokenKind::Lpar, 7..8), (TokenKind::Rpar, 8..9), (TokenKind::Colon, 9..10), (TokenKind::Newline, 10..11), // Gap ||..|| (TokenKind::Comment, 15..24), (TokenKind::NonLogicalNewline, 24..25), (TokenKind::Indent, 25..29), (TokenKind::Pass, 29..33), // No newline at the end to keep the token set full of unique tokens ]; /// Helper function to create [`Tokens`] from an iterator of (kind, range). fn new_tokens(tokens: impl Iterator)>) -> Tokens { Tokens::new( tokens .map(|(kind, range)| { Token::new( kind, TextRange::new(TextSize::new(range.start), TextSize::new(range.end)), TokenFlags::empty(), ) }) .collect(), ) } #[test] fn tokens_after_offset_at_token_start() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let after = tokens.after(TextSize::new(8)); assert_eq!(after.len(), 7); assert_eq!(after.first().unwrap().kind(), TokenKind::Rpar); } #[test] fn tokens_after_offset_at_token_end() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let after = tokens.after(TextSize::new(11)); assert_eq!(after.len(), 4); assert_eq!(after.first().unwrap().kind(), TokenKind::Comment); } #[test] fn tokens_after_offset_between_tokens() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let after = tokens.after(TextSize::new(13)); assert_eq!(after.len(), 4); assert_eq!(after.first().unwrap().kind(), TokenKind::Comment); } #[test] fn tokens_after_offset_at_last_token_end() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let after = tokens.after(TextSize::new(33)); assert_eq!(after.len(), 0); } #[test] #[should_panic(expected = "Offset 5 is inside a token range 4..7")] fn tokens_after_offset_inside_token() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); tokens.after(TextSize::new(5)); } #[test] fn tokens_before_offset_at_first_token_start() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let before = tokens.before(TextSize::new(0)); assert_eq!(before.len(), 0); } #[test] fn tokens_before_offset_after_first_token_gap() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let before = tokens.before(TextSize::new(3)); assert_eq!(before.len(), 1); assert_eq!(before.last().unwrap().kind(), TokenKind::Def); } #[test] fn tokens_before_offset_at_second_token_start() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let before = tokens.before(TextSize::new(4)); assert_eq!(before.len(), 1); assert_eq!(before.last().unwrap().kind(), TokenKind::Def); } #[test] fn tokens_before_offset_at_token_start() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let before = tokens.before(TextSize::new(8)); assert_eq!(before.len(), 3); assert_eq!(before.last().unwrap().kind(), TokenKind::Lpar); } #[test] fn tokens_before_offset_at_token_end() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let before = tokens.before(TextSize::new(11)); assert_eq!(before.len(), 6); assert_eq!(before.last().unwrap().kind(), TokenKind::Newline); } #[test] fn tokens_before_offset_between_tokens() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let before = tokens.before(TextSize::new(13)); assert_eq!(before.len(), 6); assert_eq!(before.last().unwrap().kind(), TokenKind::Newline); } #[test] fn tokens_before_offset_at_last_token_end() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let before = tokens.before(TextSize::new(33)); assert_eq!(before.len(), 10); assert_eq!(before.last().unwrap().kind(), TokenKind::Pass); } #[test] #[should_panic(expected = "Offset 5 is inside a token range 4..7")] fn tokens_before_offset_inside_token() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); tokens.before(TextSize::new(5)); } #[test] fn tokens_in_range_at_token_offset() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let in_range = tokens.in_range(TextRange::new(4.into(), 10.into())); assert_eq!(in_range.len(), 4); assert_eq!(in_range.first().unwrap().kind(), TokenKind::Name); assert_eq!(in_range.last().unwrap().kind(), TokenKind::Colon); } #[test] fn tokens_in_range_start_offset_at_token_end() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let in_range = tokens.in_range(TextRange::new(11.into(), 29.into())); assert_eq!(in_range.len(), 3); assert_eq!(in_range.first().unwrap().kind(), TokenKind::Comment); assert_eq!(in_range.last().unwrap().kind(), TokenKind::Indent); } #[test] fn tokens_in_range_end_offset_at_token_start() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let in_range = tokens.in_range(TextRange::new(8.into(), 15.into())); assert_eq!(in_range.len(), 3); assert_eq!(in_range.first().unwrap().kind(), TokenKind::Rpar); assert_eq!(in_range.last().unwrap().kind(), TokenKind::Newline); } #[test] fn tokens_in_range_start_offset_between_tokens() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let in_range = tokens.in_range(TextRange::new(13.into(), 29.into())); assert_eq!(in_range.len(), 3); assert_eq!(in_range.first().unwrap().kind(), TokenKind::Comment); assert_eq!(in_range.last().unwrap().kind(), TokenKind::Indent); } #[test] fn tokens_in_range_end_offset_between_tokens() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); let in_range = tokens.in_range(TextRange::new(9.into(), 13.into())); assert_eq!(in_range.len(), 2); assert_eq!(in_range.first().unwrap().kind(), TokenKind::Colon); assert_eq!(in_range.last().unwrap().kind(), TokenKind::Newline); } #[test] #[should_panic(expected = "Offset 5 is inside a token range 4..7")] fn tokens_in_range_start_offset_inside_token() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); tokens.in_range(TextRange::new(5.into(), 10.into())); } #[test] #[should_panic(expected = "End offset 6 is inside a token range 4..7")] fn tokens_in_range_end_offset_inside_token() { let tokens = new_tokens(TEST_CASE_WITH_GAP.into_iter()); tokens.in_range(TextRange::new(0.into(), 6.into())); } }