diff --git a/parser/src/context.rs b/parser/src/context.rs index f10e105..eb1953f 100644 --- a/parser/src/context.rs +++ b/parser/src/context.rs @@ -1,6 +1,6 @@ use rustpython_ast::{Expr, ExprContext, ExprKind}; -pub fn set_context(expr: Expr, ctx: ExprContext) -> Expr { +pub(crate) fn set_context(expr: Expr, ctx: ExprContext) -> Expr { match expr.node { ExprKind::Name { id, .. } => Expr { node: ExprKind::Name { id, ctx }, diff --git a/parser/src/error.rs b/parser/src/error.rs index 4edc02e..dab07f8 100644 --- a/parser/src/error.rs +++ b/parser/src/error.rs @@ -1,40 +1,71 @@ -//! Define internal parse error types -//! The goal is to provide a matching and a safe error API, maksing errors from LALR +//! Error types for the parser. +//! +//! These types are used to represent errors that occur during lexing and parsing and are +//! returned by the `parse_*` functions in the [parser] module and the iterator in the +//! [lexer] implementation. +//! +//! [parser]: crate::parser +//! [lexer]: crate::lexer +// Define internal parse error types. +// The goal is to provide a matching and a safe error API, masking errors from LALR use crate::{ast::Location, token::Tok}; use lalrpop_util::ParseError as LalrpopError; use std::fmt; -/// Represents an error during lexical scanning. +/// Represents an error during lexing. #[derive(Debug, PartialEq)] pub struct LexicalError { + /// The type of error that occurred. pub error: LexicalErrorType, + /// The location of the error. pub location: Location, } impl LexicalError { + /// Creates a new `LexicalError` with the given error type and location. pub fn new(error: LexicalErrorType, location: Location) -> Self { Self { error, location } } } +/// Represents the different types of errors that can occur during lexing. #[derive(Debug, PartialEq)] pub enum LexicalErrorType { + // TODO: Can probably be removed, the places it is used seem to be able + // to use the `UnicodeError` variant instead. + #[doc(hidden)] StringError, + // TODO: Should take a start/end position to report. + /// Decoding of a unicode escape sequence in a string literal failed. UnicodeError, + /// The nesting of brackets/braces/parentheses is not balanced. NestingError, + /// The indentation is not consistent. IndentationError, + /// Inconsistent use of tabs and spaces. TabError, + /// Encountered a tab after a space. TabsAfterSpaces, + /// A non-default argument follows a default argument. DefaultArgumentError, + /// A duplicate argument was found in a function definition. DuplicateArgumentError(String), + /// A positional argument follows a keyword argument. PositionalArgumentError, + /// An iterable argument unpacking `*args` follows keyword argument unpacking `**kwargs`. UnpackedArgumentError, + /// A keyword argument was repeated. DuplicateKeywordArgumentError(String), + /// An unrecognized token was encountered. UnrecognizedToken { tok: char }, + /// An f-string error containing the [`FStringErrorType`]. FStringError(FStringErrorType), + /// An unexpected character was encountered after a line continuation. LineContinuationError, + /// An unexpected end of file was encountered. Eof, + /// An unexpected error occurred. OtherError(String), } @@ -85,13 +116,17 @@ impl fmt::Display for LexicalErrorType { } // TODO: consolidate these with ParseError +/// An error that occurred during parsing of an f-string. #[derive(Debug, PartialEq)] pub struct FStringError { + /// The type of error that occurred. pub error: FStringErrorType, + /// The location of the error. pub location: Location, } impl FStringError { + /// Creates a new `FStringError` with the given error type and location. pub fn new(error: FStringErrorType, location: Location) -> Self { Self { error, location } } @@ -106,19 +141,33 @@ impl From for LexicalError { } } +/// Represents the different types of errors that can occur during parsing of an f-string. #[derive(Debug, PartialEq)] pub enum FStringErrorType { + /// Expected a right brace after an opened left brace. UnclosedLbrace, + /// Expected a left brace after an ending right brace. UnopenedRbrace, + /// Expected a right brace after a conversion flag. ExpectedRbrace, + /// An error occurred while parsing an f-string expression. InvalidExpression(Box), + /// An invalid conversion flag was encountered. InvalidConversionFlag, + /// An empty expression was encountered. EmptyExpression, + /// An opening delimiter was not closed properly. MismatchedDelimiter(char, char), + /// Too many nested expressions in an f-string. ExpressionNestedTooDeeply, + /// The f-string expression cannot include the given character. ExpressionCannotInclude(char), + /// A single right brace was encountered. SingleRbrace, + /// A closing delimiter was not opened properly. Unmatched(char), + // TODO: Test this case. + /// Unterminated string. UnterminatedString, } @@ -167,9 +216,10 @@ impl From for LalrpopError { } } -/// Represents an error during parsing +/// Represents an error during parsing. pub type ParseError = rustpython_compiler_core::BaseError; +/// Represents the different types of errors that can occur during parsing. #[derive(Debug, PartialEq, thiserror::Error)] pub enum ParseErrorType { /// Parser encountered an unexpected end of input @@ -180,11 +230,12 @@ pub enum ParseErrorType { InvalidToken, /// Parser encountered an unexpected token UnrecognizedToken(Tok, Option), - /// Maps to `User` type from `lalrpop-util` + // Maps to `User` type from `lalrpop-util` + /// Parser encountered an error during lexing. Lexical(LexicalErrorType), } -/// Convert `lalrpop_util::ParseError` to our internal type +// Convert `lalrpop_util::ParseError` to our internal type pub(crate) fn parse_error_from_lalrpop( err: LalrpopError, source_path: &str, @@ -258,6 +309,7 @@ impl fmt::Display for ParseErrorType { } impl ParseErrorType { + /// Returns true if the error is an indentation error. pub fn is_indentation_error(&self) -> bool { match self { ParseErrorType::Lexical(LexicalErrorType::IndentationError) => true, @@ -267,6 +319,8 @@ impl ParseErrorType { _ => false, } } + + /// Returns true if the error is a tab error. pub fn is_tab_error(&self) -> bool { matches!( self, diff --git a/parser/src/lib.rs b/parser/src/lib.rs index 72dcae6..5c7c964 100644 --- a/parser/src/lib.rs +++ b/parser/src/lib.rs @@ -1,19 +1,119 @@ -//! This crate can be used to parse python sourcecode into a so -//! called AST (abstract syntax tree). +//! This crate can be used to parse Python source code into an Abstract +//! Syntax Tree. //! -//! The stages involved in this process are lexical analysis and -//! parsing. The lexical analysis splits the sourcecode into -//! tokens, and the parsing transforms those tokens into an AST. +//! ## Overview: //! -//! For example, one could do this: +//! The process by which source code is parsed into an AST can be broken down +//! into two general stages: [lexical analysis] and [parsing]. +//! +//! During lexical analysis, the source code is converted into a stream of lexical +//! tokens that represent the smallest meaningful units of the language. For example, +//! the source code `print("Hello world")` would _roughly_ be converted into the following +//! stream of tokens: +//! +//! ```text +//! Name("print"), LeftParen, String("Hello world"), RightParen +//! ``` +//! +//! these tokens are then consumed by the parser, which matches them against a set of +//! grammar rules to verify that the source code is syntactically valid and to construct +//! an AST that represents the source code. +//! +//! During parsing, the parser consumes the tokens generated by the lexer and constructs +//! a tree representation of the source code. The tree is made up of nodes that represent +//! the different syntactic constructs of the language. If the source code is syntactically +//! invalid, parsing fails and an error is returned. After a successful parse, the AST can +//! be used to perform further analysis on the source code. Continuing with the example +//! above, the AST generated by the parser would _roughly_ look something like this: +//! +//! ```text +//! node: Expr { +//! value: { +//! node: Call { +//! func: { +//! node: Name { +//! id: "print", +//! ctx: Load, +//! }, +//! }, +//! args: [ +//! node: Constant { +//! value: Str("Hello World"), +//! kind: None, +//! }, +//! ], +//! keywords: [], +//! }, +//! }, +//! }, +//!``` +//! +//! Note: The Tokens/ASTs shown above are not the exact tokens/ASTs generated by the parser. +//! +//! ## Source code layout: +//! +//! The functionality of this crate is split into several modules: +//! +//! - [token]: This module contains the definition of the tokens that are generated by the lexer. +//! - [lexer]: This module contains the lexer and is responsible for generating the tokens. +//! - [parser]: This module contains an interface to the parser and is responsible for generating the AST. +//! - Functions and strings have special parsing requirements that are handled in additional files. +//! - [mode]: This module contains the definition of the different modes that the parser can be in. +//! - [error]: This module contains the definition of the errors that can be returned by the parser. +//! +//! # Examples +//! +//! For example, to get a stream of tokens from a given string, one could do this: //! //! ``` -//! use rustpython_parser::{parser, ast}; +//! use rustpython_parser::lexer::make_tokenizer; //! -//! let python_source = "print('Hello world')"; -//! let python_ast = parser::parse_expression(python_source, "").unwrap(); +//! let python_source = r#" +//! def is_odd(i): +//! return bool(i & 1) +//! "#; +//! let mut tokens = make_tokenizer(python_source); +//! assert!(tokens.all(|t| t.is_ok())); +//! ``` +//! +//! These tokens can be directly fed into the parser to generate an AST: //! //! ``` +//! use rustpython_parser::parser::{parse_tokens, Mode}; +//! use rustpython_parser::lexer::make_tokenizer; +//! +//! let python_source = r#" +//! def is_odd(i): +//! return bool(i & 1) +//! "#; +//! let tokens = make_tokenizer(python_source); +//! let ast = parse_tokens(tokens, Mode::Module, ""); +//! +//! assert!(ast.is_ok()); +//! ``` +//! +//! Alternatively, you can use one of the other `parse_*` functions to parse a string directly without using a specific +//! mode or tokenizing the source beforehand: +//! +//! ``` +//! use rustpython_parser::parser::parse_program; +//! +//! let python_source = r#" +//! def is_odd(i): +//! return bool(i & 1) +//! "#; +//! let ast = parse_program(python_source, ""); +//! +//! assert!(ast.is_ok()); +//! ``` +//! +//! [lexical analysis]: https://en.wikipedia.org/wiki/Lexical_analysis +//! [parsing]: https://en.wikipedia.org/wiki/Parsing +//! [token]: crate::token +//! [lexer]: crate::lexer +//! [parser]: crate::parser +//! [mode]: crate::mode +//! [error]: crate::error #![doc(html_logo_url = "https://raw.githubusercontent.com/RustPython/RustPython/main/logo.png")] #![doc(html_root_url = "https://docs.rs/rustpython-parser/")] diff --git a/parser/src/mode.rs b/parser/src/mode.rs index cd84a09..4403fbe 100644 --- a/parser/src/mode.rs +++ b/parser/src/mode.rs @@ -1,9 +1,14 @@ +//! Control in the different modes by which a source file can be parsed. use crate::token::Tok; +/// The mode argument specifies in what way code must be parsed. #[derive(Clone, Copy)] pub enum Mode { + /// The code consists of a sequence of statements. Module, + /// The code consists of a sequence of interactive statement. Interactive, + /// The code consists of a single expression. Expression, } @@ -39,6 +44,7 @@ impl std::str::FromStr for Mode { } } +/// Returned when a given mode is not valid. #[derive(Debug)] pub struct ModeParseError { _priv: (), diff --git a/parser/src/parser.rs b/parser/src/parser.rs index 38b065a..7f82802 100644 --- a/parser/src/parser.rs +++ b/parser/src/parser.rs @@ -1,9 +1,16 @@ -//! Python parsing. +//! Contains the interface to the Python parser. //! -//! Use this module to parse python code into an AST. -//! There are three ways to parse python code. You could -//! parse a whole program, a single statement, or a single -//! expression. +//! Functions in this module can be used to parse Python code into an [Abstract Syntax Tree] +//! (AST) that is then transformed into bytecode. +//! +//! There are three ways to parse Python code corresponding to the different [`Mode`]s +//! defined in the [`mode`] module. +//! +//! All functions return a [`Result`](std::result::Result) containing the parsed AST or +//! a [`ParseError`] if parsing failed. +//! +//! [Abstract Syntax Tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree +//! [`Mode`]: crate::mode use crate::lexer::{LexResult, Tok}; pub use crate::mode::Mode; @@ -12,13 +19,26 @@ use ast::Location; use itertools::Itertools; use std::iter; -/* - * Parse python code. - * Grammar may be inspired by antlr grammar for python: - * https://github.com/antlr/grammars-v4/tree/master/python3 - */ - -/// Parse a full python program, containing usually multiple lines. +/// Parse a full Python program usually consisting of multiple lines. +/// +/// This is a convenience function that can be used to parse a full Python program without having to +/// specify the [`Mode`] or the location. It is probably what you want to use most of the time. +/// +/// # Example +/// +/// For example, parsing a simple function definition and a call to that function: +/// +/// ``` +/// use rustpython_parser::parser; +/// let source = r#" +/// def foo(): +/// return 42 +/// +/// print(foo()) +/// "#; +/// let program = parser::parse_program(source, ""); +/// assert!(program.is_ok()); +/// ``` pub fn parse_program(source: &str, source_path: &str) -> Result { parse(source, Mode::Module, source_path).map(|top| match top { ast::Mod::Module { body, .. } => body, @@ -26,49 +46,44 @@ pub fn parse_program(source: &str, source_path: &str) -> Result").unwrap(); +/// let expr = parser::parse_expression("1 + 2", ""); /// -/// assert_eq!( -/// expr, -/// ast::Expr { -/// location: ast::Location::new(1, 0), -/// end_location: Some(ast::Location::new(1, 5)), -/// custom: (), -/// node: ast::ExprKind::BinOp { -/// left: Box::new(ast::Expr { -/// location: ast::Location::new(1, 0), -/// end_location: Some(ast::Location::new(1, 1)), -/// custom: (), -/// node: ast::ExprKind::Constant { -/// value: ast::Constant::Int(1.into()), -/// kind: None, -/// } -/// }), -/// op: ast::Operator::Add, -/// right: Box::new(ast::Expr { -/// location: ast::Location::new(1, 4), -/// end_location: Some(ast::Location::new(1, 5)), -/// custom: (), -/// node: ast::ExprKind::Constant { -/// value: ast::Constant::Int(2.into()), -/// kind: None, -/// } -/// }) -/// } -/// }, -/// ); +/// assert!(expr.is_ok()); /// /// ``` pub fn parse_expression(source: &str, path: &str) -> Result { parse_expression_located(source, path, Location::new(1, 0)) } +/// Parses a Python expression from a given location. +/// +/// This function allows to specify the location of the expression in the source code, other than +/// that, it behaves exactly like [`parse_expression`]. +/// +/// # Example +/// +/// Parsing a single expression denoting the addition of two numbers, but this time specifying a different, +/// somewhat silly, location: +/// +/// ``` +/// use rustpython_parser::parser::parse_expression_located; +/// use rustpython_parser::ast::Location; +/// +/// let expr = parse_expression_located("1 + 2", "", Location::new(5, 20)); +/// assert!(expr.is_ok()); +/// ``` pub fn parse_expression_located( source: &str, path: &str, @@ -80,12 +95,64 @@ pub fn parse_expression_located( }) } -// Parse a given source code +/// Parse the given Python source code using the specified [`Mode`]. +/// +/// This function is the most general function to parse Python code. Based on the [`Mode`] supplied, +/// it can be used to parse a single expression, a full Python program or an interactive expression. +/// +/// # Example +/// +/// If we want to parse a simple expression, we can use the [`Mode::Expression`] mode during +/// parsing: +/// +/// ``` +/// use rustpython_parser::parser::{parse, Mode}; +/// +/// let expr = parse("1 + 2", Mode::Expression, ""); +/// assert!(expr.is_ok()); +/// ``` +/// +/// Alternatively, we can parse a full Python program consisting of multiple lines: +/// +/// ``` +/// use rustpython_parser::parser::{parse, Mode}; +/// +/// let source = r#" +/// class Greeter: +/// +/// def greet(self): +/// print("Hello, world!") +/// "#; +/// let program = parse(source, Mode::Module, ""); +/// assert!(program.is_ok()); +/// ``` pub fn parse(source: &str, mode: Mode, source_path: &str) -> Result { parse_located(source, mode, source_path, Location::new(1, 0)) } -// Parse a given source code from a given location +/// Parse the given Python source code using the specified [`Mode`] and [`Location`]. +/// +/// This function allows to specify the location of the the source code, other than +/// that, it behaves exactly like [`parse`]. +/// +/// # Example +/// +/// ``` +/// use rustpython_parser::parser::{parse_located, Mode}; +/// use rustpython_parser::ast::Location; +/// +/// let source = r#" +/// def fib(i): +/// a, b = 0, 1 +/// for _ in range(i): +/// a, b = b, a + b +/// return a +/// +/// print(fib(42)) +/// "#; +/// let program = parse_located(source, Mode::Module, "", Location::new(1, 0)); +/// assert!(program.is_ok()); +/// ``` pub fn parse_located( source: &str, mode: Mode, @@ -96,7 +163,22 @@ pub fn parse_located( parse_tokens(lxr, mode, source_path) } -// Parse a given token iterator. +/// Parse an iterator of [`LexResult`]s using the specified [`Mode`]. +/// +/// This could allow you to perform some preprocessing on the tokens before parsing them. +/// +/// # Example +/// +/// As an example, instead of parsing a string, we can parse a list of tokens after we generate +/// them using the [`lexer::make_tokenizer`] function: +/// +/// ``` +/// use rustpython_parser::parser::{parse_tokens, Mode}; +/// use rustpython_parser::lexer::make_tokenizer; +/// +/// let expr = parse_tokens(make_tokenizer("1 + 2"), Mode::Expression, ""); +/// assert!(expr.is_ok()); +/// ``` pub fn parse_tokens( lxr: impl IntoIterator, mode: Mode, @@ -328,4 +410,13 @@ with (0 as a, 1 as b,): pass let parse_ast = parse_expression(r#"{"a": "b", **c, "d": "e"}"#, "").unwrap(); insta::assert_debug_snapshot!(parse_ast); } + + #[test] + fn test_modes() { + let source = "a[0][1][2][3][4]"; + + assert!(parse(&source, Mode::Expression, "").is_ok()); + assert!(parse(&source, Mode::Module, "").is_ok()); + assert!(parse(&source, Mode::Interactive, "").is_ok()); + } } diff --git a/parser/src/token.rs b/parser/src/token.rs index b51b2f4..3fa6ff1 100644 --- a/parser/src/token.rs +++ b/parser/src/token.rs @@ -1,86 +1,154 @@ -//! Different token definitions. -//! Loosely based on token.h from CPython source: +//! Token type for Python source code created by the lexer and consumed by the parser. +//! +//! This module defines the tokens that the lexer recognizes. The tokens are +//! loosely based on the token definitions found in the [CPython source]. +//! +//! [CPython source]: https://github.com/python/cpython/blob/dfc2e065a2e71011017077e549cd2f9bf4944c54/Include/internal/pycore_token.h use num_bigint::BigInt; use std::fmt; -/// Python source code can be tokenized in a sequence of these tokens. +/// The set of tokens the Python source code can be tokenized in. #[derive(Clone, Debug, PartialEq)] pub enum Tok { + /// Token value for a name, commonly known as an identifier. Name { + /// The name value. name: String, }, + /// Token value for an integer. Int { + /// The integer value. value: BigInt, }, + /// Token value for a floating point number. Float { + /// The float value. value: f64, }, + /// Token value for a complex number. Complex { + /// The real part of the complex number. real: f64, + /// The imaginary part of the complex number. imag: f64, }, + /// Token value for a string. String { + /// The string value. value: String, + /// The kind of string. kind: StringKind, + /// Whether the string is triple quoted. triple_quoted: bool, }, - Newline, - NonLogicalNewline, - Indent, - Dedent, - StartModule, - StartInteractive, - StartExpression, - EndOfFile, - Lpar, - Rpar, - Lsqb, - Rsqb, - Colon, - Comma, + /// Token value for a comment. These are filtered out of the token stream prior to parsing. Comment(String), + /// Token value for a newline. + Newline, + /// Token value for a newline that is not a logical line break. These are filtered out of + /// the token stream prior to parsing. + NonLogicalNewline, + /// Token value for an indent. + Indent, + /// Token value for a dedent. + Dedent, + EndOfFile, + /// Token value for a left parenthesis `(`. + Lpar, + /// Token value for a right parenthesis `)`. + Rpar, + /// Token value for a left square bracket `[`. + Lsqb, + /// Token value for a right square bracket `]`. + Rsqb, + /// Token value for a colon `:`. + Colon, + /// Token value for a comma `,`. + Comma, + /// Token value for a semicolon `;`. Semi, + /// Token value for plus `+`. Plus, + /// Token value for minus `-`. Minus, + /// Token value for star `*`. Star, + /// Token value for slash `/`. Slash, - Vbar, // '|' - Amper, // '&' + /// Token value for vertical bar `|`. + Vbar, + /// Token value for ampersand `&`. + Amper, + /// Token value for less than `<`. Less, + /// Token value for greater than `>`. Greater, + /// Token value for equal `=`. Equal, + /// Token value for dot `.`. Dot, + /// Token value for percent `%`. Percent, + /// Token value for left bracket `{`. Lbrace, + /// Token value for right bracket `}`. Rbrace, + /// Token value for double equal `==`. EqEqual, + /// Token value for not equal `!=`. NotEqual, + /// Token value for less than or equal `<=`. LessEqual, + /// Token value for greater than or equal `>=`. GreaterEqual, + /// Token value for tilde `~`. Tilde, + /// Token value for caret `^`. CircumFlex, + /// Token value for left shift `<<`. LeftShift, + /// Token value for right shift `>>`. RightShift, + /// Token value for double star `**`. DoubleStar, - DoubleStarEqual, // '**=' + /// Token value for double star equal `**=`. + DoubleStarEqual, + /// Token value for plus equal `+=`. PlusEqual, + /// Token value for minus equal `-=`. MinusEqual, + /// Token value for star equal `*=`. StarEqual, + /// Token value for slash equal `/=`. SlashEqual, + /// Token value for percent equal `%=`. PercentEqual, - AmperEqual, // '&=' + /// Token value for ampersand equal `&=`. + AmperEqual, + /// Token value for vertical bar equal `|=`. VbarEqual, - CircumflexEqual, // '^=' + /// Token value for caret equal `^=`. + CircumflexEqual, + /// Token value for left shift equal `<<=`. LeftShiftEqual, + /// Token value for right shift equal `>>=`. RightShiftEqual, - DoubleSlash, // '//' + /// Token value for double slash `//`. + DoubleSlash, + /// Token value for double slash equal `//=`. DoubleSlashEqual, + /// Token value for colon equal `:=`. ColonEqual, + /// Token value for at `@`. At, + /// Token value for at equal `@=`. AtEqual, + /// Token value for arrow `->`. Rarrow, + /// Token value for ellipsis `...`. Ellipsis, + // Self documenting. // Keywords (alphabetically): False, None, @@ -118,6 +186,11 @@ pub enum Tok { While, With, Yield, + + // RustPython specific. + StartModule, + StartInteractive, + StartExpression, } impl fmt::Display for Tok { @@ -231,14 +304,25 @@ impl fmt::Display for Tok { } } +/// The kind of string literal as described in the [String and Bytes literals] +/// section of the Python reference. +/// +/// [String and Bytes literals]: https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals #[derive(PartialEq, Eq, Debug, Clone)] pub enum StringKind { + /// A normal string literal with no prefix. String, + /// A f-string literal, with a `f` or `F` prefix. FString, + /// A byte string literal, with a `b` or `B` prefix. Bytes, + /// A raw string literal, with a `r` or `R` prefix. RawString, + /// A raw f-string literal, with a `rf`/`fr` or `rF`/`Fr` or `Rf`/`fR` or `RF`/`FR` prefix. RawFString, + /// A raw byte string literal, with a `rb`/`br` or `rB`/`Br` or `Rb`/`bR` or `RB`/`BR` prefix. RawBytes, + /// A unicode string literal, with a `u` or `U` prefix. Unicode, } @@ -286,25 +370,33 @@ impl fmt::Display for StringKind { } impl StringKind { + /// Returns true if the string is a raw string, i,e one of + /// [`StringKind::RawString`] or [`StringKind::RawFString`] or [`StringKind::RawBytes`]. pub fn is_raw(&self) -> bool { use StringKind::{RawBytes, RawFString, RawString}; matches!(self, RawString | RawFString | RawBytes) } + /// Returns true if the string is an f-string, i,e one of + /// [`StringKind::FString`] or [`StringKind::RawFString`]. pub fn is_fstring(&self) -> bool { use StringKind::{FString, RawFString}; matches!(self, FString | RawFString) } + /// Returns true if the string is a byte string, i,e one of + /// [`StringKind::Bytes`] or [`StringKind::RawBytes`]. pub fn is_bytes(&self) -> bool { use StringKind::{Bytes, RawBytes}; matches!(self, Bytes | RawBytes) } + /// Returns true if the string is a unicode string, i,e [`StringKind::Unicode`]. pub fn is_unicode(&self) -> bool { matches!(self, StringKind::Unicode) } + /// Returns the number of characters in the prefix. pub fn prefix_len(&self) -> usize { use StringKind::*; match self {