diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 392f84a..9d9358b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -63,7 +63,7 @@ jobs: - name: install ruff run: python -m pip install ruff - name: run python lint - run: ruff --ignore=E501 ast --show-source + run: ruff check ast - name: spell checker uses: streetsidesoftware/cspell-action@v2 diff --git a/Cargo.toml b/Cargo.toml index 603ad3d..a9811e9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace.package] -version = "0.3.1" +version = "0.4.0" authors = ["RustPython Team"] edition = "2021" rust-version = "1.72.1" @@ -15,12 +15,12 @@ members = [ ] [workspace.dependencies] -rustpython-parser-vendored = { path = "vendored", version = "0.3.1" } -rustpython-ast = { path = "ast", default-features = false, version = "0.3.1" } -rustpython-parser-core = { path = "core", features = [], version = "0.3.1" } -rustpython-literal = { path = "literal", version = "0.3.1" } -rustpython-format = { path = "format", default-features = false, version = "0.3.1" } -rustpython-parser = { path = "parser", default-features = false, version = "0.3.1" } +rustpython-parser-vendored = { path = "vendored", version = "0.4.0" } +rustpython-ast = { path = "ast", default-features = false, version = "0.4.0" } +rustpython-parser-core = { path = "core", features = [], version = "0.4.0" } +rustpython-literal = { path = "literal", version = "0.4.0" } +rustpython-format = { path = "format", default-features = false, version = "0.4.0" } +rustpython-parser = { path = "parser", default-features = false, version = "0.4.0" } anyhow = "1.0.45" bitflags = "2.4.0" @@ -32,7 +32,7 @@ log = "0.4.16" num-complex = "0.4.0" num-bigint = "0.4.3" num-traits = "0.2" -malachite-bigint = "0.2.0" +malachite-bigint = "0.2.3" memchr = "2.5.0" rand = "0.8.5" serde = { version = "1.0.133", default-features = false } diff --git a/ast/src/builtin.rs b/ast/src/builtin.rs index 1a64efb..47dcc7b 100644 --- a/ast/src/builtin.rs +++ b/ast/src/builtin.rs @@ -128,10 +128,10 @@ pub enum Constant { impl Constant { pub fn is_true(self) -> bool { - self.bool().map_or(false, |b| b) + self.bool().is_some_and(|b| b) } pub fn is_false(self) -> bool { - self.bool().map_or(false, |b| !b) + self.bool().is_some_and(|b| !b) } pub fn complex(self) -> Option<(f64, f64)> { match self { diff --git a/ast/src/lib.rs b/ast/src/lib.rs index 4e5d371..143fac1 100644 --- a/ast/src/lib.rs +++ b/ast/src/lib.rs @@ -20,7 +20,7 @@ mod generic; mod impls; mod ranged; #[cfg(feature = "unparse")] -mod unparse; +pub mod unparse; #[cfg(feature = "malachite-bigint")] pub use malachite_bigint as bigint; diff --git a/ast/src/source_locator.rs b/ast/src/source_locator.rs index c0a0f22..a72311d 100644 --- a/ast/src/source_locator.rs +++ b/ast/src/source_locator.rs @@ -156,11 +156,11 @@ impl crate::fold::Fold for LinearLocator<'_> { let context = self.will_map_user(&range); let name = self.fold(name)?; + let type_params = self.fold(type_params)?; let bases = self.fold(bases)?; let keywords = self.fold(keywords)?; let body = self.fold(body)?; let range = self.map_user(range, context)?; - let type_params = self.fold(type_params)?; Ok(crate::StmtClassDef { name, @@ -190,11 +190,11 @@ impl crate::fold::Fold for LinearLocator<'_> { let context = self.will_map_user(&range); let name = self.fold(name)?; + let type_params = self.fold(type_params)?; let args: Box> = self.fold(args)?; let returns = self.fold(returns)?; let body = self.fold(body)?; let type_comment = self.fold(type_comment)?; - let type_params = self.fold(type_params)?; let range = self.map_user(range, context)?; Ok(crate::StmtFunctionDef { name, @@ -225,11 +225,11 @@ impl crate::fold::Fold for LinearLocator<'_> { let context = self.will_map_user(&range); let name = self.fold(name)?; + let type_params = self.fold(type_params)?; let args: Box> = self.fold(args)?; let returns = self.fold(returns)?; let body = self.fold(body)?; let type_comment = self.fold(type_comment)?; - let type_params = self.fold(type_params)?; let range = self.map_user(range, context)?; Ok(crate::StmtAsyncFunctionDef { name, @@ -274,6 +274,34 @@ impl crate::fold::Fold for LinearLocator<'_> { keywords, }) } + + fn fold_pattern_match_mapping( + &mut self, + node: crate::PatternMatchMapping, + ) -> Result, Self::Error> { + let crate::PatternMatchMapping { + keys, + patterns, + rest, + range, + } = node; + let context = self.will_map_user(&range); + + let mut located_keys = Vec::with_capacity(keys.len()); + let mut located_patterns = Vec::with_capacity(patterns.len()); + for (key, value) in keys.into_iter().zip(patterns.into_iter()) { + located_keys.push(self.fold(key)?); + located_patterns.push(self.fold(value)?); + } + let rest = self.fold(rest)?; + let range = self.map_user(range, context)?; + Ok(crate::PatternMatchMapping { + keys: located_keys, + patterns: located_patterns, + rest, + range, + }) + } } struct LinearLookaheadLocator<'a, 'b>(&'b mut LinearLocator<'a>); diff --git a/ast/src/unparse.rs b/ast/src/unparse.rs index 95c0ba5..ae4a2d1 100644 --- a/ast/src/unparse.rs +++ b/ast/src/unparse.rs @@ -379,12 +379,7 @@ impl<'a> Unparser<'a> { } Expr::Subscript(crate::ExprSubscript { value, slice, .. }) => { self.unparse_expr(value, precedence::ATOM)?; - let mut lvl = precedence::TUPLE; - if let Expr::Tuple(crate::ExprTuple { elts, .. }) = slice.as_ref() { - if elts.iter().any(|expr| expr.is_starred_expr()) { - lvl += 1 - } - } + let lvl = precedence::TUPLE; self.p("[")?; self.unparse_expr(slice, lvl)?; self.p("]")?; diff --git a/format/src/format.rs b/format/src/format.rs index fd497b9..bc1bec2 100644 --- a/format/src/format.rs +++ b/format/src/format.rs @@ -863,17 +863,46 @@ impl FormatString { } fn parse_part_in_brackets(text: &str) -> Result { - let parts: Vec<&str> = text.splitn(2, ':').collect(); + let mut chars = text.chars().peekable(); + + let mut left = String::new(); + let mut right = String::new(); + + let mut split = false; + let mut selected = &mut left; + let mut inside_brackets = false; + + while let Some(char) = chars.next() { + if char == '[' { + inside_brackets = true; + + selected.push(char); + + while let Some(next_char) = chars.next() { + selected.push(next_char); + + if next_char == ']' { + inside_brackets = false; + break; + } + if chars.peek().is_none() { + return Err(FormatParseError::MissingRightBracket); + } + } + } else if char == ':' && !split && !inside_brackets { + split = true; + selected = &mut right; + } else { + selected.push(char); + } + } + // before the comma is a keyword or arg index, after the comma is maybe a spec. - let arg_part = parts[0]; + let arg_part: &str = &left; - let format_spec = if parts.len() > 1 { - parts[1].to_owned() - } else { - String::new() - }; + let format_spec = if split { right } else { String::new() }; - // On parts[0] can still be the conversion (!r, !s, !a) + // left can still be the conversion (!r, !s, !a) let parts: Vec<&str> = arg_part.splitn(2, '!').collect(); // before the bang is a keyword or arg index, after the comma is maybe a conversion spec. let arg_part = parts[0]; @@ -1168,6 +1197,34 @@ mod tests { ); } + #[test] + fn test_square_brackets_inside_format() { + assert_eq!( + FormatString::from_str("{[:123]}"), + Ok(FormatString { + format_parts: vec![FormatPart::Field { + field_name: "[:123]".to_owned(), + conversion_spec: None, + format_spec: "".to_owned(), + }], + }), + ); + + assert_eq!(FormatString::from_str("{asdf[:123]asdf}"), { + Ok(FormatString { + format_parts: vec![FormatPart::Field { + field_name: "asdf[:123]asdf".to_owned(), + conversion_spec: None, + format_spec: "".to_owned(), + }], + }) + }); + + assert_eq!(FormatString::from_str("{[1234}"), { + Err(FormatParseError::MissingRightBracket) + }); + } + #[test] fn test_format_parse_escape() { let expected = Ok(FormatString { diff --git a/literal/src/escape.rs b/literal/src/escape.rs index ee4fae5..ba8e3ec 100644 --- a/literal/src/escape.rs +++ b/literal/src/escape.rs @@ -232,7 +232,7 @@ impl UnicodeEscape<'_> { } } -impl<'a> Escape for UnicodeEscape<'a> { +impl Escape for UnicodeEscape<'_> { fn source_len(&self) -> usize { self.source.len() } @@ -254,24 +254,6 @@ impl<'a> Escape for UnicodeEscape<'a> { } } -#[cfg(test)] -mod unicode_escape_tests { - use super::*; - - #[test] - fn changed() { - fn test(s: &str) -> bool { - UnicodeEscape::new_repr(s).changed() - } - assert!(!test("hello")); - assert!(!test("'hello'")); - assert!(!test("\"hello\"")); - - assert!(test("'\"hello")); - assert!(test("hello\n")); - } -} - pub struct AsciiEscape<'a> { source: &'a [u8], layout: EscapeLayout, @@ -391,7 +373,7 @@ impl AsciiEscape<'_> { } } -impl<'a> Escape for AsciiEscape<'a> { +impl Escape for AsciiEscape<'_> { fn source_len(&self) -> usize { self.source.len() } @@ -439,3 +421,21 @@ impl std::fmt::Display for BytesRepr<'_, '_> { self.write(formatter) } } + +#[cfg(test)] +mod unicode_escape_tests { + use super::*; + + #[test] + fn changed() { + fn test(s: &str) -> bool { + UnicodeEscape::new_repr(s).changed() + } + assert!(!test("hello")); + assert!(!test("'hello'")); + assert!(!test("\"hello\"")); + + assert!(test("'\"hello")); + assert!(test("hello\n")); + } +} diff --git a/literal/src/float.rs b/literal/src/float.rs index 5c14fcb..0f10a6a 100644 --- a/literal/src/float.rs +++ b/literal/src/float.rs @@ -6,6 +6,33 @@ pub fn parse_str(literal: &str) -> Option { parse_inner(literal.trim().as_bytes()) } +fn strip_underlines(literal: &[u8]) -> Option> { + let mut prev = b'\0'; + let mut dup = Vec::::new(); + for p in literal { + if *p == b'_' { + // Underscores are only allowed after digits. + if !prev.is_ascii_digit() { + return None; + } + } else { + dup.push(*p); + // Underscores are only allowed before digits. + if prev == b'_' && !p.is_ascii_digit() { + return None; + } + } + prev = *p; + } + + // Underscores are not allowed at the end. + if prev == b'_' { + return None; + } + + Some(dup) +} + pub fn parse_bytes(literal: &[u8]) -> Option { parse_inner(trim_slice(literal, |b| b.is_ascii_whitespace())) } @@ -15,10 +42,10 @@ fn trim_slice(v: &[T], mut trim: impl FnMut(&T) -> bool) -> &[T] { // it.take_while_ref(&mut trim).for_each(drop); // hmm.. `&mut slice::Iter<_>` is not `Clone` // it.by_ref().rev().take_while_ref(&mut trim).for_each(drop); - while it.clone().next().map_or(false, &mut trim) { + while it.clone().next().is_some_and(&mut trim) { it.next(); } - while it.clone().next_back().map_or(false, &mut trim) { + while it.clone().next_back().is_some_and(&mut trim) { it.next_back(); } it.as_slice() @@ -28,11 +55,16 @@ fn parse_inner(literal: &[u8]) -> Option { use lexical_parse_float::{ format::PYTHON3_LITERAL, FromLexicalWithOptions, NumberFormatBuilder, Options, }; + + // Use custom function for underline handling for now. + // For further information see https://github.com/Alexhuszagh/rust-lexical/issues/96. + let stripped = strip_underlines(literal)?; + // lexical-core's format::PYTHON_STRING is inaccurate const PYTHON_STRING: u128 = NumberFormatBuilder::rebuild(PYTHON3_LITERAL) .no_special(false) .build(); - f64::from_lexical_with_options::(literal, &Options::new()).ok() + f64::from_lexical_with_options::(&stripped, &Options::new()).ok() } pub fn is_integer(v: f64) -> bool { diff --git a/parser/src/python.lalrpop b/parser/src/python.lalrpop index e1b8cbc..fe584d8 100644 --- a/parser/src/python.lalrpop +++ b/parser/src/python.lalrpop @@ -370,13 +370,14 @@ MatchStatement: ast::Stmt = { .last() .unwrap() .end(); + let subject_range = (subjects.first().unwrap().start()..subjects.last().unwrap().end()).into(); ast::Stmt::Match( ast::StmtMatch { subject: Box::new(ast::Expr::Tuple( ast::ExprTuple { elts: subjects, ctx: ast::ExprContext::Load, - range: (location..end_location).into() + range: subject_range, }, )), cases, diff --git a/parser/src/python.rs b/parser/src/python.rs index 369499a..ab02fdb 100644 --- a/parser/src/python.rs +++ b/parser/src/python.rs @@ -1,5 +1,5 @@ // auto-generated: "lalrpop 0.20.0" -// sha3: de5ffc51d44962eb297cbbf668fae33652eed69586405ebef2229fc02d183bc8 +// sha3: c2ba3f0f3de013733a18ba664f36f7f587254cc430656e553ceada96d33c409b use crate::{ ast::{self as ast, Ranged, bigint::BigInt}, lexer::{LexicalError, LexicalErrorType}, @@ -30842,13 +30842,14 @@ fn __action82< .last() .unwrap() .end(); + let subject_range = (subjects.first().unwrap().start()..subjects.last().unwrap().end()).into(); ast::Stmt::Match( ast::StmtMatch { subject: Box::new(ast::Expr::Tuple( ast::ExprTuple { elts: subjects, ctx: ast::ExprContext::Load, - range: (location..end_location).into() + range: subject_range, }, )), cases, diff --git a/parser/src/snapshots/rustpython_parser__parser__tests__patma.snap b/parser/src/snapshots/rustpython_parser__parser__tests__patma.snap index 7b3b577..65eed26 100644 --- a/parser/src/snapshots/rustpython_parser__parser__tests__patma.snap +++ b/parser/src/snapshots/rustpython_parser__parser__tests__patma.snap @@ -3783,7 +3783,7 @@ expression: parse_ast range: 2720..2760, subject: Tuple( ExprTuple { - range: 2720..2760, + range: 2726..2730, elts: [ Name( ExprName { diff --git a/parser/src/soft_keywords.rs b/parser/src/soft_keywords.rs index 9abcd39..3f8c24c 100644 --- a/parser/src/soft_keywords.rs +++ b/parser/src/soft_keywords.rs @@ -132,8 +132,8 @@ where } } - self.start_of_line = next.as_ref().map_or(false, |lex_result| { - lex_result.as_ref().map_or(false, |(tok, _)| { + self.start_of_line = next.as_ref().is_some_and(|lex_result| { + lex_result.as_ref().is_ok_and(|(tok, _)| { #[cfg(feature = "full-lexer")] if matches!(tok, Tok::NonLogicalNewline | Tok::Comment { .. }) { return self.start_of_line; diff --git a/vendored/src/text_size/mod.rs b/vendored/src/text_size/mod.rs index 977038a..c6a240f 100644 --- a/vendored/src/text_size/mod.rs +++ b/vendored/src/text_size/mod.rs @@ -23,8 +23,6 @@ mod range; mod size; mod traits; -#[cfg(feature = "schemars")] -mod schemars_impls; #[cfg(feature = "serde")] mod serde_impls; diff --git a/vendored/src/text_size/size.rs b/vendored/src/text_size/size.rs index 27a0f12..147eab2 100644 --- a/vendored/src/text_size/size.rs +++ b/vendored/src/text_size/size.rs @@ -5,7 +5,6 @@ use { fmt, iter, num::TryFromIntError, ops::{Add, AddAssign, Sub, SubAssign}, - u32, }, };