From f4ce4bf98374209add67accee0f3f3056d686b23 Mon Sep 17 00:00:00 2001 From: Joshua Warner Date: Tue, 8 Nov 2022 19:32:14 -0500 Subject: [PATCH] Implement parsing for tuple accessor functions (.1, .2, etc) Step 2 of N toward implementing #4465 --- .../ast/src/lang/core/expr/expr_to_expr2.rs | 4 +- crates/compiler/can/src/def.rs | 2 +- crates/compiler/can/src/expr.rs | 8 +- crates/compiler/can/src/operator.rs | 8 +- crates/compiler/fmt/src/expr.rs | 20 ++++- crates/compiler/fmt/src/spaces.rs | 6 +- crates/compiler/parse/src/ast.rs | 17 +++-- crates/compiler/parse/src/expr.rs | 15 ++-- crates/compiler/parse/src/ident.rs | 74 ++++++++++++++----- crates/compiler/parse/src/pattern.rs | 2 +- .../pass/basic_field.expr.result-ast | 2 +- .../pass/multiple_fields.expr.result-ast | 6 +- .../parenthetical_basic_field.expr.result-ast | 2 +- ...etical_field_qualified_var.expr.result-ast | 2 +- .../pass/qualified_field.expr.result-ast | 6 +- .../tuple_accessor_function.expr.result-ast | 21 ++++++ .../pass/tuple_accessor_function.expr.roc | 1 + .../unary_negation_access.expr.result-ast | 2 +- crates/compiler/parse/tests/test_parse.rs | 1 + 19 files changed, 144 insertions(+), 55 deletions(-) create mode 100644 crates/compiler/parse/tests/snapshots/pass/tuple_accessor_function.expr.result-ast create mode 100644 crates/compiler/parse/tests/snapshots/pass/tuple_accessor_function.expr.roc diff --git a/crates/ast/src/lang/core/expr/expr_to_expr2.rs b/crates/ast/src/lang/core/expr/expr_to_expr2.rs index 62d6a5d6dd..9a551a4109 100644 --- a/crates/ast/src/lang/core/expr/expr_to_expr2.rs +++ b/crates/ast/src/lang/core/expr/expr_to_expr2.rs @@ -278,7 +278,7 @@ pub fn expr_to_expr2<'a>( } } - Access(record_expr, field) => { + RecordAccess(record_expr, field) => { // TODO let region = ZERO; let (record_expr_id, output) = to_expr_id(env, scope, record_expr, region); @@ -295,7 +295,7 @@ pub fn expr_to_expr2<'a>( ) } - AccessorFunction(field) => ( + RecordAccessorFunction(field) => ( Expr2::Accessor { function_var: env.var_store.fresh(), record_var: env.var_store.fresh(), diff --git a/crates/compiler/can/src/def.rs b/crates/compiler/can/src/def.rs index a68ded2c8c..b49b888cac 100644 --- a/crates/compiler/can/src/def.rs +++ b/crates/compiler/can/src/def.rs @@ -2236,7 +2236,7 @@ fn canonicalize_pending_body<'a>( ident: defined_symbol, .. }, - ast::Expr::AccessorFunction(field), + ast::Expr::RecordAccessorFunction(field), ) => { let (loc_can_expr, can_output) = ( Loc::at( diff --git a/crates/compiler/can/src/expr.rs b/crates/compiler/can/src/expr.rs index 27a66d7e99..4b7839f47b 100644 --- a/crates/compiler/can/src/expr.rs +++ b/crates/compiler/can/src/expr.rs @@ -921,7 +921,7 @@ pub fn canonicalize_expr<'a>( (expr, output) } - ast::Expr::Access(record_expr, field) => { + ast::Expr::RecordAccess(record_expr, field) => { let (loc_expr, output) = canonicalize_expr(env, var_store, scope, region, record_expr); ( @@ -935,7 +935,7 @@ pub fn canonicalize_expr<'a>( output, ) } - ast::Expr::AccessorFunction(field) => ( + ast::Expr::RecordAccessorFunction(field) => ( Accessor(AccessorData { name: scope.gen_unique_symbol(), function_var: var_store.fresh(), @@ -947,6 +947,8 @@ pub fn canonicalize_expr<'a>( }), Output::default(), ), + ast::Expr::TupleAccess(_record_expr, _field) => todo!("handle TupleAccess"), + ast::Expr::TupleAccessorFunction(_) => todo!("handle TupleAccessorFunction"), ast::Expr::Tag(tag) => { let variant_var = var_store.fresh(); let ext_var = var_store.fresh(); @@ -2068,7 +2070,7 @@ fn flatten_str_literal<'a>( pub fn is_valid_interpolation(expr: &ast::Expr<'_>) -> bool { match expr { ast::Expr::Var { .. } => true, - ast::Expr::Access(sub_expr, _) => is_valid_interpolation(sub_expr), + ast::Expr::RecordAccess(sub_expr, _) => is_valid_interpolation(sub_expr), _ => false, } } diff --git a/crates/compiler/can/src/operator.rs b/crates/compiler/can/src/operator.rs index 6dba661bc0..596c031923 100644 --- a/crates/compiler/can/src/operator.rs +++ b/crates/compiler/can/src/operator.rs @@ -120,7 +120,8 @@ pub fn desugar_expr<'a>(arena: &'a Bump, loc_expr: &'a Loc>) -> &'a Loc | NonBase10Int { .. } | Str(_) | SingleQuote(_) - | AccessorFunction(_) + | RecordAccessorFunction(_) + | TupleAccessorFunction(_) | Var { .. } | Underscore { .. } | MalformedIdent(_, _) @@ -129,13 +130,14 @@ pub fn desugar_expr<'a>(arena: &'a Bump, loc_expr: &'a Loc>) -> &'a Loc | Tag(_) | OpaqueRef(_) => loc_expr, - Access(sub_expr, paths) => { + TupleAccess(_sub_expr, _paths) => todo!("Handle TupleAccess"), + RecordAccess(sub_expr, paths) => { let region = loc_expr.region; let loc_sub_expr = Loc { region, value: **sub_expr, }; - let value = Access(&desugar_expr(arena, arena.alloc(loc_sub_expr)).value, paths); + let value = RecordAccess(&desugar_expr(arena, arena.alloc(loc_sub_expr)).value, paths); arena.alloc(Loc { region, value }) } diff --git a/crates/compiler/fmt/src/expr.rs b/crates/compiler/fmt/src/expr.rs index ffe6adeade..db6d46617c 100644 --- a/crates/compiler/fmt/src/expr.rs +++ b/crates/compiler/fmt/src/expr.rs @@ -34,8 +34,10 @@ impl<'a> Formattable for Expr<'a> { | Num(..) | NonBase10Int { .. } | SingleQuote(_) - | Access(_, _) - | AccessorFunction(_) + | RecordAccess(_, _) + | RecordAccessorFunction(_) + | TupleAccess(_, _) + | TupleAccessorFunction(_) | Var { .. } | Underscore { .. } | MalformedIdent(_, _) @@ -399,12 +401,22 @@ impl<'a> Formattable for Expr<'a> { sub_expr.format_with_options(buf, Parens::InApply, newlines, indent); } - AccessorFunction(key) => { + RecordAccessorFunction(key) => { buf.indent(indent); buf.push('.'); buf.push_str(key); } - Access(expr, key) => { + RecordAccess(expr, key) => { + expr.format_with_options(buf, Parens::InApply, Newlines::Yes, indent); + buf.push('.'); + buf.push_str(key); + } + TupleAccessorFunction(key) => { + buf.indent(indent); + buf.push('.'); + buf.push_str(key); + } + TupleAccess(expr, key) => { expr.format_with_options(buf, Parens::InApply, Newlines::Yes, indent); buf.push('.'); buf.push_str(key); diff --git a/crates/compiler/fmt/src/spaces.rs b/crates/compiler/fmt/src/spaces.rs index e9ffe94210..648f6e1069 100644 --- a/crates/compiler/fmt/src/spaces.rs +++ b/crates/compiler/fmt/src/spaces.rs @@ -640,8 +640,10 @@ impl<'a> RemoveSpaces<'a> for Expr<'a> { is_negative, }, Expr::Str(a) => Expr::Str(a.remove_spaces(arena)), - Expr::Access(a, b) => Expr::Access(arena.alloc(a.remove_spaces(arena)), b), - Expr::AccessorFunction(a) => Expr::AccessorFunction(a), + Expr::RecordAccess(a, b) => Expr::RecordAccess(arena.alloc(a.remove_spaces(arena)), b), + Expr::RecordAccessorFunction(a) => Expr::RecordAccessorFunction(a), + Expr::TupleAccess(a, b) => Expr::TupleAccess(arena.alloc(a.remove_spaces(arena)), b), + Expr::TupleAccessorFunction(a) => Expr::TupleAccessorFunction(a), Expr::List(a) => Expr::List(a.remove_spaces(arena)), Expr::RecordUpdate { update, fields } => Expr::RecordUpdate { update: arena.alloc(update.remove_spaces(arena)), diff --git a/crates/compiler/parse/src/ast.rs b/crates/compiler/parse/src/ast.rs index 617ff44cbe..0d41a3cf13 100644 --- a/crates/compiler/parse/src/ast.rs +++ b/crates/compiler/parse/src/ast.rs @@ -163,13 +163,19 @@ pub enum Expr<'a> { // String Literals Str(StrLiteral<'a>), // string without escapes in it - /// Look up exactly one field on a record, e.g. (expr).foo. - Access(&'a Expr<'a>, &'a str), - /// e.g. `.foo` - AccessorFunction(&'a str), /// eg 'b' SingleQuote(&'a str), + /// Look up exactly one field on a record, e.g. `x.foo`. + RecordAccess(&'a Expr<'a>, &'a str), + /// e.g. `.foo` + RecordAccessorFunction(&'a str), + + /// Look up exactly one field on a tuple, e.g. `(x, y).1`. + TupleAccess(&'a Expr<'a>, &'a str), + /// e.g. `.1` + TupleAccessorFunction(&'a str), + // Collection Literals List(Collection<'a, &'a Loc>>), @@ -731,7 +737,8 @@ impl<'a> Pattern<'a> { Pattern::Malformed(buf.into_bump_str()) } } - Ident::AccessorFunction(string) => Pattern::Malformed(string), + Ident::RecordAccessorFunction(string) => Pattern::Malformed(string), + Ident::TupleAccessorFunction(string) => Pattern::Malformed(string), Ident::Malformed(string, _problem) => Pattern::Malformed(string), } } diff --git a/crates/compiler/parse/src/expr.rs b/crates/compiler/parse/src/expr.rs index cb90d60ebb..e68d2124ff 100644 --- a/crates/compiler/parse/src/expr.rs +++ b/crates/compiler/parse/src/expr.rs @@ -147,7 +147,7 @@ fn loc_expr_in_parens_etc_help<'a>() -> impl Parser<'a, Loc>, EExpr<'a> // Wrap the previous answer in the new one, so we end up // with a nested Expr. That way, `foo.bar.baz` gets represented // in the AST as if it had been written (foo.bar).baz all along. - value = Expr::Access(arena.alloc(value), field); + value = Expr::RecordAccess(arena.alloc(value), field); } } @@ -1868,8 +1868,10 @@ fn expr_to_pattern_help<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result(arena: &'a Bump, src: Ident<'a>) -> Expr<'a> { // Wrap the previous answer in the new one, so we end up // with a nested Expr. That way, `foo.bar.baz` gets represented // in the AST as if it had been written (foo.bar).baz all along. - answer = Expr::Access(arena.alloc(answer), field); + answer = Expr::RecordAccess(arena.alloc(answer), field); } answer } - Ident::AccessorFunction(string) => Expr::AccessorFunction(string), + Ident::RecordAccessorFunction(string) => Expr::RecordAccessorFunction(string), + Ident::TupleAccessorFunction(string) => Expr::TupleAccessorFunction(string), Ident::Malformed(string, problem) => Expr::MalformedIdent(string, problem), } } @@ -2595,7 +2598,7 @@ fn record_literal_help<'a>() -> impl Parser<'a, Expr<'a>, EExpr<'a>> { // Wrap the previous answer in the new one, so we end up // with a nested Expr. That way, `foo.bar.baz` gets represented // in the AST as if it had been written (foo.bar).baz all along. - value = Expr::Access(arena.alloc(value), field); + value = Expr::RecordAccess(arena.alloc(value), field); } } diff --git a/crates/compiler/parse/src/ident.rs b/crates/compiler/parse/src/ident.rs index 2891a55417..349cac5e9b 100644 --- a/crates/compiler/parse/src/ident.rs +++ b/crates/compiler/parse/src/ident.rs @@ -44,7 +44,9 @@ pub enum Ident<'a> { parts: &'a [&'a str], }, /// .foo { foo: 42 } - AccessorFunction(&'a str), + RecordAccessorFunction(&'a str), + /// .1 (1, 2, 3) + TupleAccessorFunction(&'a str), /// .Foo or foo. or something like foo.Bar Malformed(&'a str, BadIdent), } @@ -69,7 +71,8 @@ impl<'a> Ident<'a> { len - 1 } - AccessorFunction(string) => string.len(), + RecordAccessorFunction(string) => string.len(), + TupleAccessorFunction(string) => string.len(), Malformed(string, _) => string.len(), } } @@ -134,10 +137,7 @@ pub fn uppercase_ident<'a>() -> impl Parser<'a, &'a str, ()> { } pub fn unqualified_ident<'a>() -> impl Parser<'a, &'a str, ()> { - move |_, state: State<'a>, _min_indent: u32| match chomp_part( - |c| c.is_alphabetic(), - state.bytes(), - ) { + move |_, state: State<'a>, _min_indent: u32| match chomp_anycase_part(state.bytes()) { Err(progress) => Err((progress, (), state)), Ok(ident) => { if crate::keyword::KEYWORDS.iter().any(|kw| &ident == kw) { @@ -234,18 +234,35 @@ pub enum BadIdent { BadOpaqueRef(Position), } +fn is_alnum(ch: char) -> bool { + ch.is_alphabetic() || ch.is_ascii_digit() +} + fn chomp_lowercase_part(buffer: &[u8]) -> Result<&str, Progress> { - chomp_part(|c: char| c.is_lowercase(), buffer) + chomp_part(char::is_lowercase, is_alnum, buffer) } fn chomp_uppercase_part(buffer: &[u8]) -> Result<&str, Progress> { - chomp_part(|c: char| c.is_uppercase(), buffer) + chomp_part(char::is_uppercase, is_alnum, buffer) +} + +fn chomp_anycase_part(buffer: &[u8]) -> Result<&str, Progress> { + chomp_part(char::is_alphabetic, is_alnum, buffer) +} + +fn chomp_integer_part(buffer: &[u8]) -> Result<&str, Progress> { + chomp_part( + |ch| char::is_ascii_digit(&ch), + |ch| char::is_ascii_digit(&ch), + buffer, + ) } #[inline(always)] -fn chomp_part(leading_is_good: F, buffer: &[u8]) -> Result<&str, Progress> +fn chomp_part(leading_is_good: F, rest_is_good: G, buffer: &[u8]) -> Result<&str, Progress> where F: Fn(char) -> bool, + G: Fn(char) -> bool, { use encode_unicode::CharExt; @@ -260,7 +277,7 @@ where } while let Ok((ch, width)) = char::from_utf8_slice_start(&buffer[chomped..]) { - if ch.is_alphabetic() || ch.is_ascii_digit() { + if rest_is_good(ch) { chomped += width; } else { // we're done @@ -277,8 +294,13 @@ where } } -/// a `.foo` accessor function -fn chomp_accessor(buffer: &[u8], pos: Position) -> Result<&str, BadIdent> { +pub enum Accessor<'a> { + RecordField(&'a str), + TupleIndex(&'a str), +} + +/// a `.foo` or `.1` accessor function +fn chomp_accessor(buffer: &[u8], pos: Position) -> Result { // assumes the leading `.` has been chomped already use encode_unicode::CharExt; @@ -289,12 +311,25 @@ fn chomp_accessor(buffer: &[u8], pos: Position) -> Result<&str, BadIdent> { if let Ok(('.', _)) = char::from_utf8_slice_start(&buffer[chomped..]) { Err(BadIdent::WeirdAccessor(pos)) } else { - Ok(name) + Ok(Accessor::RecordField(name)) } } Err(_) => { - // we've already made progress with the initial `.` - Err(BadIdent::StrayDot(pos.bump_column(1))) + match chomp_integer_part(buffer) { + Ok(name) => { + let chomped = name.len(); + + if let Ok(('.', _)) = char::from_utf8_slice_start(&buffer[chomped..]) { + Err(BadIdent::WeirdAccessor(pos)) + } else { + Ok(Accessor::TupleIndex(name)) + } + } + Err(_) => { + // we've already made progress with the initial `.` + Err(BadIdent::StrayDot(pos.bump_column(1))) + } + } } } } @@ -335,10 +370,13 @@ fn chomp_identifier_chain<'a>( match char::from_utf8_slice_start(&buffer[chomped..]) { Ok((ch, width)) => match ch { '.' => match chomp_accessor(&buffer[1..], pos) { - Ok(accessor) => { + Ok(Accessor::RecordField(accessor)) => { let bytes_parsed = 1 + accessor.len(); - - return Ok((bytes_parsed as u32, Ident::AccessorFunction(accessor))); + return Ok((bytes_parsed as u32, Ident::RecordAccessorFunction(accessor))); + } + Ok(Accessor::TupleIndex(accessor)) => { + let bytes_parsed = 1 + accessor.len(); + return Ok((bytes_parsed as u32, Ident::TupleAccessorFunction(accessor))); } Err(fail) => return Err((1, fail)), }, diff --git a/crates/compiler/parse/src/pattern.rs b/crates/compiler/parse/src/pattern.rs index 78c6286949..25bca1b944 100644 --- a/crates/compiler/parse/src/pattern.rs +++ b/crates/compiler/parse/src/pattern.rs @@ -340,7 +340,7 @@ fn loc_ident_pattern_help<'a>( )) } } - Ident::AccessorFunction(string) => Ok(( + Ident::RecordAccessorFunction(string) | Ident::TupleAccessorFunction(string) => Ok(( MadeProgress, Loc { region: loc_ident.region, diff --git a/crates/compiler/parse/tests/snapshots/pass/basic_field.expr.result-ast b/crates/compiler/parse/tests/snapshots/pass/basic_field.expr.result-ast index 89beaecfa5..40c572be2e 100644 --- a/crates/compiler/parse/tests/snapshots/pass/basic_field.expr.result-ast +++ b/crates/compiler/parse/tests/snapshots/pass/basic_field.expr.result-ast @@ -1,4 +1,4 @@ -Access( +RecordAccess( Var { module_name: "", ident: "rec", diff --git a/crates/compiler/parse/tests/snapshots/pass/multiple_fields.expr.result-ast b/crates/compiler/parse/tests/snapshots/pass/multiple_fields.expr.result-ast index bb7b50e47a..856d51e722 100644 --- a/crates/compiler/parse/tests/snapshots/pass/multiple_fields.expr.result-ast +++ b/crates/compiler/parse/tests/snapshots/pass/multiple_fields.expr.result-ast @@ -1,6 +1,6 @@ -Access( - Access( - Access( +RecordAccess( + RecordAccess( + RecordAccess( Var { module_name: "", ident: "rec", diff --git a/crates/compiler/parse/tests/snapshots/pass/parenthetical_basic_field.expr.result-ast b/crates/compiler/parse/tests/snapshots/pass/parenthetical_basic_field.expr.result-ast index a2b8bd86ad..4d2a918ff6 100644 --- a/crates/compiler/parse/tests/snapshots/pass/parenthetical_basic_field.expr.result-ast +++ b/crates/compiler/parse/tests/snapshots/pass/parenthetical_basic_field.expr.result-ast @@ -1,4 +1,4 @@ -Access( +RecordAccess( ParensAround( Var { module_name: "", diff --git a/crates/compiler/parse/tests/snapshots/pass/parenthetical_field_qualified_var.expr.result-ast b/crates/compiler/parse/tests/snapshots/pass/parenthetical_field_qualified_var.expr.result-ast index ec387d8d3b..3e7ca8d62c 100644 --- a/crates/compiler/parse/tests/snapshots/pass/parenthetical_field_qualified_var.expr.result-ast +++ b/crates/compiler/parse/tests/snapshots/pass/parenthetical_field_qualified_var.expr.result-ast @@ -1,4 +1,4 @@ -Access( +RecordAccess( ParensAround( Var { module_name: "One.Two", diff --git a/crates/compiler/parse/tests/snapshots/pass/qualified_field.expr.result-ast b/crates/compiler/parse/tests/snapshots/pass/qualified_field.expr.result-ast index d629481dbe..48c6df6e5d 100644 --- a/crates/compiler/parse/tests/snapshots/pass/qualified_field.expr.result-ast +++ b/crates/compiler/parse/tests/snapshots/pass/qualified_field.expr.result-ast @@ -1,6 +1,6 @@ -Access( - Access( - Access( +RecordAccess( + RecordAccess( + RecordAccess( Var { module_name: "One.Two", ident: "rec", diff --git a/crates/compiler/parse/tests/snapshots/pass/tuple_accessor_function.expr.result-ast b/crates/compiler/parse/tests/snapshots/pass/tuple_accessor_function.expr.result-ast new file mode 100644 index 0000000000..a54857c683 --- /dev/null +++ b/crates/compiler/parse/tests/snapshots/pass/tuple_accessor_function.expr.result-ast @@ -0,0 +1,21 @@ +Apply( + @0-2 TupleAccessorFunction( + "1", + ), + [ + @3-12 Tuple( + [ + @4-5 Num( + "1", + ), + @7-8 Num( + "2", + ), + @10-11 Num( + "3", + ), + ], + ), + ], + Space, +) diff --git a/crates/compiler/parse/tests/snapshots/pass/tuple_accessor_function.expr.roc b/crates/compiler/parse/tests/snapshots/pass/tuple_accessor_function.expr.roc new file mode 100644 index 0000000000..1bbe5354cf --- /dev/null +++ b/crates/compiler/parse/tests/snapshots/pass/tuple_accessor_function.expr.roc @@ -0,0 +1 @@ +.1 (1, 2, 3) \ No newline at end of file diff --git a/crates/compiler/parse/tests/snapshots/pass/unary_negation_access.expr.result-ast b/crates/compiler/parse/tests/snapshots/pass/unary_negation_access.expr.result-ast index 8a32af8c8a..4094bd8ff4 100644 --- a/crates/compiler/parse/tests/snapshots/pass/unary_negation_access.expr.result-ast +++ b/crates/compiler/parse/tests/snapshots/pass/unary_negation_access.expr.result-ast @@ -1,5 +1,5 @@ UnaryOp( - @1-11 Access( + @1-11 RecordAccess( Var { module_name: "", ident: "rec1", diff --git a/crates/compiler/parse/tests/test_parse.rs b/crates/compiler/parse/tests/test_parse.rs index 279265373a..2537a96847 100644 --- a/crates/compiler/parse/tests/test_parse.rs +++ b/crates/compiler/parse/tests/test_parse.rs @@ -279,6 +279,7 @@ mod test_parse { pass/underscore_backpassing.expr, pass/underscore_in_assignment_pattern.expr, pass/var_else.expr, + pass/tuple_accessor_function.expr, pass/var_if.expr, pass/var_is.expr, pass/var_minus_two.expr,