diff --git a/Earthfile b/Earthfile index 6b45cabafd..af5c0340c4 100644 --- a/Earthfile +++ b/Earthfile @@ -27,7 +27,7 @@ install-zig-llvm-valgrind-clippy-rustfmt: RUN echo "[build]" > $CARGO_HOME/config.toml RUN echo "rustflags = [\"-C\", \"link-arg=-fuse-ld=lld\", \"-C\", \"target-cpu=native\"]" >> $CARGO_HOME/config.tom # valgrind - RUN apt -y install autotools-dev cmake automake + RUN apt -y install autotools-dev cmake automake libc6-dbg RUN wget https://sourceware.org/pub/valgrind/valgrind-3.16.1.tar.bz2 RUN tar -xf valgrind-3.16.1.tar.bz2 # need to cd every time, every command starts at WORKDIR diff --git a/cli/tests/cli_run.rs b/cli/tests/cli_run.rs index ecaf3e3062..ef11fade5d 100644 --- a/cli/tests/cli_run.rs +++ b/cli/tests/cli_run.rs @@ -96,7 +96,7 @@ mod cli_run { "hello-world", &[], "Hello, World!!!!!!!!!!!!!\n", - false, + true, ); } @@ -108,7 +108,7 @@ mod cli_run { "hello-world", &[], "Hello, World!!!!!!!!!!!!!\n", - false, + true, ); } @@ -120,7 +120,7 @@ mod cli_run { "quicksort", &[], "[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2]\n", - false, + true, ); } @@ -132,28 +132,12 @@ mod cli_run { "quicksort", &["--optimize"], "[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2]\n", - false, - ); - } - - #[test] - #[serial(quicksort)] - // TODO: Stop ignoring this test once we are correctly freeing the RocList even when in dev build. - #[ignore] - fn run_quicksort_valgrind() { - check_output( - &example_file("quicksort", "Quicksort.roc"), - "quicksort", - &[], - "[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2]\n", true, ); } #[test] #[serial(quicksort)] - // TODO: Stop ignoring this test once valgrind supports AVX512. - #[ignore] fn run_quicksort_optimized_valgrind() { check_output( &example_file("quicksort", "Quicksort.roc"), @@ -164,34 +148,6 @@ mod cli_run { ); } - #[test] - #[serial(multi_module)] - // TODO: Stop ignoring this test once we are correctly freeing the RocList even when in dev build. - #[ignore] - fn run_multi_module_valgrind() { - check_output( - &example_file("multi-module", "Quicksort.roc"), - "quicksort", - &[], - "[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2]\n", - true, - ); - } - - #[test] - #[serial(multi_module)] - // TODO: Stop ignoring this test once valgrind supports AVX512. - #[ignore] - fn run_multi_module_optimized_valgrind() { - check_output( - &example_file("multi-module", "Quicksort.roc"), - "quicksort", - &["--optimize"], - "[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2]\n", - true, - ); - } - #[test] #[serial(nqueens)] fn run_nqueens_not_optimized() { @@ -201,7 +157,7 @@ mod cli_run { "nqueens", &[], "4\n", - false, + true, ); } @@ -213,7 +169,7 @@ mod cli_run { "cfold", &[], "11 & 11\n", - false, + true, ); } @@ -237,7 +193,7 @@ mod cli_run { "rbtree-insert", &[], "Node Black 0 {} Empty Empty\n", - false, + true, ); } @@ -249,7 +205,7 @@ mod cli_run { "rbtree-del", &[], "30\n", - false, + true, ); } @@ -272,7 +228,7 @@ mod cli_run { "multi-dep-str", &[], "I am Dep2.str2\n", - false, + true, ); } @@ -284,7 +240,7 @@ mod cli_run { "multi-dep-str", &["--optimize"], "I am Dep2.str2\n", - false, + true, ); } @@ -296,7 +252,7 @@ mod cli_run { "multi-dep-thunk", &[], "I am Dep2.value2\n", - false, + true, ); } @@ -308,7 +264,7 @@ mod cli_run { "multi-dep-thunk", &["--optimize"], "I am Dep2.value2\n", - false, + true, ); } } diff --git a/compiler/gen/src/llvm/build.rs b/compiler/gen/src/llvm/build.rs index cb52cec7e1..15575694e6 100644 --- a/compiler/gen/src/llvm/build.rs +++ b/compiler/gen/src/llvm/build.rs @@ -661,7 +661,7 @@ pub fn build_exp_literal<'a, 'ctx, 'env>( pub fn build_exp_call<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, layout_ids: &mut LayoutIds<'a>, - scope: &Scope<'a, 'ctx>, + scope: &mut Scope<'a, 'ctx>, parent: FunctionValue<'ctx>, layout: &Layout<'a>, call: &roc_mono::ir::Call<'a>, @@ -732,16 +732,29 @@ pub fn build_exp_call<'a, 'ctx, 'env>( } CallType::Foreign { - foreign_symbol: foreign, + foreign_symbol, ret_layout, - } => build_foreign_symbol(env, scope, foreign, arguments, ret_layout), + } => { + // we always initially invoke foreign symbols, but if there is nothing to clean up, + // we emit a normal call + build_foreign_symbol( + env, + layout_ids, + scope, + parent, + foreign_symbol, + arguments, + ret_layout, + ForeignCallOrInvoke::Call, + ) + } } } pub fn build_exp_expr<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, layout_ids: &mut LayoutIds<'a>, - scope: &Scope<'a, 'ctx>, + scope: &mut Scope<'a, 'ctx>, parent: FunctionValue<'ctx>, layout: &Layout<'a>, expr: &roc_mono::ir::Expr<'a>, @@ -1848,8 +1861,6 @@ fn invoke_roc_function<'a, 'ctx, 'env>( { env.builder.position_at_end(pass_block); - // env.builder.build_store(alloca, call_result); - // scope.insert(symbol, (layout, alloca)); scope.insert(symbol, (layout, call_result)); build_exp_stmt(env, layout_ids, scope, parent, pass); @@ -1909,7 +1920,7 @@ pub fn build_exp_stmt<'a, 'ctx, 'env>( for (symbol, expr, layout) in queue { debug_assert!(layout != &Layout::RecursivePointer); - let val = build_exp_expr(env, layout_ids, &scope, parent, layout, &expr); + let val = build_exp_expr(env, layout_ids, scope, parent, layout, &expr); // Make a new scope which includes the binding we just encountered. // This should be done *after* compiling the bound expr, since any @@ -2012,7 +2023,20 @@ pub fn build_exp_stmt<'a, 'ctx, 'env>( CallType::Foreign { ref foreign_symbol, ref ret_layout, - } => build_foreign_symbol(env, scope, foreign_symbol, call.arguments, ret_layout), + } => build_foreign_symbol( + env, + layout_ids, + scope, + parent, + foreign_symbol, + call.arguments, + ret_layout, + ForeignCallOrInvoke::Invoke { + symbol: *symbol, + pass, + fail, + }, + ), CallType::LowLevel { .. } => { unreachable!("lowlevel itself never throws exceptions") @@ -2116,12 +2140,6 @@ pub fn build_exp_stmt<'a, 'ctx, 'env>( context.i64_type().const_zero().into() } - /* - Inc(symbol1, 1, Dec(symbol2, cont)) if symbol1 == symbol2 => { - dbg!(symbol1); - build_exp_stmt(env, layout_ids, scope, parent, cont) - } - */ Refcounting(modify, cont) => { use ModifyRc::*; @@ -4008,70 +4026,159 @@ fn run_low_level<'a, 'ctx, 'env>( } } +enum ForeignCallOrInvoke<'a> { + Call, + Invoke { + symbol: Symbol, + pass: &'a roc_mono::ir::Stmt<'a>, + fail: &'a roc_mono::ir::Stmt<'a>, + }, +} + +fn build_foreign_symbol_return_result<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, + scope: &mut Scope<'a, 'ctx>, + foreign: &roc_module::ident::ForeignSymbol, + arguments: &[Symbol], + return_type: BasicTypeEnum<'ctx>, +) -> (FunctionValue<'ctx>, &'a [BasicValueEnum<'ctx>]) { + let mut arg_vals: Vec = Vec::with_capacity_in(arguments.len(), env.arena); + let mut arg_types = Vec::with_capacity_in(arguments.len() + 1, env.arena); + + for arg in arguments.iter() { + let (value, layout) = load_symbol_and_layout(scope, arg); + arg_vals.push(value); + let arg_type = basic_type_from_layout(env.arena, env.context, layout, env.ptr_bytes); + debug_assert_eq!(arg_type, value.get_type()); + arg_types.push(arg_type); + } + + let function_type = get_fn_type(&return_type, &arg_types); + let function = get_foreign_symbol(env, foreign.clone(), function_type); + + (function, arg_vals.into_bump_slice()) +} + +fn build_foreign_symbol_write_result_into_ptr<'a, 'ctx, 'env>( + env: &Env<'a, 'ctx, 'env>, + scope: &mut Scope<'a, 'ctx>, + foreign: &roc_module::ident::ForeignSymbol, + arguments: &[Symbol], + return_pointer: PointerValue<'ctx>, +) -> (FunctionValue<'ctx>, &'a [BasicValueEnum<'ctx>]) { + let mut arg_vals: Vec = Vec::with_capacity_in(arguments.len(), env.arena); + let mut arg_types = Vec::with_capacity_in(arguments.len() + 1, env.arena); + + arg_vals.push(return_pointer.into()); + arg_types.push(return_pointer.get_type().into()); + + for arg in arguments.iter() { + let (value, layout) = load_symbol_and_layout(scope, arg); + arg_vals.push(value); + let arg_type = basic_type_from_layout(env.arena, env.context, layout, env.ptr_bytes); + debug_assert_eq!(arg_type, value.get_type()); + arg_types.push(arg_type); + } + + let function_type = env.context.void_type().fn_type(&arg_types, false); + let function = get_foreign_symbol(env, foreign.clone(), function_type); + + (function, arg_vals.into_bump_slice()) +} + +#[allow(clippy::too_many_arguments)] fn build_foreign_symbol<'a, 'ctx, 'env>( env: &Env<'a, 'ctx, 'env>, - scope: &Scope<'a, 'ctx>, + layout_ids: &mut LayoutIds<'a>, + scope: &mut Scope<'a, 'ctx>, + parent: FunctionValue<'ctx>, foreign: &roc_module::ident::ForeignSymbol, arguments: &[Symbol], ret_layout: &Layout<'a>, + call_or_invoke: ForeignCallOrInvoke<'a>, ) -> BasicValueEnum<'ctx> { - let mut arg_vals: Vec = Vec::with_capacity_in(arguments.len(), env.arena); - - let mut arg_types = Vec::with_capacity_in(arguments.len() + 1, env.arena); + let ret_type = basic_type_from_layout(env.arena, env.context, ret_layout, env.ptr_bytes); + let return_pointer = env.builder.build_alloca(ret_type, "return_value"); // crude approximation of the C calling convention let pass_result_by_pointer = ret_layout.stack_size(env.ptr_bytes) > 2 * env.ptr_bytes; - if pass_result_by_pointer { - // the return value is too big to pass through a register, so the caller must - // allocate space for it on its stack, and provide a pointer to write the result into - let ret_type = basic_type_from_layout(env.arena, env.context, ret_layout, env.ptr_bytes); - - let ret_ptr_type = get_ptr_type(&ret_type, AddressSpace::Generic); - - let ret_ptr = env.builder.build_alloca(ret_type, "return_value"); - - arg_vals.push(ret_ptr.into()); - arg_types.push(ret_ptr_type.into()); - - for arg in arguments.iter() { - let (value, layout) = load_symbol_and_layout(scope, arg); - arg_vals.push(value); - let arg_type = basic_type_from_layout(env.arena, env.context, layout, env.ptr_bytes); - arg_types.push(arg_type); - } - - let function_type = env.context.void_type().fn_type(&arg_types, false); - let function = get_foreign_symbol(env, foreign.clone(), function_type); - - let call = env.builder.build_call(function, arg_vals.as_slice(), "tmp"); - - // this is a foreign function, use c calling convention - call.set_call_convention(C_CALL_CONV); - - call.try_as_basic_value(); - - env.builder.build_load(ret_ptr, "read_result") + let (function, arguments) = if pass_result_by_pointer { + build_foreign_symbol_write_result_into_ptr(env, scope, foreign, arguments, return_pointer) } else { - for arg in arguments.iter() { - let (value, layout) = load_symbol_and_layout(scope, arg); - arg_vals.push(value); - let arg_type = basic_type_from_layout(env.arena, env.context, layout, env.ptr_bytes); - arg_types.push(arg_type); + build_foreign_symbol_return_result(env, scope, foreign, arguments, ret_type) + }; + + match call_or_invoke { + ForeignCallOrInvoke::Call => { + let call = env.builder.build_call(function, arguments, "tmp"); + + // this is a foreign function, use c calling convention + call.set_call_convention(C_CALL_CONV); + + call.try_as_basic_value(); + + if pass_result_by_pointer { + env.builder.build_load(return_pointer, "read_result") + } else { + call.try_as_basic_value().left().unwrap() + } } + ForeignCallOrInvoke::Invoke { symbol, pass, fail } => { + let pass_block = env.context.append_basic_block(parent, "invoke_pass"); + let fail_block = env.context.append_basic_block(parent, "invoke_fail"); - let ret_type = basic_type_from_layout(env.arena, env.context, ret_layout, env.ptr_bytes); - let function_type = get_fn_type(&ret_type, &arg_types); - let function = get_foreign_symbol(env, foreign.clone(), function_type); + let call = env + .builder + .build_invoke(function, arguments, pass_block, fail_block, "tmp"); - let call = env.builder.build_call(function, arg_vals.as_slice(), "tmp"); + // this is a foreign function, use c calling convention + call.set_call_convention(C_CALL_CONV); - // this is a foreign function, use c calling convention - call.set_call_convention(C_CALL_CONV); + call.try_as_basic_value(); - call.try_as_basic_value() - .left() - .unwrap_or_else(|| panic!("LLVM error: Invalid call by pointer.")) + let call_result = if pass_result_by_pointer { + env.builder.build_load(return_pointer, "read_result") + } else { + call.try_as_basic_value().left().unwrap() + }; + + { + env.builder.position_at_end(pass_block); + + scope.insert(symbol, (ret_layout.clone(), call_result)); + + build_exp_stmt(env, layout_ids, scope, parent, pass); + + scope.remove(&symbol); + } + + { + env.builder.position_at_end(fail_block); + + let landing_pad_type = { + let exception_ptr = + env.context.i8_type().ptr_type(AddressSpace::Generic).into(); + let selector_value = env.context.i32_type().into(); + + env.context + .struct_type(&[exception_ptr, selector_value], false) + }; + + env.builder + .build_catch_all_landing_pad( + &landing_pad_type, + &BasicValueEnum::IntValue(env.context.i8_type().const_zero()), + env.context.i8_type().ptr_type(AddressSpace::Generic), + "invoke_landing_pad", + ) + .into_struct_value(); + + build_exp_stmt(env, layout_ids, scope, parent, fail); + } + + call_result + } } } diff --git a/compiler/gen/src/llvm/build_list.rs b/compiler/gen/src/llvm/build_list.rs index 7ccbcffbd5..48e7e6797f 100644 --- a/compiler/gen/src/llvm/build_list.rs +++ b/compiler/gen/src/llvm/build_list.rs @@ -3,7 +3,10 @@ use crate::llvm::build::{ }; use crate::llvm::compare::generic_eq; use crate::llvm::convert::{basic_type_from_layout, collection, get_ptr_type}; -use crate::llvm::refcounting::{decrement_refcount_layout, increment_refcount_layout}; +use crate::llvm::refcounting::{ + decrement_refcount_layout, increment_refcount_layout, refcount_is_one_comparison, + PointerToRefcount, +}; use inkwell::builder::Builder; use inkwell::context::Context; use inkwell::types::{BasicTypeEnum, PointerType}; @@ -685,13 +688,38 @@ pub fn list_set<'a, 'ctx, 'env>( original_wrapper, load_list_ptr(builder, original_wrapper, ptr_type), ), - InPlace::Clone => clone_nonempty_list( - env, - output_inplace, - list_len, - load_list_ptr(builder, original_wrapper, ptr_type), - elem_layout, - ), + InPlace::Clone => { + let list_ptr = load_list_ptr(builder, original_wrapper, ptr_type); + + let refcount_ptr = PointerToRefcount::from_ptr_to_data(env, list_ptr); + let refcount = refcount_ptr.get_refcount(env); + + let rc_is_one = refcount_is_one_comparison(env, refcount); + + let source_block = env.builder.get_insert_block().unwrap(); + let clone_block = ctx.append_basic_block(parent, "clone"); + let done_block = ctx.append_basic_block(parent, "done"); + + env.builder + .build_conditional_branch(rc_is_one, done_block, clone_block); + + env.builder.position_at_end(clone_block); + + let cloned = + clone_nonempty_list(env, output_inplace, list_len, list_ptr, elem_layout).0; + + env.builder.build_unconditional_branch(done_block); + + env.builder.position_at_end(done_block); + + let list_type = original_wrapper.get_type(); + let merged = env.builder.build_phi(list_type, "writable_list"); + merged.add_incoming(&[(&original_wrapper, source_block), (&cloned, clone_block)]); + + let result = merged.as_basic_value().into_struct_value(); + + (result, load_list_ptr(builder, result, ptr_type)) + } }; // If we got here, we passed the bounds check, so this is an in-bounds GEP diff --git a/compiler/gen/src/llvm/refcounting.rs b/compiler/gen/src/llvm/refcounting.rs index 8fd78f06f2..a7d07c3ca1 100644 --- a/compiler/gen/src/llvm/refcounting.rs +++ b/compiler/gen/src/llvm/refcounting.rs @@ -489,7 +489,7 @@ fn modify_refcount_layout<'a, 'ctx, 'env>( let field_ptr = env .builder - .build_extract_value(wrapper_struct, 1, "increment_closure_data") + .build_extract_value(wrapper_struct, 1, "modify_rc_closure_data") .unwrap(); modify_refcount_layout( diff --git a/compiler/mono/src/borrow.rs b/compiler/mono/src/borrow.rs index 905ba01253..14f6687ed0 100644 --- a/compiler/mono/src/borrow.rs +++ b/compiler/mono/src/borrow.rs @@ -261,6 +261,16 @@ impl<'a> BorrowInfState<'a> { } } + fn own_arg(&mut self, x: Symbol) { + self.own_var(x); + } + + fn own_args(&mut self, xs: &[Symbol]) { + for x in xs.iter() { + self.own_arg(*x); + } + } + /// For each xs[i], if xs[i] is owned, then mark ps[i] as owned. /// We use this action to preserve tail calls. That is, if we have /// a tail call `f xs`, if the i-th parameter is borrowed, but `xs[i]` is owned @@ -308,31 +318,33 @@ impl<'a> BorrowInfState<'a> { } = e; match call_type { - ByName { - name, arg_layouts, .. - } - | ByPointer { - name, arg_layouts, .. - } => { + ByName { name, .. } => { // get the borrow signature of the applied function - let ps = match self.param_map.get_symbol(*name) { - Some(slice) => slice, - None => Vec::from_iter_in( - arg_layouts.iter().cloned().map(|layout| Param { - symbol: Symbol::UNDERSCORE, - borrow: false, - layout, - }), - self.arena, - ) - .into_bump_slice(), - }; + match self.param_map.get_symbol(*name) { + Some(ps) => { + // the return value will be owned + self.own_var(z); + // if the function exects an owned argument (ps), the argument must be owned (args) + self.own_args_using_params(arguments, ps); + } + None => { + // this is really an indirect call, but the function was bound to a symbol + // the return value will be owned + self.own_var(z); + + // if the function exects an owned argument (ps), the argument must be owned (args) + self.own_args(arguments); + } + } + } + + ByPointer { .. } => { // the return value will be owned self.own_var(z); // if the function exects an owned argument (ps), the argument must be owned (args) - self.own_args_using_params(arguments, ps); + self.own_args(arguments); } LowLevel { op } => { diff --git a/compiler/mono/src/inc_dec.rs b/compiler/mono/src/inc_dec.rs index c8fb776ac9..a7413a9c60 100644 --- a/compiler/mono/src/inc_dec.rs +++ b/compiler/mono/src/inc_dec.rs @@ -466,35 +466,46 @@ impl<'a> Context<'a> { &*self.arena.alloc(Stmt::Let(z, v, l, b)) } - ByName { - name, arg_layouts, .. - } - | ByPointer { - name, arg_layouts, .. - } => { + ByName { name, .. } => { // get the borrow signature - let ps = match self.param_map.get_symbol(*name) { - Some(slice) => slice, - None => Vec::from_iter_in( - arg_layouts.iter().cloned().map(|layout| Param { - symbol: Symbol::UNDERSCORE, - borrow: false, - layout, - }), - self.arena, - ) - .into_bump_slice(), - }; + match self.param_map.get_symbol(*name) { + Some(ps) => { + let v = Expr::Call(crate::ir::Call { + call_type, + arguments, + }); + let b = self.add_dec_after_application(arguments, ps, b, b_live_vars); + let b = self.arena.alloc(Stmt::Let(z, v, l, b)); + + self.add_inc_before(arguments, ps, b, b_live_vars) + } + None => { + // an indirect call that was bound to a name + let v = Expr::Call(crate::ir::Call { + call_type, + arguments, + }); + + self.add_inc_before_consume_all( + arguments, + self.arena.alloc(Stmt::Let(z, v, l, b)), + b_live_vars, + ) + } + } + } + ByPointer { .. } => { let v = Expr::Call(crate::ir::Call { call_type, arguments, }); - let b = self.add_dec_after_application(arguments, ps, b, b_live_vars); - let b = self.arena.alloc(Stmt::Let(z, v, l, b)); - - self.add_inc_before(arguments, ps, b, b_live_vars) + self.add_inc_before_consume_all( + arguments, + self.arena.alloc(Stmt::Let(z, v, l, b)), + b_live_vars, + ) } } } diff --git a/compiler/parse/src/blankspace.rs b/compiler/parse/src/blankspace.rs index 9b422a9df6..d3f0b70f55 100644 --- a/compiler/parse/src/blankspace.rs +++ b/compiler/parse/src/blankspace.rs @@ -444,6 +444,9 @@ where let mut state = state; let mut any_newlines = false; + let start_row = original_state.line; + let start_col = original_state.column; + let start_bytes_len = state.bytes.len(); while !state.bytes.is_empty() { @@ -491,7 +494,13 @@ where let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); state = state - .check_indent_e(arena, min_indent, indent_problem) + .check_indent_e( + arena, + min_indent, + indent_problem, + start_row, + start_col, + ) .map_err(|(fail, _)| { (progress, fail, original_state.clone()) })? @@ -523,7 +532,13 @@ where ); if any_newlines { state = state - .check_indent_e(arena, min_indent, indent_problem) + .check_indent_e( + arena, + min_indent, + indent_problem, + start_row, + start_col, + ) .map_err(|(fail, _)| { (progress, fail, original_state.clone()) })?; @@ -692,7 +707,13 @@ where progress, space_slice, state - .check_indent_e(arena, min_indent, indent_problem) + .check_indent_e( + arena, + min_indent, + indent_problem, + start_row, + start_col, + ) .map_err(|(fail, _)| (progress, fail, original_state))?, )); } @@ -720,7 +741,7 @@ where let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); if any_newlines { state = state - .check_indent_e(arena, min_indent, indent_problem) + .check_indent_e(arena, min_indent, indent_problem, start_row, start_col) .map_err(|(fail, _)| (progress, fail, original_state))?; } diff --git a/compiler/parse/src/parser.rs b/compiler/parse/src/parser.rs index f96d521ebe..3cce208d30 100644 --- a/compiler/parse/src/parser.rs +++ b/compiler/parse/src/parser.rs @@ -72,12 +72,14 @@ impl<'a> State<'a> { _arena: &'a Bump, min_indent: u16, to_error: TE, + row: Row, + col: Col, ) -> Result where TE: Fn(Row, Col) -> E, { if self.indent_col < min_indent { - Err((to_error(self.line, self.column), self)) + Err((to_error(row, col), self)) } else { Ok(self) } @@ -370,29 +372,33 @@ pub type Col = u16; pub enum Type<'a> { TRecord(TRecord<'a>, Row, Col), TTagUnion(TTagUnion<'a>, Row, Col), + TInParens(TInParens<'a>, Row, Col), + TApply(TApply, Row, Col), + TVariable(TVariable, Row, Col), + TWildcard(Row, Col), /// TStart(Row, Col), - TSpace(Row, Col), + TEnd(Row, Col), + TSpace(BadInputError, Row, Col), + TFunctionArgument(Row, Col), /// TIndentStart(Row, Col), + TIndentEnd(Row, Col), + TAsIndentStart(Row, Col), } #[derive(Debug, Clone, PartialEq, Eq)] pub enum TRecord<'a> { End(Row, Col), Open(Row, Col), - /// + Field(Row, Col), Colon(Row, Col), Optional(Row, Col), Type(&'a Type<'a>, Row, Col), - // TODO REMOVE in favor of Type - Syntax(&'a SyntaxError<'a>, Row, Col), - - /// Space(BadInputError, Row, Col), - /// + IndentOpen(Row, Col), IndentColon(Row, Col), IndentOptional(Row, Col), @@ -403,11 +409,21 @@ pub enum TRecord<'a> { pub enum TTagUnion<'a> { End(Row, Col), Open(Row, Col), - /// + Type(&'a Type<'a>, Row, Col), - // TODO REMOVE in favor of Type - Syntax(&'a SyntaxError<'a>, Row, Col), + Space(BadInputError, Row, Col), + + IndentOpen(Row, Col), + IndentEnd(Row, Col), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum TInParens<'a> { + End(Row, Col), + Open(Row, Col), + /// + Type(&'a Type<'a>, Row, Col), /// Space(BadInputError, Row, Col), @@ -416,6 +432,26 @@ pub enum TTagUnion<'a> { IndentEnd(Row, Col), } +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum TApply { + /// + StartNotUppercase(Row, Col), + End(Row, Col), + Space(BadInputError, Row, Col), + /// + DoubleDot(Row, Col), + TrailingDot(Row, Col), + StartIsNumber(Row, Col), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum TVariable { + /// + StartNotLowercase(Row, Col), + End(Row, Col), + Space(BadInputError, Row, Col), +} + #[derive(Debug, Clone, PartialEq, Eq)] pub enum ContextStack<'a> { Cons(ContextItem, &'a ContextStack<'a>), @@ -529,6 +565,26 @@ where } } +pub fn not_e<'a, P, TE, E, X, Val>(parser: P, to_error: TE) -> impl Parser<'a, (), E> +where + TE: Fn(Row, Col) -> E, + P: Parser<'a, Val, X>, + E: 'a, +{ + move |arena, state: State<'a>| { + let original_state = state.clone(); + + match parser.parse(arena, state) { + Ok((_, _, _)) => Err(( + NoProgress, + to_error(original_state.line, original_state.column), + original_state, + )), + Err((_, _, _)) => Ok((NoProgress, (), original_state)), + } + } +} + pub fn lookahead<'a, Peek, P, PeekVal, Val, Error>( peek: Peek, parser: P, @@ -758,6 +814,27 @@ pub fn peek_utf8_char<'a>(state: &State) -> Result<(char, usize), SyntaxError<'a } } +/// A single UTF-8-encoded char. This will both parse *and* validate that the +/// char is valid UTF-8, but it will *not* advance the state. +pub fn peek_utf8_char_e( + state: &State, + end_of_file: EOF, + to_error: TE, +) -> Result<(char, usize), E> +where + TE: Fn(BadInputError, Row, Col) -> E, + EOF: Fn(Row, Col) -> E, +{ + if !state.bytes.is_empty() { + match char::from_utf8_slice_start(state.bytes) { + Ok((ch, len_utf8)) => Ok((ch, len_utf8)), + Err(_) => Err(to_error(BadInputError::BadUtf8, state.line, state.column)), + } + } else { + Err(end_of_file(state.line, state.column)) + } +} + /// A single UTF-8-encoded char, with an offset. This will both parse *and* /// validate that the char is valid UTF-8, but it will *not* advance the state. pub fn peek_utf8_char_at<'a>( @@ -778,7 +855,10 @@ pub fn peek_utf8_char_at<'a>( } } -pub fn keyword<'a>(keyword: &'static str, min_indent: u16) -> impl Parser<'a, (), SyntaxError<'a>> { +pub fn keyword<'a>( + keyword: &'static str, + _min_indent: u16, +) -> impl Parser<'a, (), SyntaxError<'a>> { move |arena, state: State<'a>| { let initial_state = state.clone(); // first parse the keyword characters @@ -786,18 +866,47 @@ pub fn keyword<'a>(keyword: &'static str, min_indent: u16) -> impl Parser<'a, () // then we must have at least one space character // TODO this is potentially wasteful if there are a lot of spaces - match crate::blankspace::space1(min_indent).parse(arena, after_keyword_state.clone()) { - Err((_, fail, _)) => { - // this is not a keyword, maybe it's `whence` or `iffy` - // anyway, make no progress and return the initial state - // so we can try something else - Err((NoProgress, fail, initial_state)) - } - Ok((_, _, _)) => { + match peek_utf8_char(&after_keyword_state) { + Ok((next, _width)) if next == ' ' || next == '#' || next == '\n' => { // give back the state after parsing the keyword, but before the whitespace // that way we can attach the whitespace to whatever follows Ok((MadeProgress, (), after_keyword_state)) } + _ => { + // this is not a keyword, maybe it's `whence` or `iffy` + // anyway, make no progress and return the initial state + // so we can try something else + Err((NoProgress, SyntaxError::ConditionFailed, initial_state)) + } + } + } +} + +pub fn keyword_e<'a, E>(keyword: &'static str, if_error: E) -> impl Parser<'a, (), E> +where + E: 'a + Clone, +{ + move |arena, state: State<'a>| { + let initial_state = state.clone(); + // first parse the keyword characters + let (_, _, after_keyword_state) = ascii_string(keyword) + .parse(arena, state) + .map_err(|(_, _, state)| (NoProgress, if_error.clone(), state))?; + + // then we must have at least one space character + // TODO this is potentially wasteful if there are a lot of spaces + match peek_utf8_char(&after_keyword_state) { + Ok((next, _width)) if next == ' ' || next == '#' || next == '\n' => { + // give back the state after parsing the keyword, but before the whitespace + // that way we can attach the whitespace to whatever follows + Ok((MadeProgress, (), after_keyword_state)) + } + _ => { + // this is not a keyword, maybe it's `whence` or `iffy` + // anyway, make no progress and return the initial state + // so we can try something else + Err((NoProgress, if_error.clone(), initial_state)) + } } } } @@ -815,20 +924,15 @@ pub fn ascii_string<'a>(keyword: &'static str) -> impl Parser<'a, (), SyntaxErro let len = keyword.len(); // TODO do this comparison in one SIMD instruction (on supported systems) - match state.bytes.get(0..len) { - Some(next_str) => { - if next_str == keyword.as_bytes() { - Ok(( - Progress::MadeProgress, - (), - state.advance_without_indenting(arena, len)?, - )) - } else { - let (_, fail, state) = unexpected(arena, len, Attempting::Keyword, state); - Err((NoProgress, fail, state)) - } - } - _ => Err(unexpected_eof(arena, state, 0)), + if state.bytes.starts_with(keyword.as_bytes()) { + Ok(( + Progress::MadeProgress, + (), + state.advance_without_indenting(arena, len)?, + )) + } else { + let (_, fail, state) = unexpected(arena, len, Attempting::Keyword, state); + Err((NoProgress, fail, state)) } } } @@ -1357,6 +1461,58 @@ where } } +pub fn word2<'a, ToError, E>(word_1: u8, word_2: u8, to_error: ToError) -> impl Parser<'a, (), E> +where + ToError: Fn(Row, Col) -> E, + E: 'a, +{ + debug_assert_ne!(word_1, b'\n'); + debug_assert_ne!(word_2, b'\n'); + + let needle = [word_1, word_2]; + + move |_arena: &'a Bump, state: State<'a>| { + if state.bytes.starts_with(&needle) { + Ok(( + MadeProgress, + (), + State { + bytes: &state.bytes[2..], + column: state.column + 2, + ..state + }, + )) + } else { + Err((NoProgress, to_error(state.line, state.column), state)) + } + } +} + +pub fn check_indent<'a, TE, E>(min_indent: u16, to_problem: TE) -> impl Parser<'a, (), E> +where + TE: Fn(Row, Col) -> E, + E: 'a, +{ + move |_arena, state: State<'a>| { + dbg!(state.indent_col, min_indent); + if state.indent_col < min_indent { + Err((NoProgress, to_problem(state.line, state.column), state)) + } else { + Ok((NoProgress, (), state)) + } + } +} + +#[macro_export] +macro_rules! word1_check_indent { + ($word:expr, $word_problem:expr, $min_indent:expr, $indent_problem:expr) => { + and!( + word1($word, $word_problem), + crate::parser::check_indent($min_indent, $indent_problem) + ) + }; +} + #[allow(dead_code)] fn in_context<'a, AddContext, P1, P2, Start, A, X, Y>( add_context: AddContext, diff --git a/compiler/parse/src/type_annotation.rs b/compiler/parse/src/type_annotation.rs index 652ec7f3a1..6ee9b87b0b 100644 --- a/compiler/parse/src/type_annotation.rs +++ b/compiler/parse/src/type_annotation.rs @@ -1,36 +1,26 @@ -use crate::ast::{AssignedField, Attempting, CommentOrNewline, Tag, TypeAnnotation}; -use crate::blankspace::{space0_around, space0_before, space1, space1_before}; -use crate::expr::{global_tag, private_tag}; +use crate::ast::{AssignedField, Tag, TypeAnnotation}; +use crate::blankspace::{space0_around_e, space0_before_e, space0_e}; use crate::ident::join_module_parts; use crate::keyword; use crate::parser::{ - allocated, ascii_char, ascii_string, not, optional, peek_utf8_char, specialize, specialize_ref, - unexpected, word1, BadInputError, Either, ParseResult, Parser, + allocated, backtrackable, not_e, optional, peek_utf8_char_e, specialize, specialize_ref, word1, + word2, BadInputError, ParseResult, Parser, Progress::{self, *}, - State, SyntaxError, TRecord, TTagUnion, Type, + State, SyntaxError, TApply, TInParens, TRecord, TTagUnion, TVariable, Type, }; use bumpalo::collections::string::String; use bumpalo::collections::vec::Vec; use bumpalo::Bump; -use roc_collections::all::arena_join; use roc_region::all::{Located, Region}; pub fn located<'a>( min_indent: u16, ) -> impl Parser<'a, Located>, SyntaxError<'a>> { - expression(min_indent) + specialize(|x, _, _| SyntaxError::Type(x), expression(min_indent)) } #[inline(always)] -fn tag_union_type<'a>(min_indent: u16) -> impl Parser<'a, TypeAnnotation<'a>, SyntaxError<'a>> { - let f = |x, row, col| SyntaxError::Type(Type::TTagUnion(x, row, col)); - specialize(f, tag_union_type_internal(min_indent)) -} - -#[inline(always)] -fn tag_union_type_internal<'a>( - min_indent: u16, -) -> impl Parser<'a, TypeAnnotation<'a>, TTagUnion<'a>> { +fn tag_union_type<'a>(min_indent: u16) -> impl Parser<'a, TypeAnnotation<'a>, TTagUnion<'a>> { move |arena, state| { let (_, (tags, final_comments), state) = collection_trailing_sep_e!( word1(b'[', TTagUnion::Open), @@ -45,11 +35,9 @@ fn tag_union_type_internal<'a>( .parse(arena, state)?; // This could be an open tag union, e.g. `[ Foo, Bar ]a` - let (_, ext, state) = optional(allocated(specialize_ref( - TTagUnion::Syntax, - term(min_indent), - ))) - .parse(arena, state)?; + let (_, ext, state) = + optional(allocated(specialize_ref(TTagUnion::Type, term(min_indent)))) + .parse(arena, state)?; let result = TypeAnnotation::TagUnion { tags: tags.into_bump_slice(), @@ -61,36 +49,41 @@ fn tag_union_type_internal<'a>( } } -#[allow(clippy::type_complexity)] -pub fn term<'a>(min_indent: u16) -> impl Parser<'a, Located>, SyntaxError<'a>> { +fn term<'a>(min_indent: u16) -> impl Parser<'a, Located>, Type<'a>> { map_with_arena!( and!( one_of!( loc_wildcard(), - loc_parenthetical_type(min_indent), - loc!(record_type(min_indent)), - loc!(tag_union_type(min_indent)), + specialize(Type::TInParens, loc_type_in_parens(min_indent)), + loc!(specialize(Type::TRecord, record_type(min_indent))), + loc!(specialize(Type::TTagUnion, tag_union_type(min_indent))), loc!(applied_type(min_indent)), - loc!(parse_type_variable) + loc!(specialize(Type::TVariable, parse_type_variable)) ), - |a, s| { - optional( - // Inline type annotation, e.g. [ Nil, Cons a (List a) ] as List a + // Inline alias notation, e.g. [ Nil, Cons a (List a) ] as List a + one_of![ + map!( and!( - space1(min_indent), - skip_first!( - crate::parser::keyword(keyword::AS, min_indent), - space1_before(term(min_indent), min_indent) + skip_second!( + backtrackable(space0_e(min_indent, Type::TSpace, Type::TIndentEnd)), + crate::parser::keyword_e(keyword::AS, Type::TEnd(0, 0)) + ), + space0_before_e( + term(min_indent), + min_indent, + Type::TSpace, + Type::TAsIndentStart ) ), - ) - .parse(a, s) - } + Some + ), + |_, state| Ok((NoProgress, None, state)) + ] ), |arena: &'a Bump, (loc_ann, opt_as): ( Located>, - Option<(&'a [CommentOrNewline<'a>], Located>)> + Option<(&'a [_], Located>)> )| { match opt_as { Some((spaces, loc_as)) => { @@ -108,87 +101,195 @@ pub fn term<'a>(min_indent: u16) -> impl Parser<'a, Located>, } /// The `*` type variable, e.g. in (List *) Wildcard, -fn loc_wildcard<'a>() -> impl Parser<'a, Located>, SyntaxError<'a>> { - map!(loc!(ascii_char(b'*')), |loc_val: Located<()>| { +fn loc_wildcard<'a>() -> impl Parser<'a, Located>, Type<'a>> { + map!(loc!(word1(b'*', Type::TWildcard)), |loc_val: Located<()>| { loc_val.map(|_| TypeAnnotation::Wildcard) }) } -fn loc_applied_arg<'a>( - min_indent: u16, -) -> impl Parser<'a, Located>, SyntaxError<'a>> { - skip_first!( - // Once we hit an "as", stop parsing args - // and roll back parsing of preceding spaces - not(and!( - space1(min_indent), - crate::parser::keyword(keyword::AS, min_indent) - )), - space1_before( - one_of!( - loc_wildcard(), - loc_parenthetical_type(min_indent), - loc!(record_type(min_indent)), - loc!(tag_union_type(min_indent)), - loc!(parse_concrete_type), - loc!(parse_type_variable) - ), - min_indent - ) +fn loc_applied_arg<'a>(min_indent: u16) -> impl Parser<'a, Located>, Type<'a>> { + use crate::ast::Spaceable; + + map_with_arena!( + and!( + backtrackable(space0_e(min_indent, Type::TSpace, Type::TIndentStart)), + skip_first!( + // Once we hit an "as", stop parsing args + // and roll back parsing of preceding spaces + not_e( + crate::parser::keyword(keyword::AS, min_indent), + Type::TStart + ), + one_of!( + loc_wildcard(), + specialize(Type::TInParens, loc_type_in_parens(min_indent)), + loc!(specialize(Type::TRecord, record_type(min_indent))), + loc!(specialize(Type::TTagUnion, tag_union_type(min_indent))), + loc!(specialize(Type::TApply, parse_concrete_type)), + loc!(specialize(Type::TVariable, parse_type_variable)) + ) + ) + ), + |arena: &'a Bump, (spaces, argument): (&'a [_], Located>)| { + if spaces.is_empty() { + argument + } else { + let Located { region, value } = argument; + arena.alloc(value).with_spaces_before(spaces, region) + } + } ) } -fn loc_applied_args<'a>( +fn loc_type_in_parens<'a>( min_indent: u16, -) -> impl Parser<'a, Vec<'a, Located>>, SyntaxError<'a>> { - zero_or_more!(loc_applied_arg(min_indent)) -} - -#[inline(always)] -fn loc_parenthetical_type<'a>( - min_indent: u16, -) -> impl Parser<'a, Located>, SyntaxError<'a>> { +) -> impl Parser<'a, Located>, TInParens<'a>> { + // TODO what if the middle parser returns EOF? between!( - ascii_char(b'('), - space0_around( - move |arena, state| expression(min_indent).parse(arena, state), + word1(b'(', TInParens::Open), + space0_around_e( + move |arena, state| specialize_ref(TInParens::Type, expression(min_indent)) + .parse(arena, state), min_indent, + TInParens::Space, + TInParens::IndentEnd, ), - ascii_char(b')') + word1(b')', TInParens::End) ) } #[inline(always)] fn tag_type<'a>(min_indent: u16) -> impl Parser<'a, Tag<'a>, TTagUnion<'a>> { move |arena, state: State<'a>| { - let (_, either_name, state) = specialize_ref( - TTagUnion::Syntax, - either!(loc!(private_tag()), loc!(global_tag())), - ) - .parse(arena, state)?; + let (_, name, state) = loc!(parse_tag_name(TTagUnion::End)).parse(arena, state)?; let (_, args, state) = - specialize_ref(TTagUnion::Syntax, loc_applied_args(min_indent)).parse(arena, state)?; + specialize_ref(TTagUnion::Type, loc_applied_args_e(min_indent)).parse(arena, state)?; - let result = match either_name { - Either::First(name) => Tag::Private { + let result = if name.value.starts_with('@') { + Tag::Private { name, args: args.into_bump_slice(), - }, - Either::Second(name) => Tag::Global { + } + } else { + Tag::Global { name, args: args.into_bump_slice(), - }, + } }; Ok((MadeProgress, result, state)) } } +use crate::parser::{Col, Row}; +fn parse_tag_name<'a, F, E>(to_problem: F) -> impl Parser<'a, &'a str, E> +where + F: Fn(Row, Col) -> E, + E: 'a, +{ + use encode_unicode::CharExt; + + move |arena, mut state: State<'a>| { + let mut buf; + + match char::from_utf8_slice_start(state.bytes) { + Ok((first_letter, bytes_parsed)) => match first_letter { + '@' => { + debug_assert_eq!(bytes_parsed, 1); + + // parsing a private tag name + match char::from_utf8_slice_start(&state.bytes[1..]) { + Ok((second_letter, bytes_parsed_2)) if second_letter.is_uppercase() => { + let total_parsed = bytes_parsed + bytes_parsed_2; + + buf = String::with_capacity_in(total_parsed, arena); + + buf.push('@'); + buf.push(second_letter); + + state = state + .advance_without_indenting(arena, total_parsed) + .map_err(|(progress, _, state)| { + (progress, to_problem(state.line, state.column), state) + })?; + } + _ => { + // important for error messages + state = state + .advance_without_indenting(arena, bytes_parsed) + .map_err(|(progress, _, state)| { + (progress, to_problem(state.line, state.column), state) + })?; + + let row = state.line; + let col = state.column; + return state.fail(arena, MadeProgress, to_problem(row, col)); + } + } + } + + _ if first_letter.is_uppercase() => { + buf = String::with_capacity_in(1, arena); + + buf.push(first_letter); + + state = state + .advance_without_indenting(arena, bytes_parsed) + .map_err(|(progress, _, state)| { + (progress, to_problem(state.line, state.column), state) + })?; + } + + _ => { + let row = state.line; + let col = state.column; + return state.fail(arena, NoProgress, to_problem(row, col)); + } + }, + Err(_) => { + let row = state.line; + let col = state.column; + return state.fail(arena, NoProgress, to_problem(row, col)); + } + }; + + while !state.bytes.is_empty() { + match char::from_utf8_slice_start(state.bytes) { + Ok((ch, bytes_parsed)) => { + // After the first character, only these are allowed: + // + // * Unicode alphabetic chars - you might include `鹏` if that's clear to your readers + // * ASCII digits - e.g. `1` but not `¾`, both of which pass .is_numeric() + // * A ':' indicating the end of the field + if ch.is_alphabetic() || ch.is_ascii_digit() { + buf.push(ch); + + state = state + .advance_without_indenting(arena, bytes_parsed) + .map_err(|(progress, _, state)| { + (progress, to_problem(state.line, state.column), state) + })?; + } else { + // This is the end of the field. We're done! + break; + } + } + Err(_) => { + let row = state.line; + let col = state.column; + return state.fail(arena, MadeProgress, to_problem(row, col)); + } + }; + } + + Ok((MadeProgress, buf.into_bump_str(), state)) + } +} + fn record_type_field<'a>( min_indent: u16, ) -> impl Parser<'a, AssignedField<'a, TypeAnnotation<'a>>, TRecord<'a>> { - use crate::blankspace::{space0_before_e, space0_e}; use crate::ident::lowercase_ident; use crate::parser::Either::*; use AssignedField::*; @@ -216,7 +317,7 @@ fn record_type_field<'a>( )) .parse(arena, state)?; - let val_parser = specialize_ref(TRecord::Syntax, term(min_indent)); + let val_parser = specialize_ref(TRecord::Type, term(min_indent)); match opt_loc_val { Some(First(_)) => { @@ -261,25 +362,16 @@ fn record_type_field<'a>( } #[inline(always)] -fn record_type<'a>(min_indent: u16) -> impl Parser<'a, TypeAnnotation<'a>, SyntaxError<'a>> { - let f = |x, row, col| SyntaxError::Type(Type::TRecord(x, row, col)); - specialize(f, record_type_internal(min_indent)) -} - -#[inline(always)] -fn record_type_internal<'a>(min_indent: u16) -> impl Parser<'a, TypeAnnotation<'a>, TRecord<'a>> { +fn record_type<'a>(min_indent: u16) -> impl Parser<'a, TypeAnnotation<'a>, TRecord<'a>> { use crate::type_annotation::TypeAnnotation::*; - let field_term = move |a, s| match term(min_indent).parse(a, s) { - Ok(t) => Ok(t), - Err((p, error, s)) => Err((p, TRecord::Syntax(a.alloc(error), s.line, s.column), s)), - }; - move |arena, state| { let (_, (fields, final_comments), state) = collection_trailing_sep_e!( + // word1_check_indent!(b'{', TRecord::Open, min_indent, TRecord::IndentOpen), word1(b'{', TRecord::Open), loc!(record_type_field(min_indent)), word1(b',', TRecord::End), + // word1_check_indent!(b'}', TRecord::End, min_indent, TRecord::IndentEnd), word1(b'}', TRecord::End), min_indent, TRecord::Open, @@ -288,6 +380,7 @@ fn record_type_internal<'a>(min_indent: u16) -> impl Parser<'a, TypeAnnotation<' ) .parse(arena, state)?; + let field_term = specialize_ref(TRecord::Type, term(min_indent)); let (_, ext, state) = optional(allocated(field_term)).parse(arena, state)?; let result = Record { @@ -300,13 +393,13 @@ fn record_type_internal<'a>(min_indent: u16) -> impl Parser<'a, TypeAnnotation<' } } -fn applied_type<'a>(min_indent: u16) -> impl Parser<'a, TypeAnnotation<'a>, SyntaxError<'a>> { +fn applied_type<'a>(min_indent: u16) -> impl Parser<'a, TypeAnnotation<'a>, Type<'a>> { map!( and!( - parse_concrete_type, + specialize(Type::TApply, parse_concrete_type), // Optionally parse space-separated arguments for the constructor, // e.g. `Str Float` in `Map Str Float` - loc_applied_args(min_indent) + loc_applied_args_e(min_indent) ), |(ctor, args): (TypeAnnotation<'a>, Vec<'a, Located>>)| { match &ctor { @@ -325,26 +418,56 @@ fn applied_type<'a>(min_indent: u16) -> impl Parser<'a, TypeAnnotation<'a>, Synt ) } -fn expression<'a>( +fn loc_applied_args_e<'a>( min_indent: u16, -) -> impl Parser<'a, Located>, SyntaxError<'a>> { - use crate::blankspace::space0; +) -> impl Parser<'a, Vec<'a, Located>>, Type<'a>> { + zero_or_more!(loc_applied_arg(min_indent)) +} + +fn expression<'a>(min_indent: u16) -> impl Parser<'a, Located>, Type<'a>> { move |arena, state: State<'a>| { - let (p1, first, state) = space0_before(term(min_indent), min_indent).parse(arena, state)?; + let (p1, first, state) = space0_before_e( + term(min_indent), + min_indent, + Type::TSpace, + Type::TIndentStart, + ) + .parse(arena, state)?; + let (p2, rest, state) = zero_or_more!(skip_first!( - ascii_char(b','), - space0_around(term(min_indent), min_indent) + word1(b',', Type::TFunctionArgument), + one_of![ + space0_around_e( + term(min_indent), + min_indent, + Type::TSpace, + Type::TIndentStart + ), + |_, state: State<'a>| Err(( + NoProgress, + Type::TFunctionArgument(state.line, state.column), + state + )) + ] )) .parse(arena, state)?; // TODO this space0 is dropped, so newlines just before the function arrow when there // is only one argument are not seen by the formatter. Can we do better? - let (p3, is_function, state) = - optional(skip_first!(space0(min_indent), ascii_string("->"))).parse(arena, state)?; + let (p3, is_function, state) = optional(skip_first!( + space0_e(min_indent, Type::TSpace, Type::TIndentStart), + word2(b'-', b'>', Type::TStart) + )) + .parse(arena, state)?; if is_function.is_some() { - let (p4, return_type, state) = - space0_before(term(min_indent), min_indent).parse(arena, state)?; + let (p4, return_type, state) = space0_before_e( + term(min_indent), + min_indent, + Type::TSpace, + Type::TIndentStart, + ) + .parse(arena, state)?; // prepare arguments let mut arguments = Vec::with_capacity_in(rest.len() + 1, &arena); @@ -365,10 +488,7 @@ fn expression<'a>( Ok((progress, first, state)) } else { // e.g. `Int,Int` without an arrow and return type - let msg = - "TODO: Decide the correct error to return for 'Invalid function signature'" - .to_string(); - Err((progress, SyntaxError::NotYetImplemented(msg), state)) + panic!() } } } @@ -393,30 +513,27 @@ fn expression<'a>( fn parse_concrete_type<'a>( arena: &'a Bump, mut state: State<'a>, -) -> ParseResult<'a, TypeAnnotation<'a>, SyntaxError<'a>> { +) -> ParseResult<'a, TypeAnnotation<'a>, TApply> { let mut part_buf = String::new_in(arena); // The current "part" (parts are dot-separated.) let mut parts: Vec<&'a str> = Vec::new_in(arena); - let start_bytes_len = state.bytes.len(); - // Qualified types must start with a capitalized letter. - match peek_utf8_char(&state) { + match peek_utf8_char_e(&state, TApply::StartNotUppercase, TApply::Space) { Ok((first_letter, bytes_parsed)) => { if first_letter.is_alphabetic() && first_letter.is_uppercase() { part_buf.push(first_letter); } else { - return Err(unexpected(arena, 0, Attempting::ConcreteType, state)); + let problem = TApply::StartNotUppercase(state.line, state.column + 1); + return Err((NoProgress, problem, state)); } - state = state.advance_without_indenting(arena, bytes_parsed)?; + state = state.advance_without_indenting_e(arena, bytes_parsed, TApply::Space)?; } - Err(reason) => return state.fail(arena, NoProgress, reason), + Err(reason) => return Err((NoProgress, reason, state)), } - let mut next_char = None; - while !state.bytes.is_empty() { - match peek_utf8_char(&state) { + match peek_utf8_char_e(&state, TApply::End, TApply::Space) { Ok((ch, bytes_parsed)) => { // After the first character, only these are allowed: // @@ -426,21 +543,33 @@ fn parse_concrete_type<'a>( if ch.is_alphabetic() { if part_buf.is_empty() && !ch.is_uppercase() { // Each part must begin with a capital letter. - return malformed(Some(ch), arena, state, parts); + return Err(( + MadeProgress, + TApply::StartNotUppercase(state.line, state.column), + state, + )); } part_buf.push(ch); } else if ch.is_ascii_digit() { // Parts may not start with numbers! if part_buf.is_empty() { - return malformed(Some(ch), arena, state, parts); + return Err(( + MadeProgress, + TApply::StartIsNumber(state.line, state.column), + state, + )); } part_buf.push(ch); } else if ch == '.' { // Having two consecutive dots is an error. if part_buf.is_empty() { - return malformed(Some(ch), arena, state, parts); + return Err(( + MadeProgress, + TApply::DoubleDot(state.line, state.column), + state, + )); } parts.push(part_buf.into_bump_str()); @@ -449,17 +578,13 @@ fn parse_concrete_type<'a>( part_buf = String::new_in(arena); } else { // This must be the end of the type. We're done! - next_char = Some(ch); - break; } - state = state.advance_without_indenting(arena, bytes_parsed)?; + state = state.advance_without_indenting_e(arena, bytes_parsed, TApply::Space)?; } Err(reason) => { - let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); - - return state.fail(arena, progress, reason); + return Err((MadeProgress, reason, state)); } } } @@ -471,14 +596,11 @@ fn parse_concrete_type<'a>( // // If we made it this far and don't have a next_char, then necessarily // we have consumed a '.' char previously. - return malformed(next_char.or(Some('.')), arena, state, parts); - } - - if part_buf.is_empty() { - // We had neither capitalized nor noncapitalized parts, - // yet we made it this far. The only explanation is that this was - // a stray '.' drifting through the cosmos. - return Err(unexpected(arena, 1, Attempting::Identifier, state)); + return Err(( + MadeProgress, + TApply::TrailingDot(state.line, state.column), + state, + )); } let answer = TypeAnnotation::Apply( @@ -487,37 +609,37 @@ fn parse_concrete_type<'a>( &[], ); - let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); - Ok((progress, answer, state)) + Ok((MadeProgress, answer, state)) } fn parse_type_variable<'a>( arena: &'a Bump, mut state: State<'a>, -) -> ParseResult<'a, TypeAnnotation<'a>, SyntaxError<'a>> { +) -> ParseResult<'a, TypeAnnotation<'a>, TVariable> { let mut buf = String::new_in(arena); let start_bytes_len = state.bytes.len(); - match peek_utf8_char(&state) { + match peek_utf8_char_e(&state, TVariable::StartNotLowercase, TVariable::Space) { Ok((first_letter, bytes_parsed)) => { // Type variables must start with a lowercase letter. if first_letter.is_alphabetic() && first_letter.is_lowercase() { buf.push(first_letter); } else { - return Err(unexpected(arena, 0, Attempting::TypeVariable, state)); + return Err(( + NoProgress, + TVariable::StartNotLowercase(state.line, state.column), + state, + )); } - state = state.advance_without_indenting(arena, bytes_parsed)?; - } - Err(reason) => { - let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); - return state.fail(arena, progress, reason); + state = state.advance_without_indenting_e(arena, bytes_parsed, TVariable::Space)?; } + Err(reason) => return Err((NoProgress, reason, state)), } while !state.bytes.is_empty() { - match peek_utf8_char(&state) { + match peek_utf8_char_e(&state, TVariable::End, TVariable::Space) { Ok((ch, bytes_parsed)) => { // After the first character, only these are allowed: // @@ -530,11 +652,10 @@ fn parse_type_variable<'a>( break; } - state = state.advance_without_indenting(arena, bytes_parsed)?; + state = state.advance_without_indenting_e(arena, bytes_parsed, TVariable::Space)?; } Err(reason) => { - let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); - return state.fail(arena, progress, reason); + return state.fail(arena, MadeProgress, reason); } } } @@ -544,45 +665,3 @@ fn parse_type_variable<'a>( let progress = Progress::from_lengths(start_bytes_len, state.bytes.len()); Ok((progress, answer, state)) } - -fn malformed<'a>( - opt_bad_char: Option, - arena: &'a Bump, - mut state: State<'a>, - parts: Vec<&'a str>, -) -> ParseResult<'a, TypeAnnotation<'a>, SyntaxError<'a>> { - // assumption: progress was made to conclude that the annotation is malformed - - // Reconstruct the original string that we've been parsing. - let mut full_string = String::new_in(arena); - - full_string.push_str(arena_join(arena, &mut parts.into_iter(), ".").into_bump_str()); - - if let Some(bad_char) = opt_bad_char { - full_string.push(bad_char); - } - - // Consume the remaining chars in the identifier. - while !state.bytes.is_empty() { - match peek_utf8_char(&state) { - Ok((ch, bytes_parsed)) => { - // We can't use ch.is_alphanumeric() here because that passes for - // things that are "numeric" but not ASCII digits, like `¾` - if ch == '.' || ch.is_alphabetic() || ch.is_ascii_digit() { - full_string.push(ch); - } else { - break; - } - - state = state.advance_without_indenting(arena, bytes_parsed)?; - } - Err(reason) => return state.fail(arena, MadeProgress, reason), - } - } - - Ok(( - MadeProgress, - TypeAnnotation::Malformed(full_string.into_bump_str()), - state, - )) -} diff --git a/compiler/reporting/src/error/parse.rs b/compiler/reporting/src/error/parse.rs index 0fb170c0d9..fc8082c6b6 100644 --- a/compiler/reporting/src/error/parse.rs +++ b/compiler/reporting/src/error/parse.rs @@ -31,7 +31,7 @@ fn note_for_tag_union_type_indent<'a>(alloc: &'a RocDocAllocator<'a>) -> RocDocB fn hint_for_tag_name<'a>(alloc: &'a RocDocAllocator<'a>) -> RocDocBuilder<'a> { alloc.concat(vec![ alloc.hint("Tag names "), - alloc.reflow("Tag names start with an uppercase letter, like "), + alloc.reflow("start with an uppercase letter, like "), alloc.parser_suggestion("Err"), alloc.text(" or "), alloc.parser_suggestion("Green"), @@ -39,6 +39,17 @@ fn hint_for_tag_name<'a>(alloc: &'a RocDocAllocator<'a>) -> RocDocBuilder<'a> { ]) } +fn hint_for_private_tag_name<'a>(alloc: &'a RocDocAllocator<'a>) -> RocDocBuilder<'a> { + alloc.concat(vec![ + alloc.hint("Private tag names "), + alloc.reflow("start with an `@` symbol followed by an uppercase letter, like "), + alloc.parser_suggestion("@UID"), + alloc.text(" or "), + alloc.parser_suggestion("@SecretKey"), + alloc.text("."), + ]) +} + fn to_syntax_report<'a>( alloc: &'a RocDocAllocator<'a>, filename: PathBuf, @@ -102,6 +113,24 @@ fn to_syntax_report<'a>( report(doc) } + SyntaxError::Eof(region) => { + let doc = alloc.stack(vec![alloc.reflow("End of Field"), alloc.region(*region)]); + + Report { + filename, + doc, + title: "PARSE PROBLEM".to_string(), + } + } + SyntaxError::OutdentedTooFar => { + let doc = alloc.stack(vec![alloc.reflow("OutdentedTooFar")]); + + Report { + filename, + doc, + title: "PARSE PROBLEM".to_string(), + } + } Type(typ) => to_type_report(alloc, filename, &typ, 0, 0), _ => todo!("unhandled parse error: {:?}", parse_problem), } @@ -111,8 +140,8 @@ fn to_type_report<'a>( alloc: &'a RocDocAllocator<'a>, filename: PathBuf, parse_problem: &roc_parse::parser::Type<'a>, - _start_row: Row, - _start_col: Col, + start_row: Row, + start_col: Col, ) -> Report<'a> { use roc_parse::parser::Type; @@ -121,6 +150,82 @@ fn to_type_report<'a>( Type::TTagUnion(tag_union, row, col) => { to_ttag_union_report(alloc, filename, &tag_union, *row, *col) } + Type::TInParens(tinparens, row, col) => { + to_tinparens_report(alloc, filename, &tinparens, *row, *col) + } + Type::TApply(tapply, row, col) => to_tapply_report(alloc, filename, &tapply, *row, *col), + + Type::TFunctionArgument(row, col) => match what_is_next(alloc.src_lines, *row, *col) { + Next::Other(Some(',')) => { + let surroundings = Region::from_rows_cols(start_row, start_col, *row, *col); + let region = Region::from_row_col(*row, *col); + + let doc = alloc.stack(vec![ + alloc.reflow(r"I just started parsing a function argument type, but I encounterd two commas in a row:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![alloc.reflow("Try removing one of them.")]), + ]); + + Report { + filename, + doc, + title: "DOUBLE COMMA".to_string(), + } + } + _ => todo!(), + }, + + Type::TStart(row, col) => { + let surroundings = Region::from_rows_cols(start_row, start_col, *row, *col); + let region = Region::from_row_col(*row, *col); + + let doc = alloc.stack(vec![ + alloc.reflow(r"I just started parsing a type, but I got stuck here:"), + alloc.region_with_subregion(surroundings, region), + alloc.note("I may be confused by indentation"), + ]); + + Report { + filename, + doc, + title: "UNFINISHED TYPE".to_string(), + } + } + + Type::TIndentStart(row, col) => { + let surroundings = Region::from_rows_cols(start_row, start_col, *row, *col); + let region = Region::from_row_col(*row, *col); + + let doc = alloc.stack(vec![ + alloc.reflow(r"I just started parsing a type, but I got stuck here:"), + alloc.region_with_subregion(surroundings, region), + alloc.note("I may be confused by indentation"), + ]); + + Report { + filename, + doc, + title: "UNFINISHED TYPE".to_string(), + } + } + + Type::TAsIndentStart(row, col) => { + let surroundings = Region::from_rows_cols(start_row, start_col, *row, *col); + let region = Region::from_row_col(*row, *col); + + let doc = alloc.stack(vec![ + alloc.reflow(r"I just started parsing an inline type alias, but I got stuck here:"), + alloc.region_with_subregion(surroundings, region), + alloc.note("I may be confused by indentation"), + ]); + + Report { + filename, + doc, + title: "UNFINISHED INLINE ALIAS".to_string(), + } + } + _ => todo!("unhandled type parse error: {:?}", &parse_problem), } } @@ -278,8 +383,6 @@ fn to_trecord_report<'a>( TRecord::Type(tipe, row, col) => to_type_report(alloc, filename, tipe, row, col), - TRecord::Syntax(error, row, col) => to_syntax_report(alloc, filename, error, row, col), - TRecord::IndentOpen(row, col) => { let surroundings = Region::from_rows_cols(start_row, start_col, row, col); let region = Region::from_row_col(row, col); @@ -303,7 +406,7 @@ fn to_trecord_report<'a>( } TRecord::IndentEnd(row, col) => { - match next_line_starts_with_close_curly(alloc.src_lines, row - 1) { + match next_line_starts_with_close_curly(alloc.src_lines, row.saturating_sub(1)) { Some((curly_row, curly_col)) => { let surroundings = Region::from_rows_cols(start_row, start_col, curly_row, curly_col); @@ -373,8 +476,6 @@ fn to_ttag_union_report<'a>( ) -> Report<'a> { use roc_parse::parser::TTagUnion; - dbg!(parse_problem); - match *parse_problem { TTagUnion::Open(row, col) => match what_is_next(alloc.src_lines, row, col) { Next::Keyword(keyword) => { @@ -463,6 +564,22 @@ fn to_ttag_union_report<'a>( title: "WEIRD TAG NAME".to_string(), } } + Next::Other(Some('@')) => { + let doc = alloc.stack(vec![ + alloc.reflow( + r"I am partway through parsing a tag union type, but I got stuck here:", + ), + alloc.region_with_subregion(surroundings, region), + alloc.reflow(r"I was expecting to see a private tag name."), + hint_for_private_tag_name(alloc), + ]); + + Report { + filename, + doc, + title: "WEIRD TAG NAME".to_string(), + } + } _ => { let doc = alloc.stack(vec![ alloc.reflow(r"I am partway through parsing a tag union type, but I got stuck here:"), @@ -487,8 +604,6 @@ fn to_ttag_union_report<'a>( TTagUnion::Type(tipe, row, col) => to_type_report(alloc, filename, tipe, row, col), - TTagUnion::Syntax(error, row, col) => to_syntax_report(alloc, filename, error, row, col), - TTagUnion::IndentOpen(row, col) => { let surroundings = Region::from_rows_cols(start_row, start_col, row, col); let region = Region::from_row_col(row, col); @@ -565,6 +680,320 @@ fn to_ttag_union_report<'a>( } } +fn to_tinparens_report<'a>( + alloc: &'a RocDocAllocator<'a>, + filename: PathBuf, + parse_problem: &roc_parse::parser::TInParens<'a>, + start_row: Row, + start_col: Col, +) -> Report<'a> { + use roc_parse::parser::TInParens; + + match *parse_problem { + TInParens::Open(row, col) => { + match what_is_next(alloc.src_lines, row, col) { + Next::Keyword(keyword) => { + let surroundings = Region::from_rows_cols(start_row, start_col, row, col); + let region = to_keyword_region(row, col, keyword); + + let doc = alloc.stack(vec![ + alloc.reflow(r"I just saw an open parenthesis, so I was expecting to see a type next."), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow(r"Something like "), + alloc.parser_suggestion("(List Person)"), + alloc.text(" or "), + alloc.parser_suggestion("(Result I64 Str)"), + ]), + ]); + + Report { + filename, + doc, + title: "UNFINISHED PARENTHESES".to_string(), + } + } + Next::Other(Some(c)) if c.is_alphabetic() => { + debug_assert!(c.is_lowercase()); + + let surroundings = Region::from_rows_cols(start_row, start_col, row, col); + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc.reflow( + r"I am partway through parsing a type in parentheses, but I got stuck here:", + ), + alloc.region_with_subregion(surroundings, region), + alloc.reflow(r"I was expecting to see a tag name."), + hint_for_tag_name(alloc), + ]); + + Report { + filename, + doc, + title: "WEIRD TAG NAME".to_string(), + } + } + _ => { + let surroundings = Region::from_rows_cols(start_row, start_col, row, col); + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc.reflow( + r"I just started parsing a type in parentheses, but I got stuck here:", + ), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow(r"Tag unions look like "), + alloc.parser_suggestion("[ Many I64, None ],"), + alloc.reflow(" so I was expecting to see a tag name next."), + ]), + ]); + + Report { + filename, + doc, + title: "UNFINISHED PARENTHESES".to_string(), + } + } + } + } + + TInParens::End(row, col) => { + let surroundings = Region::from_rows_cols(start_row, start_col, row, col); + let region = Region::from_row_col(row, col); + + match what_is_next(alloc.src_lines, row, col) { + Next::Other(Some(c)) if c.is_alphabetic() => { + debug_assert!(c.is_lowercase()); + + // TODO hint for tuples? + let doc = alloc.stack(vec![ + alloc.reflow( + r"I am partway through parsing a type in parentheses, but I got stuck here:", + ), + alloc.region_with_subregion(surroundings, region), + alloc.reflow(r"I was expecting to see a tag name."), + hint_for_tag_name(alloc), + ]); + + Report { + filename, + doc, + title: "WEIRD TAG NAME".to_string(), + } + } + _ => { + let doc = alloc.stack(vec![ + alloc.reflow(r"I am partway through parsing a type in parentheses, but I got stuck here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow( + r"I was expecting to see a closing parenthesis before this, so try adding a ", + ), + alloc.parser_suggestion(")"), + alloc.reflow(" and see if that helps?"), + ]), + ]); + + Report { + filename, + doc, + title: "UNFINISHED PARENTHESES".to_string(), + } + } + } + } + + TInParens::Type(tipe, row, col) => to_type_report(alloc, filename, tipe, row, col), + + TInParens::IndentOpen(row, col) => { + let surroundings = Region::from_rows_cols(start_row, start_col, row, col); + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc + .reflow(r"I just started parsing a type in parentheses, but I got stuck here:"), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow(r"Tag unions look like "), + alloc.parser_suggestion("[ Many I64, None ],"), + alloc.reflow(" so I was expecting to see a tag name next."), + ]), + note_for_tag_union_type_indent(alloc), + ]); + + Report { + filename, + doc, + title: "UNFINISHED PARENTHESES".to_string(), + } + } + + TInParens::IndentEnd(row, col) => { + match next_line_starts_with_close_square_bracket(alloc.src_lines, row - 1) { + Some((curly_row, curly_col)) => { + let surroundings = + Region::from_rows_cols(start_row, start_col, curly_row, curly_col); + let region = Region::from_row_col(curly_row, curly_col); + + let doc = alloc.stack(vec![ + alloc.reflow( + "I am partway through parsing a type in parentheses, but I got stuck here:", + ), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("I need this square bracket to be indented more. Try adding more spaces before it!"), + ]), + ]); + + Report { + filename, + doc, + title: "NEED MORE INDENTATION".to_string(), + } + } + None => { + let surroundings = Region::from_rows_cols(start_row, start_col, row, col); + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc.reflow( + r"I am partway through parsing a type in parentheses, but I got stuck here:", + ), + alloc.region_with_subregion(surroundings, region), + alloc.concat(vec![ + alloc.reflow("I was expecting to see a closing square "), + alloc.reflow("bracket before this, so try adding a "), + alloc.parser_suggestion("]"), + alloc.reflow(" and see if that helps?"), + ]), + note_for_tag_union_type_indent(alloc), + ]); + + Report { + filename, + doc, + title: "UNFINISHED PARENTHESES".to_string(), + } + } + } + } + + TInParens::Space(error, row, col) => to_space_report(alloc, filename, &error, row, col), + } +} + +fn to_tapply_report<'a>( + alloc: &'a RocDocAllocator<'a>, + filename: PathBuf, + parse_problem: &roc_parse::parser::TApply, + _start_row: Row, + _start_col: Col, +) -> Report<'a> { + use roc_parse::parser::TApply; + + match *parse_problem { + TApply::DoubleDot(row, col) => { + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc.reflow(r"I encountered two dots in a row:"), + alloc.region(region), + alloc.concat(vec![alloc.reflow("Try removing one of them.")]), + ]); + + Report { + filename, + doc, + title: "DOUBLE DOT".to_string(), + } + } + TApply::TrailingDot(row, col) => { + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc.reflow(r"I encountered a dot with nothing after it:"), + alloc.region(region), + alloc.concat(vec![ + alloc.reflow("Dots are used to refer to a type in a qualified way, like "), + alloc.parser_suggestion("Num.I64"), + alloc.text(" or "), + alloc.parser_suggestion("List.List a"), + alloc.reflow(". Try adding a type name next."), + ]), + ]); + + Report { + filename, + doc, + title: "TRAILING DOT".to_string(), + } + } + TApply::StartIsNumber(row, col) => { + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc.reflow(r"I encountered a number at the start of a qualified name segment:"), + alloc.region(region), + alloc.concat(vec![ + alloc.reflow("All parts of a qualified type name must start with an uppercase letter, like "), + alloc.parser_suggestion("Num.I64"), + alloc.text(" or "), + alloc.parser_suggestion("List.List a"), + alloc.text("."), + ]), + ]); + + Report { + filename, + doc, + title: "WEIRD QUALIFIED NAME".to_string(), + } + } + TApply::StartNotUppercase(row, col) => { + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc.reflow(r"I encountered a lowercase letter at the start of a qualified name segment:"), + alloc.region(region), + alloc.concat(vec![ + alloc.reflow("All parts of a qualified type name must start with an uppercase letter, like "), + alloc.parser_suggestion("Num.I64"), + alloc.text(" or "), + alloc.parser_suggestion("List.List a"), + alloc.text("."), + ]), + ]); + + Report { + filename, + doc, + title: "WEIRD QUALIFIED NAME".to_string(), + } + } + + TApply::End(row, col) => { + let region = Region::from_row_col(row, col); + + let doc = alloc.stack(vec![ + alloc.reflow( + r"I reached the end of the input file while parsing a qualified type name", + ), + alloc.region(region), + ]); + + Report { + filename, + doc, + title: "END OF FILE".to_string(), + } + } + + TApply::Space(error, row, col) => to_space_report(alloc, filename, &error, row, col), + } +} + fn to_space_report<'a>( alloc: &'a RocDocAllocator<'a>, filename: PathBuf, diff --git a/compiler/reporting/tests/test_reporting.rs b/compiler/reporting/tests/test_reporting.rs index 986a877743..7b50c9e5a8 100644 --- a/compiler/reporting/tests/test_reporting.rs +++ b/compiler/reporting/tests/test_reporting.rs @@ -4027,7 +4027,8 @@ mod test_reporting { } #[test] - fn type_annotation_dubble_colon() { + #[ignore] + fn type_annotation_double_colon() { report_problem_as( indoc!( r#" @@ -4172,8 +4173,7 @@ mod test_reporting { I was expecting to see a tag name. - Hint: Tag names Tag names start with an uppercase letter, like - Err or Green. + Hint: Tag names start with an uppercase letter, like Err or Green. "# ), ) @@ -4198,8 +4198,7 @@ mod test_reporting { I was expecting to see a tag name. - Hint: Tag names Tag names start with an uppercase letter, like - Err or Green. + Hint: Tag names start with an uppercase letter, like Err or Green. "# ), ) @@ -4245,8 +4244,7 @@ mod test_reporting { I am partway through parsing a record type, but I got stuck here: 1│ f : { - 2│ foo : I64, - ^ + ^ I was expecting to see a closing curly brace before this, so try adding a } and see if that helps? @@ -4396,4 +4394,313 @@ mod test_reporting { ), ) } + + #[test] + #[ignore] + fn type_in_parens_start() { + report_problem_as( + indoc!( + r#" + f : ( + "# + ), + indoc!( + r#" + ── UNFINISHED RECORD TYPE ────────────────────────────────────────────────────── + + I am partway through parsing a record type, but I got stuck here: + + 1│ f : { a: Int, + ^ + + I was expecting to see a closing curly brace before this, so try + adding a } and see if that helps? + "# + ), + ) + } + + #[test] + fn type_in_parens_end() { + report_problem_as( + indoc!( + r#" + f : ( I64 + "# + ), + indoc!( + r#" + ── UNFINISHED PARENTHESES ────────────────────────────────────────────────────── + + I am partway through parsing a type in parentheses, but I got stuck + here: + + 1│ f : ( I64 + ^ + + I was expecting to see a closing parenthesis before this, so try + adding a ) and see if that helps? + "# + ), + ) + } + + #[test] + fn type_apply_double_dot() { + report_problem_as( + indoc!( + r#" + f : Foo..Bar + "# + ), + indoc!( + r#" + ── DOUBLE DOT ────────────────────────────────────────────────────────────────── + + I encountered two dots in a row: + + 1│ f : Foo..Bar + ^ + + Try removing one of them. + "# + ), + ) + } + + #[test] + fn type_apply_trailing_dot() { + report_problem_as( + indoc!( + r#" + f : Foo.Bar. + "# + ), + indoc!( + r#" + ── TRAILING DOT ──────────────────────────────────────────────────────────────── + + I encountered a dot with nothing after it: + + 1│ f : Foo.Bar. + ^ + + Dots are used to refer to a type in a qualified way, like + Num.I64 or List.List a. Try adding a type name next. + "# + ), + ) + } + + #[test] + #[ignore] + fn type_apply_stray_dot() { + // TODO good message + report_problem_as( + indoc!( + r#" + f : . + "# + ), + indoc!( + r#" + ── UNFINISHED PARENTHESES ────────────────────────────────────────────────────── + + I am partway through parsing a type in parentheses, but I got stuck + here: + + 1│ f : ( I64 + ^ + + I was expecting to see a closing parenthesis before this, so try + adding a ) and see if that helps? + "# + ), + ) + } + + #[test] + fn type_apply_start_with_number() { + report_problem_as( + indoc!( + r#" + f : Foo.1 + "# + ), + indoc!( + r#" + ── WEIRD QUALIFIED NAME ──────────────────────────────────────────────────────── + + I encountered a number at the start of a qualified name segment: + + 1│ f : Foo.1 + ^ + + All parts of a qualified type name must start with an uppercase + letter, like Num.I64 or List.List a. + "# + ), + ) + } + + #[test] + fn type_apply_start_with_lowercase() { + report_problem_as( + indoc!( + r#" + f : Foo.foo + "# + ), + indoc!( + r#" + ── WEIRD QUALIFIED NAME ──────────────────────────────────────────────────────── + + I encountered a lowercase letter at the start of a qualified name + segment: + + 1│ f : Foo.foo + ^ + + All parts of a qualified type name must start with an uppercase + letter, like Num.I64 or List.List a. + "# + ), + ) + } + + #[test] + fn type_inline_alias() { + report_problem_as( + indoc!( + r#" + f : I64 as + f = 0 + + f + "# + ), + indoc!( + r#" + ── UNFINISHED INLINE ALIAS ───────────────────────────────────────────────────── + + I just started parsing an inline type alias, but I got stuck here: + + 1│ f : I64 as + ^ + + Note: I may be confused by indentation + "# + ), + ) + } + + #[test] + fn type_double_comma() { + report_problem_as( + indoc!( + r#" + f : I64,,I64 -> I64 + f = 0 + + f + "# + ), + indoc!( + r#" + ── DOUBLE COMMA ──────────────────────────────────────────────────────────────── + + I just started parsing a function argument type, but I encounterd two + commas in a row: + + 1│ f : I64,,I64 -> I64 + ^ + + Try removing one of them. + "# + ), + ) + } + + #[test] + fn type_argument_no_arrow() { + report_problem_as( + indoc!( + r#" + f : I64, I64 + f = 0 + + f + "# + ), + indoc!( + r#" + ── UNFINISHED TYPE ───────────────────────────────────────────────────────────── + + I just started parsing a type, but I got stuck here: + + 1│ f : I64, I64 + ^ + + Note: I may be confused by indentation + "# + ), + ) + } + + #[test] + fn type_argument_arrow_then_nothing() { + // TODO could do better by pointing out we're parsing a function type + report_problem_as( + indoc!( + r#" + f : I64, I64 -> + f = 0 + + f + "# + ), + indoc!( + r#" + ── UNFINISHED TYPE ───────────────────────────────────────────────────────────── + + I just started parsing a type, but I got stuck here: + + 1│ f : I64, I64 -> + ^ + + Note: I may be confused by indentation + "# + ), + ) + } + + #[test] + fn invalid_private_tag_name() { + // TODO could do better by pointing out we're parsing a function type + report_problem_as( + indoc!( + r#" + f : [ @Foo Bool, @100 I64 ] + f = 0 + + f + "# + ), + indoc!( + r#" + ── WEIRD TAG NAME ────────────────────────────────────────────────────────────── + + I am partway through parsing a tag union type, but I got stuck here: + + 1│ f : [ @Foo Bool, @100 I64 ] + ^ + + I was expecting to see a private tag name. + + Hint: Private tag names start with a `@` symbol followed by an + uppercase letter, like @UID or @SecretKey. + "# + ), + ) + } } diff --git a/editor/src/keyboard_input.rs b/editor/src/keyboard_input.rs index 0a976a1a0f..457f27d50c 100644 --- a/editor/src/keyboard_input.rs +++ b/editor/src/keyboard_input.rs @@ -38,6 +38,9 @@ pub fn handle_keydown( handle_cut(app_model)? } } + + A => pass_keydown_to_focused(&modifiers, virtual_keycode, app_model), + _ => (), } diff --git a/editor/src/mvc/ed_update.rs b/editor/src/mvc/ed_update.rs index 83cb701aec..27247c11fe 100644 --- a/editor/src/mvc/ed_update.rs +++ b/editor/src/mvc/ed_update.rs @@ -318,6 +318,20 @@ fn del_selection(selection: RawSelection, ed_model: &mut EdModel) -> EdResult<() Ok(()) } +// TODO move this to impl EdModel +pub fn handle_select_all(ed_model: &mut EdModel) { + if ed_model.text_buf.nr_of_chars() > 0 { + let last_pos = ed_model.text_buf.last_position(); + + ed_model.selection_opt = Some(RawSelection { + start_pos: Position { line: 0, column: 0 }, + end_pos: last_pos, + }); + + ed_model.caret_pos = last_pos; + } +} + pub fn handle_new_char(received_char: &char, ed_model: &mut EdModel) -> EdResult<()> { let old_caret_pos = ed_model.caret_pos; @@ -351,7 +365,8 @@ pub fn handle_new_char(received_char: &char, ed_model: &mut EdModel) -> EdResult ed_model.selection_opt = None; } - '\u{3}' // Ctrl + C + '\u{1}' // Ctrl + A + | '\u{3}' // Ctrl + C | '\u{16}' // Ctrl + V | '\u{18}' // Ctrl + X | '\u{e000}'..='\u{f8ff}' // http://www.unicode.org/faq/private_use.html @@ -397,6 +412,12 @@ pub fn handle_key_down( Up => handle_arrow(move_caret_up, modifiers, ed_model), Right => handle_arrow(move_caret_right, modifiers, ed_model), Down => handle_arrow(move_caret_down, modifiers, ed_model), + + A => { + if modifiers.ctrl() { + handle_select_all(ed_model) + } + } _ => {} } } @@ -405,7 +426,7 @@ pub fn handle_key_down( pub mod test_ed_update { use crate::mvc::app_update::test_app_update::mock_app_model; use crate::mvc::ed_model::{Position, RawSelection}; - use crate::mvc::ed_update::handle_new_char; + use crate::mvc::ed_update::{handle_new_char, handle_select_all}; use crate::selection::test_selection::{ all_lines_vec, convert_dsl_to_selection, convert_selection_to_dsl, text_buffer_from_dsl_str, }; @@ -554,4 +575,51 @@ pub mod test_ed_update { Ok(()) } + + fn assert_select_all( + pre_lines_str: &[&str], + expected_post_lines_str: &[&str], + ) -> Result<(), String> { + let (caret_pos, selection_opt, pre_text_buf) = gen_caret_text_buf(pre_lines_str)?; + + let app_model = mock_app_model(pre_text_buf, caret_pos, selection_opt, None); + let mut ed_model = app_model.ed_model_opt.unwrap(); + + handle_select_all(&mut ed_model); + + let mut text_buf_lines = all_lines_vec(&ed_model.text_buf); + let post_lines_str = convert_selection_to_dsl( + ed_model.selection_opt, + ed_model.caret_pos, + &mut text_buf_lines, + )?; + + assert_eq!(post_lines_str, expected_post_lines_str); + + Ok(()) + } + + #[test] + fn select_all() -> Result<(), String> { + assert_select_all(&["|"], &["|"])?; + assert_select_all(&["|a"], &["[a]|"])?; + assert_select_all(&["a|"], &["[a]|"])?; + assert_select_all(&["abc d|ef ghi"], &["[abc def ghi]|"])?; + assert_select_all(&["[a]|"], &["[a]|"])?; + assert_select_all(&["|[a]"], &["[a]|"])?; + assert_select_all(&["|[abc def ghi]"], &["[abc def ghi]|"])?; + assert_select_all(&["a\n", "[b\n", "]|"], &["[a\n", "b\n", "]|"])?; + assert_select_all(&["a\n", "[b]|\n", ""], &["[a\n", "b\n", "]|"])?; + assert_select_all(&["a\n", "|[b\n", "]"], &["[a\n", "b\n", "]|"])?; + assert_select_all( + &["abc\n", "def\n", "gh|i\n", "jkl"], + &["[abc\n", "def\n", "ghi\n", "jkl]|"], + )?; + assert_select_all( + &["|[abc\n", "def\n", "ghi\n", "jkl]"], + &["[abc\n", "def\n", "ghi\n", "jkl]|"], + )?; + + Ok(()) + } } diff --git a/editor/src/text_buffer.rs b/editor/src/text_buffer.rs index f43d0260ff..a0a512883c 100644 --- a/editor/src/text_buffer.rs +++ b/editor/src/text_buffer.rs @@ -116,6 +116,10 @@ impl TextBuffer { self.text_rope.len_lines() } + pub fn nr_of_chars(&self) -> usize { + self.text_rope.len_chars() + } + // expensive function, don't use it if it can be done with a specialized, more efficient function // TODO use pool allocation here pub fn all_lines<'a>(&self, arena: &'a Bump) -> BumpString<'a> { @@ -132,6 +136,16 @@ impl TextBuffer { self.text_rope.line_to_char(pos.line) + pos.column } + fn char_indx_to_pos(&self, char_indx: usize) -> Position { + let line = self.text_rope.char_to_line(char_indx); + + let char_idx_line_start = self.pos_to_char_indx(Position { line, column: 0 }); + + let column = char_indx - char_idx_line_start; + + Position { line, column } + } + fn sel_to_tup(&self, raw_sel: RawSelection) -> EdResult<(usize, usize)> { let valid_sel = validate_selection(raw_sel)?; let start_char_indx = self.pos_to_char_indx(valid_sel.selection.start_pos); @@ -139,6 +153,10 @@ impl TextBuffer { Ok((start_char_indx, end_char_indx)) } + + pub fn last_position(&self) -> Position { + self.char_indx_to_pos(self.nr_of_chars()) + } } pub fn from_path(path: &Path) -> EdResult { diff --git a/examples/closure/.gitignore b/examples/closure/.gitignore deleted file mode 100644 index 3b89e1b9f3..0000000000 --- a/examples/closure/.gitignore +++ /dev/null @@ -1 +0,0 @@ -closure diff --git a/examples/closure/Closure.roc b/examples/closure/Closure.roc deleted file mode 100644 index 03131b7dd1..0000000000 --- a/examples/closure/Closure.roc +++ /dev/null @@ -1,8 +0,0 @@ -app "closure" provides [ makeClosure ] to "./platform/" - -makeClosure : ({} -> Int *) as MyClosure -makeClosure = - x = 42 - y = 42 - - \{} -> x + y diff --git a/examples/closure/platform/Cargo.lock b/examples/closure/platform/Cargo.lock deleted file mode 100644 index c386bb6c4a..0000000000 --- a/examples/closure/platform/Cargo.lock +++ /dev/null @@ -1,23 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -[[package]] -name = "host" -version = "0.1.0" -dependencies = [ - "roc_std 0.1.0", -] - -[[package]] -name = "libc" -version = "0.2.79" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "roc_std" -version = "0.1.0" -dependencies = [ - "libc 0.2.79 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[metadata] -"checksum libc 0.2.79 (registry+https://github.com/rust-lang/crates.io-index)" = "2448f6066e80e3bfc792e9c98bf705b4b0fc6e8ef5b43e5889aff0eaa9c58743" diff --git a/examples/closure/platform/Cargo.toml b/examples/closure/platform/Cargo.toml deleted file mode 100644 index 70f3c1f86c..0000000000 --- a/examples/closure/platform/Cargo.toml +++ /dev/null @@ -1,13 +0,0 @@ -[package] -name = "host" -version = "0.1.0" -authors = ["Richard Feldman "] -edition = "2018" - -[lib] -crate-type = ["staticlib"] - -[dependencies] -roc_std = { path = "../../../roc_std" } - -[workspace] diff --git a/examples/closure/platform/Pkg-Config.roc b/examples/closure/platform/Pkg-Config.roc deleted file mode 100644 index 8e351ef6ba..0000000000 --- a/examples/closure/platform/Pkg-Config.roc +++ /dev/null @@ -1,12 +0,0 @@ -platform examples/closure - requires { main : Effect {} } - exposes [] - packages {} - imports [] - provides [ mainForHost ] - effects Effect - { - putChar : Int -> Effect {}, - putLine : Str -> Effect {}, - getLine : Effect Str - } diff --git a/examples/closure/platform/host.c b/examples/closure/platform/host.c deleted file mode 100644 index 0378c69589..0000000000 --- a/examples/closure/platform/host.c +++ /dev/null @@ -1,7 +0,0 @@ -#include - -extern int rust_main(); - -int main() { - return rust_main(); -} diff --git a/examples/closure/platform/src/lib.rs b/examples/closure/platform/src/lib.rs deleted file mode 100644 index bbaca0733a..0000000000 --- a/examples/closure/platform/src/lib.rs +++ /dev/null @@ -1,95 +0,0 @@ -#![allow(non_snake_case)] - -use roc_std::alloca; -use roc_std::RocCallResult; -use std::alloc::Layout; -use std::time::SystemTime; - -extern "C" { - #[link_name = "roc__makeClosure_1_exposed"] - fn make_closure(output: *mut u8) -> (); - - #[link_name = "roc__makeClosure_1_size"] - fn closure_size() -> i64; - - #[link_name = "roc__makeClosure_1_MyClosure_caller"] - fn call_MyClosure(function_pointer: *const u8, closure_data: *const u8, output: *mut u8) -> (); - - #[link_name = "roc__makeClosure_1_MyClosure_size"] - fn size_MyClosure() -> i64; -} - -unsafe fn call_the_closure(function_pointer: *const u8, closure_data_ptr: *const u8) -> i64 { - let size = size_MyClosure() as usize; - - alloca::with_stack_bytes(size, |buffer| { - let buffer: *mut std::ffi::c_void = buffer; - let buffer: *mut u8 = buffer as *mut u8; - - call_MyClosure( - function_pointer, - closure_data_ptr as *const u8, - buffer as *mut u8, - ); - - let output = &*(buffer as *mut RocCallResult); - - match output.into() { - Ok(v) => v, - Err(e) => panic!("failed with {}", e), - } - }) -} - -#[no_mangle] -pub fn rust_main() -> isize { - println!("Running Roc closure"); - let start_time = SystemTime::now(); - - let size = unsafe { closure_size() } as usize; - let layout = Layout::array::(size).unwrap(); - let answer = unsafe { - let buffer = std::alloc::alloc(layout); - - make_closure(buffer); - - let output = &*(buffer as *mut RocCallResult<()>); - - match output.into() { - Ok(()) => { - let function_pointer = { - // this is a pointer to the location where the function pointer is stored - // we pass just the function pointer - let temp = buffer.offset(8) as *const i64; - - (*temp) as *const u8 - }; - - let closure_data_ptr = buffer.offset(16); - - let result = - call_the_closure(function_pointer as *const u8, closure_data_ptr as *const u8); - - std::alloc::dealloc(buffer, layout); - - result - } - Err(msg) => { - std::alloc::dealloc(buffer, layout); - - panic!("Roc failed with message: {}", msg); - } - } - }; - let end_time = SystemTime::now(); - let duration = end_time.duration_since(start_time).unwrap(); - - println!( - "Roc closure took {:.4} ms to compute this answer: {:?}", - duration.as_secs_f64() * 1000.0, - answer - ); - - // Exit code - 0 -} diff --git a/examples/quicksort/platform/host.zig b/examples/quicksort/platform/host.zig index e13fb26b76..bc5d13ce4a 100644 --- a/examples/quicksort/platform/host.zig +++ b/examples/quicksort/platform/host.zig @@ -22,6 +22,10 @@ pub export fn main() i32 { const stderr = std.io.getStdErr().writer(); var raw_numbers: [NUM_NUMS + 1]i64 = undefined; + + // set refcount to one + raw_numbers[0] = -9223372036854775808; + var numbers = raw_numbers[1..]; for (numbers) |x, i| {