From ecab8fa25a275246973ae274fd74c614a6b2b6e5 Mon Sep 17 00:00:00 2001 From: Ayaz Hafiz Date: Tue, 11 Oct 2022 12:58:05 -0500 Subject: [PATCH 01/16] Make sure self-recursive checks only happen after typechecking Programs like ``` after : ({} -> a), ({} -> b) -> ({} -> b) fx = after (\{} -> {}) \{} -> if Bool.true then fx {} else {} ``` are legal because they always decay to functions, even if they may not look like functions syntactically. Rather than using a syntactic check to check for illegally-recursive functions, we should only perform such checks after we know the types of values. Closes #4291 --- crates/compiler/can/src/def.rs | 113 +++++++--------------- crates/compiler/can/src/module.rs | 2 +- crates/compiler/solve/tests/solve_expr.rs | 35 +++++++ crates/reporting/tests/test_reporting.rs | 4 +- 4 files changed, 74 insertions(+), 80 deletions(-) diff --git a/crates/compiler/can/src/def.rs b/crates/compiler/can/src/def.rs index 0984707718..e86addb94d 100644 --- a/crates/compiler/can/src/def.rs +++ b/crates/compiler/can/src/def.rs @@ -1453,7 +1453,6 @@ impl DefOrdering { #[inline(always)] pub(crate) fn sort_can_defs_new( - env: &mut Env<'_>, scope: &mut Scope, var_store: &mut VarStore, defs: CanDefs, @@ -1524,67 +1523,48 @@ pub(crate) fn sort_can_defs_new( let def = defs.pop().unwrap(); let index = group.first_one().unwrap(); - let bad_recursion_body = if def_ordering.direct_references.get_row_col(index, index) - { - // a definition like `x = x + 1`, which is invalid in roc. - // We need to convert the body of the def to a runtime error. - let symbol = def_ordering.get_symbol(index).unwrap(); + if def_ordering.references.get_row_col(index, index) { + // push the "header" for this group of recursive definitions + let cycle_mark = IllegalCycleMark::new(var_store); + declarations.push_recursive_group(1, cycle_mark); - let entries = vec![make_cycle_entry(symbol, &def)]; + // then push the definition + let (symbol, specializes) = match def.loc_pattern.value { + Pattern::Identifier(symbol) => (symbol, None), - let problem = Problem::RuntimeError(RuntimeError::CircularDef(entries.clone())); - env.problem(problem); + Pattern::AbilityMemberSpecialization { ident, specializes } => { + (ident, Some(specializes)) + } - Some(Expr::RuntimeError(RuntimeError::CircularDef(entries))) - } else { - None - }; + _ => { + internal_error!("destructures cannot participate in a recursive group; it's always a type error") + } + }; - let is_illegally_self_recursive = bad_recursion_body.is_some(); - let set_opt_invalid_recursion_body = |e: &mut Expr| match bad_recursion_body { - Some(err) => *e = err, - None => {} - }; - - if def_ordering.references.get_row_col(index, index) && !is_illegally_self_recursive - { - // this function calls itself, and must be typechecked as a recursive def - match def.loc_pattern.value { - Pattern::Identifier(symbol) => match def.loc_expr.value { - Closure(closure_data) => { - declarations.push_recursive_def( - Loc::at(def.loc_pattern.region, symbol), - Loc::at(def.loc_expr.region, closure_data), - def.expr_var, - def.annotation, - None, - ); - } - e => todo!("{:?}", e), - }, - Pattern::AbilityMemberSpecialization { - ident: symbol, - specializes, - } => match def.loc_expr.value { - Closure(closure_data) => { - declarations.push_recursive_def( - Loc::at(def.loc_pattern.region, symbol), - Loc::at(def.loc_expr.region, closure_data), - def.expr_var, - def.annotation, - Some(specializes), - ); - } - _ => todo!(), - }, - _ => todo!("{:?}", &def.loc_pattern.value), + match def.loc_expr.value { + Closure(closure_data) => { + declarations.push_recursive_def( + Loc::at(def.loc_pattern.region, symbol), + Loc::at(def.loc_expr.region, closure_data), + def.expr_var, + def.annotation, + specializes, + ); + } + _ => { + declarations.push_value_def( + Loc::at(def.loc_pattern.region, symbol), + def.loc_expr, + def.expr_var, + def.annotation, + specializes, + ); + } } } else { match def.loc_pattern.value { Pattern::Identifier(symbol) => match def.loc_expr.value { - Closure(mut closure_data) => { - set_opt_invalid_recursion_body(&mut closure_data.loc_body.value); - + Closure(closure_data) => { declarations.push_function_def( Loc::at(def.loc_pattern.region, symbol), Loc::at(def.loc_expr.region, closure_data), @@ -1594,9 +1574,6 @@ pub(crate) fn sort_can_defs_new( ); } _ => { - let mut def = def; - set_opt_invalid_recursion_body(&mut def.loc_expr.value); - declarations.push_value_def( Loc::at(def.loc_pattern.region, symbol), def.loc_expr, @@ -1610,9 +1587,7 @@ pub(crate) fn sort_can_defs_new( ident: symbol, specializes, } => match def.loc_expr.value { - Closure(mut closure_data) => { - set_opt_invalid_recursion_body(&mut closure_data.loc_body.value); - + Closure(closure_data) => { declarations.push_function_def( Loc::at(def.loc_pattern.region, symbol), Loc::at(def.loc_expr.region, closure_data), @@ -1622,9 +1597,6 @@ pub(crate) fn sort_can_defs_new( ); } _ => { - let mut def = def; - set_opt_invalid_recursion_body(&mut def.loc_expr.value); - declarations.push_value_def( Loc::at(def.loc_pattern.region, symbol), def.loc_expr, @@ -1635,9 +1607,6 @@ pub(crate) fn sort_can_defs_new( } }, _ => { - let mut def = def; - set_opt_invalid_recursion_body(&mut def.loc_expr.value); - declarations.push_destructure_def( def.loc_pattern, def.loc_expr, @@ -1749,17 +1718,7 @@ pub(crate) fn sort_can_defs( Pattern::AbilityMemberSpecialization { .. } ); - let declaration = if def_ordering.direct_references.get_row_col(index, index) { - // a definition like `x = x + 1`, which is invalid in roc - let symbol = def_ordering.get_symbol(index).unwrap(); - - let entries = vec![make_cycle_entry(symbol, &def)]; - - let problem = Problem::RuntimeError(RuntimeError::CircularDef(entries.clone())); - env.problem(problem); - - Declaration::InvalidCycle(entries) - } else if def_ordering.references.get_row_col(index, index) { + let declaration = if def_ordering.references.get_row_col(index, index) { debug_assert!(!is_specialization, "Self-recursive specializations can only be determined during solving - but it was determined for {:?} now, that's a bug!", def); // this function calls itself, and must be typechecked as a recursive def diff --git a/crates/compiler/can/src/module.rs b/crates/compiler/can/src/module.rs index 6435fc65f2..9c56ffec71 100644 --- a/crates/compiler/can/src/module.rs +++ b/crates/compiler/can/src/module.rs @@ -422,7 +422,7 @@ pub fn canonicalize_module_defs<'a>( }; let (mut declarations, mut output) = - crate::def::sort_can_defs_new(&mut env, &mut scope, var_store, defs, new_output); + crate::def::sort_can_defs_new(&mut scope, var_store, defs, new_output); debug_assert!( output.pending_derives.is_empty(), diff --git a/crates/compiler/solve/tests/solve_expr.rs b/crates/compiler/solve/tests/solve_expr.rs index 60658558cf..b43b7def71 100644 --- a/crates/compiler/solve/tests/solve_expr.rs +++ b/crates/compiler/solve/tests/solve_expr.rs @@ -8009,4 +8009,39 @@ mod solve_expr { print_only_under_alias: true ); } + + #[test] + fn self_recursive_function_not_syntactically_a_function() { + infer_eq_without_problem( + indoc!( + r#" + app "test" provides [fx] to "./platform" + + after : ({} -> a), ({} -> b) -> ({} -> b) + + fx = after (\{} -> {}) \{} -> if Bool.true then fx {} else {} + "# + ), + "{} -> {}", + ); + } + + #[test] + fn self_recursive_function_not_syntactically_a_function_nested() { + infer_eq_without_problem( + indoc!( + r#" + app "test" provides [main] to "./platform" + + main = + after : ({} -> a), ({} -> b) -> ({} -> b) + + fx = after (\{} -> {}) \{} -> if Bool.true then fx {} else {} + + fx + "# + ), + "{} -> {}", + ); + } } diff --git a/crates/reporting/tests/test_reporting.rs b/crates/reporting/tests/test_reporting.rs index f26dab8ea9..018f6ab109 100644 --- a/crates/reporting/tests/test_reporting.rs +++ b/crates/reporting/tests/test_reporting.rs @@ -10773,7 +10773,7 @@ All branches in an `if` must have the same type! app "test" imports [] provides [main] to "./platform" main = - if Bool.true then \{} -> {} else main + if Bool.true then {} else main "# ), @r###" @@ -10782,7 +10782,7 @@ All branches in an `if` must have the same type! `main` is defined directly in terms of itself: 3│> main = - 4│> if Bool.true then \{} -> {} else main + 4│> if Bool.true then {} else main Since Roc evaluates values strict, running this program would create an infinite number of `main` values! From 3d34de5fc1e10b06ccd5e1c752c5089d235806d2 Mon Sep 17 00:00:00 2001 From: Ayaz Hafiz Date: Tue, 11 Oct 2022 13:13:50 -0500 Subject: [PATCH 02/16] Remove test caught by reporting tests --- crates/compiler/can/tests/test_can.rs | 31 --------------------------- 1 file changed, 31 deletions(-) diff --git a/crates/compiler/can/tests/test_can.rs b/crates/compiler/can/tests/test_can.rs index a115faf55d..2c531a8ebd 100644 --- a/crates/compiler/can/tests/test_can.rs +++ b/crates/compiler/can/tests/test_can.rs @@ -917,37 +917,6 @@ mod test_can { assert_eq!(is_circular_def, false); } - #[test] - fn invalid_self_recursion() { - let src = indoc!( - r#" - x = x - - x - "# - ); - - let home = test_home(); - let arena = Bump::new(); - let CanExprOut { - loc_expr, - problems, - interns, - .. - } = can_expr_with(&arena, home, src); - - let is_circular_def = matches!(loc_expr.value, RuntimeError(RuntimeError::CircularDef(_))); - - let problem = Problem::RuntimeError(RuntimeError::CircularDef(vec![CycleEntry { - symbol: interns.symbol(home, "x".into()), - symbol_region: Region::new(Position::new(0), Position::new(1)), - expr_region: Region::new(Position::new(4), Position::new(5)), - }])); - - assert_eq!(is_circular_def, true); - assert_eq!(problems, vec![problem]); - } - #[test] fn invalid_mutual_recursion() { let src = indoc!( From f13c57be8b301610784c9dd0a0f4103e5abc821f Mon Sep 17 00:00:00 2001 From: Ayaz Hafiz Date: Mon, 17 Oct 2022 10:02:40 -0500 Subject: [PATCH 03/16] Tighten up infinite loop error --- crates/reporting/src/error/canonicalize.rs | 6 +----- crates/reporting/tests/test_reporting.rs | 8 ++++---- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/crates/reporting/src/error/canonicalize.rs b/crates/reporting/src/error/canonicalize.rs index 22a1864749..d9bc25db44 100644 --- a/crates/reporting/src/error/canonicalize.rs +++ b/crates/reporting/src/error/canonicalize.rs @@ -2035,11 +2035,7 @@ pub fn to_circular_def_doc<'b>( alloc.reflow(" is defined directly in terms of itself:"), ]), alloc.region(lines.convert_region(Region::span_across(symbol_region, expr_region))), - alloc.concat([ - alloc.reflow("Since Roc evaluates values strict, running this program would create an infinite number of "), - alloc.symbol_unqualified(*symbol), - alloc.reflow(" values!"), - ]), + alloc.reflow("Roc evaluates values strictly, so running this program would enter an infinite loop!"), alloc.hint("").append(alloc.concat([ alloc.reflow("Did you mean to define "),alloc.symbol_unqualified(*symbol),alloc.reflow(" as a function?"), ])), diff --git a/crates/reporting/tests/test_reporting.rs b/crates/reporting/tests/test_reporting.rs index 018f6ab109..0d975e0ae1 100644 --- a/crates/reporting/tests/test_reporting.rs +++ b/crates/reporting/tests/test_reporting.rs @@ -2166,8 +2166,8 @@ mod test_reporting { 4│ f = f ^^^^^ - Since Roc evaluates values strict, running this program would create - an infinite number of `f` values! + Roc evaluates values strictly, so running this program would enter an + infinite loop! Hint: Did you mean to define `f` as a function? "### @@ -10784,8 +10784,8 @@ All branches in an `if` must have the same type! 3│> main = 4│> if Bool.true then {} else main - Since Roc evaluates values strict, running this program would create - an infinite number of `main` values! + Roc evaluates values strictly, so running this program would enter an + infinite loop! Hint: Did you mean to define `main` as a function? "### From 7986514d2073296ed48627e2f146b8f25c698869 Mon Sep 17 00:00:00 2001 From: Ayaz Hafiz Date: Mon, 17 Oct 2022 11:36:06 -0500 Subject: [PATCH 04/16] Use runtime representation of values when building structural eq Closes #4348 --- crates/compiler/gen_llvm/src/llvm/compare.rs | 2 ++ crates/compiler/test_gen/src/gen_primitives.rs | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/crates/compiler/gen_llvm/src/llvm/compare.rs b/crates/compiler/gen_llvm/src/llvm/compare.rs index 8b0b84eddc..5011916a93 100644 --- a/crates/compiler/gen_llvm/src/llvm/compare.rs +++ b/crates/compiler/gen_llvm/src/llvm/compare.rs @@ -147,6 +147,8 @@ fn build_eq<'a, 'ctx, 'env>( rhs_layout: &Layout<'a>, when_recursive: WhenRecursive<'a>, ) -> BasicValueEnum<'ctx> { + let lhs_layout = &lhs_layout.runtime_representation(env.layout_interner); + let rhs_layout = &rhs_layout.runtime_representation(env.layout_interner); if lhs_layout != rhs_layout { panic!( "Equality of different layouts; did you have a type mismatch?\n{:?} == {:?}", diff --git a/crates/compiler/test_gen/src/gen_primitives.rs b/crates/compiler/test_gen/src/gen_primitives.rs index 8f8340dcf3..b55fe58495 100644 --- a/crates/compiler/test_gen/src/gen_primitives.rs +++ b/crates/compiler/test_gen/src/gen_primitives.rs @@ -4085,3 +4085,21 @@ fn pattern_match_char() { RocStr ); } + +#[test] +#[cfg(any(feature = "gen-llvm", feature = "gen-wasm"))] +fn issue_4348() { + assert_evals_to!( + indoc!( + r#" + str = "z" + (\_ -> + when str is + "z" -> "okay" + _ -> "") "FAIL" + "# + ), + RocStr::from("okay"), + RocStr + ); +} From 05e8e6de6fdf8716d492804a302becd87fb4a93c Mon Sep 17 00:00:00 2001 From: Ayaz Hafiz Date: Tue, 18 Oct 2022 15:50:20 -0500 Subject: [PATCH 05/16] Disallow typing optional fields when required fields are annotated Closes #4313 --- crates/ast/src/lang/core/types.rs | 19 +----- crates/ast/src/solve_type.rs | 9 +++ crates/compiler/can/src/annotation.rs | 4 +- crates/compiler/mono/src/layout.rs | 6 +- crates/compiler/mono/src/layout_soa.rs | 4 +- crates/compiler/solve/src/solve.rs | 20 ++++-- crates/compiler/types/src/pretty_print.rs | 6 +- crates/compiler/types/src/subs.rs | 2 + crates/compiler/types/src/types.rs | 77 +++++++++++++---------- crates/compiler/unify/src/unify.rs | 16 ++++- crates/glue/src/types.rs | 4 +- crates/reporting/src/error/type.rs | 24 +++---- crates/reporting/tests/test_reporting.rs | 27 ++++++++ 13 files changed, 133 insertions(+), 85 deletions(-) diff --git a/crates/ast/src/lang/core/types.rs b/crates/ast/src/lang/core/types.rs index 5216d43bc2..25ce0cc0bf 100644 --- a/crates/ast/src/lang/core/types.rs +++ b/crates/ast/src/lang/core/types.rs @@ -414,24 +414,7 @@ pub fn to_type2<'a>( for (node_id, (label, field)) in field_types.iter_node_ids().zip(field_types_map) { let poolstr = PoolStr::new(label.as_str(), env.pool); - let rec_field = match field { - RecordField::Optional(_) => { - let field_id = env.pool.add(field.into_inner()); - RecordField::Optional(field_id) - } - RecordField::RigidOptional(_) => { - let field_id = env.pool.add(field.into_inner()); - RecordField::RigidOptional(field_id) - } - RecordField::Demanded(_) => { - let field_id = env.pool.add(field.into_inner()); - RecordField::Demanded(field_id) - } - RecordField::Required(_) => { - let field_id = env.pool.add(field.into_inner()); - RecordField::Required(field_id) - } - }; + let rec_field = field.map_owned(|field| env.pool.add(field)); env.pool[node_id] = (poolstr, rec_field); } diff --git a/crates/ast/src/solve_type.rs b/crates/ast/src/solve_type.rs index fabd83e825..b84ead1cb9 100644 --- a/crates/ast/src/solve_type.rs +++ b/crates/ast/src/solve_type.rs @@ -834,6 +834,15 @@ fn type_to_variable<'a>( cached, mempool.get(*type_id), )), + RigidRequired(type_id) => RigidRequired(type_to_variable( + arena, + mempool, + subs, + rank, + pools, + cached, + mempool.get(*type_id), + )), Optional(type_id) => Optional(type_to_variable( arena, mempool, diff --git a/crates/compiler/can/src/annotation.rs b/crates/compiler/can/src/annotation.rs index 7c5e789f4c..eda2d8d13e 100644 --- a/crates/compiler/can/src/annotation.rs +++ b/crates/compiler/can/src/annotation.rs @@ -1209,7 +1209,7 @@ fn can_assigned_fields<'a>( ); let label = Lowercase::from(field_name.value); - field_types.insert(label.clone(), Required(field_type)); + field_types.insert(label.clone(), RigidRequired(field_type)); break 'inner label; } @@ -1246,7 +1246,7 @@ fn can_assigned_fields<'a>( } }; - field_types.insert(field_name.clone(), Required(field_type)); + field_types.insert(field_name.clone(), RigidRequired(field_type)); break 'inner field_name; } diff --git a/crates/compiler/mono/src/layout.rs b/crates/compiler/mono/src/layout.rs index a184594e01..3b22bd379c 100644 --- a/crates/compiler/mono/src/layout.rs +++ b/crates/compiler/mono/src/layout.rs @@ -3117,7 +3117,9 @@ fn layout_from_flat_type<'a>( for (label, field) in it { match field { - RecordField::Required(field_var) | RecordField::Demanded(field_var) => { + RecordField::Required(field_var) + | RecordField::Demanded(field_var) + | RecordField::RigidRequired(field_var) => { sortables .push((label, cached!(Layout::from_var(env, field_var), criteria))); } @@ -3220,7 +3222,7 @@ fn sort_record_fields_help<'a>( for (label, field) in fields_map { match field { - RecordField::Demanded(v) | RecordField::Required(v) => { + RecordField::Demanded(v) | RecordField::Required(v) | RecordField::RigidRequired(v) => { let Cacheable(layout, _) = Layout::from_var(env, v); sorted_fields.push((label, v, Ok(layout?))); } diff --git a/crates/compiler/mono/src/layout_soa.rs b/crates/compiler/mono/src/layout_soa.rs index 0d2262273e..63c8079078 100644 --- a/crates/compiler/mono/src/layout_soa.rs +++ b/crates/compiler/mono/src/layout_soa.rs @@ -807,7 +807,9 @@ impl Layout { RecordField::Optional(_) | RecordField::RigidOptional(_) => { // do nothing } - RecordField::Required(_) | RecordField::Demanded(_) => { + RecordField::Required(_) + | RecordField::Demanded(_) + | RecordField::RigidRequired(_) => { let var = subs.variables[var_index.index as usize]; let layout = Layout::from_var_help(layouts, subs, var)?; diff --git a/crates/compiler/solve/src/solve.rs b/crates/compiler/solve/src/solve.rs index bdfcb24265..2676d6f25c 100644 --- a/crates/compiler/solve/src/solve.rs +++ b/crates/compiler/solve/src/solve.rs @@ -2427,6 +2427,7 @@ fn type_to_variable<'a>( Optional(t) => Optional(helper!(t)), Required(t) => Required(helper!(t)), Demanded(t) => Demanded(helper!(t)), + RigidRequired(t) => RigidRequired(helper!(t)), RigidOptional(t) => RigidOptional(helper!(t)), } }; @@ -3413,11 +3414,17 @@ fn adjust_rank_content( let var = subs[var_index]; rank = rank.max(adjust_rank(subs, young_mark, visit_mark, group_rank, var)); - // When generalizing annotations with rigid optionals, we want to promote - // them to non-rigid, so that usages at specialized sites don't have to - // exactly include the optional field. - if let RecordField::RigidOptional(()) = subs[field_index] { - subs[field_index] = RecordField::Optional(()); + // When generalizing annotations with rigid optional/required fields, + // we want to promote them to non-rigid, so that usages at + // specialized sites don't have to exactly include the optional/required field. + match subs[field_index] { + RecordField::RigidOptional(()) => { + subs[field_index] = RecordField::Optional(()); + } + RecordField::RigidRequired(()) => { + subs[field_index] = RecordField::Required(()); + } + _ => {} } } @@ -3791,7 +3798,8 @@ fn deep_copy_var_help( let slice = SubsSlice::extend_new( &mut subs.record_fields, field_types.into_iter().map(|f| match f { - RecordField::RigidOptional(()) => internal_error!("RigidOptionals should be generalized to non-rigid by this point"), + RecordField::RigidOptional(()) + | RecordField::RigidRequired(()) => internal_error!("Rigid optional/required should be generalized to non-rigid by this point"), RecordField::Demanded(_) | RecordField::Required(_) diff --git a/crates/compiler/types/src/pretty_print.rs b/crates/compiler/types/src/pretty_print.rs index 0c930cf25f..ffdf9ea618 100644 --- a/crates/compiler/types/src/pretty_print.rs +++ b/crates/compiler/types/src/pretty_print.rs @@ -1104,10 +1104,8 @@ fn write_flat_type<'a>( buf.push_str(label.as_str()); match record_field { - Optional(_) => buf.push_str(" ? "), - Required(_) => buf.push_str(" : "), - Demanded(_) => buf.push_str(" : "), - RigidOptional(_) => buf.push_str(" ? "), + Optional(_) | RigidOptional(_) => buf.push_str(" ? "), + Required(_) | Demanded(_) | RigidRequired(_) => buf.push_str(" : "), }; write_content( diff --git a/crates/compiler/types/src/subs.rs b/crates/compiler/types/src/subs.rs index b433675a2e..a72811eb22 100644 --- a/crates/compiler/types/src/subs.rs +++ b/crates/compiler/types/src/subs.rs @@ -920,6 +920,7 @@ fn subs_fmt_flat_type(this: &FlatType, subs: &Subs, f: &mut fmt::Formatter) -> f RecordField::RigidOptional(_) => "r?", RecordField::Required(_) => ":", RecordField::Demanded(_) => ":", + RecordField::RigidRequired(_) => "r:", }; write!( f, @@ -3831,6 +3832,7 @@ fn flat_type_to_err_type( Required(_) => Required(error_type), Demanded(_) => Demanded(error_type), RigidOptional(_) => RigidOptional(error_type), + RigidRequired(_) => RigidRequired(error_type), }; err_fields.insert(label, err_record_field); diff --git a/crates/compiler/types/src/types.rs b/crates/compiler/types/src/types.rs index 7fba003170..07fb21507d 100644 --- a/crates/compiler/types/src/types.rs +++ b/crates/compiler/types/src/types.rs @@ -27,10 +27,12 @@ const GREEK_LETTERS: &[char] = &[ /// /// - Demanded: only introduced by pattern matches, e.g. { x } -> /// Cannot unify with an Optional field, but can unify with a Required field -/// - Required: introduced by record literals and type annotations. +/// - Required: introduced by record literals /// Can unify with Optional and Demanded /// - Optional: introduced by pattern matches, e.g. { x ? "" } -> /// Can unify with Required, but not with Demanded +/// - RigidRequired: introduced by annotations, e.g. { x : Str} +/// Can only unify with Required and Demanded, to prevent an optional field being typed as Required /// - RigidOptional: introduced by annotations, e.g. { x ? Str} /// Can only unify with Optional, to prevent a required field being typed as Optional #[derive(PartialEq, Eq, Clone, Hash)] @@ -38,6 +40,7 @@ pub enum RecordField { Demanded(T), Required(T), Optional(T), + RigidRequired(T), RigidOptional(T), } @@ -51,6 +54,7 @@ impl fmt::Debug for RecordField { Optional(typ) => write!(f, "Optional({:?})", typ), Required(typ) => write!(f, "Required({:?})", typ), Demanded(typ) => write!(f, "Demanded({:?})", typ), + RigidRequired(typ) => write!(f, "RigidRequired({:?})", typ), RigidOptional(typ) => write!(f, "RigidOptional({:?})", typ), } } @@ -64,6 +68,7 @@ impl RecordField { Optional(t) => t, Required(t) => t, Demanded(t) => t, + RigidRequired(t) => t, RigidOptional(t) => t, } } @@ -75,6 +80,7 @@ impl RecordField { Optional(t) => t, Required(t) => t, Demanded(t) => t, + RigidRequired(t) => t, RigidOptional(t) => t, } } @@ -86,23 +92,43 @@ impl RecordField { Optional(t) => t, Required(t) => t, Demanded(t) => t, + RigidRequired(t) => t, RigidOptional(t) => t, } } - pub fn map(&self, mut f: F) -> RecordField + pub fn map(&self, f: F) -> RecordField where - F: FnMut(&T) -> U, + F: FnOnce(&T) -> U, + { + self.replace(f(self.as_inner())) + } + + pub fn map_owned(self, f: F) -> RecordField + where + F: FnOnce(T) -> U, { use RecordField::*; match self { Optional(t) => Optional(f(t)), Required(t) => Required(f(t)), Demanded(t) => Demanded(f(t)), + RigidRequired(t) => RigidRequired(f(t)), RigidOptional(t) => RigidOptional(f(t)), } } + pub fn replace(&self, u: U) -> RecordField { + use RecordField::*; + match self { + Optional(_) => Optional(u), + Required(_) => Required(u), + Demanded(_) => Demanded(u), + RigidRequired(_) => RigidRequired(u), + RigidOptional(_) => RigidOptional(u), + } + } + pub fn is_optional(&self) -> bool { matches!( self, @@ -119,6 +145,7 @@ impl RecordField { Optional(typ) => typ.substitute(substitutions), Required(typ) => typ.substitute(substitutions), Demanded(typ) => typ.substitute(substitutions), + RigidRequired(typ) => typ.substitute(substitutions), RigidOptional(typ) => typ.substitute(substitutions), } } @@ -135,6 +162,7 @@ impl RecordField { Optional(typ) => typ.substitute_alias(rep_symbol, rep_args, actual), Required(typ) => typ.substitute_alias(rep_symbol, rep_args, actual), Demanded(typ) => typ.substitute_alias(rep_symbol, rep_args, actual), + RigidRequired(typ) => typ.substitute_alias(rep_symbol, rep_args, actual), RigidOptional(typ) => typ.substitute_alias(rep_symbol, rep_args, actual), } } @@ -154,6 +182,7 @@ impl RecordField { Optional(typ) => typ.instantiate_aliases(region, aliases, var_store, introduced), Required(typ) => typ.instantiate_aliases(region, aliases, var_store, introduced), Demanded(typ) => typ.instantiate_aliases(region, aliases, var_store, introduced), + RigidRequired(typ) => typ.instantiate_aliases(region, aliases, var_store, introduced), RigidOptional(typ) => typ.instantiate_aliases(region, aliases, var_store, introduced), } } @@ -165,6 +194,7 @@ impl RecordField { Optional(typ) => typ.contains_symbol(rep_symbol), Required(typ) => typ.contains_symbol(rep_symbol), Demanded(typ) => typ.contains_symbol(rep_symbol), + RigidRequired(typ) => typ.contains_symbol(rep_symbol), RigidOptional(typ) => typ.contains_symbol(rep_symbol), } } @@ -175,6 +205,7 @@ impl RecordField { Optional(typ) => typ.contains_variable(rep_variable), Required(typ) => typ.contains_variable(rep_variable), Demanded(typ) => typ.contains_variable(rep_variable), + RigidRequired(typ) => typ.contains_variable(rep_variable), RigidOptional(typ) => typ.contains_variable(rep_variable), } } @@ -581,12 +612,14 @@ impl fmt::Debug for Type { for (label, field_type) in fields { match field_type { - RecordField::Optional(_) => write!(f, "{:?} ? {:?}", label, field_type)?, - RecordField::Required(_) => write!(f, "{:?} : {:?}", label, field_type)?, - RecordField::Demanded(_) => write!(f, "{:?} : {:?}", label, field_type)?, - RecordField::RigidOptional(_) => { + RecordField::Optional(_) | RecordField::RigidOptional(_) => { write!(f, "{:?} ? {:?}", label, field_type)? } + RecordField::Required(_) + | RecordField::Demanded(_) + | RecordField::RigidRequired(_) => { + write!(f, "{:?} : {:?}", label, field_type)? + } } if any_written_yet { @@ -1636,15 +1669,8 @@ fn variables_help(tipe: &Type, accum: &mut ImSet) { variables_help(ret, accum); } Record(fields, ext) => { - use RecordField::*; - for (_, field) in fields { - match field { - Optional(x) => variables_help(x, accum), - Required(x) => variables_help(x, accum), - Demanded(x) => variables_help(x, accum), - RigidOptional(x) => variables_help(x, accum), - }; + variables_help(field.as_inner(), accum); } if let TypeExtension::Open(ext) = ext { @@ -1778,15 +1804,8 @@ fn variables_help_detailed(tipe: &Type, accum: &mut VariableDetail) { variables_help_detailed(ret, accum); } Record(fields, ext) => { - use RecordField::*; - for (_, field) in fields { - match field { - Optional(x) => variables_help_detailed(x, accum), - Required(x) => variables_help_detailed(x, accum), - Demanded(x) => variables_help_detailed(x, accum), - RigidOptional(x) => variables_help_detailed(x, accum), - }; + variables_help_detailed(field.as_inner(), accum); } if let TypeExtension::Open(ext) = ext { @@ -2366,11 +2385,7 @@ fn write_error_type_help( buf.push_str(" ? "); content } - Required(content) => { - buf.push_str(" : "); - content - } - Demanded(content) => { + Required(content) | Demanded(content) | RigidRequired(content) => { buf.push_str(" : "); content } @@ -2520,11 +2535,7 @@ fn write_debug_error_type_help(error_type: ErrorType, buf: &mut String, parens: buf.push_str(" ? "); content } - Required(content) => { - buf.push_str(" : "); - content - } - Demanded(content) => { + Required(content) | Demanded(content) | RigidRequired(content) => { buf.push_str(" : "); content } diff --git a/crates/compiler/unify/src/unify.rs b/crates/compiler/unify/src/unify.rs index 08bcf62e39..19a4d21d0f 100644 --- a/crates/compiler/unify/src/unify.rs +++ b/crates/compiler/unify/src/unify.rs @@ -1845,6 +1845,7 @@ fn unify_shared_fields( // // Demanded does not unify with Optional // RigidOptional does not unify with Required or Demanded + // RigidRequired does not unify with Optional // Unifying Required with Demanded => Demanded // Unifying Optional with Required => Required // Unifying Optional with RigidOptional => RigidOptional @@ -1861,15 +1862,26 @@ fn unify_shared_fields( (Required(val), Optional(_)) => Required(val), (Optional(val), Required(_)) => Required(val), (Optional(val), Optional(_)) => Optional(val), + + // rigid optional (RigidOptional(val), Optional(_)) | (Optional(_), RigidOptional(val)) => { RigidOptional(val) } - (RigidOptional(_), Demanded(_) | Required(_)) - | (Demanded(_) | Required(_), RigidOptional(_)) => { + (RigidOptional(_), Demanded(_) | Required(_) | RigidRequired(_)) + | (Demanded(_) | Required(_) | RigidRequired(_), RigidOptional(_)) => { // this is an error, but we continue to give better error messages continue; } (RigidOptional(val), RigidOptional(_)) => RigidOptional(val), + + // rigid required + (RigidRequired(_), Optional(_)) | (Optional(_), RigidRequired(_)) => { + // this is an error, but we continue to give better error messages + continue; + } + (RigidRequired(val), Demanded(_) | Required(_)) + | (Demanded(_) | Required(_), RigidRequired(val)) => RigidRequired(val), + (RigidRequired(val), RigidRequired(_)) => RigidRequired(val), }; matching_fields.push((name, actual)); diff --git a/crates/glue/src/types.rs b/crates/glue/src/types.rs index 311fa71857..317461d148 100644 --- a/crates/glue/src/types.rs +++ b/crates/glue/src/types.rs @@ -768,7 +768,9 @@ fn add_type_help<'a>( .expect("something weird in content") .flat_map(|(label, field)| { match field { - RecordField::Required(field_var) | RecordField::Demanded(field_var) => { + RecordField::Required(field_var) + | RecordField::Demanded(field_var) + | RecordField::RigidRequired(field_var) => { Some((label.to_string(), field_var)) } RecordField::Optional(_) | RecordField::RigidOptional(_) => { diff --git a/crates/reporting/src/error/type.rs b/crates/reporting/src/error/type.rs index 3ed8c75082..52cb8bd025 100644 --- a/crates/reporting/src/error/type.rs +++ b/crates/reporting/src/error/type.rs @@ -2317,6 +2317,9 @@ fn to_doc_help<'b>( Parens::Unnecessary, v, )), + RecordField::RigidRequired(v) => RecordField::RigidRequired( + to_doc_help(ctx, alloc, Parens::Unnecessary, v), + ), RecordField::Demanded(v) => RecordField::Demanded(to_doc_help( ctx, alloc, @@ -2794,22 +2797,12 @@ fn diff_record<'b>( left: ( field.clone(), alloc.string(field.as_str().to_string()), - match t1 { - RecordField::Optional(_) => RecordField::Optional(diff.left), - RecordField::RigidOptional(_) => RecordField::RigidOptional(diff.left), - RecordField::Required(_) => RecordField::Required(diff.left), - RecordField::Demanded(_) => RecordField::Demanded(diff.left), - }, + t1.replace(diff.left), ), right: ( field.clone(), alloc.string(field.as_str().to_string()), - match t2 { - RecordField::Optional(_) => RecordField::Optional(diff.right), - RecordField::RigidOptional(_) => RecordField::RigidOptional(diff.right), - RecordField::Required(_) => RecordField::Required(diff.right), - RecordField::Demanded(_) => RecordField::Demanded(diff.right), - }, + t2.replace(diff.right), ), status: { match (&t1, &t2) { @@ -3252,10 +3245,9 @@ mod report_text { let entry_to_doc = |(field_name, field_type): (RocDocBuilder<'b>, RecordField>)| { match field_type { - RecordField::Demanded(field) => { - field_name.append(alloc.text(" : ")).append(field) - } - RecordField::Required(field) => { + RecordField::Demanded(field) + | RecordField::Required(field) + | RecordField::RigidRequired(field) => { field_name.append(alloc.text(" : ")).append(field) } RecordField::Optional(field) | RecordField::RigidOptional(field) => { diff --git a/crates/reporting/tests/test_reporting.rs b/crates/reporting/tests/test_reporting.rs index f26dab8ea9..401b567112 100644 --- a/crates/reporting/tests/test_reporting.rs +++ b/crates/reporting/tests/test_reporting.rs @@ -11457,4 +11457,31 @@ All branches in an `if` must have the same type! Note: `Hash` cannot be generated for functions. "### ); + + test_report!( + demanded_vs_optional_record_field, + indoc!( + r#" + foo : { a : Str } -> Str + foo = \{ a ? "" } -> a + foo + "# + ), + @r###" + ── TYPE MISMATCH ───────────────────────────────────────── /code/proj/Main.roc ─ + + The 1st argument to `foo` is weird: + + 5│ foo = \{ a ? "" } -> a + ^^^^^^^^^^ + + The argument is a pattern that matches record values of type: + + { a ? Str } + + But the annotation on `foo` says the 1st argument should be: + + { a : Str } + "### + ); } From 7bbf1fb9c5fb75f71cce531b7fa7eddf3da1e9cc Mon Sep 17 00:00:00 2001 From: Brendan Hansknecht Date: Fri, 21 Oct 2022 13:57:22 -0700 Subject: [PATCH 06/16] fix examples that are failing to build --- examples/cli/echo.roc | 4 ++-- examples/cli/http-get.roc | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/examples/cli/echo.roc b/examples/cli/echo.roc index c44bcbabbf..210d16bd3b 100644 --- a/examples/cli/echo.roc +++ b/examples/cli/echo.roc @@ -6,8 +6,8 @@ app "echo" main : Program main = Program.noArgs mainTask -mainTask : List Str -> Task ExitCode [] [Read [Stdin], Write [Stdout]] -mainTask = \_args -> +mainTask : Task ExitCode [] [Read [Stdin], Write [Stdout]] +mainTask = _ <- Task.await (Stdout.line "🗣 Shout into this cave and hear the echo! 👂👂👂") Task.loop {} (\_ -> Task.map tick Step) |> Program.exit 0 diff --git a/examples/cli/http-get.roc b/examples/cli/http-get.roc index 10d90bed38..f6e5b4059f 100644 --- a/examples/cli/http-get.roc +++ b/examples/cli/http-get.roc @@ -1,10 +1,13 @@ app "http-get" packages { pf: "cli-platform/main.roc" } - imports [pf.Http, pf.Task, pf.Stdin, pf.Stdout] + imports [pf.Http, pf.Task, pf.Stdin, pf.Stdout, pf.Program.{ Program, ExitCode }] provides [main] to pf -main : List Str -> Task.Task {} [] [Read [Stdin], Write [Stdout], Network [Http]] -main = \_args -> +main : Program +main = Program.noArgs mainTask + +mainTask : Task.Task ExitCode [] [Read [Stdin], Write [Stdout], Network [Http]] +mainTask = _ <- Task.await (Stdout.line "Please enter a URL to fetch") url <- Task.await Stdin.line @@ -22,3 +25,4 @@ main = \_args -> |> Task.await Stdout.line output + |> Program.exit 0 From 989784620dd29451ada57b916d3f9f77606b75ad Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 22 Oct 2022 01:23:41 +0200 Subject: [PATCH 07/16] use execve on windows! --- Cargo.lock | 7 ----- crates/cli/Cargo.toml | 3 --- crates/cli/src/lib.rs | 61 ++++++++++++++++++++++++++++++++++++------- 3 files changed, 51 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 41058b0864..b17b6f36b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2103,12 +2103,6 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" -[[package]] -name = "memexec" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc62ccb14881da5d1862cda3a9648fb4a4897b2aff0b2557b89da44a5e550b7c" - [[package]] name = "memmap2" version = "0.3.1" @@ -3403,7 +3397,6 @@ dependencies = [ "inkwell 0.1.0", "libc", "libloading", - "memexec", "mimalloc", "once_cell", "parking_lot 0.12.1", diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index 1025251fbd..19f101cecd 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -77,9 +77,6 @@ roc_gen_llvm = {path = "../compiler/gen_llvm"} inkwell = {path = "../vendor/inkwell"} signal-hook = "0.3.14" -[target.'cfg(windows)'.dependencies] -memexec = "0.2.0" - # for now, uses unix/libc functions that windows does not support [target.'cfg(not(windows))'.dependencies] roc_repl_expect = { path = "../repl_expect" } diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index 50dac8eda7..7848aace1f 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -779,6 +779,52 @@ fn make_argv_envp<'a, I: IntoIterator, S: AsRef>( (argv_cstrings, envp_cstrings) } +#[cfg_attr(not(target_family = "windows"), allow(unused))] +fn make_argv_envp_windows<'a, I: IntoIterator, S: AsRef>( + arena: &'a Bump, + executable: &ExecutableFile, + args: I, +) -> ( + bumpalo::collections::Vec<'a, CString>, + bumpalo::collections::Vec<'a, CString>, +) { + use bumpalo::collections::CollectIn; + + let path = executable.as_path(); + let path_cstring = CString::new(path.as_os_str().to_str().unwrap().as_bytes()).unwrap(); + + // argv is an array of pointers to strings passed to the new program + // as its command-line arguments. By convention, the first of these + // strings (i.e., argv[0]) should contain the filename associated + // with the file being executed. The argv array must be terminated + // by a NULL pointer. (Thus, in the new program, argv[argc] will be NULL.) + let it = args + .into_iter() + .map(|x| CString::new(x.as_ref().to_str().unwrap().as_bytes()).unwrap()); + + let argv_cstrings: bumpalo::collections::Vec = + std::iter::once(path_cstring).chain(it).collect_in(arena); + + // envp is an array of pointers to strings, conventionally of the + // form key=value, which are passed as the environment of the new + // program. The envp array must be terminated by a NULL pointer. + let mut buffer = Vec::with_capacity(100); + let envp_cstrings: bumpalo::collections::Vec = std::env::vars_os() + .map(|(k, v)| { + buffer.clear(); + + use std::io::Write; + buffer.write_all(k.to_str().unwrap().as_bytes()).unwrap(); + buffer.write_all(b"=").unwrap(); + buffer.write_all(v.to_str().unwrap().as_bytes()).unwrap(); + + CString::new(buffer.as_slice()).unwrap() + }) + .collect_in(arena); + + (argv_cstrings, envp_cstrings) +} + /// Run on the native OS (not on wasm) #[cfg(target_family = "unix")] fn roc_run_native, S: AsRef>( @@ -874,12 +920,9 @@ impl ExecutableFile { #[cfg(target_family = "windows")] ExecutableFile::OnDisk(_, path) => { - let _ = argv; - let _ = envp; - use memexec::memexec_exe; - let bytes = std::fs::read(path).unwrap(); - memexec_exe(&bytes).unwrap(); - std::process::exit(0); + let path_cstring = CString::new(path.to_str().unwrap()).unwrap(); + + libc::execve(path_cstring.as_ptr().cast(), argv.as_ptr(), envp.as_ptr()) } } } @@ -975,7 +1018,7 @@ fn roc_run_executable_file_path(binary_bytes: &[u8]) -> std::io::Result, S: AsRef>( arena: Bump, // This should be passed an owned value, not a reference, so we can usefully mem::forget it! opt_level: OptLevel, - _args: I, + args: I, binary_bytes: &[u8], _expectations: VecMap, _interns: Interns, @@ -986,9 +1029,7 @@ fn roc_run_native, S: AsRef>( let executable = roc_run_executable_file_path(binary_bytes)?; // TODO forward the arguments - // let (argv_cstrings, envp_cstrings) = make_argv_envp(&arena, &executable, args); - let argv_cstrings = bumpalo::vec![ in &arena; CString::default()]; - let envp_cstrings = bumpalo::vec![ in &arena; CString::default()]; + let (argv_cstrings, envp_cstrings) = make_argv_envp_windows(&arena, &executable, args); let argv: bumpalo::collections::Vec<*const c_char> = argv_cstrings .iter() From c55a3e7b37c4573cb3f97b88638a4f8a521e52d7 Mon Sep 17 00:00:00 2001 From: Folkert Date: Sat, 22 Oct 2022 14:49:59 +0200 Subject: [PATCH 08/16] resize reloc section when required --- crates/linker/src/lib.rs | 21 +++++--- crates/linker/src/pe.rs | 102 ++++++++++++++++++++++++++++++++------- 2 files changed, 99 insertions(+), 24 deletions(-) diff --git a/crates/linker/src/lib.rs b/crates/linker/src/lib.rs index d6fe76da55..a6009ade81 100644 --- a/crates/linker/src/lib.rs +++ b/crates/linker/src/lib.rs @@ -66,7 +66,8 @@ pub fn build_and_preprocess_host( host_input_path.with_file_name("dynhost") }; - generate_dynamic_lib(target, exposed_to_host, exported_closure_types, &dummy_lib); + let dummy_dll_symbols = make_dummy_dll_symbols(exposed_to_host, exported_closure_types); + generate_dynamic_lib(target, &dummy_dll_symbols, &dummy_lib); rebuild_host(opt_level, target, host_input_path, Some(&dummy_lib)); let metadata = host_input_path.with_file_name("metadata"); // let prehost = host_input_path.with_file_name("preprocessedhost"); @@ -77,6 +78,7 @@ pub fn build_and_preprocess_host( &metadata, preprocessed_host_path, &dummy_lib, + &dummy_dll_symbols, false, false, ) @@ -92,12 +94,10 @@ pub fn link_preprocessed_host( surgery(roc_app_bytes, &metadata, binary_path, false, false, target) } -fn generate_dynamic_lib( - target: &Triple, +fn make_dummy_dll_symbols( exposed_to_host: Vec, exported_closure_types: Vec, - dummy_lib_path: &Path, -) { +) -> Vec { let mut custom_names = Vec::new(); for sym in exposed_to_host { @@ -120,8 +120,12 @@ fn generate_dynamic_lib( // so they must be in alphabetical order custom_names.sort_unstable(); - if !dummy_lib_is_up_to_date(target, dummy_lib_path, &custom_names) { - let bytes = crate::generate_dylib::generate(target, &custom_names) + custom_names +} + +fn generate_dynamic_lib(target: &Triple, custom_names: &[String], dummy_lib_path: &Path) { + if !dummy_lib_is_up_to_date(target, dummy_lib_path, custom_names) { + let bytes = crate::generate_dylib::generate(target, custom_names) .unwrap_or_else(|e| internal_error!("{e}")); std::fs::write(dummy_lib_path, &bytes).unwrap_or_else(|e| internal_error!("{e}")) @@ -182,12 +186,14 @@ fn dummy_lib_is_up_to_date( } /// Constructs a `metadata::Metadata` from a host executable binary, and writes it to disk +#[allow(clippy::too_many_arguments)] fn preprocess( target: &Triple, host_exe_path: &Path, metadata_path: &Path, preprocessed_path: &Path, shared_lib: &Path, + dummy_dll_symbols: &[String], verbose: bool, time: bool, ) { @@ -229,6 +235,7 @@ fn preprocess( host_exe_path, metadata_path, preprocessed_path, + dummy_dll_symbols, verbose, time, ) diff --git a/crates/linker/src/pe.rs b/crates/linker/src/pe.rs index 171141c208..80ed93b111 100644 --- a/crates/linker/src/pe.rs +++ b/crates/linker/src/pe.rs @@ -186,12 +186,18 @@ pub(crate) fn preprocess_windows( host_exe_filename: &Path, metadata_filename: &Path, preprocessed_filename: &Path, + dummy_dll_symbols: &[String], _verbose: bool, _time: bool, ) -> object::read::Result<()> { let data = open_mmap(host_exe_filename); let new_sections = [*b".text\0\0\0", *b".rdata\0\0"]; - let mut preprocessed = Preprocessor::preprocess(preprocessed_filename, &data, &new_sections); + let mut preprocessed = Preprocessor::preprocess( + preprocessed_filename, + &data, + dummy_dll_symbols.len(), + &new_sections, + ); // get the metadata from the preprocessed executable before the destructive operations below let md = PeMetadata::from_preprocessed_host(&preprocessed, &new_sections); @@ -560,7 +566,8 @@ impl DynamicRelocationsPe { /// update existing section headers to point to a different (shifted) location in the file struct Preprocessor { header_offset: u64, - additional_length: usize, + additional_header_space: usize, + additional_reloc_space: usize, extra_sections_start: usize, extra_sections_width: usize, section_count_offset: u64, @@ -574,9 +581,17 @@ struct Preprocessor { impl Preprocessor { const SECTION_HEADER_WIDTH: usize = std::mem::size_of::(); - fn preprocess(output_path: &Path, data: &[u8], extra_sections: &[[u8; 8]]) -> MmapMut { - let this = Self::new(data, extra_sections); - let mut result = open_mmap_mut(output_path, data.len() + this.additional_length); + fn preprocess( + output_path: &Path, + data: &[u8], + dummy_dll_symbols: usize, + extra_sections: &[[u8; 8]], + ) -> MmapMut { + let this = Self::new(data, dummy_dll_symbols, extra_sections); + let mut result = open_mmap_mut( + output_path, + data.len() + this.additional_header_space + this.additional_reloc_space, + ); this.copy(&mut result, data); this.fix(&mut result, extra_sections); @@ -584,7 +599,7 @@ impl Preprocessor { result } - fn new(data: &[u8], extra_sections: &[[u8; 8]]) -> Self { + fn new(data: &[u8], dummy_dll_symbols: usize, extra_sections: &[[u8; 8]]) -> Self { use object::read::pe::ImageNtHeaders; let dos_header = pe::ImageDosHeader::parse(data).unwrap_or_else(|e| internal_error!("{e}")); @@ -609,13 +624,16 @@ impl Preprocessor { let extra_alignments = div_ceil(extra_sections_width, file_alignment); let new_headers_size = old_headers_size + extra_alignments * file_alignment; - let additional_length = new_headers_size - old_headers_size; + let additional_header_space = new_headers_size - old_headers_size; + let additional_reloc_space = + Self::additional_reloc_space(data, dummy_dll_symbols, file_alignment); Self { extra_sections_start: section_table_offset as usize + sections.len() as usize * Self::SECTION_HEADER_WIDTH, extra_sections_width, - additional_length, + additional_header_space, + additional_reloc_space, header_offset, // the section count is stored 6 bytes into the header section_count_offset: header_offset + 4 + 2, @@ -627,6 +645,39 @@ impl Preprocessor { } } + fn additional_reloc_space(data: &[u8], extra_symbols: usize, file_alignment: usize) -> usize { + let file = object::read::pe::PeFile64::parse(data).unwrap(); + + let reloc_section = file + .section_table() + .iter() + .find(|h| h.name == *b".reloc\0\0") + .unwrap_or_else(|| internal_error!("host binary does not have a .reloc section")); + + // we use the virtual size here because it is more granular; the `size_of_raw_data` is + // rounded up to the file alignment + let available_space = + reloc_section.size_of_raw_data.get(LE) - reloc_section.virtual_size.get(LE); + + // worst case, each relocation needs its own header + let worst_case = std::mem::size_of::() + std::mem::size_of::(); + + if available_space < (extra_symbols * worst_case) as u32 { + // resize that section + let new_size = next_multiple_of( + reloc_section.virtual_size.get(LE) as usize + (extra_symbols * worst_case), + file_alignment, + ); + + let delta = new_size - reloc_section.size_of_raw_data.get(LE) as usize; + debug_assert_eq!(delta % file_alignment, 0); + + delta + } else { + 0 + } + } + fn copy(&self, result: &mut MmapMut, data: &[u8]) { let extra_sections_start = self.extra_sections_start; @@ -647,7 +698,8 @@ impl Preprocessor { .copy_from_slice(&data[extra_sections_start..][..rest_of_headers]); // copy all of the actual (post-header) data - result[self.new_headers_size..].copy_from_slice(&data[self.old_headers_size..]); + let source = &data[self.old_headers_size..]; + result[self.new_headers_size..][..source.len()].copy_from_slice(source); } fn write_dummy_sections(&self, result: &mut MmapMut, extra_section_names: &[[u8; 8]]) { @@ -656,7 +708,7 @@ impl Preprocessor { // only correct for the first section, but that is OK because it's overwritten later // anyway. But, this value may be used to check whether a previous section overruns into // the app sections. - let pointer_to_raw_data = result.len() - self.additional_length; + let pointer_to_raw_data = result.len() - self.additional_header_space; let previous_section_header = load_struct_inplace::(result, self.extra_sections_start - W); @@ -743,15 +795,25 @@ impl Preprocessor { // now be updated to point to the correct place in the file let shift = self.new_headers_size - self.old_headers_size; let mut offset = self.section_table_offset as usize; - loop { - let header = load_struct_inplace_mut::(result, offset); + let headers = load_structs_inplace_mut::( + result, + offset, + self.new_section_count, + ); + + let mut it = headers.iter_mut().peekable(); + while let Some(header) = it.next() { let old = header.pointer_to_raw_data.get(LE); header.pointer_to_raw_data.set(LE, old + shift as u32); - // stop when we hit the NULL section - if header.name == [0; 8] { - break; + if header.name == *b".reloc\0\0" { + // assumes `.reloc` is the final section + debug_assert_eq!(it.peek().map(|s| s.name).as_ref(), extra_sections.first()); + + let old = header.size_of_raw_data.get(LE); + let new = old + self.additional_reloc_space as u32; + header.size_of_raw_data.set(LE, new); } offset += std::mem::size_of::(); @@ -1557,7 +1619,7 @@ mod test { let sections_len_old = image_headers_old.file_header().number_of_sections.get(LE); - let mmap = Preprocessor::preprocess(output_file, input_data, new_sections); + let mmap = Preprocessor::preprocess(output_file, input_data, 0, new_sections); let image_headers_new = load_struct_inplace::(&mmap, dos_header.nt_headers_offset() as usize); @@ -1667,6 +1729,7 @@ mod test { &dir.join("host.exe"), &dir.join("metadata"), &dir.join("preprocessedhost"), + &names, false, false, ) @@ -1881,7 +1944,12 @@ mod test { let extra_sections = [*b"\0\0\0\0\0\0\0\0", *b"\0\0\0\0\0\0\0\0"]; - Preprocessor::preprocess(&dir.join("app.exe"), host_bytes, extra_sections.as_slice()); + Preprocessor::preprocess( + &dir.join("app.exe"), + host_bytes, + 0, + extra_sections.as_slice(), + ); } #[cfg(windows)] From df7e4eea7e2692adea4656cf725406737be9b64e Mon Sep 17 00:00:00 2001 From: Prajwal S N Date: Mon, 10 Oct 2022 20:51:25 +0530 Subject: [PATCH 09/16] builtin(str): implement Str.graphemes Signed-off-by: Prajwal S N --- crates/compiler/builtins/bitcode/src/main.zig | 1 + crates/compiler/builtins/bitcode/src/str.zig | 39 ++++++++++++++++++- crates/compiler/builtins/roc/Str.roc | 4 ++ crates/compiler/builtins/src/bitcode.rs | 1 + crates/compiler/can/src/builtins.rs | 1 + crates/compiler/gen_llvm/src/llvm/build.rs | 14 +++++++ crates/compiler/gen_wasm/src/low_level.rs | 1 + crates/compiler/module/src/low_level.rs | 2 + crates/compiler/module/src/symbol.rs | 1 + crates/compiler/mono/src/borrow.rs | 6 ++- 10 files changed, 67 insertions(+), 3 deletions(-) diff --git a/crates/compiler/builtins/bitcode/src/main.zig b/crates/compiler/builtins/bitcode/src/main.zig index b7c00d307d..22a82d328a 100644 --- a/crates/compiler/builtins/bitcode/src/main.zig +++ b/crates/compiler/builtins/bitcode/src/main.zig @@ -145,6 +145,7 @@ comptime { exportStrFn(str.strTrimRight, "trim_right"); exportStrFn(str.strCloneTo, "clone_to"); exportStrFn(str.withCapacity, "with_capacity"); + exportStrFn(str.strGraphemes, "str_graphemes"); inline for (INTEGERS) |T| { str.exportFromInt(T, ROC_BUILTINS ++ "." ++ STR ++ ".from_int."); diff --git a/crates/compiler/builtins/bitcode/src/str.zig b/crates/compiler/builtins/bitcode/src/str.zig index 669a0131c6..de77e65cf7 100644 --- a/crates/compiler/builtins/bitcode/src/str.zig +++ b/crates/compiler/builtins/bitcode/src/str.zig @@ -1,5 +1,6 @@ const utils = @import("utils.zig"); const RocList = @import("list.zig").RocList; +const grapheme = @import("helpers/grapheme.zig"); const UpdateMode = utils.UpdateMode; const std = @import("std"); const mem = std.mem; @@ -1212,7 +1213,6 @@ test "countSegments: string equals delimiter" { } // Str.countGraphemeClusters -const grapheme = @import("helpers/grapheme.zig"); pub fn countGraphemeClusters(string: RocStr) callconv(.C) usize { if (string.isEmpty()) { return 0; @@ -1303,6 +1303,43 @@ test "countGraphemeClusters: emojis, ut8, and ascii characters" { try expectEqual(count, 10); } +// Str.graphemes +pub fn strGraphemes(string: RocStr) callconv(.C) RocList { + var list = RocList.allocate(@alignOf(RocStr), countGraphemeClusters(string), @sizeOf(RocStr)); + const graphemes = @ptrCast([*]RocStr, @alignCast(@alignOf(RocStr), list.bytes)); + + const bytes_ptr = string.asU8ptr(); + var bytes = bytes_ptr[0..string.len()]; + var iter = (unicode.Utf8View.init(bytes) catch unreachable).iterator(); + var grapheme_break_state: ?grapheme.BoundClass = null; + var grapheme_break_state_ptr = &grapheme_break_state; + var opt_last_codepoint: ?u21 = null; + + var list_index: usize = 0; + var start_index: usize = 0; + var str_index: usize = 0; + var cur_codepoint_len: usize = 0; + + while (iter.nextCodepoint()) |cur_codepoint| { + cur_codepoint_len = unicode.utf8CodepointSequenceLength(cur_codepoint) catch unreachable; + if (opt_last_codepoint) |last_codepoint| { + var did_break = grapheme.isGraphemeBreak(last_codepoint, cur_codepoint, grapheme_break_state_ptr); + if (did_break) { + graphemes[list_index] = RocStr.init(bytes_ptr + start_index, str_index - start_index + cur_codepoint_len); + list_index += 1; + start_index = str_index + cur_codepoint_len; + grapheme_break_state = null; + } + str_index += cur_codepoint_len; + } + opt_last_codepoint = cur_codepoint; + } + // Append last grapheme + graphemes[list_index] = RocStr.init(bytes_ptr + start_index, str_index - start_index + cur_codepoint_len); + + return list; +} + pub fn countUtf8Bytes(string: RocStr) callconv(.C) usize { return string.len(); } diff --git a/crates/compiler/builtins/roc/Str.roc b/crates/compiler/builtins/roc/Str.roc index ea28bd0bc8..24925a45c2 100644 --- a/crates/compiler/builtins/roc/Str.roc +++ b/crates/compiler/builtins/roc/Str.roc @@ -45,6 +45,7 @@ interface Str walkScalarsUntil, withCapacity, withPrefix, + graphemes, ] imports [ Bool.{ Bool, Eq }, @@ -180,6 +181,9 @@ repeat : Str, Nat -> Str ## expect Str.countGraphemes "üïä" == 4 countGraphemes : Str -> Nat +## Split a string into its constituent grapheme clusters +graphemes : Str -> List Str + ## If the string begins with a [Unicode code point](http://www.unicode.org/glossary/#code_point) ## equal to the given [U32], return `Bool.true`. Otherwise return `Bool.false`. ## diff --git a/crates/compiler/builtins/src/bitcode.rs b/crates/compiler/builtins/src/bitcode.rs index 1c5cbdb6a0..f5fdb7dc05 100644 --- a/crates/compiler/builtins/src/bitcode.rs +++ b/crates/compiler/builtins/src/bitcode.rs @@ -362,6 +362,7 @@ pub const STR_APPEND_SCALAR: &str = "roc_builtins.str.append_scalar"; pub const STR_GET_SCALAR_UNSAFE: &str = "roc_builtins.str.get_scalar_unsafe"; pub const STR_CLONE_TO: &str = "roc_builtins.str.clone_to"; pub const STR_WITH_CAPACITY: &str = "roc_builtins.str.with_capacity"; +pub const STR_GRAPHEMES: &str = "roc_builtins.str.str_graphemes"; pub const LIST_MAP: &str = "roc_builtins.list.map"; pub const LIST_MAP2: &str = "roc_builtins.list.map2"; diff --git a/crates/compiler/can/src/builtins.rs b/crates/compiler/can/src/builtins.rs index d847eb1fe0..05688ed656 100644 --- a/crates/compiler/can/src/builtins.rs +++ b/crates/compiler/can/src/builtins.rs @@ -125,6 +125,7 @@ map_symbol_to_lowlevel_and_arity! { StrToNum; STR_TO_NUM; 1, StrGetCapacity; STR_CAPACITY; 1, StrWithCapacity; STR_WITH_CAPACITY; 1, + StrGraphemes; STR_GRAPHEMES; 1, ListLen; LIST_LEN; 1, ListWithCapacity; LIST_WITH_CAPACITY; 1, diff --git a/crates/compiler/gen_llvm/src/llvm/build.rs b/crates/compiler/gen_llvm/src/llvm/build.rs index 4eb4c89412..73126c90b1 100644 --- a/crates/compiler/gen_llvm/src/llvm/build.rs +++ b/crates/compiler/gen_llvm/src/llvm/build.rs @@ -6049,6 +6049,20 @@ fn run_low_level<'a, 'ctx, 'env>( bitcode::STR_WITH_CAPACITY, ) } + StrGraphemes => { + // Str.graphemes : Str -> List Str + debug_assert_eq!(args.len(), 1); + + let string = load_symbol(scope, &args[0]); + + call_str_bitcode_fn( + env, + &[string], + &[], + BitcodeReturns::List, + bitcode::STR_GRAPHEMES, + ) + } ListLen => { // List.len : List * -> Nat debug_assert_eq!(args.len(), 1); diff --git a/crates/compiler/gen_wasm/src/low_level.rs b/crates/compiler/gen_wasm/src/low_level.rs index 8970c0f9e6..4b98d9943a 100644 --- a/crates/compiler/gen_wasm/src/low_level.rs +++ b/crates/compiler/gen_wasm/src/low_level.rs @@ -304,6 +304,7 @@ impl<'a> LowLevelCall<'a> { self.load_args_and_call_zig(backend, bitcode::STR_SUBSTRING_UNSAFE) } StrWithCapacity => self.load_args_and_call_zig(backend, bitcode::STR_WITH_CAPACITY), + StrGraphemes => self.load_args_and_call_zig(backend, bitcode::STR_GRAPHEMES), // List ListLen => match backend.storage.get(&self.arguments[0]) { diff --git a/crates/compiler/module/src/low_level.rs b/crates/compiler/module/src/low_level.rs index e92d455e05..e29c9816a0 100644 --- a/crates/compiler/module/src/low_level.rs +++ b/crates/compiler/module/src/low_level.rs @@ -31,6 +31,7 @@ pub enum LowLevel { StrGetScalarUnsafe, StrGetCapacity, StrWithCapacity, + StrGraphemes, ListLen, ListWithCapacity, ListReserve, @@ -250,6 +251,7 @@ map_symbol_to_lowlevel! { StrToNum <= STR_TO_NUM, StrGetCapacity <= STR_CAPACITY, StrWithCapacity <= STR_WITH_CAPACITY, + StrGraphemes <= STR_GRAPHEMES, ListLen <= LIST_LEN, ListGetCapacity <= LIST_CAPACITY, ListWithCapacity <= LIST_WITH_CAPACITY, diff --git a/crates/compiler/module/src/symbol.rs b/crates/compiler/module/src/symbol.rs index 337b7af9db..8fa634fc89 100644 --- a/crates/compiler/module/src/symbol.rs +++ b/crates/compiler/module/src/symbol.rs @@ -1318,6 +1318,7 @@ define_builtins! { 52 STR_REPLACE_LAST: "replaceLast" 53 STR_WITH_CAPACITY: "withCapacity" 54 STR_WITH_PREFIX: "withPrefix" + 55 STR_GRAPHEMES: "graphemes" } 6 LIST: "List" => { 0 LIST_LIST: "List" exposed_apply_type=true // the List.List type alias diff --git a/crates/compiler/mono/src/borrow.rs b/crates/compiler/mono/src/borrow.rs index a81139c125..fc37792f57 100644 --- a/crates/compiler/mono/src/borrow.rs +++ b/crates/compiler/mono/src/borrow.rs @@ -879,8 +879,10 @@ pub fn lowlevel_borrow_signature(arena: &Bump, op: LowLevel) -> &[bool] { // - other refcounted arguments are Borrowed match op { Unreachable => arena.alloc_slice_copy(&[irrelevant]), - ListLen | StrIsEmpty | StrToScalars | StrCountGraphemes | StrCountUtf8Bytes - | StrGetCapacity | ListGetCapacity => arena.alloc_slice_copy(&[borrowed]), + ListLen | StrIsEmpty | StrToScalars | StrCountGraphemes | StrGraphemes + | StrCountUtf8Bytes | StrGetCapacity | ListGetCapacity => { + arena.alloc_slice_copy(&[borrowed]) + } ListWithCapacity | StrWithCapacity => arena.alloc_slice_copy(&[irrelevant]), ListReplaceUnsafe => arena.alloc_slice_copy(&[owned, irrelevant, irrelevant]), StrGetUnsafe | ListGetUnsafe => arena.alloc_slice_copy(&[borrowed, irrelevant]), From c2dbed2ff5dc1cd76dc41f6ae896f36fe1b944ae Mon Sep 17 00:00:00 2001 From: Travis Staloch Date: Wed, 19 Oct 2022 09:16:12 -0700 Subject: [PATCH 10/16] str-graphemes: rework and add some zig tests - rework strGraphemes() to use a mutable slice and keep track of just `last_codepoint_len`. - add zig tests for empty string, ascii, utf8, ascii+utf8+emoji --- crates/compiler/build/src/link.rs | 7 +- crates/compiler/builtins/bitcode/src/main.zig | 2 +- crates/compiler/builtins/bitcode/src/str.zig | 134 ++++++++---------- crates/compiler/builtins/src/bitcode.rs | 2 +- 4 files changed, 65 insertions(+), 80 deletions(-) diff --git a/crates/compiler/build/src/link.rs b/crates/compiler/build/src/link.rs index 601a21bd56..385f82d11c 100644 --- a/crates/compiler/build/src/link.rs +++ b/crates/compiler/build/src/link.rs @@ -434,7 +434,12 @@ pub fn build_c_host_native( _ => { command.args(&[ shared_lib_path.to_str().unwrap(), - &bitcode::get_builtins_host_obj_path(), + // This line is commented out because + // @bhansconnect: With the addition of Str.graphemes, always + // linking the built-ins led to a surgical linker bug for + // optimized builds. Disabling until it is needed for dev + // builds. + // &bitcode::get_builtins_host_obj_path(), "-fPIE", "-pie", "-lm", diff --git a/crates/compiler/builtins/bitcode/src/main.zig b/crates/compiler/builtins/bitcode/src/main.zig index 22a82d328a..61a3065e17 100644 --- a/crates/compiler/builtins/bitcode/src/main.zig +++ b/crates/compiler/builtins/bitcode/src/main.zig @@ -145,7 +145,7 @@ comptime { exportStrFn(str.strTrimRight, "trim_right"); exportStrFn(str.strCloneTo, "clone_to"); exportStrFn(str.withCapacity, "with_capacity"); - exportStrFn(str.strGraphemes, "str_graphemes"); + exportStrFn(str.strGraphemes, "graphemes"); inline for (INTEGERS) |T| { str.exportFromInt(T, ROC_BUILTINS ++ "." ++ STR ++ ".from_int."); diff --git a/crates/compiler/builtins/bitcode/src/str.zig b/crates/compiler/builtins/bitcode/src/str.zig index de77e65cf7..8bf25b72b3 100644 --- a/crates/compiler/builtins/bitcode/src/str.zig +++ b/crates/compiler/builtins/bitcode/src/str.zig @@ -1248,96 +1248,76 @@ pub fn countGraphemeClusters(string: RocStr) callconv(.C) usize { return count; } -test "countGraphemeClusters: empty string" { - const count = countGraphemeClusters(RocStr.empty()); - try expectEqual(count, 0); -} - -test "countGraphemeClusters: ascii characters" { - const bytes_arr = "abcd"; - const bytes_len = bytes_arr.len; - const str = RocStr.init(bytes_arr, bytes_len); - defer str.deinit(); - - const count = countGraphemeClusters(str); - try expectEqual(count, 4); -} - -test "countGraphemeClusters: utf8 characters" { - const bytes_arr = "ãxā"; - const bytes_len = bytes_arr.len; - const str = RocStr.init(bytes_arr, bytes_len); - defer str.deinit(); - - const count = countGraphemeClusters(str); - try expectEqual(count, 3); -} - -test "countGraphemeClusters: emojis" { - const bytes_arr = "🤔🤔🤔"; - const bytes_len = bytes_arr.len; - const str = RocStr.init(bytes_arr, bytes_len); - defer str.deinit(); - - const count = countGraphemeClusters(str); - try expectEqual(count, 3); -} - -test "countGraphemeClusters: emojis and ut8 characters" { - const bytes_arr = "🤔å🤔¥🤔ç"; - const bytes_len = bytes_arr.len; - const str = RocStr.init(bytes_arr, bytes_len); - defer str.deinit(); - - const count = countGraphemeClusters(str); - try expectEqual(count, 6); -} - -test "countGraphemeClusters: emojis, ut8, and ascii characters" { - const bytes_arr = "6🤔å🤔e¥🤔çpp"; - const bytes_len = bytes_arr.len; - const str = RocStr.init(bytes_arr, bytes_len); - defer str.deinit(); - - const count = countGraphemeClusters(str); - try expectEqual(count, 10); -} - // Str.graphemes -pub fn strGraphemes(string: RocStr) callconv(.C) RocList { - var list = RocList.allocate(@alignOf(RocStr), countGraphemeClusters(string), @sizeOf(RocStr)); - const graphemes = @ptrCast([*]RocStr, @alignCast(@alignOf(RocStr), list.bytes)); - - const bytes_ptr = string.asU8ptr(); - var bytes = bytes_ptr[0..string.len()]; - var iter = (unicode.Utf8View.init(bytes) catch unreachable).iterator(); - var grapheme_break_state: ?grapheme.BoundClass = null; - var grapheme_break_state_ptr = &grapheme_break_state; +pub fn strGraphemes(roc_str: RocStr) callconv(.C) RocList { + var break_state: ?grapheme.BoundClass = null; var opt_last_codepoint: ?u21 = null; + var index: usize = 0; + var last_codepoint_len: u8 = 0; - var list_index: usize = 0; - var start_index: usize = 0; - var str_index: usize = 0; - var cur_codepoint_len: usize = 0; + var result = RocList.allocate(@alignOf(RocStr), countGraphemeClusters(roc_str), @sizeOf(RocStr)); + const graphemes = result.elements(RocStr) orelse return result; + var slice = roc_str.asSlice(); + var iter = (unicode.Utf8View.init(slice) catch unreachable).iterator(); while (iter.nextCodepoint()) |cur_codepoint| { - cur_codepoint_len = unicode.utf8CodepointSequenceLength(cur_codepoint) catch unreachable; + const cur_codepoint_len = unicode.utf8CodepointSequenceLength(cur_codepoint) catch unreachable; if (opt_last_codepoint) |last_codepoint| { - var did_break = grapheme.isGraphemeBreak(last_codepoint, cur_codepoint, grapheme_break_state_ptr); + var did_break = grapheme.isGraphemeBreak(last_codepoint, cur_codepoint, &break_state); if (did_break) { - graphemes[list_index] = RocStr.init(bytes_ptr + start_index, str_index - start_index + cur_codepoint_len); - list_index += 1; - start_index = str_index + cur_codepoint_len; - grapheme_break_state = null; + graphemes[index] = RocStr.fromSlice(slice[0..last_codepoint_len]); + slice = slice[last_codepoint_len..]; + index += 1; + break_state = null; + last_codepoint_len = 0; } - str_index += cur_codepoint_len; } + last_codepoint_len += cur_codepoint_len; opt_last_codepoint = cur_codepoint; } // Append last grapheme - graphemes[list_index] = RocStr.init(bytes_ptr + start_index, str_index - start_index + cur_codepoint_len); + graphemes[index] = RocStr.fromSlice(slice); + return result; +} - return list; +// these test both countGraphemeClusters() and strGraphemes() +fn graphemesTest(input: []const u8, expected: []const []const u8) !void { + const rocstr = RocStr.fromSlice(input); + defer rocstr.deinit(); + const count = countGraphemeClusters(rocstr); + try expectEqual(expected.len, count); + + const graphemes = strGraphemes(rocstr); + defer graphemes.deinit(u8); + if (input.len == 0) return; // empty string + const elems = graphemes.elements(RocStr) orelse unreachable; + for (expected) |g, i| { + try std.testing.expectEqualStrings(g, elems[i].asSlice()); + } +} + +test "graphemes: empty string" { + try graphemesTest("", &.{}); +} + +test "graphemes: ascii characters" { + try graphemesTest("abcd", &.{ "a", "b", "c", "d" }); +} + +test "graphemes: utf8 characters" { + try graphemesTest("ãxā", &.{ "ã", "x", "ā" }); +} + +test "graphemes: emojis" { + try graphemesTest("🤔🤔🤔", &.{ "🤔", "🤔", "🤔" }); +} + +test "graphemes: emojis and ut8 characters" { + try graphemesTest("🤔å🤔¥🤔ç", &.{ "🤔", "å", "🤔", "¥", "🤔", "ç" }); +} + +test "graphemes: emojis, ut8, and ascii characters" { + try graphemesTest("6🤔å🤔e¥🤔çpp", &.{ "6", "🤔", "å", "🤔", "e", "¥", "🤔", "ç", "p", "p" }); } pub fn countUtf8Bytes(string: RocStr) callconv(.C) usize { diff --git a/crates/compiler/builtins/src/bitcode.rs b/crates/compiler/builtins/src/bitcode.rs index f5fdb7dc05..58145098f5 100644 --- a/crates/compiler/builtins/src/bitcode.rs +++ b/crates/compiler/builtins/src/bitcode.rs @@ -362,7 +362,7 @@ pub const STR_APPEND_SCALAR: &str = "roc_builtins.str.append_scalar"; pub const STR_GET_SCALAR_UNSAFE: &str = "roc_builtins.str.get_scalar_unsafe"; pub const STR_CLONE_TO: &str = "roc_builtins.str.clone_to"; pub const STR_WITH_CAPACITY: &str = "roc_builtins.str.with_capacity"; -pub const STR_GRAPHEMES: &str = "roc_builtins.str.str_graphemes"; +pub const STR_GRAPHEMES: &str = "roc_builtins.str.graphemes"; pub const LIST_MAP: &str = "roc_builtins.list.map"; pub const LIST_MAP2: &str = "roc_builtins.list.map2"; From a627a477092edf0321b93975a70aedd26f283aa2 Mon Sep 17 00:00:00 2001 From: Christopher Duncan Date: Mon, 10 Oct 2022 17:12:24 -0400 Subject: [PATCH 11/16] Add Dict module to the testing CI --- CONTRIBUTING.md | 10 ++++++++++ crates/compiler/builtins/roc/Dict.roc | 13 +++++++++++-- mlc_config.json | 3 +++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 53ea150641..96e079aa8e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -33,6 +33,16 @@ cargo clippy --workspace --tests -- --deny warnings Execute `cargo fmt --all` to fix the formatting. +## Generating Docs + +If you make changes to [Roc's Standard Library](https://www.roc-lang.org/builtins/Str), you can add comments to the code following [the CommonMark Spec](https://spec.commonmark.org/current/) to further explain your intentions. You can view these changes locally with: + +```sh +cargo run docs crates/compiler/builtins/roc +``` + +This command will generate the documentation in the [`generated-docs`](generated-docs) directory. + ## Contribution Tips - If you've never made a pull request on github before, [this](https://www.freecodecamp.org/news/how-to-make-your-first-pull-request-on-github-3/) will be a good place to start. diff --git a/crates/compiler/builtins/roc/Dict.roc b/crates/compiler/builtins/roc/Dict.roc index 7537cb507d..144bfe6085 100644 --- a/crates/compiler/builtins/roc/Dict.roc +++ b/crates/compiler/builtins/roc/Dict.roc @@ -128,6 +128,16 @@ remove = \@Dict list, key -> |> @Dict ## Insert or remove a value in a Dict based on its presence +## +## alterValue : [Present Bool, Missing] -> [Present Bool, Missing] +## alterValue = \possibleValue -> +## when possibleValue is +## Missing -> Present Bool.false +## Present value -> if value then Missing else Present Bool.true +## +## expect Dict.update Dict.empty "a" alterValue == Dict.single "a" Bool.false +## expect Dict.update (Dict.single "a" Bool.false) "a" alterValue == Dict.single "a" Bool.true +## expect Dict.update (Dict.single "a" Bool.true) "a" alterValue == Dict.empty update : Dict k v, k, ([Present v, Missing] -> [Present v, Missing]) -> Dict k v | k has Eq update = \dict, key, alter -> possibleValue = @@ -144,8 +154,7 @@ alterValue : [Present Bool, Missing] -> [Present Bool, Missing] alterValue = \possibleValue -> when possibleValue is Missing -> Present Bool.false - Present value if Bool.not value -> Present Bool.true - Present _ -> Missing + Present value -> if value then Missing else Present Bool.true expect update empty "a" alterValue == single "a" Bool.false expect update (single "a" Bool.false) "a" alterValue == single "a" Bool.true diff --git a/mlc_config.json b/mlc_config.json index 1fdb957353..93ee48f233 100644 --- a/mlc_config.json +++ b/mlc_config.json @@ -1,5 +1,8 @@ { "ignorePatterns": [ + { + "pattern": "generated-docs" + }, { "pattern": "http://127.0.0.1" }, From 6d02332b62f39fb21e0b2d60eba55baa31a4b86c Mon Sep 17 00:00:00 2001 From: Nick Gravgaard Date: Mon, 24 Oct 2022 07:35:37 +0100 Subject: [PATCH 12/16] Change TUI platform to call view on initial model Signed-off-by: Nick Gravgaard --- examples/cli/tui-platform/host.zig | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/examples/cli/tui-platform/host.zig b/examples/cli/tui-platform/host.zig index fea7652bc7..10e281dca7 100644 --- a/examples/cli/tui-platform/host.zig +++ b/examples/cli/tui-platform/host.zig @@ -79,6 +79,16 @@ fn view(input: ConstModel) RocStr { return output; } +fn print_output(viewed: RocStr) void { + const stdout = std.io.getStdOut().writer(); + + for (viewed.asSlice()) |char| { + stdout.print("{c}", .{char}) catch unreachable; + } + + stdout.print("\n", .{}) catch unreachable; +} + const Align = 2 * @alignOf(usize); extern fn malloc(size: usize) callconv(.C) ?*align(Align) anyopaque; extern fn realloc(c_ptr: [*]align(Align) u8, size: usize) callconv(.C) ?*anyopaque; @@ -163,13 +173,15 @@ fn call_the_closure(program: Program) void { _ = program; var allocator = std.heap.page_allocator; - const stdout = std.io.getStdOut().writer(); const stdin = std.io.getStdIn().reader(); var buf: [1000]u8 = undefined; var model = init(&allocator); + const init_viewed = view(model); + print_output(init_viewed); + while (true) { const line = (stdin.readUntilDelimiterOrEof(buf[0..], '\n') catch unreachable) orelse return; @@ -182,11 +194,7 @@ fn call_the_closure(program: Program) void { model = update(&allocator, model, to_append); const viewed = view(model); - for (viewed.asSlice()) |char| { - stdout.print("{c}", .{char}) catch unreachable; - } - - stdout.print("\n", .{}) catch unreachable; + print_output(viewed); } // The closure returns result, nothing interesting to do with it From a8dda54a2b15b3aa64efe6f66bfe1a0bf8454f96 Mon Sep 17 00:00:00 2001 From: Anton-4 <17049058+Anton-4@users.noreply.github.com> Date: Mon, 24 Oct 2022 11:34:33 +0200 Subject: [PATCH 13/16] simple signing workaround Signed-off-by: Anton-4 <17049058+Anton-4@users.noreply.github.com> --- CONTRIBUTING.md | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 96e079aa8e..2dcff7e65e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -51,14 +51,16 @@ This command will generate the documentation in the [`generated-docs`](generated - [Fork](https://github.com/roc-lang/roc/fork) the repo so that you can apply your changes first on your own copy of the roc repo. - It's a good idea to open a draft pull request as you begin working on something. This way, others can see that you're working on it, which avoids duplicate effort, and others can give feedback sooner rather than later if they notice a problem in the direction things are going. Click the button "ready for review" when it's ready. - All your commits need to be signed [to prevent impersonation](https://dev.to/martiliones/how-i-got-linus-torvalds-in-my-contributors-on-github-3k4g): - 1. If you have a Yubikey, follow [guide 1](https://dev.to/paulmicheli/using-your-yubikey-to-get-started-with-gpg-3h4k), [guide 2](https://dev.to/paulmicheli/using-your-yubikey-for-signed-git-commits-4l73) and skip the steps below. - 2. [Make a key to sign your commits.](https://docs.github.com/en/authentication/managing-commit-signature-verification/generating-a-new-gpg-key). - 3. [Configure git to use your key.](https://docs.github.com/en/authentication/managing-commit-signature-verification/telling-git-about-your-signing-key) - 4. Make git sign your commits automatically: + - If you don't have signing set up on your device and you only want to change a single file, it will be easier to use [github's edit button](https://docs.github.com/en/repositories/working-with-files/managing-files/editing-files). This will sign your commit automatically. + - For multi-file or complex changes you will want to set up signing on your device: + 1. If you have a Yubikey, follow [guide 1](https://dev.to/paulmicheli/using-your-yubikey-to-get-started-with-gpg-3h4k), [guide 2](https://dev.to/paulmicheli/using-your-yubikey-for-signed-git-commits-4l73) and skip the steps below. + 2. [Make a key to sign your commits.](https://docs.github.com/en/authentication/managing-commit-signature-verification/generating-a-new-gpg-key) + 3. [Configure git to use your key.](https://docs.github.com/en/authentication/managing-commit-signature-verification/telling-git-about-your-signing-key) + 4. Make git sign your commits automatically: - ```sh - git config --global commit.gpgsign true - ``` + ```sh + git config --global commit.gpgsign true + ``` ## Can we do better? From 38409221b6ff78d908db1529533c5095fb8e7256 Mon Sep 17 00:00:00 2001 From: Keerthana Kasthuril <76804118+keerthanak-tw@users.noreply.github.com> Date: Mon, 24 Oct 2022 17:01:34 +0530 Subject: [PATCH 14/16] Add how to sign unsigned commits section Signed-off-by: Keerthana Kasthuril <76804118+keerthanak-tw@users.noreply.github.com> --- CONTRIBUTING.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 96e079aa8e..52ba9f5d9e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -60,6 +60,26 @@ This command will generate the documentation in the [`generated-docs`](generated git config --global commit.gpgsign true ``` +#### Committed files without signing? +It is common that people forget to sign their commits. You can find which commits need to be signed by running `git log --show-signature` command. + +If you have only one commit, running `git commit --amend --no-edit -S` would sign the latest commit 🚀 + +In case you have multiple commits, you can sign them by two ways + 1. Switching to interactive rebase mode and editing the file + - Enter into interactive mode, by running `git rebase -i HEAD~n` where `n` is the number of commits up to the most current commit you would like to see. + - This would display a set of commits in a text file like below + ``` + pick hash2 commit message 2 + pick hash1 commit message 1 + ``` + - After each of the commit you want to sign, add the command `exec git commit --amend --no-edit -S` + 2. Run git rebase command recursively + - Find the commit till you want to sign, using the `git log --show-signature` command. + - Run the command `git rebase --exec 'git commit --amend --no-edit -n -S' -i HASH` which would sign all commits till commit `HASH` + +Please note that if you had pushed these commits already, you might have to do a **force push** which might lead to confusions among other contributors. So, always make sure you have signed commits before pushing. + ## Can we do better? Feel free to open an issue if you think this document can be improved or is unclear in any way. From 14710e1dc9a35b626fe0e54dc867e883a6381941 Mon Sep 17 00:00:00 2001 From: Anton-4 <17049058+Anton-4@users.noreply.github.com> Date: Mon, 24 Oct 2022 13:47:40 +0200 Subject: [PATCH 15/16] minor changes Signed-off-by: Anton-4 <17049058+Anton-4@users.noreply.github.com> --- CONTRIBUTING.md | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 52ba9f5d9e..a4ed4546a5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -60,25 +60,26 @@ This command will generate the documentation in the [`generated-docs`](generated git config --global commit.gpgsign true ``` -#### Committed files without signing? -It is common that people forget to sign their commits. You can find which commits need to be signed by running `git log --show-signature` command. +### Forgot to sign commits? -If you have only one commit, running `git commit --amend --no-edit -S` would sign the latest commit 🚀 +You can find which commits need to be signed by running `git log --show-signature`. -In case you have multiple commits, you can sign them by two ways - 1. Switching to interactive rebase mode and editing the file +If you have only one commit, running `git commit --amend --no-edit -S` would sign the latest commit 🚀. + +In case you have multiple commits, you can sign them in two ways: + 1. Switching to interactive rebase mode and editing the file: - Enter into interactive mode, by running `git rebase -i HEAD~n` where `n` is the number of commits up to the most current commit you would like to see. - - This would display a set of commits in a text file like below + - This would display a set of commits in a text file like below: ``` pick hash2 commit message 2 pick hash1 commit message 1 ``` - - After each of the commit you want to sign, add the command `exec git commit --amend --no-edit -S` - 2. Run git rebase command recursively - - Find the commit till you want to sign, using the `git log --show-signature` command. - - Run the command `git rebase --exec 'git commit --amend --no-edit -n -S' -i HASH` which would sign all commits till commit `HASH` + - After every commit you want to sign, add `exec git commit --amend --no-edit -S`. + 2. Or run git rebase recursively: + - Find the oldest commit you want to sign, using the `git log --show-signature` command. + - Run the command `git rebase --exec 'git commit --amend --no-edit -n -S' -i HASH` which would sign all commits up to commit `HASH`. -Please note that if you had pushed these commits already, you might have to do a **force push** which might lead to confusions among other contributors. So, always make sure you have signed commits before pushing. +If you already pushed unsigned commits, you mmay have to do a force push with `git push origin -f `. ## Can we do better? From 5164994fb56737470b6d41918674f6a1bd92caec Mon Sep 17 00:00:00 2001 From: Ayaz Hafiz Date: Mon, 24 Oct 2022 10:28:56 -0500 Subject: [PATCH 16/16] Do not attempt to lookup functions in `expect`s Functions are not useful to print in expect results, because they are only printed opaquely as ``. Moreover, their transformation to closure sets during mono can be extremely lossy, up to and including the elision of symbols for function closure symbols. As such, simply do not attempt to lookup or print functions referenced in expects. Closes #4389 --- crates/compiler/mono/src/ir.rs | 14 +++++++++---- crates/compiler/types/src/subs.rs | 20 +++++++++++++++++++ crates/repl_expect/src/lib.rs | 27 ++++++++++++++++++++++--- crates/repl_expect/src/run.rs | 33 +++++++++++++++++++++---------- 4 files changed, 77 insertions(+), 17 deletions(-) diff --git a/crates/compiler/mono/src/ir.rs b/crates/compiler/mono/src/ir.rs index 4f2dbfb2bf..af61e7f816 100644 --- a/crates/compiler/mono/src/ir.rs +++ b/crates/compiler/mono/src/ir.rs @@ -6368,10 +6368,13 @@ pub fn from_can<'a>( ), None => symbol, }; - lookups.push(symbol); let res_layout = layout_cache.from_var(env.arena, var, env.subs); let layout = return_on_layout_error!(env, res_layout, "Expect"); - layouts.push(layout); + if !matches!(layout, Layout::LambdaSet(..)) { + // Exclude functions from lookups + lookups.push(symbol); + layouts.push(layout); + } } let mut stmt = Stmt::Expect { @@ -6421,10 +6424,13 @@ pub fn from_can<'a>( ), None => symbol, }; - lookups.push(symbol); let res_layout = layout_cache.from_var(env.arena, var, env.subs); let layout = return_on_layout_error!(env, res_layout, "Expect"); - layouts.push(layout); + if !matches!(layout, Layout::LambdaSet(..)) { + // Exclude functions from lookups + lookups.push(symbol); + layouts.push(layout); + } } let mut stmt = Stmt::ExpectFx { diff --git a/crates/compiler/types/src/subs.rs b/crates/compiler/types/src/subs.rs index a72811eb22..1383beb215 100644 --- a/crates/compiler/types/src/subs.rs +++ b/crates/compiler/types/src/subs.rs @@ -2115,6 +2115,26 @@ impl Subs { pub fn is_inhabited(&self, var: Variable) -> bool { is_inhabited(self, var) } + + pub fn is_function(&self, mut var: Variable) -> bool { + loop { + match self.get_content_without_compacting(var) { + Content::FlexVar(_) + | Content::RigidVar(_) + | Content::FlexAbleVar(_, _) + | Content::RigidAbleVar(_, _) + | Content::RecursionVar { .. } + | Content::RangedNumber(_) + | Content::Error => return false, + Content::LambdaSet(_) => return true, + Content::Structure(FlatType::Func(..)) => return true, + Content::Structure(_) => return false, + Content::Alias(_, _, real_var, _) => { + var = *real_var; + } + } + } + } } #[inline(always)] diff --git a/crates/repl_expect/src/lib.rs b/crates/repl_expect/src/lib.rs index a8f7c1e92a..ab339a77d5 100644 --- a/crates/repl_expect/src/lib.rs +++ b/crates/repl_expect/src/lib.rs @@ -923,13 +923,34 @@ mod test { When it failed, these variables had these values: - forcer : Str -> U8 - forcer = - case : Str case = "" "# ), ); } + + #[test] + fn issue_i4389() { + run_expect_test( + indoc!( + r#" + interface Test exposes [] imports [] + + expect + totalCount = \{} -> 1u8 + totalCount {} == 96u8 + "# + ), + indoc!( + r#" + This expectation failed: + + 3│> expect + 4│> totalCount = \{} -> 1u8 + 5│> totalCount {} == 96u8 + "# + ), + ); + } } diff --git a/crates/repl_expect/src/run.rs b/crates/repl_expect/src/run.rs index 44a54f8dcf..39044c45e6 100644 --- a/crates/repl_expect/src/run.rs +++ b/crates/repl_expect/src/run.rs @@ -18,6 +18,7 @@ use roc_mono::{ir::OptLevel, layout::Layout}; use roc_region::all::Region; use roc_reporting::{error::expect::Renderer, report::RenderTarget}; use roc_target::TargetInfo; +use roc_types::subs::{Subs, Variable}; use target_lexicon::Triple; pub(crate) struct ExpectMemory<'a> { @@ -361,6 +362,27 @@ pub fn roc_dev_expect<'a>( ) } +fn split_expect_lookups(subs: &Subs, lookups: &[ExpectLookup]) -> (Vec, Vec) { + lookups + .iter() + .filter_map( + |ExpectLookup { + symbol, + var, + ability_info: _, + }| { + // mono will have dropped lookups that resolve to functions, so we should not keep + // them either. + if subs.is_function(*var) { + None + } else { + Some((*symbol, *var)) + } + }, + ) + .unzip() +} + #[allow(clippy::too_many_arguments)] fn render_expect_failure<'a>( writer: &mut impl std::io::Write, @@ -390,16 +412,7 @@ fn render_expect_failure<'a>( }; let subs = arena.alloc(&mut data.subs); - let (symbols, variables): (Vec<_>, Vec<_>) = current - .iter() - .map( - |ExpectLookup { - symbol, - var, - ability_info: _, - }| (*symbol, *var), - ) - .unzip(); + let (symbols, variables) = split_expect_lookups(subs, current); let (offset, expressions) = crate::get_values( target_info,