diff --git a/crates/compiler/can/src/constraint.rs b/crates/compiler/can/src/constraint.rs index 5f56366988..1e138d7e42 100644 --- a/crates/compiler/can/src/constraint.rs +++ b/crates/compiler/can/src/constraint.rs @@ -1,4 +1,5 @@ use std::cell::Cell; +use std::path::Path; use crate::abilities::SpecializationId; use crate::exhaustive::{ExhaustiveContext, SketchedRows}; @@ -599,6 +600,7 @@ impl Constraints { | Constraint::PatternPresence(_, _, _, _) | Constraint::Exhaustive { .. } | Constraint::Resolve(..) + | Constraint::IngestedFile(..) | Constraint::CheckCycle(..) => false, } } @@ -673,10 +675,19 @@ impl Constraints { Constraint::CheckCycle(cycle_index, cycle_mark) } + + pub fn ingested_file( + &mut self, + type_index: TypeOrVar, + file_path: Box, + bytes: Vec, + ) -> Constraint { + Constraint::IngestedFile(type_index, file_path, bytes) + } } -roc_error_macros::assert_sizeof_default!(Constraint, 3 * 8); -roc_error_macros::assert_sizeof_aarch64!(Constraint, 3 * 8); +roc_error_macros::assert_sizeof_default!(Constraint, 6 * 8); +roc_error_macros::assert_sizeof_aarch64!(Constraint, 6 * 8); impl std::ops::Index for Constraints { type Output = Expected; @@ -734,7 +745,7 @@ pub struct OpportunisticResolve { pub specialization_id: SpecializationId, } -#[derive(Clone, Copy)] +#[derive(Clone)] pub enum Constraint { Eq(Eq), Store(TypeOrVar, Variable, Index<&'static str>, u32), @@ -773,6 +784,10 @@ pub enum Constraint { /// Attempt to resolve a specialization. Resolve(OpportunisticResolve), CheckCycle(Index, IllegalCycleMark), + + // This is terrible and could be a huge cost to copy. + // Not sure a better way to get the bytes here so we can check if they are valid utf8 or decode properly. + IngestedFile(TypeOrVar, Box, Vec), } #[derive(Debug, Clone, Copy, Default)] @@ -856,6 +871,9 @@ impl std::fmt::Debug for Constraint { Self::CheckCycle(arg0, arg1) => { write!(f, "CheckCycle({:?}, {:?})", arg0, arg1) } + Self::IngestedFile(arg0, arg1, arg2) => { + write!(f, "IngestedFile({:?}, {:?}, {:?})", arg0, arg1, arg2) + } } } } diff --git a/crates/compiler/can/src/copy.rs b/crates/compiler/can/src/copy.rs index b5ebb386f5..c9f52e4625 100644 --- a/crates/compiler/can/src/copy.rs +++ b/crates/compiler/can/src/copy.rs @@ -277,7 +277,9 @@ fn deep_copy_expr_help(env: &mut C, copied: &mut Vec, expr Float(v1, v2, str, val, bound) => Float(sub!(*v1), sub!(*v2), str.clone(), *val, *bound), Str(str) => Str(str.clone()), SingleQuote(v1, v2, char, bound) => SingleQuote(sub!(*v1), sub!(*v2), *char, *bound), - IngestedFile(bytes, anno) => IngestedFile(bytes.clone(), anno.clone()), + IngestedFile(file_path, bytes, anno) => { + IngestedFile(file_path.clone(), bytes.clone(), anno.clone()) + } List { elem_var, loc_elems, diff --git a/crates/compiler/can/src/debug/pretty_print.rs b/crates/compiler/can/src/debug/pretty_print.rs index 7e878d45d0..a170d2af76 100644 --- a/crates/compiler/can/src/debug/pretty_print.rs +++ b/crates/compiler/can/src/debug/pretty_print.rs @@ -165,7 +165,9 @@ fn expr<'a>(c: &Ctx, p: EPrec, f: &'a Arena<'a>, e: &'a Expr) -> DocBuilder<'a, Num(_, n, _, _) | Int(_, _, n, _, _) | Float(_, _, n, _, _) => f.text(&**n), Str(s) => f.text(format!(r#""{}""#, s)), SingleQuote(_, _, c, _) => f.text(format!("'{}'", c)), - IngestedFile(_,_) => todo!("I am not really sure how we want this to be printed. file name? all bytes? as correct type?"), + IngestedFile(file_path, bytes, _) => { + f.text(format!("", file_path, bytes.len())) + } List { elem_var: _, loc_elems, diff --git a/crates/compiler/can/src/expr.rs b/crates/compiler/can/src/expr.rs index da8330420b..1219640457 100644 --- a/crates/compiler/can/src/expr.rs +++ b/crates/compiler/can/src/expr.rs @@ -1,5 +1,5 @@ use crate::abilities::SpecializationId; -use crate::annotation::{self, freshen_opaque_def, IntroducedVariables}; +use crate::annotation::{freshen_opaque_def, IntroducedVariables}; use crate::builtins::builtin_defs_map; use crate::def::{can_defs_with_return, Annotation, Def}; use crate::env::Env; @@ -29,6 +29,7 @@ use roc_types::types::{Alias, Category, IndexOrField, LambdaSet, OptAbleVar, Typ use std::fmt::{Debug, Display}; use std::fs::File; use std::io::Read; +use std::path::Path; use std::{char, u32}; /// Derives that an opaque type has claimed, to checked and recorded after solving. @@ -103,7 +104,7 @@ pub enum Expr { }, // The bytes of a file and the expected type annotation. - IngestedFile(Vec, annotation::Annotation), + IngestedFile(Box, Vec, Variable), // Lookups Var(Symbol, Variable), @@ -302,7 +303,7 @@ impl Expr { Self::Int(..) => Category::Int, Self::Float(..) => Category::Frac, Self::Str(..) => Category::Str, - Self::IngestedFile(..) => Category::IngestedFile, + Self::IngestedFile(file_path, _, _) => Category::IngestedFile(file_path.clone()), Self::SingleQuote(..) => Category::Character, Self::List { .. } => Category::List, &Self::Var(sym, _) => Category::Lookup(sym), @@ -735,23 +736,12 @@ pub fn canonicalize_expr<'a>( ast::Expr::Str(literal) => flatten_str_literal(env, var_store, scope, literal), - ast::Expr::IngestedFile(file_path, type_ann) => match File::open(file_path) { + ast::Expr::IngestedFile(file_path, _) => match File::open(file_path) { Ok(mut file) => { let mut bytes = vec![]; match file.read_to_end(&mut bytes) { Ok(_) => ( - Expr::IngestedFile( - bytes, - annotation::canonicalize_annotation( - env, - scope, - &type_ann.value, - region, - var_store, - &VecMap::default(), - annotation::AnnotationFor::Value, - ), - ), + Expr::IngestedFile((*file_path).into(), bytes, var_store.fresh()), Output::default(), ), Err(e) => { @@ -3041,7 +3031,7 @@ pub(crate) fn get_lookup_symbols(expr: &Expr) -> Vec { | Expr::Float(_, _, _, _, _) | Expr::Int(_, _, _, _, _) | Expr::Str(_) - | Expr::IngestedFile(_, _) + | Expr::IngestedFile(..) | Expr::ZeroArgumentTag { .. } | Expr::RecordAccessor(_) | Expr::SingleQuote(..) diff --git a/crates/compiler/constrain/src/expr.rs b/crates/compiler/constrain/src/expr.rs index ebd1c738e3..06813445d0 100644 --- a/crates/compiler/constrain/src/expr.rs +++ b/crates/compiler/constrain/src/expr.rs @@ -30,8 +30,8 @@ use roc_region::all::{Loc, Region}; use roc_types::subs::{IllegalCycleMark, Variable}; use roc_types::types::Type::{self, *}; use roc_types::types::{ - AliasCommon, AliasKind, AnnotationSource, Category, IndexOrField, OptAbleType, PReason, Reason, - RecordField, TypeExtension, TypeTag, Types, + AliasKind, AnnotationSource, Category, IndexOrField, OptAbleType, PReason, Reason, RecordField, + TypeExtension, TypeTag, Types, }; /// This is for constraining Defs @@ -375,38 +375,20 @@ pub fn constrain_expr( let expected_index = expected; constraints.equal_types(str_index, expected_index, Category::Str, region) } - IngestedFile(bytes, anno) => match &anno.typ { - Type::Apply(Symbol::STR_STR, _, _) => { - if std::str::from_utf8(bytes).is_err() { - todo!("cause an error for the type being wrong due to not being a utf8 string"); - } + IngestedFile(file_path, bytes, var) => { + let index = constraints.push_variable(*var); + let eq_con = constraints.equal_types( + index, + expected, + Category::IngestedFile(file_path.clone()), + region, + ); + let ingested_con = constraints.ingested_file(index, file_path.clone(), bytes.clone()); - let str_index = constraints.push_type(types, Types::STR); - let expected_index = expected; - constraints.equal_types(str_index, expected_index, Category::Str, region) - } - Type::Apply(Symbol::LIST_LIST, elem_type, _) - if matches!( - elem_type[0].value, - Type::DelayedAlias(AliasCommon { - symbol: Symbol::NUM_U8, - .. - }) - ) => - { - let elem_var = Variable::U8; - let list_elem_type = Type::Variable(elem_var); - let elem_type_index = { - let typ = types.from_old_type(&list_type(list_elem_type)); - constraints.push_type(types, typ) - }; - constraints.equal_types(elem_type_index, expected, Category::List, region) - } - x => todo!( - "Unsupported requested type for ingested file, give proper error: {:?}", - x - ), - }, + // First resolve the type variable with the eq_con then try to ingest a file into the correct type. + let and_constraint = constraints.and_constraint(vec![eq_con, ingested_con]); + constraints.exists([*var], and_constraint) + } SingleQuote(num_var, precision_var, _, bound) => single_quote_literal( types, constraints, @@ -3975,7 +3957,7 @@ fn is_generalizable_expr(mut expr: &Expr) -> bool { } OpaqueRef { argument, .. } => expr = &argument.1.value, Str(_) - | IngestedFile(_, _) + | IngestedFile(..) | List { .. } | SingleQuote(_, _, _, _) | When { .. } diff --git a/crates/compiler/mono/src/ir.rs b/crates/compiler/mono/src/ir.rs index 760315f9dd..168fd1f4ca 100644 --- a/crates/compiler/mono/src/ir.rs +++ b/crates/compiler/mono/src/ir.rs @@ -4162,42 +4162,37 @@ pub fn with_hole<'a>( hole, ), - IngestedFile(bytes, anno) => match &anno.typ { - Type::Apply(Symbol::STR_STR, _, _) => Stmt::Let( - assigned, - Expr::Literal(Literal::Str( - // This is safe because we ensure the utf8 bytes are valid earlier in the compiler pipeline. - arena.alloc(unsafe { std::str::from_utf8_unchecked(&bytes) }.to_owned()), - )), - Layout::STR, - hole, - ), - Type::Apply(Symbol::LIST_LIST, elem_type, _) - if matches!( - elem_type[0].value, - Type::DelayedAlias(AliasCommon { - symbol: Symbol::NUM_U8, - .. - }) - ) => - { - let elem_layout = Layout::U8; - let mut elements = Vec::with_capacity_in(bytes.len(), env.arena); - for byte in bytes { - elements.push(ListLiteralElement::Literal(Literal::Byte(byte))); + IngestedFile(_, bytes, var) => { + let interned = layout_cache.from_var(env.arena, var, env.subs).unwrap(); + let layout = layout_cache.get_in(interned); + + match layout { + Layout::Builtin(Builtin::List(elem_layout)) if elem_layout == Layout::U8 => { + let mut elements = Vec::with_capacity_in(bytes.len(), env.arena); + for byte in bytes { + elements.push(ListLiteralElement::Literal(Literal::Byte(byte))); + } + let expr = Expr::Array { + elem_layout, + elems: elements.into_bump_slice(), + }; + + Stmt::Let(assigned, expr, interned, hole) } - let expr = Expr::Array { - elem_layout, - elems: elements.into_bump_slice(), - }; - - let list_layout = layout_cache.put_in(Layout::Builtin(Builtin::List(elem_layout))); - - Stmt::Let(assigned, expr, list_layout, hole) + Layout::Builtin(Builtin::Str) => Stmt::Let( + assigned, + Expr::Literal(Literal::Str( + // This is safe because we ensure the utf8 bytes are valid earlier in the compiler pipeline. + arena.alloc(unsafe { std::str::from_utf8_unchecked(&bytes) }.to_owned()), + )), + Layout::STR, + hole, + ), + _ => unreachable!( + "All of these cases should be dealt during solve, generating proper errors" + ), } - _ => unreachable!("All of these cases should be dealt with earlier in the compiler, generating proper errors"), - }, - + } SingleQuote(_, _, character, _) => { let layout = layout_cache .from_var(env.arena, variable, env.subs) diff --git a/crates/compiler/parse/src/ast.rs b/crates/compiler/parse/src/ast.rs index dd3c0a341f..93bce91b2c 100644 --- a/crates/compiler/parse/src/ast.rs +++ b/crates/compiler/parse/src/ast.rs @@ -1474,7 +1474,6 @@ impl<'a> Malformed for Expr<'a> { Str(inner) => inner.is_malformed(), - // TODO: what is the scope of Malformed? Would this not being a real file make it malformed? RecordAccess(inner, _) | TupleAccess(inner, _) => inner.is_malformed(), diff --git a/crates/compiler/solve/src/solve.rs b/crates/compiler/solve/src/solve.rs index 3a607e2494..4f6489d578 100644 --- a/crates/compiler/solve/src/solve.rs +++ b/crates/compiler/solve/src/solve.rs @@ -1771,6 +1771,55 @@ fn solve( state } + IngestedFile(type_index, _, bytes) => { + let actual = either_type_index_to_var( + subs, + rank, + pools, + problems, + abilities_store, + obligation_cache, + &mut can_types, + aliases, + *type_index, + ); + + if let Success { .. } = unify( + &mut UEnv::new(subs), + actual, + Variable::LIST_U8, + Mode::EQ, + Polarity::OF_VALUE, + ) { + // List U8 always valid. + state + } else if let Success { .. } = unify( + &mut UEnv::new(subs), + actual, + Variable::STR, + Mode::EQ, + Polarity::OF_VALUE, + ) { + // Str only valid if valid utf8. + if std::str::from_utf8(bytes).is_err() { + todo!("add type error due to not being a utf8 string"); + } + + state + } else { + // Unexpected type. + todo!("Add type error for unsupported ingested file type"); + // let problem = TypeError::BadExpr( + // *region, + // Category::Lookup(*symbol), + // actual_type, + // expectation.replace_ref(expected_type), + // ); + + // problems.push(problem); + // state + } + } }; } diff --git a/crates/compiler/types/src/types.rs b/crates/compiler/types/src/types.rs index 415989a195..7bf2191b49 100644 --- a/crates/compiler/types/src/types.rs +++ b/crates/compiler/types/src/types.rs @@ -15,6 +15,7 @@ use roc_module::symbol::{Interns, Symbol}; use roc_region::all::{Loc, Region}; use std::fmt; use std::fmt::Write; +use std::path::Path; pub const TYPE_NUM: &str = "Num"; pub const TYPE_INTEGER: &str = "Integer"; @@ -3781,7 +3782,7 @@ pub enum Category { List, Str, Character, - IngestedFile, + IngestedFile(Box), // records Record, diff --git a/crates/reporting/src/error/canonicalize.rs b/crates/reporting/src/error/canonicalize.rs index eebd8ef77d..ead5643ea3 100644 --- a/crates/reporting/src/error/canonicalize.rs +++ b/crates/reporting/src/error/canonicalize.rs @@ -1094,7 +1094,7 @@ pub fn can_problem<'b>( title = "OVERAPPLIED CRASH".to_string(); } Problem::FileProblem { filename, error } => { - let report = to_file_problem_report(&alloc, &filename, error); + let report = to_file_problem_report(alloc, &filename, error); doc = report.doc; title = report.title; } diff --git a/crates/reporting/src/error/type.rs b/crates/reporting/src/error/type.rs index 6e83cb6767..960f4c0420 100644 --- a/crates/reporting/src/error/type.rs +++ b/crates/reporting/src/error/type.rs @@ -1655,10 +1655,11 @@ fn format_category<'b>( alloc.concat([this_is, alloc.text(" a Unicode scalar value")]), alloc.text(" of type:"), ), - IngestedFile => ( - // TODO: is this what we actually want for the error message here. - // Should we somehow get the file name piped to here or type annotation? - alloc.concat([this_is, alloc.text(" an ingested file")]), + IngestedFile(file_path) => ( + alloc.concat([ + this_is, + alloc.text(format!(" an ingested file ({:?})", file_path)), + ]), alloc.text(" of type:"), ), Lambda => (