Get things compiling

This commit is contained in:
Richard Feldman 2020-08-26 21:12:44 -04:00
parent 70bef827a7
commit f35e43768a
18 changed files with 541 additions and 253 deletions

View file

@ -6,7 +6,7 @@ use roc_module::ident::{Lowercase, TagName};
use roc_module::operator::CalledVia; use roc_module::operator::CalledVia;
use roc_module::symbol::{Interns, ModuleId, Symbol}; use roc_module::symbol::{Interns, ModuleId, Symbol};
use roc_mono::layout::{Builtin, Layout}; use roc_mono::layout::{Builtin, Layout};
use roc_parse::ast::{AssignedField, Expr}; use roc_parse::ast::{AssignedField, Expr, StrLiteral};
use roc_region::all::{Located, Region}; use roc_region::all::{Located, Region};
use roc_types::subs::{Content, FlatType, Subs, Variable}; use roc_types::subs::{Content, FlatType, Subs, Variable};
use roc_types::types::RecordField; use roc_types::types::RecordField;
@ -90,7 +90,7 @@ fn jit_to_ast_help<'a>(
execution_engine, execution_engine,
main_fn_name, main_fn_name,
&'static str, &'static str,
|string: &'static str| { Expr::Str(env.arena.alloc(string)) } |string: &'static str| { str_slice_to_ast(env.arena, env.arena.alloc(string)) }
), ),
Layout::Builtin(Builtin::EmptyList) => { Layout::Builtin(Builtin::EmptyList) => {
jit_map!(execution_engine, main_fn_name, &'static str, |_| { jit_map!(execution_engine, main_fn_name, &'static str, |_| {
@ -168,11 +168,11 @@ fn ptr_to_ast<'a>(
list_to_ast(env, ptr, len, elem_layout, content) list_to_ast(env, ptr, len, elem_layout, content)
} }
Layout::Builtin(Builtin::EmptyStr) => Expr::Str(""), Layout::Builtin(Builtin::EmptyStr) => Expr::Str(StrLiteral::PlainLine("")),
Layout::Builtin(Builtin::Str) => { Layout::Builtin(Builtin::Str) => {
let arena_str = unsafe { *(ptr as *const &'static str) }; let arena_str = unsafe { *(ptr as *const &'static str) };
Expr::Str(arena_str) str_slice_to_ast(env.arena, arena_str)
} }
Layout::Struct(field_layouts) => match content { Layout::Struct(field_layouts) => match content {
Content::Structure(FlatType::Record(fields, _)) => { Content::Structure(FlatType::Record(fields, _)) => {
@ -405,3 +405,10 @@ fn i64_to_ast(arena: &Bump, num: i64) -> Expr<'_> {
fn f64_to_ast(arena: &Bump, num: f64) -> Expr<'_> { fn f64_to_ast(arena: &Bump, num: f64) -> Expr<'_> {
Expr::Num(arena.alloc(format!("{}", num))) Expr::Num(arena.alloc(format!("{}", num)))
} }
fn str_slice_to_ast<'a>(_arena: &'a Bump, string: &'a str) -> Expr<'a> {
todo!(
"if this string contains newlines, render it as a multiline string: {:?}",
Expr::Str(StrLiteral::PlainLine(string))
);
}

View file

@ -232,6 +232,12 @@ mod repl_eval {
); );
} }
#[test]
fn multiline_string() {
// If a string contains newlines, format it as a multiline string in the output
expect_success(r#""\n\nhi!\n\n""#, "\"\"\"\n\nhi!\n\n\"\"\"");
}
// TODO uncomment this once https://github.com/rtfeldman/roc/issues/295 is done // TODO uncomment this once https://github.com/rtfeldman/roc/issues/295 is done
// //
// #[test] // #[test]

View file

@ -14,7 +14,7 @@ use roc_module::ident::{Lowercase, TagName};
use roc_module::low_level::LowLevel; use roc_module::low_level::LowLevel;
use roc_module::operator::CalledVia; use roc_module::operator::CalledVia;
use roc_module::symbol::Symbol; use roc_module::symbol::Symbol;
use roc_parse::ast; use roc_parse::ast::{self, StrLiteral, StrSegment};
use roc_parse::pattern::PatternType::*; use roc_parse::pattern::PatternType::*;
use roc_problem::can::{PrecedenceProblem, Problem, RuntimeError}; use roc_problem::can::{PrecedenceProblem, Problem, RuntimeError};
use roc_region::all::{Located, Region}; use roc_region::all::{Located, Region};
@ -55,8 +55,10 @@ pub enum Expr {
// Int and Float store a variable to generate better error messages // Int and Float store a variable to generate better error messages
Int(Variable, i64), Int(Variable, i64),
Float(Variable, f64), Float(Variable, f64),
Str(Box<str>), Str {
BlockStr(Box<str>), interpolations: Vec<(Box<str>, Symbol)>,
suffix: Box<str>,
},
List { List {
list_var: Variable, // required for uniqueness of the list list_var: Variable, // required for uniqueness of the list
elem_var: Variable, elem_var: Variable,
@ -247,12 +249,7 @@ pub fn canonicalize_expr<'a>(
) )
} }
} }
ast::Expr::Str(string) => (Str((*string).into()), Output::default()), ast::Expr::Str(literal) => flatten_str_literal(env, scope, literal),
ast::Expr::BlockStr(lines) => {
let joined = lines.iter().copied().collect::<Vec<&str>>().join("\n");
(BlockStr(joined.into()), Output::default())
}
ast::Expr::List(loc_elems) => { ast::Expr::List(loc_elems) => {
if loc_elems.is_empty() { if loc_elems.is_empty() {
( (
@ -1045,8 +1042,7 @@ pub fn inline_calls(var_store: &mut VarStore, scope: &mut Scope, expr: Expr) ->
other @ Num(_, _) other @ Num(_, _)
| other @ Int(_, _) | other @ Int(_, _)
| other @ Float(_, _) | other @ Float(_, _)
| other @ Str(_) | other @ Str { .. }
| other @ BlockStr(_)
| other @ RuntimeError(_) | other @ RuntimeError(_)
| other @ EmptyRecord | other @ EmptyRecord
| other @ Accessor { .. } | other @ Accessor { .. }
@ -1323,3 +1319,78 @@ pub fn inline_calls(var_store: &mut VarStore, scope: &mut Scope, expr: Expr) ->
} }
} }
} }
fn flatten_str_literal(
env: &mut Env<'_>,
scope: &mut Scope,
literal: &StrLiteral<'_>,
) -> (Expr, Output) {
use ast::StrLiteral::*;
match literal {
PlainLine(str_slice) => (
Expr::Str {
interpolations: Vec::new(),
suffix: (*str_slice).into(),
},
Output::default(),
),
LineWithEscapes(segments) => flatten_str_lines(env, scope, &[segments]),
Block(lines) => flatten_str_lines(env, scope, lines),
}
}
fn flatten_str_lines(
env: &mut Env<'_>,
scope: &mut Scope,
lines: &[&[StrSegment<'_>]],
) -> (Expr, Output) {
use StrSegment::*;
let mut buf = String::new();
let mut interpolations = Vec::new();
let mut output = Output::default();
for line in lines {
for segment in line.iter() {
match segment {
Plaintext(string) => {
buf.push_str(string);
}
Unicode(loc_digits) => {
todo!("parse unicode digits {:?}", loc_digits);
}
Interpolated {
module_name,
ident,
region,
} => {
let (expr, new_output) =
canonicalize_lookup(env, scope, module_name, ident, region.clone());
output.union(new_output);
match expr {
Expr::Var(symbol) => {
interpolations.push((buf.into(), symbol));
}
_ => {
todo!("TODO gracefully handle non-ident in string interpolation.");
}
}
buf = String::new();
}
EscapedChar(ch) => buf.push(*ch),
}
}
}
(
Expr::Str {
interpolations,
suffix: buf.into(),
},
output,
)
}

View file

@ -68,8 +68,6 @@ pub fn desugar_expr<'a>(arena: &'a Bump, loc_expr: &'a Located<Expr<'a>>) -> &'a
| Nested(NonBase10Int { .. }) | Nested(NonBase10Int { .. })
| Str(_) | Str(_)
| Nested(Str(_)) | Nested(Str(_))
| BlockStr(_)
| Nested(BlockStr(_))
| AccessorFunction(_) | AccessorFunction(_)
| Nested(AccessorFunction(_)) | Nested(AccessorFunction(_))
| Var { .. } | Var { .. }

View file

@ -4,7 +4,7 @@ use crate::num::{finish_parsing_base, finish_parsing_float, finish_parsing_int};
use crate::scope::Scope; use crate::scope::Scope;
use roc_module::ident::{Ident, Lowercase, TagName}; use roc_module::ident::{Ident, Lowercase, TagName};
use roc_module::symbol::Symbol; use roc_module::symbol::Symbol;
use roc_parse::ast; use roc_parse::ast::{self, StrLiteral, StrSegment};
use roc_parse::pattern::PatternType; use roc_parse::pattern::PatternType;
use roc_problem::can::{MalformedPatternProblem, Problem, RuntimeError}; use roc_problem::can::{MalformedPatternProblem, Problem, RuntimeError};
use roc_region::all::{Located, Region}; use roc_region::all::{Located, Region};
@ -230,16 +230,8 @@ pub fn canonicalize_pattern<'a>(
ptype => unsupported_pattern(env, ptype, region), ptype => unsupported_pattern(env, ptype, region),
}, },
StrLiteral(string) => match pattern_type { StrLiteral(literal) => match pattern_type {
WhenBranch => { WhenBranch => flatten_str_literal(literal),
// TODO report whether string was malformed
Pattern::StrLiteral((*string).into())
}
ptype => unsupported_pattern(env, ptype, region),
},
BlockStrLiteral(_lines) => match pattern_type {
WhenBranch => todo!("TODO block string literal pattern"),
ptype => unsupported_pattern(env, ptype, region), ptype => unsupported_pattern(env, ptype, region),
}, },
@ -473,3 +465,38 @@ fn add_bindings_from_patterns(
| UnsupportedPattern(_) => (), | UnsupportedPattern(_) => (),
} }
} }
fn flatten_str_literal(literal: &StrLiteral<'_>) -> Pattern {
use ast::StrLiteral::*;
match literal {
PlainLine(str_slice) => Pattern::StrLiteral((*str_slice).into()),
LineWithEscapes(segments) => flatten_str_lines(&[segments]),
Block(lines) => flatten_str_lines(lines),
}
}
fn flatten_str_lines(lines: &[&[StrSegment<'_>]]) -> Pattern {
use StrSegment::*;
let mut buf = String::new();
for line in lines {
for segment in line.iter() {
match segment {
Plaintext(string) => {
buf.push_str(string);
}
Unicode(loc_digits) => {
todo!("parse unicode digits {:?}", loc_digits);
}
Interpolated { region, .. } => {
return Pattern::UnsupportedPattern(region.clone());
}
EscapedChar(ch) => buf.push(*ch),
}
}
}
Pattern::StrLiteral(buf.into())
}

View file

@ -1236,30 +1236,38 @@ mod test_can {
// ); // );
// } // }
// #[test] #[test]
// fn string_with_interpolation_at_start() { fn string_with_interpolation_at_start() {
// let input = indoc!( let src = indoc!(
// r#" r#"
// "\(abc)defg" "\(abc)defg"
// "# "#
// ); );
let arena = Bump::new();
let CanExprOut {
loc_expr, problems, ..
} = can_expr_with(&arena, test_home(), src);
assert_eq!(problems, Vec::new());
// let (args, ret) = (vec![("", Located::new(0, 2, 0, 4, Var("abc")))], "defg"); // let (args, ret) = (vec![("", Located::new(0, 2, 0, 4, Var("abc")))], "defg");
// let arena = Bump::new(); // let arena = Bump::new();
// let actual = parse_with(&arena, input); // let actual = parse_with(&arena, input);
// assert_eq!( // assert_eq!(
// Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), // Ok(Expr::InterpolatedStr(&(
// arena.alloc_slice_clone(&args),
// ret
// ))),
// actual // actual
// ); // );
// } }
// #[test] #[test]
// fn string_with_interpolation_at_end() { fn string_with_interpolation_at_end() {
// let input = indoc!( let src = indoc!(
// r#" r#"
// "abcd\(efg)" "abcd\(efg)"
// "# "#
// ); );
// let (args, ret) = (vec![("abcd", Located::new(0, 6, 0, 8, Var("efg")))], ""); // let (args, ret) = (vec![("abcd", Located::new(0, 6, 0, 8, Var("efg")))], "");
// let arena = Bump::new(); // let arena = Bump::new();
// let actual = parse_with(&arena, input); // let actual = parse_with(&arena, input);
@ -1268,15 +1276,15 @@ mod test_can {
// Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), // Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
// actual // actual
// ); // );
// } }
// #[test] #[test]
// fn string_with_interpolation_in_middle() { fn string_with_interpolation_in_middle() {
// let input = indoc!( let src = indoc!(
// r#" r#"
// "abc\(defg)hij" "abc\(defg)hij"
// "# "#
// ); );
// let (args, ret) = (vec![("abc", Located::new(0, 5, 0, 8, Var("defg")))], "hij"); // let (args, ret) = (vec![("abc", Located::new(0, 5, 0, 8, Var("defg")))], "hij");
// let arena = Bump::new(); // let arena = Bump::new();
// let actual = parse_with(&arena, input); // let actual = parse_with(&arena, input);
@ -1285,15 +1293,15 @@ mod test_can {
// Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), // Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
// actual // actual
// ); // );
// } }
// #[test] #[test]
// fn string_with_two_interpolations_in_middle() { fn string_with_two_interpolations_in_middle() {
// let input = indoc!( let src = indoc!(
// r#" r#"
// "abc\(defg)hi\(jkl)mn" "abc\(defg)hi\(jkl)mn"
// "# "#
// ); );
// let (args, ret) = ( // let (args, ret) = (
// vec![ // vec![
// ("abc", Located::new(0, 5, 0, 8, Var("defg"))), // ("abc", Located::new(0, 5, 0, 8, Var("defg"))),
@ -1308,15 +1316,15 @@ mod test_can {
// Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), // Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
// actual // actual
// ); // );
// } }
// #[test] #[test]
// fn string_with_four_interpolations() { fn string_with_four_interpolations() {
// let input = indoc!( let src = indoc!(
// r#" r#"
// "\(abc)def\(ghi)jkl\(mno)pqrs\(tuv)" "\(abc)def\(ghi)jkl\(mno)pqrs\(tuv)"
// "# "#
// ); );
// let (args, ret) = ( // let (args, ret) = (
// vec![ // vec![
// ("", Located::new(0, 2, 0, 4, Var("abc"))), // ("", Located::new(0, 2, 0, 4, Var("abc"))),
@ -1333,7 +1341,7 @@ mod test_can {
// Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))), // Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
// actual // actual
// ); // );
// } }
// #[test] // #[test]
// fn string_with_escaped_interpolation() { // fn string_with_escaped_interpolation() {
@ -1384,4 +1392,6 @@ mod test_can {
// TODO test hex/oct/binary conversion to numbers // TODO test hex/oct/binary conversion to numbers
// //
// TODO test for \t \r and \n in string literals *outside* unicode escape sequence! // TODO test for \t \r and \n in string literals *outside* unicode escape sequence!
//
// TODO test for multiline block string literals in pattern matches
} }

View file

@ -1,4 +1,4 @@
use crate::builtins::{empty_list_type, float_literal, int_literal, list_type, str_type}; use crate::builtins::{empty_list_type, float_literal, int_literal, list_type};
use crate::pattern::{constrain_pattern, PatternState}; use crate::pattern::{constrain_pattern, PatternState};
use roc_can::annotation::IntroducedVariables; use roc_can::annotation::IntroducedVariables;
use roc_can::constraint::Constraint::{self, *}; use roc_can::constraint::Constraint::{self, *};
@ -199,7 +199,15 @@ pub fn constrain_expr(
exists(vars, And(cons)) exists(vars, And(cons))
} }
Str(_) | BlockStr(_) => Eq(str_type(), expected, Category::Str, region), Str { interpolations, .. } => {
todo!(
"constrain interpolations in a string literal {:?}",
interpolations
);
// use crate::builtins::{empty_list_type, float_literal, int_literal, list_type, str_type};
// Eq(str_type(), expected, Category::Str, region)
}
List { List {
elem_var, elem_var,
loc_elems, loc_elems,

View file

@ -503,14 +503,15 @@ pub fn constrain_expr(
]), ]),
) )
} }
BlockStr(_) | Str(_) => { Str { interpolations, .. } => {
let uniq_type = var_store.fresh(); todo!("uniq constrain interpolations {:?}", interpolations);
let inferred = str_type(Bool::variable(uniq_type)); // let uniq_type = var_store.fresh();
// let inferred = str_type(Bool::variable(uniq_type));
exists( // exists(
vec![uniq_type], // vec![uniq_type],
Eq(inferred, expected, Category::Str, region), // Eq(inferred, expected, Category::Str, region),
) // )
} }
EmptyRecord => { EmptyRecord => {
let uniq_type = var_store.fresh(); let uniq_type = var_store.fresh();

View file

@ -28,7 +28,6 @@ impl<'a> Formattable<'a> for Expr<'a> {
Float(_) Float(_)
| Num(_) | Num(_)
| NonBase10Int { .. } | NonBase10Int { .. }
| Str(_)
| Access(_, _) | Access(_, _)
| AccessorFunction(_) | AccessorFunction(_)
| Var { .. } | Var { .. }
@ -42,7 +41,13 @@ impl<'a> Formattable<'a> for Expr<'a> {
List(elems) => elems.iter().any(|loc_expr| loc_expr.is_multiline()), List(elems) => elems.iter().any(|loc_expr| loc_expr.is_multiline()),
BlockStr(lines) => lines.len() > 1, Str(literal) => {
todo!(
"fmt determine if string literal is multiline: {:?}",
literal
);
// lines.len() > 1
}
Apply(loc_expr, args, _) => { Apply(loc_expr, args, _) => {
loc_expr.is_multiline() || args.iter().any(|loc_arg| loc_arg.is_multiline()) loc_expr.is_multiline() || args.iter().any(|loc_arg| loc_arg.is_multiline())
} }
@ -112,10 +117,19 @@ impl<'a> Formattable<'a> for Expr<'a> {
sub_expr.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent); sub_expr.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent);
buf.push(')'); buf.push(')');
} }
Str(string) => { Str(literal) => {
buf.push('"'); todo!("fmt string literal {:?}", literal);
buf.push_str(string); // buf.push('"');
buf.push('"'); // buf.push_str(string);
// buf.push('"');
//
// BlockStr(lines) => {
// buf.push_str("\"\"\"");
// for line in lines.iter() {
// buf.push_str(line);
// }
// buf.push_str("\"\"\"");
// }
} }
Var { module_name, ident } => { Var { module_name, ident } => {
if !module_name.is_empty() { if !module_name.is_empty() {
@ -152,13 +166,6 @@ impl<'a> Formattable<'a> for Expr<'a> {
buf.push(')'); buf.push(')');
} }
} }
BlockStr(lines) => {
buf.push_str("\"\"\"");
for line in lines.iter() {
buf.push_str(line);
}
buf.push_str("\"\"\"");
}
Num(string) | Float(string) | GlobalTag(string) | PrivateTag(string) => { Num(string) | Float(string) | GlobalTag(string) | PrivateTag(string) => {
buf.push_str(string) buf.push_str(string)
} }

View file

@ -37,7 +37,6 @@ impl<'a> Formattable<'a> for Pattern<'a> {
| Pattern::NonBase10Literal { .. } | Pattern::NonBase10Literal { .. }
| Pattern::FloatLiteral(_) | Pattern::FloatLiteral(_)
| Pattern::StrLiteral(_) | Pattern::StrLiteral(_)
| Pattern::BlockStrLiteral(_)
| Pattern::Underscore | Pattern::Underscore
| Pattern::Malformed(_) | Pattern::Malformed(_)
| Pattern::QualifiedIdentifier { .. } => false, | Pattern::QualifiedIdentifier { .. } => false,
@ -126,11 +125,8 @@ impl<'a> Formattable<'a> for Pattern<'a> {
buf.push_str(string); buf.push_str(string);
} }
FloatLiteral(string) => buf.push_str(string), FloatLiteral(string) => buf.push_str(string),
StrLiteral(string) => buf.push_str(string), StrLiteral(literal) => {
BlockStrLiteral(lines) => { todo!("Format string literal: {:?}", literal);
for line in *lines {
buf.push_str(line)
}
} }
Underscore => buf.push('_'), Underscore => buf.push('_'),

View file

@ -222,10 +222,22 @@ pub fn build_exp_literal<'a, 'ctx, 'env>(
Float(num) => env.context.f64_type().const_float(*num).into(), Float(num) => env.context.f64_type().const_float(*num).into(),
Bool(b) => env.context.bool_type().const_int(*b as u64, false).into(), Bool(b) => env.context.bool_type().const_int(*b as u64, false).into(),
Byte(b) => env.context.i8_type().const_int(*b as u64, false).into(), Byte(b) => env.context.i8_type().const_int(*b as u64, false).into(),
Str(str_literal) => { Str {
if str_literal.is_empty() { interpolations,
suffix,
} => {
if interpolations.is_empty() && suffix.is_empty() {
empty_list(env) empty_list(env)
} else { } else {
if !interpolations.is_empty() {
todo!(
"LLVM code gen for string interpolations: {:?}",
interpolations
);
}
let mut str_literal = suffix; // TODO REMOVE THIS
let ctx = env.context; let ctx = env.context;
let builder = env.builder; let builder = env.builder;

View file

@ -1095,7 +1095,10 @@ fn test_to_equality<'a>(
} }
Test::IsStr(test_str) => { Test::IsStr(test_str) => {
let lhs = Expr::Literal(Literal::Str(env.arena.alloc(test_str))); let lhs = Expr::Literal(Literal::Str {
interpolations: &[],
suffix: env.arena.alloc(test_str),
});
let lhs_symbol = env.unique_symbol(); let lhs_symbol = env.unique_symbol();
let (mut stores, rhs_symbol) = path_to_expr(env, cond_symbol, &path, &cond_layout); let (mut stores, rhs_symbol) = path_to_expr(env, cond_symbol, &path, &cond_layout);

View file

@ -590,7 +590,10 @@ pub enum Literal<'a> {
// Literals // Literals
Int(i64), Int(i64),
Float(f64), Float(f64),
Str(&'a str), Str {
interpolations: &'a [(&'a str, Symbol)],
suffix: &'a str,
},
/// Closed tag unions containing exactly two (0-arity) tags compile to Expr::Bool, /// Closed tag unions containing exactly two (0-arity) tags compile to Expr::Bool,
/// so they can (at least potentially) be emitted as 1-bit machine bools. /// so they can (at least potentially) be emitted as 1-bit machine bools.
/// ///
@ -669,7 +672,13 @@ impl<'a> Literal<'a> {
Float(lit) => alloc.text(format!("{}f64", lit)), Float(lit) => alloc.text(format!("{}f64", lit)),
Bool(lit) => alloc.text(format!("{}", lit)), Bool(lit) => alloc.text(format!("{}", lit)),
Byte(lit) => alloc.text(format!("{}u8", lit)), Byte(lit) => alloc.text(format!("{}u8", lit)),
Str(lit) => alloc.text(format!("{:?}", lit)), Str {
interpolations,
suffix,
} => {
// alloc.text(format!("{:?}", lit))
todo!("Literal::to_doc for Str");
}
} }
} }
} }
@ -1242,12 +1251,18 @@ pub fn with_hole<'a>(
hole, hole,
), ),
Str(string) | BlockStr(string) => Stmt::Let( Str {
assigned, interpolations,
Expr::Literal(Literal::Str(arena.alloc(string))), suffix: _,
Layout::Builtin(Builtin::Str), } => {
hole, todo!("mono IR to turn Str interpolations into Let");
), // Stmt::Let(
// assigned,
// Expr::Literal(Literal::Str(arena.alloc(string))),
// Layout::Builtin(Builtin::Str),
// hole,
// )
}
Num(var, num) => match num_argument_to_int_or_float(env.subs, var) { Num(var, num) => match num_argument_to_int_or_float(env.subs, var) {
IntOrFloat::IntType => Stmt::Let( IntOrFloat::IntType => Stmt::Let(

View file

@ -84,6 +84,26 @@ pub struct WhenPattern<'a> {
pub guard: Option<Loc<Expr<'a>>>, pub guard: Option<Loc<Expr<'a>>>,
} }
#[derive(Clone, Debug, PartialEq)]
pub enum StrSegment<'a> {
Plaintext(&'a str), // e.g. "foo"
Unicode(Loc<&'a str>), // e.g. "00A0" in "\u(00A0)"
EscapedChar(char), // e.g. '\n' in "Hello!\n"
Interpolated {
// e.g. "App.version" in "Version: \(App.version)"
module_name: &'a str,
ident: &'a str,
region: Region,
},
}
#[derive(Clone, Debug, PartialEq)]
pub enum StrLiteral<'a> {
PlainLine(&'a str),
LineWithEscapes(&'a [StrSegment<'a>]),
Block(&'a [&'a [StrSegment<'a>]]),
}
/// A parsed expression. This uses lifetimes extensively for two reasons: /// A parsed expression. This uses lifetimes extensively for two reasons:
/// ///
/// 1. It uses Bump::alloc for all allocations, which returns a reference. /// 1. It uses Bump::alloc for all allocations, which returns a reference.
@ -105,8 +125,7 @@ pub enum Expr<'a> {
}, },
// String Literals // String Literals
Str(&'a str), Str(StrLiteral<'a>), // string without escapes in it
BlockStr(&'a [&'a str]),
/// Look up exactly one field on a record, e.g. (expr).foo. /// Look up exactly one field on a record, e.g. (expr).foo.
Access(&'a Expr<'a>, &'a str), Access(&'a Expr<'a>, &'a str),
/// e.g. `.foo` /// e.g. `.foo`
@ -336,8 +355,7 @@ pub enum Pattern<'a> {
is_negative: bool, is_negative: bool,
}, },
FloatLiteral(&'a str), FloatLiteral(&'a str),
StrLiteral(&'a str), StrLiteral(StrLiteral<'a>),
BlockStrLiteral(&'a [&'a str]),
Underscore, Underscore,
// Space // Space
@ -455,7 +473,6 @@ impl<'a> Pattern<'a> {
) => string_x == string_y && base_x == base_y && is_negative_x == is_negative_y, ) => string_x == string_y && base_x == base_y && is_negative_x == is_negative_y,
(FloatLiteral(x), FloatLiteral(y)) => x == y, (FloatLiteral(x), FloatLiteral(y)) => x == y,
(StrLiteral(x), StrLiteral(y)) => x == y, (StrLiteral(x), StrLiteral(y)) => x == y,
(BlockStrLiteral(x), BlockStrLiteral(y)) => x == y,
(Underscore, Underscore) => true, (Underscore, Underscore) => true,
// Space // Space
@ -584,7 +601,7 @@ impl<'a> Spaceable<'a> for Def<'a> {
pub enum Attempting { pub enum Attempting {
List, List,
Keyword, Keyword,
StringLiteral, StrLiteral,
RecordLiteral, RecordLiteral,
RecordFieldLabel, RecordFieldLabel,
InterpolatedString, InterpolatedString,

View file

@ -300,12 +300,8 @@ fn expr_to_pattern<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<'a>,
base: *base, base: *base,
is_negative: *is_negative, is_negative: *is_negative,
}), }),
Expr::Str(string) => Ok(Pattern::StrLiteral(string)),
Expr::MalformedIdent(string) => Ok(Pattern::Malformed(string)),
// These would not have parsed as patterns // These would not have parsed as patterns
Expr::BlockStr(_) Expr::AccessorFunction(_)
| Expr::AccessorFunction(_)
| Expr::Access(_, _) | Expr::Access(_, _)
| Expr::List(_) | Expr::List(_)
| Expr::Closure(_, _) | Expr::Closure(_, _)
@ -322,6 +318,9 @@ fn expr_to_pattern<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<'a>,
attempting: Attempting::Def, attempting: Attempting::Def,
reason: FailReason::InvalidPattern, reason: FailReason::InvalidPattern,
}), }),
Expr::Str(string) => Ok(Pattern::StrLiteral(string.clone())),
Expr::MalformedIdent(string) => Ok(Pattern::Malformed(string)),
} }
} }
@ -580,11 +579,7 @@ fn annotation_or_alias<'a>(
QualifiedIdentifier { .. } => { QualifiedIdentifier { .. } => {
panic!("TODO gracefully handle trying to annotate a qualified identifier, e.g. `Foo.bar : ...`"); panic!("TODO gracefully handle trying to annotate a qualified identifier, e.g. `Foo.bar : ...`");
} }
NumLiteral(_) NumLiteral(_) | NonBase10Literal { .. } | FloatLiteral(_) | StrLiteral(_) => {
| NonBase10Literal { .. }
| FloatLiteral(_)
| StrLiteral(_)
| BlockStrLiteral(_) => {
panic!("TODO gracefully handle trying to annotate a litera"); panic!("TODO gracefully handle trying to annotate a litera");
} }
Underscore => { Underscore => {
@ -916,10 +911,7 @@ fn number_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
} }
fn string_pattern<'a>() -> impl Parser<'a, Pattern<'a>> { fn string_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
map!(crate::string_literal::parse(), |result| match result { map!(crate::string_literal::parse(), Pattern::StrLiteral)
crate::string_literal::StringLiteral::Line(string) => Pattern::StrLiteral(string),
crate::string_literal::StringLiteral::Block(lines) => Pattern::BlockStrLiteral(lines),
})
} }
fn underscore_pattern<'a>() -> impl Parser<'a, Pattern<'a>> { fn underscore_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
@ -1789,8 +1781,5 @@ pub fn global_tag<'a>() -> impl Parser<'a, &'a str> {
} }
pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> { pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
map!(crate::string_literal::parse(), |result| match result { map!(crate::string_literal::parse(), Expr::Str)
crate::string_literal::StringLiteral::Line(string) => Expr::Str(string),
crate::string_literal::StringLiteral::Block(lines) => Expr::BlockStr(lines),
})
} }

View file

@ -1,71 +1,110 @@
use crate::ast::Attempting; use crate::ast::{Attempting, StrLiteral, StrSegment};
use crate::parser::{parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State}; use crate::parser::{parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State};
use bumpalo::collections::vec::Vec; use bumpalo::collections::vec::Vec;
use bumpalo::Bump; use bumpalo::Bump;
pub enum StringLiteral<'a> { pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>> {
Line(&'a str), use StrLiteral::*;
Block(&'a [&'a str]),
}
pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
move |arena: &'a Bump, state: State<'a>| { move |arena: &'a Bump, state: State<'a>| {
let mut bytes = state.bytes.iter(); let mut bytes = state.bytes.iter();
// String literals must start with a quote. // String literals must start with a quote.
// If this doesn't, it must not be a string literal! // If this doesn't, it must not be a string literal!
match bytes.next() { match bytes.next() {
Some(&byte) => { Some(&byte) => {
if byte != b'"' { if byte != b'"' {
return Err(unexpected(0, state, Attempting::StringLiteral)); return Err(unexpected(0, state, Attempting::StrLiteral));
} }
} }
None => { None => {
return Err(unexpected_eof(0, Attempting::StringLiteral, state)); return Err(unexpected_eof(0, Attempting::StrLiteral, state));
} }
} }
// The current segment begins right after the opening quotation mark.
let mut cur_segment = &state.bytes[1..];
enum EscapeState {
None,
Unicode,
Interpolation,
}
// At the parsing stage we keep the entire raw string, because the formatter // At the parsing stage we keep the entire raw string, because the formatter
// needs the raw string. (For example, so it can "remember" whether you // needs the raw string. (For example, so it can "remember" whether you
// wrote \u{...} or the actual unicode character itself.) // wrote \u{...} or the actual unicode character itself.)
// //
// Later, in canonicalization, we'll do things like resolving
// unicode escapes and string interpolation.
//
// Since we're keeping the entire raw string, all we need to track is // Since we're keeping the entire raw string, all we need to track is
// how many characters we've parsed. So far, that's 1 (the opening `"`). // how many characters we've parsed. So far, that's 1 (the opening `"`).
let mut parsed_chars = 1; let mut total_parsed_chars = 1;
let mut prev_byte = b'"'; let mut segment_parsed_chars = 0;
let mut segments = Vec::new_in(arena);
let mut escape_state = EscapeState::None;
// pub enum StrSegment<'a> {
// Plaintext(&'a str), // e.g. "foo"
// Unicode(&'a str), // e.g. "00A0" in "\u(00A0)"
// Interpolated(&'a str), // e.g. "name" in "Hi, \(name)!"
// EscapedChar(char), // e.g. '\n' in "Hello!\n"
// }
while let Some(&byte) = bytes.next() { while let Some(&byte) = bytes.next() {
parsed_chars += 1; segment_parsed_chars += 1;
// Potentially end the string (unless this is an escaped `"`!) // Potentially end the string (unless this is an escaped `"`!)
match byte { match byte {
b'"' if prev_byte != b'\\' => { b'"' => {
let (string, state) = if parsed_chars == 2 { // If we aren't escaping, then this is the end of the string!
if let EscapeState::None = escape_state {
let (literal, state) = if total_parsed_chars == 1 && segments.is_empty() {
match bytes.next() { match bytes.next() {
Some(b'"') => { Some(b'"') => {
// If the first three chars were all `"`, then this // If the very first three chars were all `"`,
// literal begins with `"""` and is a block string. // then this literal begins with `"""`
// and is a block string.
return parse_block_string(arena, state, &mut bytes); return parse_block_string(arena, state, &mut bytes);
} }
_ => ("", state.advance_without_indenting(2)?), _ => (PlainLine(""), state.advance_without_indenting(2)?),
} }
} else { } else {
// Start at 1 so we omit the opening `"`.
// Subtract 1 from parsed_chars so we omit the closing `"`. // Subtract 1 from parsed_chars so we omit the closing `"`.
let string_bytes = &state.bytes[1..(parsed_chars - 1)]; let string_bytes = &cur_segment[0..(segment_parsed_chars - 1)];
match parse_utf8(string_bytes) { match parse_utf8(string_bytes) {
Ok(string) => (string, state.advance_without_indenting(parsed_chars)?), Ok(string) => {
total_parsed_chars += segment_parsed_chars;
let state =
state.advance_without_indenting(total_parsed_chars)?;
if segments.is_empty() {
// We only had one segment.
(StrLiteral::PlainLine(string), state)
} else {
// We had multiple segments! Parse the
// current one and add it to the list.
segments.push(StrSegment::Plaintext(string));
(LineWithEscapes(segments.into_bump_slice()), state)
}
}
Err(reason) => { Err(reason) => {
return state.fail(reason); return state.fail(reason);
} }
} }
}; };
return Ok((StringLiteral::Line(string), state)); return Ok((literal, state));
} else {
// We are escaping, so this is an error. (If it were an
// escaped single character like \" then we would have
// handled that scenario already.)
return Err(unexpected(
state.bytes.len() - 1,
state,
Attempting::StrLiteral,
));
}
} }
b'\n' => { b'\n' => {
// This is a single-line string, which cannot have newlines! // This is a single-line string, which cannot have newlines!
@ -76,19 +115,90 @@ pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
return Err(unexpected( return Err(unexpected(
state.bytes.len() - 1, state.bytes.len() - 1,
state, state,
Attempting::StringLiteral, Attempting::StrLiteral,
)); ));
} }
b')' => {
// All escape sequences end in a close paren, so we don't
// need to pay for a conditional here. If it was an escape,
// then we want to set it to None, and if it wasn't an
// escape, then setting it from None to None is harmless!
// (And likely cheaper than a conditional.)
escape_state = EscapeState::None;
}
b'\\' => {
// This is the start of a new escape
if let EscapeState::None = escape_state {
match bytes.next() {
Some(b'(') => {
// This is an interpolated variable
escape_state = EscapeState::Interpolation;
todo!("Parse interpolated ident");
}
Some(b'u') => {
escape_state = EscapeState::Unicode;
// This is an escaped unicode character
todo!("Parse '(' and then parse escaped unicode character");
}
Some(ch @ b'\n') | Some(ch @ b'\t') | Some(ch @ b'\r')
| Some(ch @ b'"') | Some(ch @ b'\\') => {
// Record the current segment so we can begin a new one.
match parse_utf8(cur_segment) {
Ok(string) => {
segments.push(StrSegment::Plaintext(string));
}
Err(reason) => {
return state.fail(reason);
}
}
// Record the escaped char.
segments.push(StrSegment::EscapedChar(*ch as char));
// We're now done escaping.
escape_state = EscapeState::None;
// Advance past the segment we just added, and
// also past the escaped char we just added.
//
// +2 because we just parsed a backslash and
// one other char after it.
cur_segment = &cur_segment[(segment_parsed_chars + 2)..];
// Reset segment_parsed_chars to 0 because we're now
// parsing the beginning of a new segment.
segment_parsed_chars = 0;
}
_ => { _ => {
prev_byte = byte; // Invalid escape! A backslash must be followed
// by either an open paren or else one of the
// escapable characters (\n, \t, \", \\, etc)
return Err(unexpected(
state.bytes.len() - 1,
state,
Attempting::StrLiteral,
));
}
}
} else {
// Can't have a \ inside an escape!
return Err(unexpected(
state.bytes.len() - 1,
state,
Attempting::StrLiteral,
));
}
}
_ => {
// All other characters need no special handling.
} }
} }
} }
// We ran out of characters before finding a closed quote // We ran out of characters before finding a closed quote
Err(unexpected_eof( Err(unexpected_eof(
parsed_chars, total_parsed_chars,
Attempting::StringLiteral, Attempting::StrLiteral,
state.clone(), state.clone(),
)) ))
} }
@ -98,7 +208,7 @@ fn parse_block_string<'a, I>(
arena: &'a Bump, arena: &'a Bump,
state: State<'a>, state: State<'a>,
bytes: &mut I, bytes: &mut I,
) -> ParseResult<'a, StringLiteral<'a>> ) -> ParseResult<'a, StrLiteral<'a>>
where where
I: Iterator<Item = &'a u8>, I: Iterator<Item = &'a u8>,
{ {
@ -125,12 +235,13 @@ where
let line_bytes = &state.bytes[line_start..(parsed_chars - 3)]; let line_bytes = &state.bytes[line_start..(parsed_chars - 3)];
return match parse_utf8(line_bytes) { return match parse_utf8(line_bytes) {
Ok(line) => { Ok(_line) => {
let state = state.advance_without_indenting(parsed_chars)?; // let state = state.advance_without_indenting(parsed_chars)?;
lines.push(line); // lines.push(line);
Ok((StringLiteral::Block(arena.alloc(lines)), state)) // Ok((StrLiteral::Block(lines.into_bump_slice()), state))
todo!("TODO finish making block strings accept escapes");
} }
Err(reason) => state.fail(reason), Err(reason) => state.fail(reason),
}; };
@ -164,8 +275,8 @@ where
// We ran out of characters before finding 3 closing quotes // We ran out of characters before finding 3 closing quotes
Err(unexpected_eof( Err(unexpected_eof(
parsed_chars, parsed_chars,
// TODO custom BlockStringLiteral? // TODO custom BlockStrLiteral?
Attempting::StringLiteral, Attempting::StrLiteral,
state, state,
)) ))
} }

View file

@ -24,6 +24,7 @@ mod test_parse {
use roc_parse::ast::CommentOrNewline::*; use roc_parse::ast::CommentOrNewline::*;
use roc_parse::ast::Expr::{self, *}; use roc_parse::ast::Expr::{self, *};
use roc_parse::ast::Pattern::{self, *}; use roc_parse::ast::Pattern::{self, *};
use roc_parse::ast::StrLiteral::*;
use roc_parse::ast::{ use roc_parse::ast::{
Attempting, Def, InterfaceHeader, Spaceable, Tag, TypeAnnotation, WhenBranch, Attempting, Def, InterfaceHeader, Spaceable, Tag, TypeAnnotation, WhenBranch,
}; };
@ -51,7 +52,7 @@ mod test_parse {
// STRING LITERALS // STRING LITERALS
fn expect_parsed_str(input: &str, expected: &str) { fn expect_parsed_str(input: &str, expected: &str) {
assert_parses_to(expected, Str(input.into())); assert_parses_to(expected, Expr::Str(PlainLine(input)));
} }
#[test] #[test]
@ -62,7 +63,7 @@ mod test_parse {
"" ""
"# "#
), ),
Str(""), Str(PlainLine("")),
); );
} }
@ -74,7 +75,7 @@ mod test_parse {
"x" "x"
"# "#
), ),
Str("x".into()), Expr::Str(PlainLine("x".into())),
); );
} }
@ -86,7 +87,7 @@ mod test_parse {
"foo" "foo"
"# "#
), ),
Str("foo".into()), Expr::Str(PlainLine("foo".into())),
); );
} }
@ -1859,8 +1860,10 @@ mod test_parse {
fn two_branch_when() { fn two_branch_when() {
let arena = Bump::new(); let arena = Bump::new();
let newlines = bumpalo::vec![in &arena; Newline]; let newlines = bumpalo::vec![in &arena; Newline];
let pattern1 = let pattern1 = Pattern::SpaceBefore(
Pattern::SpaceBefore(arena.alloc(StrLiteral("blah")), newlines.into_bump_slice()); arena.alloc(StrLiteral(PlainLine("blah"))),
newlines.into_bump_slice(),
);
let loc_pattern1 = Located::new(1, 1, 1, 7, pattern1); let loc_pattern1 = Located::new(1, 1, 1, 7, pattern1);
let expr1 = Num("1"); let expr1 = Num("1");
let loc_expr1 = Located::new(1, 1, 11, 12, expr1); let loc_expr1 = Located::new(1, 1, 11, 12, expr1);
@ -1870,8 +1873,10 @@ mod test_parse {
guard: None, guard: None,
}); });
let newlines = bumpalo::vec![in &arena; Newline]; let newlines = bumpalo::vec![in &arena; Newline];
let pattern2 = let pattern2 = Pattern::SpaceBefore(
Pattern::SpaceBefore(arena.alloc(StrLiteral("mise")), newlines.into_bump_slice()); arena.alloc(StrLiteral(PlainLine("mise"))),
newlines.into_bump_slice(),
);
let loc_pattern2 = Located::new(2, 2, 1, 7, pattern2); let loc_pattern2 = Located::new(2, 2, 1, 7, pattern2);
let expr2 = Num("2"); let expr2 = Num("2");
let loc_expr2 = Located::new(2, 2, 11, 12, expr2); let loc_expr2 = Located::new(2, 2, 11, 12, expr2);
@ -2003,9 +2008,11 @@ mod test_parse {
fn when_with_alternative_patterns() { fn when_with_alternative_patterns() {
let arena = Bump::new(); let arena = Bump::new();
let newlines = bumpalo::vec![in &arena; Newline]; let newlines = bumpalo::vec![in &arena; Newline];
let pattern1 = let pattern1 = Pattern::SpaceBefore(
Pattern::SpaceBefore(arena.alloc(StrLiteral("blah")), newlines.into_bump_slice()); arena.alloc(StrLiteral(PlainLine("blah"))),
let pattern1_alt = StrLiteral("blop"); newlines.into_bump_slice(),
);
let pattern1_alt = StrLiteral(PlainLine("blop"));
let loc_pattern1 = Located::new(1, 1, 1, 7, pattern1); let loc_pattern1 = Located::new(1, 1, 1, 7, pattern1);
let loc_pattern1_alt = Located::new(1, 1, 10, 16, pattern1_alt); let loc_pattern1_alt = Located::new(1, 1, 10, 16, pattern1_alt);
let expr1 = Num("1"); let expr1 = Num("1");
@ -2016,11 +2023,15 @@ mod test_parse {
guard: None, guard: None,
}); });
let newlines = bumpalo::vec![in &arena; Newline]; let newlines = bumpalo::vec![in &arena; Newline];
let pattern2 = let pattern2 = Pattern::SpaceBefore(
Pattern::SpaceBefore(arena.alloc(StrLiteral("foo")), newlines.into_bump_slice()); arena.alloc(StrLiteral(PlainLine("foo"))),
newlines.into_bump_slice(),
);
let newlines = bumpalo::vec![in &arena; Newline]; let newlines = bumpalo::vec![in &arena; Newline];
let pattern2_alt = let pattern2_alt = Pattern::SpaceBefore(
Pattern::SpaceBefore(arena.alloc(StrLiteral("bar")), newlines.into_bump_slice()); arena.alloc(StrLiteral(PlainLine("bar"))),
newlines.into_bump_slice(),
);
let loc_pattern2 = Located::new(2, 2, 1, 6, pattern2); let loc_pattern2 = Located::new(2, 2, 1, 6, pattern2);
let loc_pattern2_alt = Located::new(3, 3, 1, 6, pattern2_alt); let loc_pattern2_alt = Located::new(3, 3, 1, 6, pattern2_alt);
let expr2 = Num("2"); let expr2 = Num("2");
@ -2133,14 +2144,14 @@ mod test_parse {
let def2 = SpaceAfter( let def2 = SpaceAfter(
arena.alloc(Body( arena.alloc(Body(
arena.alloc(Located::new(2, 2, 0, 3, pattern2)), arena.alloc(Located::new(2, 2, 0, 3, pattern2)),
arena.alloc(Located::new(2, 2, 6, 10, Str("hi"))), arena.alloc(Located::new(2, 2, 6, 10, Str(PlainLine("hi")))),
)), )),
newlines2.into_bump_slice(), newlines2.into_bump_slice(),
); );
let def3 = SpaceAfter( let def3 = SpaceAfter(
arena.alloc(Body( arena.alloc(Body(
arena.alloc(Located::new(3, 3, 0, 3, pattern3)), arena.alloc(Located::new(3, 3, 0, 3, pattern3)),
arena.alloc(Located::new(3, 3, 6, 13, Str("stuff"))), arena.alloc(Located::new(3, 3, 6, 13, Str(PlainLine("stuff")))),
)), )),
newlines3.into_bump_slice(), newlines3.into_bump_slice(),
); );
@ -2426,7 +2437,7 @@ mod test_parse {
// ) // )
// "# // "#
// ), // ),
// Str(""), // Str(PlainLine("")),
// ); // );
// } // }

View file

@ -787,8 +787,7 @@ pub fn annotate_usage(expr: &Expr, usage: &mut VarUsage) {
| Num(_, _) | Num(_, _)
| Int(_, _) | Int(_, _)
| Float(_, _) | Float(_, _)
| Str(_) | Str { .. }
| BlockStr(_)
| EmptyRecord | EmptyRecord
| Accessor { .. } | Accessor { .. }
| RunLowLevel { .. } => {} | RunLowLevel { .. } => {}