Get things compiling

This commit is contained in:
Richard Feldman 2020-08-26 21:12:44 -04:00
parent 70bef827a7
commit f35e43768a
18 changed files with 541 additions and 253 deletions

View file

@ -6,7 +6,7 @@ use roc_module::ident::{Lowercase, TagName};
use roc_module::operator::CalledVia;
use roc_module::symbol::{Interns, ModuleId, Symbol};
use roc_mono::layout::{Builtin, Layout};
use roc_parse::ast::{AssignedField, Expr};
use roc_parse::ast::{AssignedField, Expr, StrLiteral};
use roc_region::all::{Located, Region};
use roc_types::subs::{Content, FlatType, Subs, Variable};
use roc_types::types::RecordField;
@ -90,7 +90,7 @@ fn jit_to_ast_help<'a>(
execution_engine,
main_fn_name,
&'static str,
|string: &'static str| { Expr::Str(env.arena.alloc(string)) }
|string: &'static str| { str_slice_to_ast(env.arena, env.arena.alloc(string)) }
),
Layout::Builtin(Builtin::EmptyList) => {
jit_map!(execution_engine, main_fn_name, &'static str, |_| {
@ -168,11 +168,11 @@ fn ptr_to_ast<'a>(
list_to_ast(env, ptr, len, elem_layout, content)
}
Layout::Builtin(Builtin::EmptyStr) => Expr::Str(""),
Layout::Builtin(Builtin::EmptyStr) => Expr::Str(StrLiteral::PlainLine("")),
Layout::Builtin(Builtin::Str) => {
let arena_str = unsafe { *(ptr as *const &'static str) };
Expr::Str(arena_str)
str_slice_to_ast(env.arena, arena_str)
}
Layout::Struct(field_layouts) => match content {
Content::Structure(FlatType::Record(fields, _)) => {
@ -405,3 +405,10 @@ fn i64_to_ast(arena: &Bump, num: i64) -> Expr<'_> {
fn f64_to_ast(arena: &Bump, num: f64) -> Expr<'_> {
Expr::Num(arena.alloc(format!("{}", num)))
}
fn str_slice_to_ast<'a>(_arena: &'a Bump, string: &'a str) -> Expr<'a> {
todo!(
"if this string contains newlines, render it as a multiline string: {:?}",
Expr::Str(StrLiteral::PlainLine(string))
);
}

View file

@ -232,6 +232,12 @@ mod repl_eval {
);
}
#[test]
fn multiline_string() {
// If a string contains newlines, format it as a multiline string in the output
expect_success(r#""\n\nhi!\n\n""#, "\"\"\"\n\nhi!\n\n\"\"\"");
}
// TODO uncomment this once https://github.com/rtfeldman/roc/issues/295 is done
//
// #[test]

View file

@ -14,7 +14,7 @@ use roc_module::ident::{Lowercase, TagName};
use roc_module::low_level::LowLevel;
use roc_module::operator::CalledVia;
use roc_module::symbol::Symbol;
use roc_parse::ast;
use roc_parse::ast::{self, StrLiteral, StrSegment};
use roc_parse::pattern::PatternType::*;
use roc_problem::can::{PrecedenceProblem, Problem, RuntimeError};
use roc_region::all::{Located, Region};
@ -55,8 +55,10 @@ pub enum Expr {
// Int and Float store a variable to generate better error messages
Int(Variable, i64),
Float(Variable, f64),
Str(Box<str>),
BlockStr(Box<str>),
Str {
interpolations: Vec<(Box<str>, Symbol)>,
suffix: Box<str>,
},
List {
list_var: Variable, // required for uniqueness of the list
elem_var: Variable,
@ -247,12 +249,7 @@ pub fn canonicalize_expr<'a>(
)
}
}
ast::Expr::Str(string) => (Str((*string).into()), Output::default()),
ast::Expr::BlockStr(lines) => {
let joined = lines.iter().copied().collect::<Vec<&str>>().join("\n");
(BlockStr(joined.into()), Output::default())
}
ast::Expr::Str(literal) => flatten_str_literal(env, scope, literal),
ast::Expr::List(loc_elems) => {
if loc_elems.is_empty() {
(
@ -1045,8 +1042,7 @@ pub fn inline_calls(var_store: &mut VarStore, scope: &mut Scope, expr: Expr) ->
other @ Num(_, _)
| other @ Int(_, _)
| other @ Float(_, _)
| other @ Str(_)
| other @ BlockStr(_)
| other @ Str { .. }
| other @ RuntimeError(_)
| other @ EmptyRecord
| other @ Accessor { .. }
@ -1323,3 +1319,78 @@ pub fn inline_calls(var_store: &mut VarStore, scope: &mut Scope, expr: Expr) ->
}
}
}
fn flatten_str_literal(
env: &mut Env<'_>,
scope: &mut Scope,
literal: &StrLiteral<'_>,
) -> (Expr, Output) {
use ast::StrLiteral::*;
match literal {
PlainLine(str_slice) => (
Expr::Str {
interpolations: Vec::new(),
suffix: (*str_slice).into(),
},
Output::default(),
),
LineWithEscapes(segments) => flatten_str_lines(env, scope, &[segments]),
Block(lines) => flatten_str_lines(env, scope, lines),
}
}
fn flatten_str_lines(
env: &mut Env<'_>,
scope: &mut Scope,
lines: &[&[StrSegment<'_>]],
) -> (Expr, Output) {
use StrSegment::*;
let mut buf = String::new();
let mut interpolations = Vec::new();
let mut output = Output::default();
for line in lines {
for segment in line.iter() {
match segment {
Plaintext(string) => {
buf.push_str(string);
}
Unicode(loc_digits) => {
todo!("parse unicode digits {:?}", loc_digits);
}
Interpolated {
module_name,
ident,
region,
} => {
let (expr, new_output) =
canonicalize_lookup(env, scope, module_name, ident, region.clone());
output.union(new_output);
match expr {
Expr::Var(symbol) => {
interpolations.push((buf.into(), symbol));
}
_ => {
todo!("TODO gracefully handle non-ident in string interpolation.");
}
}
buf = String::new();
}
EscapedChar(ch) => buf.push(*ch),
}
}
}
(
Expr::Str {
interpolations,
suffix: buf.into(),
},
output,
)
}

View file

@ -68,8 +68,6 @@ pub fn desugar_expr<'a>(arena: &'a Bump, loc_expr: &'a Located<Expr<'a>>) -> &'a
| Nested(NonBase10Int { .. })
| Str(_)
| Nested(Str(_))
| BlockStr(_)
| Nested(BlockStr(_))
| AccessorFunction(_)
| Nested(AccessorFunction(_))
| Var { .. }

View file

@ -4,7 +4,7 @@ use crate::num::{finish_parsing_base, finish_parsing_float, finish_parsing_int};
use crate::scope::Scope;
use roc_module::ident::{Ident, Lowercase, TagName};
use roc_module::symbol::Symbol;
use roc_parse::ast;
use roc_parse::ast::{self, StrLiteral, StrSegment};
use roc_parse::pattern::PatternType;
use roc_problem::can::{MalformedPatternProblem, Problem, RuntimeError};
use roc_region::all::{Located, Region};
@ -230,16 +230,8 @@ pub fn canonicalize_pattern<'a>(
ptype => unsupported_pattern(env, ptype, region),
},
StrLiteral(string) => match pattern_type {
WhenBranch => {
// TODO report whether string was malformed
Pattern::StrLiteral((*string).into())
}
ptype => unsupported_pattern(env, ptype, region),
},
BlockStrLiteral(_lines) => match pattern_type {
WhenBranch => todo!("TODO block string literal pattern"),
StrLiteral(literal) => match pattern_type {
WhenBranch => flatten_str_literal(literal),
ptype => unsupported_pattern(env, ptype, region),
},
@ -473,3 +465,38 @@ fn add_bindings_from_patterns(
| UnsupportedPattern(_) => (),
}
}
fn flatten_str_literal(literal: &StrLiteral<'_>) -> Pattern {
use ast::StrLiteral::*;
match literal {
PlainLine(str_slice) => Pattern::StrLiteral((*str_slice).into()),
LineWithEscapes(segments) => flatten_str_lines(&[segments]),
Block(lines) => flatten_str_lines(lines),
}
}
fn flatten_str_lines(lines: &[&[StrSegment<'_>]]) -> Pattern {
use StrSegment::*;
let mut buf = String::new();
for line in lines {
for segment in line.iter() {
match segment {
Plaintext(string) => {
buf.push_str(string);
}
Unicode(loc_digits) => {
todo!("parse unicode digits {:?}", loc_digits);
}
Interpolated { region, .. } => {
return Pattern::UnsupportedPattern(region.clone());
}
EscapedChar(ch) => buf.push(*ch),
}
}
}
Pattern::StrLiteral(buf.into())
}

View file

@ -1236,104 +1236,112 @@ mod test_can {
// );
// }
// #[test]
// fn string_with_interpolation_at_start() {
// let input = indoc!(
// r#"
// "\(abc)defg"
// "#
// );
// let (args, ret) = (vec![("", Located::new(0, 2, 0, 4, Var("abc")))], "defg");
// let arena = Bump::new();
// let actual = parse_with(&arena, input);
#[test]
fn string_with_interpolation_at_start() {
let src = indoc!(
r#"
"\(abc)defg"
"#
);
let arena = Bump::new();
let CanExprOut {
loc_expr, problems, ..
} = can_expr_with(&arena, test_home(), src);
assert_eq!(problems, Vec::new());
// let (args, ret) = (vec![("", Located::new(0, 2, 0, 4, Var("abc")))], "defg");
// let arena = Bump::new();
// let actual = parse_with(&arena, input);
// assert_eq!(
// Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
// actual
// );
// }
// assert_eq!(
// Ok(Expr::InterpolatedStr(&(
// arena.alloc_slice_clone(&args),
// ret
// ))),
// actual
// );
}
// #[test]
// fn string_with_interpolation_at_end() {
// let input = indoc!(
// r#"
// "abcd\(efg)"
// "#
// );
// let (args, ret) = (vec![("abcd", Located::new(0, 6, 0, 8, Var("efg")))], "");
// let arena = Bump::new();
// let actual = parse_with(&arena, input);
#[test]
fn string_with_interpolation_at_end() {
let src = indoc!(
r#"
"abcd\(efg)"
"#
);
// let (args, ret) = (vec![("abcd", Located::new(0, 6, 0, 8, Var("efg")))], "");
// let arena = Bump::new();
// let actual = parse_with(&arena, input);
// assert_eq!(
// Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
// actual
// );
// }
// assert_eq!(
// Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
// actual
// );
}
// #[test]
// fn string_with_interpolation_in_middle() {
// let input = indoc!(
// r#"
// "abc\(defg)hij"
// "#
// );
// let (args, ret) = (vec![("abc", Located::new(0, 5, 0, 8, Var("defg")))], "hij");
// let arena = Bump::new();
// let actual = parse_with(&arena, input);
#[test]
fn string_with_interpolation_in_middle() {
let src = indoc!(
r#"
"abc\(defg)hij"
"#
);
// let (args, ret) = (vec![("abc", Located::new(0, 5, 0, 8, Var("defg")))], "hij");
// let arena = Bump::new();
// let actual = parse_with(&arena, input);
// assert_eq!(
// Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
// actual
// );
// }
// assert_eq!(
// Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
// actual
// );
}
// #[test]
// fn string_with_two_interpolations_in_middle() {
// let input = indoc!(
// r#"
// "abc\(defg)hi\(jkl)mn"
// "#
// );
// let (args, ret) = (
// vec![
// ("abc", Located::new(0, 5, 0, 8, Var("defg"))),
// ("hi", Located::new(0, 14, 0, 16, Var("jkl"))),
// ],
// "mn",
// );
// let arena = Bump::new();
// let actual = parse_with(&arena, input);
#[test]
fn string_with_two_interpolations_in_middle() {
let src = indoc!(
r#"
"abc\(defg)hi\(jkl)mn"
"#
);
// let (args, ret) = (
// vec![
// ("abc", Located::new(0, 5, 0, 8, Var("defg"))),
// ("hi", Located::new(0, 14, 0, 16, Var("jkl"))),
// ],
// "mn",
// );
// let arena = Bump::new();
// let actual = parse_with(&arena, input);
// assert_eq!(
// Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
// actual
// );
// }
// assert_eq!(
// Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
// actual
// );
}
// #[test]
// fn string_with_four_interpolations() {
// let input = indoc!(
// r#"
// "\(abc)def\(ghi)jkl\(mno)pqrs\(tuv)"
// "#
// );
// let (args, ret) = (
// vec![
// ("", Located::new(0, 2, 0, 4, Var("abc"))),
// ("def", Located::new(0, 11, 0, 13, Var("ghi"))),
// ("jkl", Located::new(0, 20, 0, 22, Var("mno"))),
// ("pqrs", Located::new(0, 30, 0, 32, Var("tuv"))),
// ],
// "",
// );
// let arena = Bump::new();
// let actual = parse_with(&arena, input);
#[test]
fn string_with_four_interpolations() {
let src = indoc!(
r#"
"\(abc)def\(ghi)jkl\(mno)pqrs\(tuv)"
"#
);
// let (args, ret) = (
// vec![
// ("", Located::new(0, 2, 0, 4, Var("abc"))),
// ("def", Located::new(0, 11, 0, 13, Var("ghi"))),
// ("jkl", Located::new(0, 20, 0, 22, Var("mno"))),
// ("pqrs", Located::new(0, 30, 0, 32, Var("tuv"))),
// ],
// "",
// );
// let arena = Bump::new();
// let actual = parse_with(&arena, input);
// assert_eq!(
// Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
// actual
// );
// }
// assert_eq!(
// Ok(InterpolatedStr(&(arena.alloc_slice_clone(&args), ret))),
// actual
// );
}
// #[test]
// fn string_with_escaped_interpolation() {
@ -1384,4 +1392,6 @@ mod test_can {
// TODO test hex/oct/binary conversion to numbers
//
// TODO test for \t \r and \n in string literals *outside* unicode escape sequence!
//
// TODO test for multiline block string literals in pattern matches
}

View file

@ -1,4 +1,4 @@
use crate::builtins::{empty_list_type, float_literal, int_literal, list_type, str_type};
use crate::builtins::{empty_list_type, float_literal, int_literal, list_type};
use crate::pattern::{constrain_pattern, PatternState};
use roc_can::annotation::IntroducedVariables;
use roc_can::constraint::Constraint::{self, *};
@ -199,7 +199,15 @@ pub fn constrain_expr(
exists(vars, And(cons))
}
Str(_) | BlockStr(_) => Eq(str_type(), expected, Category::Str, region),
Str { interpolations, .. } => {
todo!(
"constrain interpolations in a string literal {:?}",
interpolations
);
// use crate::builtins::{empty_list_type, float_literal, int_literal, list_type, str_type};
// Eq(str_type(), expected, Category::Str, region)
}
List {
elem_var,
loc_elems,

View file

@ -503,14 +503,15 @@ pub fn constrain_expr(
]),
)
}
BlockStr(_) | Str(_) => {
let uniq_type = var_store.fresh();
let inferred = str_type(Bool::variable(uniq_type));
Str { interpolations, .. } => {
todo!("uniq constrain interpolations {:?}", interpolations);
// let uniq_type = var_store.fresh();
// let inferred = str_type(Bool::variable(uniq_type));
exists(
vec![uniq_type],
Eq(inferred, expected, Category::Str, region),
)
// exists(
// vec![uniq_type],
// Eq(inferred, expected, Category::Str, region),
// )
}
EmptyRecord => {
let uniq_type = var_store.fresh();

View file

@ -28,7 +28,6 @@ impl<'a> Formattable<'a> for Expr<'a> {
Float(_)
| Num(_)
| NonBase10Int { .. }
| Str(_)
| Access(_, _)
| AccessorFunction(_)
| Var { .. }
@ -42,7 +41,13 @@ impl<'a> Formattable<'a> for Expr<'a> {
List(elems) => elems.iter().any(|loc_expr| loc_expr.is_multiline()),
BlockStr(lines) => lines.len() > 1,
Str(literal) => {
todo!(
"fmt determine if string literal is multiline: {:?}",
literal
);
// lines.len() > 1
}
Apply(loc_expr, args, _) => {
loc_expr.is_multiline() || args.iter().any(|loc_arg| loc_arg.is_multiline())
}
@ -112,10 +117,19 @@ impl<'a> Formattable<'a> for Expr<'a> {
sub_expr.format_with_options(buf, Parens::NotNeeded, Newlines::Yes, indent);
buf.push(')');
}
Str(string) => {
buf.push('"');
buf.push_str(string);
buf.push('"');
Str(literal) => {
todo!("fmt string literal {:?}", literal);
// buf.push('"');
// buf.push_str(string);
// buf.push('"');
//
// BlockStr(lines) => {
// buf.push_str("\"\"\"");
// for line in lines.iter() {
// buf.push_str(line);
// }
// buf.push_str("\"\"\"");
// }
}
Var { module_name, ident } => {
if !module_name.is_empty() {
@ -152,13 +166,6 @@ impl<'a> Formattable<'a> for Expr<'a> {
buf.push(')');
}
}
BlockStr(lines) => {
buf.push_str("\"\"\"");
for line in lines.iter() {
buf.push_str(line);
}
buf.push_str("\"\"\"");
}
Num(string) | Float(string) | GlobalTag(string) | PrivateTag(string) => {
buf.push_str(string)
}

View file

@ -37,7 +37,6 @@ impl<'a> Formattable<'a> for Pattern<'a> {
| Pattern::NonBase10Literal { .. }
| Pattern::FloatLiteral(_)
| Pattern::StrLiteral(_)
| Pattern::BlockStrLiteral(_)
| Pattern::Underscore
| Pattern::Malformed(_)
| Pattern::QualifiedIdentifier { .. } => false,
@ -126,11 +125,8 @@ impl<'a> Formattable<'a> for Pattern<'a> {
buf.push_str(string);
}
FloatLiteral(string) => buf.push_str(string),
StrLiteral(string) => buf.push_str(string),
BlockStrLiteral(lines) => {
for line in *lines {
buf.push_str(line)
}
StrLiteral(literal) => {
todo!("Format string literal: {:?}", literal);
}
Underscore => buf.push('_'),

View file

@ -222,10 +222,22 @@ pub fn build_exp_literal<'a, 'ctx, 'env>(
Float(num) => env.context.f64_type().const_float(*num).into(),
Bool(b) => env.context.bool_type().const_int(*b as u64, false).into(),
Byte(b) => env.context.i8_type().const_int(*b as u64, false).into(),
Str(str_literal) => {
if str_literal.is_empty() {
Str {
interpolations,
suffix,
} => {
if interpolations.is_empty() && suffix.is_empty() {
empty_list(env)
} else {
if !interpolations.is_empty() {
todo!(
"LLVM code gen for string interpolations: {:?}",
interpolations
);
}
let mut str_literal = suffix; // TODO REMOVE THIS
let ctx = env.context;
let builder = env.builder;

View file

@ -1095,7 +1095,10 @@ fn test_to_equality<'a>(
}
Test::IsStr(test_str) => {
let lhs = Expr::Literal(Literal::Str(env.arena.alloc(test_str)));
let lhs = Expr::Literal(Literal::Str {
interpolations: &[],
suffix: env.arena.alloc(test_str),
});
let lhs_symbol = env.unique_symbol();
let (mut stores, rhs_symbol) = path_to_expr(env, cond_symbol, &path, &cond_layout);

View file

@ -590,7 +590,10 @@ pub enum Literal<'a> {
// Literals
Int(i64),
Float(f64),
Str(&'a str),
Str {
interpolations: &'a [(&'a str, Symbol)],
suffix: &'a str,
},
/// Closed tag unions containing exactly two (0-arity) tags compile to Expr::Bool,
/// so they can (at least potentially) be emitted as 1-bit machine bools.
///
@ -669,7 +672,13 @@ impl<'a> Literal<'a> {
Float(lit) => alloc.text(format!("{}f64", lit)),
Bool(lit) => alloc.text(format!("{}", lit)),
Byte(lit) => alloc.text(format!("{}u8", lit)),
Str(lit) => alloc.text(format!("{:?}", lit)),
Str {
interpolations,
suffix,
} => {
// alloc.text(format!("{:?}", lit))
todo!("Literal::to_doc for Str");
}
}
}
}
@ -1242,12 +1251,18 @@ pub fn with_hole<'a>(
hole,
),
Str(string) | BlockStr(string) => Stmt::Let(
assigned,
Expr::Literal(Literal::Str(arena.alloc(string))),
Layout::Builtin(Builtin::Str),
hole,
),
Str {
interpolations,
suffix: _,
} => {
todo!("mono IR to turn Str interpolations into Let");
// Stmt::Let(
// assigned,
// Expr::Literal(Literal::Str(arena.alloc(string))),
// Layout::Builtin(Builtin::Str),
// hole,
// )
}
Num(var, num) => match num_argument_to_int_or_float(env.subs, var) {
IntOrFloat::IntType => Stmt::Let(

View file

@ -84,6 +84,26 @@ pub struct WhenPattern<'a> {
pub guard: Option<Loc<Expr<'a>>>,
}
#[derive(Clone, Debug, PartialEq)]
pub enum StrSegment<'a> {
Plaintext(&'a str), // e.g. "foo"
Unicode(Loc<&'a str>), // e.g. "00A0" in "\u(00A0)"
EscapedChar(char), // e.g. '\n' in "Hello!\n"
Interpolated {
// e.g. "App.version" in "Version: \(App.version)"
module_name: &'a str,
ident: &'a str,
region: Region,
},
}
#[derive(Clone, Debug, PartialEq)]
pub enum StrLiteral<'a> {
PlainLine(&'a str),
LineWithEscapes(&'a [StrSegment<'a>]),
Block(&'a [&'a [StrSegment<'a>]]),
}
/// A parsed expression. This uses lifetimes extensively for two reasons:
///
/// 1. It uses Bump::alloc for all allocations, which returns a reference.
@ -105,8 +125,7 @@ pub enum Expr<'a> {
},
// String Literals
Str(&'a str),
BlockStr(&'a [&'a str]),
Str(StrLiteral<'a>), // string without escapes in it
/// Look up exactly one field on a record, e.g. (expr).foo.
Access(&'a Expr<'a>, &'a str),
/// e.g. `.foo`
@ -336,8 +355,7 @@ pub enum Pattern<'a> {
is_negative: bool,
},
FloatLiteral(&'a str),
StrLiteral(&'a str),
BlockStrLiteral(&'a [&'a str]),
StrLiteral(StrLiteral<'a>),
Underscore,
// Space
@ -455,7 +473,6 @@ impl<'a> Pattern<'a> {
) => string_x == string_y && base_x == base_y && is_negative_x == is_negative_y,
(FloatLiteral(x), FloatLiteral(y)) => x == y,
(StrLiteral(x), StrLiteral(y)) => x == y,
(BlockStrLiteral(x), BlockStrLiteral(y)) => x == y,
(Underscore, Underscore) => true,
// Space
@ -584,7 +601,7 @@ impl<'a> Spaceable<'a> for Def<'a> {
pub enum Attempting {
List,
Keyword,
StringLiteral,
StrLiteral,
RecordLiteral,
RecordFieldLabel,
InterpolatedString,

View file

@ -300,12 +300,8 @@ fn expr_to_pattern<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<'a>,
base: *base,
is_negative: *is_negative,
}),
Expr::Str(string) => Ok(Pattern::StrLiteral(string)),
Expr::MalformedIdent(string) => Ok(Pattern::Malformed(string)),
// These would not have parsed as patterns
Expr::BlockStr(_)
| Expr::AccessorFunction(_)
Expr::AccessorFunction(_)
| Expr::Access(_, _)
| Expr::List(_)
| Expr::Closure(_, _)
@ -322,6 +318,9 @@ fn expr_to_pattern<'a>(arena: &'a Bump, expr: &Expr<'a>) -> Result<Pattern<'a>,
attempting: Attempting::Def,
reason: FailReason::InvalidPattern,
}),
Expr::Str(string) => Ok(Pattern::StrLiteral(string.clone())),
Expr::MalformedIdent(string) => Ok(Pattern::Malformed(string)),
}
}
@ -580,11 +579,7 @@ fn annotation_or_alias<'a>(
QualifiedIdentifier { .. } => {
panic!("TODO gracefully handle trying to annotate a qualified identifier, e.g. `Foo.bar : ...`");
}
NumLiteral(_)
| NonBase10Literal { .. }
| FloatLiteral(_)
| StrLiteral(_)
| BlockStrLiteral(_) => {
NumLiteral(_) | NonBase10Literal { .. } | FloatLiteral(_) | StrLiteral(_) => {
panic!("TODO gracefully handle trying to annotate a litera");
}
Underscore => {
@ -916,10 +911,7 @@ fn number_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
}
fn string_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
map!(crate::string_literal::parse(), |result| match result {
crate::string_literal::StringLiteral::Line(string) => Pattern::StrLiteral(string),
crate::string_literal::StringLiteral::Block(lines) => Pattern::BlockStrLiteral(lines),
})
map!(crate::string_literal::parse(), Pattern::StrLiteral)
}
fn underscore_pattern<'a>() -> impl Parser<'a, Pattern<'a>> {
@ -1789,8 +1781,5 @@ pub fn global_tag<'a>() -> impl Parser<'a, &'a str> {
}
pub fn string_literal<'a>() -> impl Parser<'a, Expr<'a>> {
map!(crate::string_literal::parse(), |result| match result {
crate::string_literal::StringLiteral::Line(string) => Expr::Str(string),
crate::string_literal::StringLiteral::Block(lines) => Expr::BlockStr(lines),
})
map!(crate::string_literal::parse(), Expr::Str)
}

View file

@ -1,71 +1,110 @@
use crate::ast::Attempting;
use crate::ast::{Attempting, StrLiteral, StrSegment};
use crate::parser::{parse_utf8, unexpected, unexpected_eof, ParseResult, Parser, State};
use bumpalo::collections::vec::Vec;
use bumpalo::Bump;
pub enum StringLiteral<'a> {
Line(&'a str),
Block(&'a [&'a str]),
}
pub fn parse<'a>() -> impl Parser<'a, StrLiteral<'a>> {
use StrLiteral::*;
pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
move |arena: &'a Bump, state: State<'a>| {
let mut bytes = state.bytes.iter();
// String literals must start with a quote.
// If this doesn't, it must not be a string literal!
match bytes.next() {
Some(&byte) => {
if byte != b'"' {
return Err(unexpected(0, state, Attempting::StringLiteral));
return Err(unexpected(0, state, Attempting::StrLiteral));
}
}
None => {
return Err(unexpected_eof(0, Attempting::StringLiteral, state));
return Err(unexpected_eof(0, Attempting::StrLiteral, state));
}
}
// The current segment begins right after the opening quotation mark.
let mut cur_segment = &state.bytes[1..];
enum EscapeState {
None,
Unicode,
Interpolation,
}
// At the parsing stage we keep the entire raw string, because the formatter
// needs the raw string. (For example, so it can "remember" whether you
// wrote \u{...} or the actual unicode character itself.)
//
// Later, in canonicalization, we'll do things like resolving
// unicode escapes and string interpolation.
//
// Since we're keeping the entire raw string, all we need to track is
// how many characters we've parsed. So far, that's 1 (the opening `"`).
let mut parsed_chars = 1;
let mut prev_byte = b'"';
let mut total_parsed_chars = 1;
let mut segment_parsed_chars = 0;
let mut segments = Vec::new_in(arena);
let mut escape_state = EscapeState::None;
// pub enum StrSegment<'a> {
// Plaintext(&'a str), // e.g. "foo"
// Unicode(&'a str), // e.g. "00A0" in "\u(00A0)"
// Interpolated(&'a str), // e.g. "name" in "Hi, \(name)!"
// EscapedChar(char), // e.g. '\n' in "Hello!\n"
// }
while let Some(&byte) = bytes.next() {
parsed_chars += 1;
segment_parsed_chars += 1;
// Potentially end the string (unless this is an escaped `"`!)
match byte {
b'"' if prev_byte != b'\\' => {
let (string, state) = if parsed_chars == 2 {
match bytes.next() {
Some(b'"') => {
// If the first three chars were all `"`, then this
// literal begins with `"""` and is a block string.
return parse_block_string(arena, state, &mut bytes);
b'"' => {
// If we aren't escaping, then this is the end of the string!
if let EscapeState::None = escape_state {
let (literal, state) = if total_parsed_chars == 1 && segments.is_empty() {
match bytes.next() {
Some(b'"') => {
// If the very first three chars were all `"`,
// then this literal begins with `"""`
// and is a block string.
return parse_block_string(arena, state, &mut bytes);
}
_ => (PlainLine(""), state.advance_without_indenting(2)?),
}
_ => ("", state.advance_without_indenting(2)?),
}
} else {
// Subtract 1 from parsed_chars so we omit the closing `"`.
let string_bytes = &cur_segment[0..(segment_parsed_chars - 1)];
match parse_utf8(string_bytes) {
Ok(string) => {
total_parsed_chars += segment_parsed_chars;
let state =
state.advance_without_indenting(total_parsed_chars)?;
if segments.is_empty() {
// We only had one segment.
(StrLiteral::PlainLine(string), state)
} else {
// We had multiple segments! Parse the
// current one and add it to the list.
segments.push(StrSegment::Plaintext(string));
(LineWithEscapes(segments.into_bump_slice()), state)
}
}
Err(reason) => {
return state.fail(reason);
}
}
};
return Ok((literal, state));
} else {
// Start at 1 so we omit the opening `"`.
// Subtract 1 from parsed_chars so we omit the closing `"`.
let string_bytes = &state.bytes[1..(parsed_chars - 1)];
match parse_utf8(string_bytes) {
Ok(string) => (string, state.advance_without_indenting(parsed_chars)?),
Err(reason) => {
return state.fail(reason);
}
}
};
return Ok((StringLiteral::Line(string), state));
// We are escaping, so this is an error. (If it were an
// escaped single character like \" then we would have
// handled that scenario already.)
return Err(unexpected(
state.bytes.len() - 1,
state,
Attempting::StrLiteral,
));
}
}
b'\n' => {
// This is a single-line string, which cannot have newlines!
@ -76,19 +115,90 @@ pub fn parse<'a>() -> impl Parser<'a, StringLiteral<'a>> {
return Err(unexpected(
state.bytes.len() - 1,
state,
Attempting::StringLiteral,
Attempting::StrLiteral,
));
}
b')' => {
// All escape sequences end in a close paren, so we don't
// need to pay for a conditional here. If it was an escape,
// then we want to set it to None, and if it wasn't an
// escape, then setting it from None to None is harmless!
// (And likely cheaper than a conditional.)
escape_state = EscapeState::None;
}
b'\\' => {
// This is the start of a new escape
if let EscapeState::None = escape_state {
match bytes.next() {
Some(b'(') => {
// This is an interpolated variable
escape_state = EscapeState::Interpolation;
todo!("Parse interpolated ident");
}
Some(b'u') => {
escape_state = EscapeState::Unicode;
// This is an escaped unicode character
todo!("Parse '(' and then parse escaped unicode character");
}
Some(ch @ b'\n') | Some(ch @ b'\t') | Some(ch @ b'\r')
| Some(ch @ b'"') | Some(ch @ b'\\') => {
// Record the current segment so we can begin a new one.
match parse_utf8(cur_segment) {
Ok(string) => {
segments.push(StrSegment::Plaintext(string));
}
Err(reason) => {
return state.fail(reason);
}
}
// Record the escaped char.
segments.push(StrSegment::EscapedChar(*ch as char));
// We're now done escaping.
escape_state = EscapeState::None;
// Advance past the segment we just added, and
// also past the escaped char we just added.
//
// +2 because we just parsed a backslash and
// one other char after it.
cur_segment = &cur_segment[(segment_parsed_chars + 2)..];
// Reset segment_parsed_chars to 0 because we're now
// parsing the beginning of a new segment.
segment_parsed_chars = 0;
}
_ => {
// Invalid escape! A backslash must be followed
// by either an open paren or else one of the
// escapable characters (\n, \t, \", \\, etc)
return Err(unexpected(
state.bytes.len() - 1,
state,
Attempting::StrLiteral,
));
}
}
} else {
// Can't have a \ inside an escape!
return Err(unexpected(
state.bytes.len() - 1,
state,
Attempting::StrLiteral,
));
}
}
_ => {
prev_byte = byte;
// All other characters need no special handling.
}
}
}
// We ran out of characters before finding a closed quote
Err(unexpected_eof(
parsed_chars,
Attempting::StringLiteral,
total_parsed_chars,
Attempting::StrLiteral,
state.clone(),
))
}
@ -98,7 +208,7 @@ fn parse_block_string<'a, I>(
arena: &'a Bump,
state: State<'a>,
bytes: &mut I,
) -> ParseResult<'a, StringLiteral<'a>>
) -> ParseResult<'a, StrLiteral<'a>>
where
I: Iterator<Item = &'a u8>,
{
@ -125,12 +235,13 @@ where
let line_bytes = &state.bytes[line_start..(parsed_chars - 3)];
return match parse_utf8(line_bytes) {
Ok(line) => {
let state = state.advance_without_indenting(parsed_chars)?;
Ok(_line) => {
// let state = state.advance_without_indenting(parsed_chars)?;
lines.push(line);
// lines.push(line);
Ok((StringLiteral::Block(arena.alloc(lines)), state))
// Ok((StrLiteral::Block(lines.into_bump_slice()), state))
todo!("TODO finish making block strings accept escapes");
}
Err(reason) => state.fail(reason),
};
@ -164,8 +275,8 @@ where
// We ran out of characters before finding 3 closing quotes
Err(unexpected_eof(
parsed_chars,
// TODO custom BlockStringLiteral?
Attempting::StringLiteral,
// TODO custom BlockStrLiteral?
Attempting::StrLiteral,
state,
))
}

View file

@ -24,6 +24,7 @@ mod test_parse {
use roc_parse::ast::CommentOrNewline::*;
use roc_parse::ast::Expr::{self, *};
use roc_parse::ast::Pattern::{self, *};
use roc_parse::ast::StrLiteral::*;
use roc_parse::ast::{
Attempting, Def, InterfaceHeader, Spaceable, Tag, TypeAnnotation, WhenBranch,
};
@ -51,7 +52,7 @@ mod test_parse {
// STRING LITERALS
fn expect_parsed_str(input: &str, expected: &str) {
assert_parses_to(expected, Str(input.into()));
assert_parses_to(expected, Expr::Str(PlainLine(input)));
}
#[test]
@ -62,7 +63,7 @@ mod test_parse {
""
"#
),
Str(""),
Str(PlainLine("")),
);
}
@ -71,10 +72,10 @@ mod test_parse {
assert_parses_to(
indoc!(
r#"
"x"
"x"
"#
),
Str("x".into()),
Expr::Str(PlainLine("x".into())),
);
}
@ -83,10 +84,10 @@ mod test_parse {
assert_parses_to(
indoc!(
r#"
"foo"
"foo"
"#
),
Str("foo".into()),
Expr::Str(PlainLine("foo".into())),
);
}
@ -1859,8 +1860,10 @@ mod test_parse {
fn two_branch_when() {
let arena = Bump::new();
let newlines = bumpalo::vec![in &arena; Newline];
let pattern1 =
Pattern::SpaceBefore(arena.alloc(StrLiteral("blah")), newlines.into_bump_slice());
let pattern1 = Pattern::SpaceBefore(
arena.alloc(StrLiteral(PlainLine("blah"))),
newlines.into_bump_slice(),
);
let loc_pattern1 = Located::new(1, 1, 1, 7, pattern1);
let expr1 = Num("1");
let loc_expr1 = Located::new(1, 1, 11, 12, expr1);
@ -1870,8 +1873,10 @@ mod test_parse {
guard: None,
});
let newlines = bumpalo::vec![in &arena; Newline];
let pattern2 =
Pattern::SpaceBefore(arena.alloc(StrLiteral("mise")), newlines.into_bump_slice());
let pattern2 = Pattern::SpaceBefore(
arena.alloc(StrLiteral(PlainLine("mise"))),
newlines.into_bump_slice(),
);
let loc_pattern2 = Located::new(2, 2, 1, 7, pattern2);
let expr2 = Num("2");
let loc_expr2 = Located::new(2, 2, 11, 12, expr2);
@ -1891,9 +1896,9 @@ mod test_parse {
&arena,
indoc!(
r#"
when x is
"blah" -> 1
"mise" -> 2
when x is
"blah" -> 1
"mise" -> 2
"#
),
);
@ -2003,9 +2008,11 @@ mod test_parse {
fn when_with_alternative_patterns() {
let arena = Bump::new();
let newlines = bumpalo::vec![in &arena; Newline];
let pattern1 =
Pattern::SpaceBefore(arena.alloc(StrLiteral("blah")), newlines.into_bump_slice());
let pattern1_alt = StrLiteral("blop");
let pattern1 = Pattern::SpaceBefore(
arena.alloc(StrLiteral(PlainLine("blah"))),
newlines.into_bump_slice(),
);
let pattern1_alt = StrLiteral(PlainLine("blop"));
let loc_pattern1 = Located::new(1, 1, 1, 7, pattern1);
let loc_pattern1_alt = Located::new(1, 1, 10, 16, pattern1_alt);
let expr1 = Num("1");
@ -2016,11 +2023,15 @@ mod test_parse {
guard: None,
});
let newlines = bumpalo::vec![in &arena; Newline];
let pattern2 =
Pattern::SpaceBefore(arena.alloc(StrLiteral("foo")), newlines.into_bump_slice());
let pattern2 = Pattern::SpaceBefore(
arena.alloc(StrLiteral(PlainLine("foo"))),
newlines.into_bump_slice(),
);
let newlines = bumpalo::vec![in &arena; Newline];
let pattern2_alt =
Pattern::SpaceBefore(arena.alloc(StrLiteral("bar")), newlines.into_bump_slice());
let pattern2_alt = Pattern::SpaceBefore(
arena.alloc(StrLiteral(PlainLine("bar"))),
newlines.into_bump_slice(),
);
let loc_pattern2 = Located::new(2, 2, 1, 6, pattern2);
let loc_pattern2_alt = Located::new(3, 3, 1, 6, pattern2_alt);
let expr2 = Num("2");
@ -2133,14 +2144,14 @@ mod test_parse {
let def2 = SpaceAfter(
arena.alloc(Body(
arena.alloc(Located::new(2, 2, 0, 3, pattern2)),
arena.alloc(Located::new(2, 2, 6, 10, Str("hi"))),
arena.alloc(Located::new(2, 2, 6, 10, Str(PlainLine("hi")))),
)),
newlines2.into_bump_slice(),
);
let def3 = SpaceAfter(
arena.alloc(Body(
arena.alloc(Located::new(3, 3, 0, 3, pattern3)),
arena.alloc(Located::new(3, 3, 6, 13, Str("stuff"))),
arena.alloc(Located::new(3, 3, 6, 13, Str(PlainLine("stuff")))),
)),
newlines3.into_bump_slice(),
);
@ -2426,7 +2437,7 @@ mod test_parse {
// )
// "#
// ),
// Str(""),
// Str(PlainLine("")),
// );
// }

View file

@ -787,8 +787,7 @@ pub fn annotate_usage(expr: &Expr, usage: &mut VarUsage) {
| Num(_, _)
| Int(_, _)
| Float(_, _)
| Str(_)
| BlockStr(_)
| Str { .. }
| EmptyRecord
| Accessor { .. }
| RunLowLevel { .. } => {}