Implement string interpolation

This commit is contained in:
Shunsuke Shibayama 2022-12-05 21:24:10 +09:00
parent c569df390c
commit 47132cfab1
4 changed files with 345 additions and 81 deletions

View file

@ -1794,6 +1794,7 @@ impl Context {
let t_exit = t_quit.clone(); let t_exit = t_quit.clone();
let t_repr = nd_func(vec![kw("object", Obj)], None, Str); let t_repr = nd_func(vec![kw("object", Obj)], None, Str);
let t_round = nd_func(vec![kw("number", Float)], None, Int); let t_round = nd_func(vec![kw("number", Float)], None, Int);
let t_str = nd_func(vec![kw("object", Obj)], None, Str);
let t_unreachable = nd_func(vec![], None, Never); let t_unreachable = nd_func(vec![], None, Never);
self.register_builtin_py_impl("abs", t_abs, Immutable, Private, Some("abs")); self.register_builtin_py_impl("abs", t_abs, Immutable, Private, Some("abs"));
self.register_builtin_py_impl("ascii", t_ascii, Immutable, Private, Some("ascii")); self.register_builtin_py_impl("ascii", t_ascii, Immutable, Private, Some("ascii"));
@ -1855,6 +1856,7 @@ impl Context {
self.register_builtin_py_impl("quit", t_quit, Immutable, Private, Some("quit")); self.register_builtin_py_impl("quit", t_quit, Immutable, Private, Some("quit"));
self.register_builtin_py_impl("repr", t_repr, Immutable, Private, Some("repr")); self.register_builtin_py_impl("repr", t_repr, Immutable, Private, Some("repr"));
self.register_builtin_py_impl("round", t_round, Immutable, Private, Some("round")); self.register_builtin_py_impl("round", t_round, Immutable, Private, Some("round"));
self.register_builtin_py_impl("str", t_str, Immutable, Private, Some("str"));
// TODO: original implementation // TODO: original implementation
self.register_builtin_py_impl( self.register_builtin_py_impl(
"unreachable", "unreachable",

View file

@ -69,6 +69,19 @@ impl Runnable for LexerRunner {
} }
} }
#[derive(Debug, PartialEq, Eq)]
pub enum Interpolation {
SingleLine,
MultiLine,
Not,
}
impl Interpolation {
pub const fn is_in(&self) -> bool {
matches!(self, Self::SingleLine | Self::MultiLine)
}
}
/// Lexes a source code and iterates tokens. /// Lexes a source code and iterates tokens.
/// ///
/// This can be used as an iterator or to generate a `TokenStream`. /// This can be used as an iterator or to generate a `TokenStream`.
@ -85,6 +98,7 @@ pub struct Lexer /*<'a>*/ {
lineno_token_starts: usize, lineno_token_starts: usize,
/// 0-origin, indicates the column number in which the token appears /// 0-origin, indicates the column number in which the token appears
col_token_starts: usize, col_token_starts: usize,
interpol_stack: Vec<Interpolation>,
} }
impl Lexer /*<'a>*/ { impl Lexer /*<'a>*/ {
@ -98,6 +112,7 @@ impl Lexer /*<'a>*/ {
prev_token: Token::new(TokenKind::BOF, "", 0, 0), prev_token: Token::new(TokenKind::BOF, "", 0, 0),
lineno_token_starts: 0, lineno_token_starts: 0,
col_token_starts: 0, col_token_starts: 0,
interpol_stack: vec![Interpolation::Not],
} }
} }
@ -112,6 +127,7 @@ impl Lexer /*<'a>*/ {
prev_token: Token::new(TokenKind::BOF, "", 0, 0), prev_token: Token::new(TokenKind::BOF, "", 0, 0),
lineno_token_starts: 0, lineno_token_starts: 0,
col_token_starts: 0, col_token_starts: 0,
interpol_stack: vec![Interpolation::Not],
} }
} }
@ -623,14 +639,9 @@ impl Lexer /*<'a>*/ {
Ok(self.emit_token(kind, &cont)) Ok(self.emit_token(kind, &cont))
} }
fn lex_str(&mut self) -> LexResult<Token> { fn str_line_break_error(token: Token, line: usize) -> LexError {
let mut s = "\"".to_string(); LexError::syntax_error(
while let Some(c) = self.peek_cur_ch() { line,
match c {
'\n' => {
let token = self.emit_token(Illegal, &s);
return Err(LexError::syntax_error(
0,
token.loc(), token.loc(),
switch_lang!( switch_lang!(
"japanese" => "文字列内で改行をすることはできません", "japanese" => "文字列内で改行をすることはできません",
@ -647,8 +658,75 @@ impl Lexer /*<'a>*/ {
) )
.into(), .into(),
), ),
)); )
} }
fn invalid_escape_error(ch: char, token: Token) -> LexError {
LexError::syntax_error(
0,
token.loc(),
switch_lang!(
"japanese" => format!("不正なエスケープシーケンスです: \\{}", ch),
"simplified_chinese" => format!("不合法的转义序列: \\{}", ch),
"traditional_chinese" => format!("不合法的轉義序列: \\{}", ch),
"english" => format!("illegal escape sequence: \\{}", ch),
),
None,
)
}
fn unclosed_string_error(token: Token, by: &str, line: usize) -> LexError {
let by = if by.is_empty() {
"".to_string()
} else {
switch_lang!(
"japanese" => format!("\"\"\"によって"),
"simplified_chinese" => format!("\"\"\""),
"traditional_chinese" => format!("\"\"\""),
"english" => format!("by \"\"\""),
)
};
LexError::syntax_error(
line,
token.loc(),
switch_lang!(
"japanese" => format!("文字列が{by}閉じられていません"),
"simplified_chinese" => format!("字符串没有被{by}"),
"traditional_chinese" => format!("字符串没有被{by}"),
"english" => format!("the string is not closed {by}"),
),
None,
)
}
fn unclosed_interpol_error(token: Token) -> LexError {
LexError::syntax_error(
0,
token.loc(),
switch_lang!(
"japanese" => "文字列内の補間が閉じられていません",
"simplified_chinese" => "字符串内的插值没有被闭",
"traditional_chinese" => "字符串內的插值沒有被閉",
"english" => "the interpolation in the string is not closed",
),
None,
)
}
fn lex_single_str(&mut self) -> LexResult<Token> {
let mut s = "\"".to_string();
while let Some(c) = self.peek_cur_ch() {
match c {
'\n' => match self.interpol_stack.last().unwrap() {
Interpolation::SingleLine if self.interpol_stack.len() == 1 => {
let token = self.emit_token(Illegal, &s);
return Err(Self::str_line_break_error(token, line!() as usize));
}
_ => {
let token = self.emit_token(Illegal, &s);
return Err(Self::unclosed_interpol_error(token));
}
},
'"' => { '"' => {
s.push(self.consume().unwrap()); s.push(self.consume().unwrap());
let token = self.emit_token(StrLit, &s); let token = self.emit_token(StrLit, &s);
@ -659,6 +737,12 @@ impl Lexer /*<'a>*/ {
if c == '\\' { if c == '\\' {
let next_c = self.consume().unwrap(); let next_c = self.consume().unwrap();
match next_c { match next_c {
'{' => {
s.push_str("\\{");
self.interpol_stack.push(Interpolation::SingleLine);
let token = self.emit_token(StrInterpLeft, &s);
return Ok(token);
}
'0' => s.push('\0'), '0' => s.push('\0'),
'r' => s.push('\r'), 'r' => s.push('\r'),
'n' => s.push('\n'), 'n' => s.push('\n'),
@ -668,17 +752,7 @@ impl Lexer /*<'a>*/ {
'\\' => s.push('\\'), '\\' => s.push('\\'),
_ => { _ => {
let token = self.emit_token(Illegal, &format!("\\{next_c}")); let token = self.emit_token(Illegal, &format!("\\{next_c}"));
return Err(LexError::syntax_error( return Err(Self::invalid_escape_error(next_c, token));
0,
token.loc(),
switch_lang!(
"japanese" => format!("不正なエスケープシーケンスです: \\{}", next_c),
"simplified_chinese" => format!("不合法的转义序列: \\{}", next_c),
"traditional_chinese" => format!("不合法的轉義序列: \\{}", next_c),
"english" => format!("illegal escape sequence: \\{}", next_c),
),
None,
));
} }
} }
} else { } else {
@ -691,17 +765,7 @@ impl Lexer /*<'a>*/ {
} }
} }
let token = self.emit_token(Illegal, &s); let token = self.emit_token(Illegal, &s);
Err(LexError::syntax_error( Err(Self::unclosed_string_error(token, "\"", line!() as usize))
0,
token.loc(),
switch_lang!(
"japanese" => "文字列が\"によって閉じられていません",
"simplified_chinese" => "字符串没有被\"关闭",
"traditional_chinese" => "字符串没有被\"关闭",
"english" => "the string is not closed by \"",
),
None,
))
} }
fn lex_multi_line_str(&mut self) -> LexResult<Token> { fn lex_multi_line_str(&mut self) -> LexResult<Token> {
@ -712,11 +776,21 @@ impl Lexer /*<'a>*/ {
let next_c = self.peek_cur_ch(); let next_c = self.peek_cur_ch();
let aft_next_c = self.peek_next_ch(); let aft_next_c = self.peek_next_ch();
if next_c.is_none() { if next_c.is_none() {
return self._unclosed_multi_string(&s); let token = self.emit_token(Illegal, &s);
return Err(Self::unclosed_string_error(
token,
"\"\"\"",
line!() as usize,
));
} }
if aft_next_c.is_none() { if aft_next_c.is_none() {
s.push(self.consume().unwrap()); s.push(self.consume().unwrap());
return self._unclosed_multi_string(&s); let token = self.emit_token(Illegal, &s);
return Err(Self::unclosed_string_error(
token,
"\"\"\"",
line!() as usize,
));
} }
if next_c.unwrap() == '"' && aft_next_c.unwrap() == '"' { if next_c.unwrap() == '"' && aft_next_c.unwrap() == '"' {
self.consume().unwrap(); self.consume().unwrap();
@ -725,6 +799,7 @@ impl Lexer /*<'a>*/ {
let token = self.emit_token(StrLit, &s); let token = self.emit_token(StrLit, &s);
return Ok(token); return Ok(token);
} }
// else unclosed_string_error
s.push(c); s.push(c);
} else { } else {
let c = self.consume().unwrap(); let c = self.consume().unwrap();
@ -732,6 +807,12 @@ impl Lexer /*<'a>*/ {
'\\' => { '\\' => {
let next_c = self.consume().unwrap(); let next_c = self.consume().unwrap();
match next_c { match next_c {
'{' => {
s.push_str("\\{");
self.interpol_stack.push(Interpolation::MultiLine);
let token = self.emit_token(StrInterpLeft, &s);
return Ok(token);
}
'0' => s.push('\0'), '0' => s.push('\0'),
'r' => s.push('\r'), 'r' => s.push('\r'),
'\'' => s.push('\''), '\'' => s.push('\''),
@ -746,24 +827,14 @@ impl Lexer /*<'a>*/ {
} }
_ => { _ => {
let token = self.emit_token(Illegal, &format!("\\{next_c}")); let token = self.emit_token(Illegal, &format!("\\{next_c}"));
return Err(LexError::syntax_error( return Err(Self::invalid_escape_error(next_c, token));
0,
token.loc(),
switch_lang!(
"japanese" => format!("不正なエスケープシーケンスです: \\{}", next_c),
"simplified_chinese" => format!("不合法的转义序列: \\{}", next_c),
"traditional_chinese" => format!("不合法的轉義序列: \\{}", next_c),
"english" => format!("illegal escape sequence: \\{}", next_c),
),
None,
));
} }
} }
} }
'\n' => { '\n' => {
self.lineno_token_starts += 1; self.lineno_token_starts += 1;
self.col_token_starts = 0; self.col_token_starts = 0;
s.push('\n') s.push('\n');
} }
_ => { _ => {
s.push(c); s.push(c);
@ -774,25 +845,120 @@ impl Lexer /*<'a>*/ {
} }
} }
} }
self._unclosed_multi_string(&s) let token = self.emit_token(Illegal, &s);
if self.interpol_stack.len() == 1 {
Err(Self::unclosed_string_error(
token,
"\"\"\"",
line!() as usize,
))
} else {
Err(Self::unclosed_interpol_error(token))
}
} }
// for multi-line strings unclosed error /// e.g. `}aaa"`, `}aaa{`
fn _unclosed_multi_string(&mut self, s: &str) -> LexResult<Token> { fn lex_interpolation_mid(&mut self) -> LexResult<Token> {
let col_end = s.rfind('\n').unwrap_or_default(); let mut s = "}".to_string();
let error_s = &s[col_end..s.len() - 1]; while let Some(c) = self.peek_cur_ch() {
let token = self.emit_token(Illegal, error_s); match c {
Err(LexError::syntax_error( '\n' => match self.interpol_stack.last().unwrap() {
0, Interpolation::MultiLine => {
token.loc(), self.lineno_token_starts += 1;
switch_lang!( self.col_token_starts = 0;
"japanese" => "文字列が\"\"\"によって閉じられていません", self.consume().unwrap();
"simplified_chinese" => "字符串没有被\"\"\"关闭", s.push('\n');
"traditional_chinese" => "字符串没有被\"\"\"关闭", }
"english" => "the string is not closed by \"\"\"", Interpolation::SingleLine => {
), if self.peek_next_ch().is_some() {
None, let token = self.emit_token(Illegal, &s);
)) return Err(Self::str_line_break_error(token, line!() as usize));
} else {
let token = self.emit_token(Illegal, &s);
return Err(Self::unclosed_string_error(token, "", line!() as usize));
}
}
Interpolation::Not => {
let token = self.emit_token(Illegal, &s);
return Err(Self::unclosed_interpol_error(token));
}
},
'"' => {
s.push(self.consume().unwrap());
match self.interpol_stack.last().unwrap() {
Interpolation::MultiLine => {
let next_c = self.peek_cur_ch();
let aft_next_c = self.peek_next_ch();
if next_c.is_none() {
self.interpol_stack.pop();
let token = self.emit_token(Illegal, &s);
return Err(Self::unclosed_string_error(
token,
"\"\"\"",
line!() as usize,
));
}
if aft_next_c.is_none() {
self.interpol_stack.pop();
s.push(self.consume().unwrap());
let token = self.emit_token(Illegal, &s);
return Err(Self::unclosed_string_error(
token,
"\"\"\"",
line!() as usize,
));
}
if next_c.unwrap() == '"' && aft_next_c.unwrap() == '"' {
self.interpol_stack.pop();
self.consume().unwrap();
self.consume().unwrap();
s.push_str("\"\"\"");
let token = self.emit_token(StrInterpRight, &s);
return Ok(token);
}
// else unclosed_string_error
}
Interpolation::SingleLine => {
self.interpol_stack.pop();
let token = self.emit_token(StrInterpRight, &s);
return Ok(token);
}
Interpolation::Not => {}
}
}
_ => {
let c = self.consume().unwrap();
if c == '\\' {
let next_c = self.consume().unwrap();
match next_c {
'{' => {
s.push_str("\\{");
let token = self.emit_token(StrInterpMid, &s);
return Ok(token);
}
'0' => s.push('\0'),
'r' => s.push('\r'),
'n' => s.push('\n'),
'\'' => s.push('\''),
'"' => s.push('"'),
't' => s.push_str(" "), // tab is invalid, so changed into 4 whitespace
'\\' => s.push('\\'),
_ => {
let token = self.emit_token(Illegal, &format!("\\{next_c}"));
return Err(Self::invalid_escape_error(next_c, token));
}
}
} else {
s.push(c);
if Self::is_bidi(c) {
return Err(self._invalid_unicode_character(&s));
}
}
}
}
}
let token = self.emit_token(Illegal, &s);
Err(Self::unclosed_string_error(token, "", line!() as usize))
} }
fn lex_raw_ident(&mut self) -> LexResult<Token> { fn lex_raw_ident(&mut self) -> LexResult<Token> {
@ -878,7 +1044,13 @@ impl Iterator for Lexer /*<'a>*/ {
Some('[') => self.accept(LSqBr, "["), Some('[') => self.accept(LSqBr, "["),
Some(']') => self.accept(RSqBr, "]"), Some(']') => self.accept(RSqBr, "]"),
Some('{') => self.accept(LBrace, "{"), Some('{') => self.accept(LBrace, "{"),
Some('}') => self.accept(RBrace, "}"), Some('}') => {
if self.interpol_stack.last().unwrap().is_in() {
Some(self.lex_interpolation_mid())
} else {
self.accept(RBrace, "}")
}
}
Some('<') => match self.peek_cur_ch() { Some('<') => match self.peek_cur_ch() {
Some('.') => { Some('.') => {
self.consume(); self.consume();
@ -1145,7 +1317,7 @@ impl Iterator for Lexer /*<'a>*/ {
let token = self.emit_token(StrLit, "\"\""); let token = self.emit_token(StrLit, "\"\"");
Some(Ok(token)) Some(Ok(token))
} else { } else {
Some(self.lex_str()) Some(self.lex_single_str())
} }
} }
(Some(c), Some(next_c)) => { (Some(c), Some(next_c)) => {
@ -1154,7 +1326,7 @@ impl Iterator for Lexer /*<'a>*/ {
self.consume(); // consume third '"' self.consume(); // consume third '"'
Some(self.lex_multi_line_str()) Some(self.lex_multi_line_str())
} else { } else {
Some(self.lex_str()) Some(self.lex_single_str())
} }
} }
} }

View file

@ -548,6 +548,7 @@ impl Parser {
match self.peek() { match self.peek() {
Some(t) Some(t)
if t.category_is(TC::Literal) if t.category_is(TC::Literal)
|| t.is(StrInterpLeft)
|| t.is(Symbol) || t.is(Symbol)
|| t.category_is(TC::UnaryOp) || t.category_is(TC::UnaryOp)
|| t.is(LParen) || t.is(LParen)
@ -1442,6 +1443,13 @@ impl Parser {
self.level -= 1; self.level -= 1;
Ok(Expr::Lit(lit)) Ok(Expr::Lit(lit))
} }
Some(t) if t.is(StrInterpLeft) => {
let str_interp = self
.try_reduce_string_interpolation()
.map_err(|_| self.stack_dec())?;
self.level -= 1;
Ok(str_interp)
}
Some(t) if t.is(AtSign) => { Some(t) if t.is(AtSign) => {
let decos = self.opt_reduce_decorators()?; let decos = self.opt_reduce_decorators()?;
let expr = self.try_reduce_chunk(false, in_brace)?; let expr = self.try_reduce_chunk(false, in_brace)?;
@ -2161,6 +2169,87 @@ impl Parser {
} }
} }
/// "...\{, expr, }..." ==> "..." + str(expr) + "..."
/// "...\{, expr, }..." ==> "..." + str(expr) + "..."
fn try_reduce_string_interpolation(&mut self) -> ParseResult<Expr> {
debug_call_info!(self);
let mut left = self.lpop();
left.content = Str::from(left.content.trim_end_matches("\\{").to_string() + "\"");
left.kind = StrLit;
let mut expr = Expr::Lit(Literal::from(left));
loop {
match self.peek() {
Some(l) if l.is(StrInterpRight) => {
let mut right = self.lpop();
right.content =
Str::from(format!("\"{}", right.content.trim_start_matches('}')));
right.kind = StrLit;
let right = Expr::Lit(Literal::from(right));
let op = Token::new(
Plus,
"+",
right.ln_begin().unwrap(),
right.col_begin().unwrap(),
);
expr = Expr::BinOp(BinOp::new(op, expr, right));
self.level -= 1;
return Ok(expr);
}
Some(_) => {
let mid_expr = self.try_reduce_expr(true, false, false, false)?;
let str_func = Expr::local(
"str",
mid_expr.ln_begin().unwrap(),
mid_expr.col_begin().unwrap(),
);
let call = Call::new(
str_func,
None,
Args::new(vec![PosArg::new(mid_expr)], vec![], None),
);
let op = Token::new(
Plus,
"+",
call.ln_begin().unwrap(),
call.col_begin().unwrap(),
);
let bin = BinOp::new(op, expr, Expr::Call(call));
expr = Expr::BinOp(bin);
if self.cur_is(StrInterpMid) {
let mut mid = self.lpop();
mid.content = Str::from(format!(
"\"{}\"",
mid.content.trim_start_matches('}').trim_end_matches("\\{")
));
mid.kind = StrLit;
let mid = Expr::Lit(Literal::from(mid));
let op = Token::new(
Plus,
"+",
mid.ln_begin().unwrap(),
mid.col_begin().unwrap(),
);
expr = Expr::BinOp(BinOp::new(op, expr, mid));
}
}
None => {
self.level -= 1;
let err = ParseError::syntax_error(
line!() as usize,
expr.loc(),
switch_lang!(
"japanese" => "文字列補間の終わりが見つかりませんでした",
"english" => "end of string interpolation not found",
),
None,
);
self.errs.push(err);
return Err(());
}
}
}
}
/// x |> f() => f(x) /// x |> f() => f(x)
fn try_reduce_stream_operator(&mut self, stack: &mut Vec<ExprOrOp>) -> ParseResult<()> { fn try_reduce_stream_operator(&mut self, stack: &mut Vec<ExprOrOp>) -> ParseResult<()> {
debug_call_info!(self); debug_call_info!(self);

View file

@ -3,5 +3,6 @@ print! "こんにちは、世界!"
print! "Γειά σου Κόσμε!" print! "Γειά σου Κόσμε!"
print! "!مرحبا بالعالم" print! "!مرحبا بالعالم"
greeting = "Hello" greetings = ["Good morning", "Hello", "Good evening"]
print! "{greeting}, world!" for! greetings, greeting =>
print! "\{greeting}, world!"