Update: add closure for error and, line and col

The length of the string is calculated as the rightmost newline or 0, and that is the message of the error
2025-09-29 20:34:44 +00:00 · 2022-09-20 16:05:53 +09:00 · 2022-09-20 16:05:53 +09:00 · f1dd25e508
commit f1dd25e508
parent beb10ef166
1 changed files with 71 additions and 62 deletions
--- a/compiler/erg_parser/lex.rs
+++ b/compiler/erg_parser/lex.rs
@ -690,17 +690,10 @@ impl Lexer /*<'a>*/ {

    fn lex_multi_line_str(&mut self) -> LexResult<Token> {
        let mut s = "\"\"\"".to_string();
-        while let Some(c) = self.peek_cur_ch() {
-            match c {
-                '"' => {
-                    s.push(self.consume().unwrap());
-                    let next_c = self.peek_cur_ch();
-                    let aft_next_c = self.peek_next_ch();
-                    if next_c.is_none() || aft_next_c.is_none() {
-                        let token = self.emit_token(Illegal, &s);
-                        return Err(LexError::syntax_error(
+        let unclosed_error = |t: Token| -> LexResult<Token> {
+            Err(LexError::syntax_error(
                0,
-                            token.loc(),
+                t.loc(),
                switch_lang!(
                    "japanese" => "文字列が\"\"\"によって閉じられていません",
                    "simplified_chinese" => "字符串没有被\"\"\"关闭",
@ -708,30 +701,46 @@ impl Lexer /*<'a>*/ {
                    "english" => "the string is not closed by \"\"\"",
                ),
                None,
-                        ));
+            ))
+        };
+
+        while let Some(c) = self.peek_cur_ch() {
+            match c {
+                '"' => {
+                    s.push(self.consume().unwrap());
+                    let next_c = self.peek_cur_ch();
+                    let aft_next_c = self.peek_next_ch();
+                    if next_c.is_none() {
+                        let col_end = s.rfind('\n').unwrap_or_default();
+                        let error_s = &s[col_end..s.len()];
+                        let token = self.emit_token(Illegal, error_s);
+                        return unclosed_error(token);
+                    }
+                    if aft_next_c.is_none() {
+                        s.push(self.consume().unwrap());
+                        let col_end = s.rfind('\n').unwrap_or_default();
+                        let error_s = &s[col_end..s.len()];
+                        let token = self.emit_token(Illegal, error_s);
+                        return unclosed_error(token);
                    }
                    let next_c = self.consume().unwrap();
                    let aft_next_c = self.consume().unwrap();
                    if next_c == '"' && aft_next_c == '"' {
+                        s.push_str("\"\"");
                        let token = self.emit_token(StrLit, &s);
                        return Ok(token);
                    }
+                    s.push(self.consume().unwrap());
                }
                _ => {
                    let c = self.consume().unwrap();
-                    if c == '\\' {
+                    match c {
+                        '\\' => {
                            let next_c = self.consume().unwrap();
                            match next_c {
                                '0' => s.push('\0'),
                                'r' => s.push('\r'),
-                            '\'' => {
-                                s.push('\'');
-                                if self.peek_next_ch().is_some()
-                                    && self.peek_next_ch().unwrap() == '\n'
-                                {
-                                    continue; // Escaping a line break if only '\' comes at the end
-                                }
-                            }
+                                '\'' => s.push('\''),
                                '"' => s.push('"'),
                                't' => s.push_str("    "), // tab is invalid, so changed into 4 whitespace
                                '\\' => s.push('\\'),
@ -739,6 +748,7 @@ impl Lexer /*<'a>*/ {
                                '\n' => {
                                    self.lineno_token_starts += 1;
                                    self.col_token_starts = 0;
+                                    continue;
                                }
                                _ => {
                                    let token = self.emit_token(Illegal, &format!("\\{next_c}"));
@ -755,7 +765,13 @@ impl Lexer /*<'a>*/ {
                                    ));
                                }
                            }
-                    } else {
+                        }
+                        '\n' => {
+                            self.lineno_token_starts += 1;
+                            self.col_token_starts = 0;
+                            s.push('\n')
+                        }
+                        _ => {
                            s.push(c);
                            if Self::is_bidi(c) {
                                return Err(self._invalid_unicode_character(&s));
@ -764,18 +780,11 @@ impl Lexer /*<'a>*/ {
                    }
                }
            }
-        let token = self.emit_token(Illegal, &s);
-        Err(LexError::syntax_error(
-            0,
-            token.loc(),
-            switch_lang!(
-                "japanese" => "文字列が\"\"\"によって閉じられていません",
-                "simplified_chinese" => "字符串没有被\"\"\"关闭",
-                "traditional_chinese" => "字符串没有被\"\"\"关闭",
-                "english" => "the string is not closed by \"\"\"",
-            ),
-            None,
-        ))
+        }
+        let col_end = s.rfind('\n').unwrap_or_default();
+        let error_s = &s[col_end..s.len()];
+        let token = self.emit_token(Illegal, error_s);
+        unclosed_error(token)
    }

    // for single strings and multi strings