Merge pull request #8289 from joshuawarner32/backslash

Move from triple-double for multiline strings to zig-like double-backslash
This commit is contained in:
Luke Boswell 2025-10-13 11:01:13 +11:00 committed by GitHub
commit ddfd451a07
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 176 additions and 135 deletions

View file

@ -843,7 +843,7 @@ const Formatter = struct {
fmt.curr_indent += 1;
}
var add_newline = false;
try fmt.pushAll("\"\"\"");
try fmt.pushAll("\\\\");
for (fmt.ast.store.exprSlice(s.parts)) |idx| {
const e = fmt.ast.store.getExpr(idx);
switch (e) {
@ -853,7 +853,7 @@ const Formatter = struct {
_ = try fmt.flushCommentsBefore(str.region.start - 1);
try fmt.ensureNewline();
try fmt.pushIndent();
try fmt.pushAll("\"\"\"");
try fmt.pushAll("\\\\");
}
add_newline = true;

View file

@ -46,7 +46,7 @@ pub const Token = struct {
Float,
StringStart, // the " that starts a string
StringEnd, // the " that ends a string
MultilineStringStart, // the """ that starts a multiline string
MultilineStringStart, // the """ or \\ that starts a multiline string
StringPart,
MalformedStringPart, // malformed, but should be treated similar to a StringPart in the parser
SingleQuote,
@ -1304,8 +1304,12 @@ pub const Tokenizer = struct {
// Backslash (\)
'\\' => {
self.cursor.pos += 1;
try self.pushTokenNormalHere(gpa, .OpBackslash, start);
if (self.cursor.peekAt(1) == '\\') {
try self.tokenizeMultilineStringLiteral(gpa);
} else {
self.cursor.pos += 1;
try self.pushTokenNormalHere(gpa, .OpBackslash, start);
}
},
// Percent (%)
@ -1532,6 +1536,14 @@ pub const Tokenizer = struct {
try self.tokenizeStringLikeLiteralBody(gpa, kind, start);
}
pub fn tokenizeMultilineStringLiteral(self: *Tokenizer, gpa: std.mem.Allocator) std.mem.Allocator.Error!void {
const start = self.cursor.pos;
std.debug.assert(self.cursor.peek() == '\\' and self.cursor.peekAt(1) == '\\');
self.cursor.pos += 2;
try self.pushTokenNormalHere(gpa, .MultilineStringStart, start);
try self.tokenizeStringLikeLiteralBody(gpa, .multi_line, start);
}
// Moving curly chars to constants because some editors hate them inline.
const open_curly = '{';
const close_curly = '}';
@ -1818,9 +1830,14 @@ fn rebuildBufferForTesting(buf: []const u8, tokens: *TokenizedBuffer, alloc: std
try buf2.append(alloc, '"');
},
.MultilineStringStart => {
try buf2.append(alloc, '"');
try buf2.append(alloc, '"');
try buf2.append(alloc, '"');
if (length == 3 and buf[region.start.offset] == '"') {
try buf2.append(alloc, '"');
try buf2.append(alloc, '"');
try buf2.append(alloc, '"');
} else {
try buf2.append(alloc, '\\');
try buf2.append(alloc, '\\');
}
},
.StringPart => {
for (0..length) |_| {
@ -2298,6 +2315,30 @@ test "tokenizer" {
.StringPart,
},
);
// Test new \\ multiline string syntax
try testTokenization(
gpa,
"\\\\hello",
&[_]Token.Tag{ .MultilineStringStart, .StringPart },
);
try testTokenization(
gpa,
"\\\\hello\n\\\\world",
&[_]Token.Tag{ .MultilineStringStart, .StringPart, .MultilineStringStart, .StringPart },
);
try testTokenization(
gpa,
"\\\\a${b}c",
&[_]Token.Tag{
.MultilineStringStart,
.StringPart,
.OpenStringInterpolation,
.LowerIdent,
.CloseStringInterpolation,
.StringPart,
},
);
}
test "tokenizer with invalid UTF-8" {

View file

@ -8,62 +8,62 @@ type=file
package
[]
{
x: """Multiline
x: \\Multiline
,
}
value1 = """This is a "string" with just one line
value1 = \\This is a "string" with just one line
value2 =
"""This is a "string" with just one line
\\This is a "string" with just one line
value3 = """This is a string
"""With multiple lines
"""${value1}
value3 = \\This is a string
\\With multiple lines
\\${value1}
value4 =
"""This is a string
\\This is a string
# A comment in between
"""With multiple lines
"""${value2}
\\With multiple lines
\\${value2}
value5 = {
a: """Multiline
a: \\Multiline
,
b: (
"""Multiline
\\Multiline
,
"""Multiline
\\Multiline
,
),
c: [
"""multiline
\\multiline
,
],
d: (
0 - """
0 - \\
,
),
e: !"""
e: !\\
,
}
x = {
"""
"""
\\
\\
}
~~~
# EXPECTED
TYPE MISMATCH - multiline_string_complex.md:37:7:37:10
TYPE MISMATCH - multiline_string_complex.md:40:6:40:9
TYPE MISMATCH - multiline_string_complex.md:37:7:37:9
TYPE MISMATCH - multiline_string_complex.md:40:6:40:8
# PROBLEMS
**TYPE MISMATCH**
This expression is used in an unexpected way:
**multiline_string_complex.md:37:7:37:10:**
**multiline_string_complex.md:37:7:37:9:**
```roc
0 - """
0 - \\
```
^^^
^^
It has the type:
_Str_
@ -73,11 +73,11 @@ But I expected it to be:
**TYPE MISMATCH**
This expression is used in an unexpected way:
**multiline_string_complex.md:40:6:40:9:**
**multiline_string_complex.md:40:6:40:8:**
```roc
e: !"""
e: !\\
```
^^^
^^
It has the type:
_Str_
@ -90,42 +90,42 @@ But I expected it to be:
KwPackage(1:1-1:8),
OpenSquare(2:2-2:3),CloseSquare(2:3-2:4),
OpenCurly(3:2-3:3),
LowerIdent(4:3-4:4),OpColon(4:4-4:5),MultilineStringStart(4:6-4:9),StringPart(4:9-4:18),
LowerIdent(4:3-4:4),OpColon(4:4-4:5),MultilineStringStart(4:6-4:8),StringPart(4:8-4:17),
Comma(5:3-5:4),
CloseCurly(6:2-6:3),
LowerIdent(8:1-8:7),OpAssign(8:8-8:9),MultilineStringStart(8:10-8:13),StringPart(8:13-8:50),
LowerIdent(8:1-8:7),OpAssign(8:8-8:9),MultilineStringStart(8:10-8:12),StringPart(8:12-8:49),
LowerIdent(10:1-10:7),OpAssign(10:8-10:9),
MultilineStringStart(11:2-11:5),StringPart(11:5-11:42),
LowerIdent(13:1-13:7),OpAssign(13:8-13:9),MultilineStringStart(13:10-13:13),StringPart(13:13-13:29),
MultilineStringStart(14:2-14:5),StringPart(14:5-14:24),
MultilineStringStart(15:2-15:5),StringPart(15:5-15:5),OpenStringInterpolation(15:5-15:7),LowerIdent(15:7-15:13),CloseStringInterpolation(15:13-15:14),StringPart(15:14-15:14),
MultilineStringStart(11:2-11:4),StringPart(11:4-11:41),
LowerIdent(13:1-13:7),OpAssign(13:8-13:9),MultilineStringStart(13:10-13:12),StringPart(13:12-13:28),
MultilineStringStart(14:2-14:4),StringPart(14:4-14:23),
MultilineStringStart(15:2-15:4),StringPart(15:4-15:4),OpenStringInterpolation(15:4-15:6),LowerIdent(15:6-15:12),CloseStringInterpolation(15:12-15:13),StringPart(15:13-15:13),
LowerIdent(17:1-17:7),OpAssign(17:8-17:9),
MultilineStringStart(18:2-18:5),StringPart(18:5-18:21),
MultilineStringStart(20:2-20:5),StringPart(20:5-20:24),
MultilineStringStart(21:2-21:5),StringPart(21:5-21:5),OpenStringInterpolation(21:5-21:7),LowerIdent(21:7-21:13),CloseStringInterpolation(21:13-21:14),StringPart(21:14-21:14),
MultilineStringStart(18:2-18:4),StringPart(18:4-18:20),
MultilineStringStart(20:2-20:4),StringPart(20:4-20:23),
MultilineStringStart(21:2-21:4),StringPart(21:4-21:4),OpenStringInterpolation(21:4-21:6),LowerIdent(21:6-21:12),CloseStringInterpolation(21:12-21:13),StringPart(21:13-21:13),
LowerIdent(23:1-23:7),OpAssign(23:8-23:9),OpenCurly(23:10-23:11),
LowerIdent(24:2-24:3),OpColon(24:3-24:4),MultilineStringStart(24:5-24:8),StringPart(24:8-24:17),
LowerIdent(24:2-24:3),OpColon(24:3-24:4),MultilineStringStart(24:5-24:7),StringPart(24:7-24:16),
Comma(25:2-25:3),
LowerIdent(26:2-26:3),OpColon(26:3-26:4),OpenRound(26:5-26:6),
MultilineStringStart(27:3-27:6),StringPart(27:6-27:15),
MultilineStringStart(27:3-27:5),StringPart(27:5-27:14),
Comma(28:3-28:4),
MultilineStringStart(29:3-29:6),StringPart(29:6-29:15),
MultilineStringStart(29:3-29:5),StringPart(29:5-29:14),
Comma(30:3-30:4),
CloseRound(31:2-31:3),Comma(31:3-31:4),
LowerIdent(32:2-32:3),OpColon(32:3-32:4),OpenSquare(32:5-32:6),
MultilineStringStart(33:3-33:6),StringPart(33:6-33:15),
MultilineStringStart(33:3-33:5),StringPart(33:5-33:14),
Comma(34:3-34:4),
CloseSquare(35:2-35:3),Comma(35:3-35:4),
LowerIdent(36:2-36:3),OpColon(36:3-36:4),OpenRound(36:5-36:6),
Int(37:3-37:4),OpBinaryMinus(37:5-37:6),MultilineStringStart(37:7-37:10),StringPart(37:10-37:10),
Int(37:3-37:4),OpBinaryMinus(37:5-37:6),MultilineStringStart(37:7-37:9),StringPart(37:9-37:9),
Comma(38:3-38:4),
CloseRound(39:2-39:3),Comma(39:3-39:4),
LowerIdent(40:2-40:3),OpColon(40:3-40:4),OpBang(40:5-40:6),MultilineStringStart(40:6-40:9),StringPart(40:9-40:9),
LowerIdent(40:2-40:3),OpColon(40:3-40:4),OpBang(40:5-40:6),MultilineStringStart(40:6-40:8),StringPart(40:8-40:8),
Comma(41:2-41:3),
CloseCurly(42:1-42:2),
LowerIdent(44:1-44:2),OpAssign(44:3-44:4),OpenCurly(44:5-44:6),
MultilineStringStart(45:2-45:5),StringPart(45:5-45:5),
MultilineStringStart(46:2-46:5),StringPart(46:5-46:5),
MultilineStringStart(45:2-45:4),StringPart(45:4-45:4),
MultilineStringStart(46:2-46:4),StringPart(46:4-46:4),
CloseCurly(47:1-47:2),
EndOfFile(48:1-48:1),
~~~
@ -135,67 +135,67 @@ EndOfFile(48:1-48:1),
(package @1.1-6.3
(exposes @2.2-2.4)
(packages @3.2-6.3
(record-field @4.3-4.18 (name "x")
(e-multiline-string @4.6-4.18
(e-string-part @4.9-4.18 (raw "Multiline"))))))
(record-field @4.3-4.17 (name "x")
(e-multiline-string @4.6-4.17
(e-string-part @4.8-4.17 (raw "Multiline"))))))
(statements
(s-decl @8.1-8.50
(s-decl @8.1-8.49
(p-ident @8.1-8.7 (raw "value1"))
(e-multiline-string @8.10-8.50
(e-string-part @8.13-8.50 (raw "This is a "string" with just one line"))))
(s-decl @10.1-11.42
(e-multiline-string @8.10-8.49
(e-string-part @8.12-8.49 (raw "This is a "string" with just one line"))))
(s-decl @10.1-11.41
(p-ident @10.1-10.7 (raw "value2"))
(e-multiline-string @11.2-11.42
(e-string-part @11.5-11.42 (raw "This is a "string" with just one line"))))
(s-decl @13.1-15.14
(e-multiline-string @11.2-11.41
(e-string-part @11.4-11.41 (raw "This is a "string" with just one line"))))
(s-decl @13.1-15.13
(p-ident @13.1-13.7 (raw "value3"))
(e-multiline-string @13.10-15.14
(e-string-part @13.13-13.29 (raw "This is a string"))
(e-string-part @14.5-14.24 (raw "With multiple lines"))
(e-string-part @15.5-15.5 (raw ""))
(e-ident @15.7-15.13 (raw "value1"))
(e-string-part @15.14-15.14 (raw ""))))
(s-decl @17.1-21.14
(e-multiline-string @13.10-15.13
(e-string-part @13.12-13.28 (raw "This is a string"))
(e-string-part @14.4-14.23 (raw "With multiple lines"))
(e-string-part @15.4-15.4 (raw ""))
(e-ident @15.6-15.12 (raw "value1"))
(e-string-part @15.13-15.13 (raw ""))))
(s-decl @17.1-21.13
(p-ident @17.1-17.7 (raw "value4"))
(e-multiline-string @18.2-21.14
(e-string-part @18.5-18.21 (raw "This is a string"))
(e-string-part @20.5-20.24 (raw "With multiple lines"))
(e-string-part @21.5-21.5 (raw ""))
(e-ident @21.7-21.13 (raw "value2"))
(e-string-part @21.14-21.14 (raw ""))))
(e-multiline-string @18.2-21.13
(e-string-part @18.4-18.20 (raw "This is a string"))
(e-string-part @20.4-20.23 (raw "With multiple lines"))
(e-string-part @21.4-21.4 (raw ""))
(e-ident @21.6-21.12 (raw "value2"))
(e-string-part @21.13-21.13 (raw ""))))
(s-decl @23.1-42.2
(p-ident @23.1-23.7 (raw "value5"))
(e-record @23.10-42.2
(field (field "a")
(e-multiline-string @24.5-24.17
(e-string-part @24.8-24.17 (raw "Multiline"))))
(e-multiline-string @24.5-24.16
(e-string-part @24.7-24.16 (raw "Multiline"))))
(field (field "b")
(e-tuple @26.5-31.3
(e-multiline-string @27.3-27.15
(e-string-part @27.6-27.15 (raw "Multiline")))
(e-multiline-string @29.3-29.15
(e-string-part @29.6-29.15 (raw "Multiline")))))
(e-multiline-string @27.3-27.14
(e-string-part @27.5-27.14 (raw "Multiline")))
(e-multiline-string @29.3-29.14
(e-string-part @29.5-29.14 (raw "Multiline")))))
(field (field "c")
(e-list @32.5-35.3
(e-multiline-string @33.3-33.15
(e-string-part @33.6-33.15 (raw "multiline")))))
(e-multiline-string @33.3-33.14
(e-string-part @33.5-33.14 (raw "multiline")))))
(field (field "d")
(e-tuple @36.5-39.3
(e-binop @37.3-37.10 (op "-")
(e-binop @37.3-37.9 (op "-")
(e-int @37.3-37.4 (raw "0"))
(e-multiline-string @37.7-37.10
(e-string-part @37.10-37.10 (raw ""))))))
(e-multiline-string @37.7-37.9
(e-string-part @37.9-37.9 (raw ""))))))
(field (field "e")
(unary "!"
(e-multiline-string @40.6-40.9
(e-string-part @40.9-40.9 (raw "")))))))
(e-multiline-string @40.6-40.8
(e-string-part @40.8-40.8 (raw "")))))))
(s-decl @44.1-47.2
(p-ident @44.1-44.2 (raw "x"))
(e-block @44.5-47.2
(statements
(e-multiline-string @45.2-46.5
(e-string-part @45.5-45.5 (raw ""))
(e-string-part @46.5-46.5 (raw ""))))))))
(e-multiline-string @45.2-46.4
(e-string-part @45.4-45.4 (raw ""))
(e-string-part @46.4-46.4 (raw ""))))))))
~~~
# FORMATTED
~~~roc
@ -206,61 +206,61 @@ NO CHANGE
(can-ir
(d-let
(p-assign @8.1-8.7 (ident "value1"))
(e-string @8.10-8.50
(e-literal @8.13-8.50 (string "This is a "string" with just one line"))))
(e-string @8.10-8.49
(e-literal @8.12-8.49 (string "This is a "string" with just one line"))))
(d-let
(p-assign @10.1-10.7 (ident "value2"))
(e-string @11.2-11.42
(e-literal @11.5-11.42 (string "This is a "string" with just one line"))))
(e-string @11.2-11.41
(e-literal @11.4-11.41 (string "This is a "string" with just one line"))))
(d-let
(p-assign @13.1-13.7 (ident "value3"))
(e-string @13.10-15.14
(e-literal @13.13-13.29 (string "This is a string"))
(e-literal @14.2-14.5 (string "\n"))
(e-literal @14.5-14.24 (string "With multiple lines"))
(e-literal @15.2-15.5 (string "\n"))
(e-lookup-local @15.7-15.13
(e-string @13.10-15.13
(e-literal @13.12-13.28 (string "This is a string"))
(e-literal @14.2-14.4 (string "\n"))
(e-literal @14.4-14.23 (string "With multiple lines"))
(e-literal @15.2-15.4 (string "\n"))
(e-lookup-local @15.6-15.12
(p-assign @8.1-8.7 (ident "value1")))))
(d-let
(p-assign @17.1-17.7 (ident "value4"))
(e-string @18.2-21.14
(e-literal @18.5-18.21 (string "This is a string"))
(e-literal @20.2-20.5 (string "\n"))
(e-literal @20.5-20.24 (string "With multiple lines"))
(e-literal @21.2-21.5 (string "\n"))
(e-lookup-local @21.7-21.13
(e-string @18.2-21.13
(e-literal @18.4-18.20 (string "This is a string"))
(e-literal @20.2-20.4 (string "\n"))
(e-literal @20.4-20.23 (string "With multiple lines"))
(e-literal @21.2-21.4 (string "\n"))
(e-lookup-local @21.6-21.12
(p-assign @10.1-10.7 (ident "value2")))))
(d-let
(p-assign @23.1-23.7 (ident "value5"))
(e-record @23.10-42.2
(fields
(field (name "a")
(e-string @24.5-24.17
(e-literal @24.8-24.17 (string "Multiline"))))
(e-string @24.5-24.16
(e-literal @24.7-24.16 (string "Multiline"))))
(field (name "b")
(e-tuple @26.5-31.3
(elems
(e-string @27.3-27.15
(e-literal @27.6-27.15 (string "Multiline")))
(e-string @29.3-29.15
(e-literal @29.6-29.15 (string "Multiline"))))))
(e-string @27.3-27.14
(e-literal @27.5-27.14 (string "Multiline")))
(e-string @29.3-29.14
(e-literal @29.5-29.14 (string "Multiline"))))))
(field (name "c")
(e-list @32.5-35.3
(elems
(e-string @33.3-33.15
(e-literal @33.6-33.15 (string "multiline"))))))
(e-string @33.3-33.14
(e-literal @33.5-33.14 (string "multiline"))))))
(field (name "d")
(e-binop @37.3-37.10 (op "sub")
(e-binop @37.3-37.9 (op "sub")
(e-num @37.3-37.4 (value "0"))
(e-string @37.7-37.10)))
(e-string @37.7-37.9)))
(field (name "e")
(e-unary-not @40.5-40.9
(e-string @40.6-40.9))))))
(e-unary-not @40.5-40.8
(e-string @40.6-40.8))))))
(d-let
(p-assign @44.1-44.2 (ident "x"))
(e-block @44.5-47.2
(e-string @45.2-46.5
(e-literal @46.2-46.5 (string "\n"))))))
(e-string @45.2-46.4
(e-literal @46.2-46.4 (string "\n"))))))
~~~
# TYPES
~~~clojure
@ -273,10 +273,10 @@ NO CHANGE
(patt @23.1-23.7 (type "{ a: Str, b: (Str, Str), c: List(Str), d: Error, e: Error }"))
(patt @44.1-44.2 (type "Str")))
(expressions
(expr @8.10-8.50 (type "Str"))
(expr @11.2-11.42 (type "Str"))
(expr @13.10-15.14 (type "Str"))
(expr @18.2-21.14 (type "Str"))
(expr @8.10-8.49 (type "Str"))
(expr @11.2-11.41 (type "Str"))
(expr @13.10-15.13 (type "Str"))
(expr @18.2-21.13 (type "Str"))
(expr @23.10-42.2 (type "{ a: Str, b: (Str, Str), c: List(Str), d: Error, e: Error }"))
(expr @44.5-47.2 (type "Str"))))
~~~

View file

@ -5,8 +5,8 @@ type=expr
~~~
# SOURCE
~~~roc
"""This is a string
"""With multiple lines
\\This is a string
\\With multiple lines
~~~
# EXPECTED
NIL
@ -14,15 +14,15 @@ NIL
NIL
# TOKENS
~~~zig
MultilineStringStart(1:1-1:4),StringPart(1:4-1:20),
MultilineStringStart(2:1-2:4),StringPart(2:4-2:23),
MultilineStringStart(1:1-1:3),StringPart(1:3-1:19),
MultilineStringStart(2:1-2:3),StringPart(2:3-2:22),
EndOfFile(3:1-3:1),
~~~
# PARSE
~~~clojure
(e-multiline-string @1.1-2.23
(e-string-part @1.4-1.20 (raw "This is a string"))
(e-string-part @2.4-2.23 (raw "With multiple lines")))
(e-multiline-string @1.1-2.22
(e-string-part @1.3-1.19 (raw "This is a string"))
(e-string-part @2.3-2.22 (raw "With multiple lines")))
~~~
# FORMATTED
~~~roc
@ -30,12 +30,12 @@ NO CHANGE
~~~
# CANONICALIZE
~~~clojure
(e-string @1.1-2.23
(e-literal @1.4-1.20 (string "This is a string"))
(e-literal @2.1-2.4 (string "\n"))
(e-literal @2.4-2.23 (string "With multiple lines")))
(e-string @1.1-2.22
(e-literal @1.3-1.19 (string "This is a string"))
(e-literal @2.1-2.3 (string "\n"))
(e-literal @2.3-2.22 (string "With multiple lines")))
~~~
# TYPES
~~~clojure
(expr @1.1-2.23 (type "Str"))
(expr @1.1-2.22 (type "Str"))
~~~