From 504d556bb9acb1f2140930276f104fdc659aded0 Mon Sep 17 00:00:00 2001 From: Joshua Warner Date: Sun, 12 Oct 2025 15:51:52 -0700 Subject: [PATCH] Move from triple-double for multiline strings to zig-like double-backslash --- src/fmt/fmt.zig | 4 +- src/parse/tokenize.zig | 53 ++++- test/snapshots/fuzz_crash/fuzz_crash_026.md | Bin 13918 -> 10276 bytes test/snapshots/multiline_string_complex.md | 230 ++++++++++---------- test/snapshots/multiline_string_expr.md | 24 +- 5 files changed, 176 insertions(+), 135 deletions(-) diff --git a/src/fmt/fmt.zig b/src/fmt/fmt.zig index 1e0db65466..7cd5f3daed 100644 --- a/src/fmt/fmt.zig +++ b/src/fmt/fmt.zig @@ -837,7 +837,7 @@ const Formatter = struct { fmt.curr_indent += 1; } var add_newline = false; - try fmt.pushAll("\"\"\""); + try fmt.pushAll("\\\\"); for (fmt.ast.store.exprSlice(s.parts)) |idx| { const e = fmt.ast.store.getExpr(idx); switch (e) { @@ -847,7 +847,7 @@ const Formatter = struct { _ = try fmt.flushCommentsBefore(str.region.start - 1); try fmt.ensureNewline(); try fmt.pushIndent(); - try fmt.pushAll("\"\"\""); + try fmt.pushAll("\\\\"); } add_newline = true; diff --git a/src/parse/tokenize.zig b/src/parse/tokenize.zig index aa5795cb4d..fb54027316 100644 --- a/src/parse/tokenize.zig +++ b/src/parse/tokenize.zig @@ -46,7 +46,7 @@ pub const Token = struct { Float, StringStart, // the " that starts a string StringEnd, // the " that ends a string - MultilineStringStart, // the """ that starts a multiline string + MultilineStringStart, // the """ or \\ that starts a multiline string StringPart, MalformedStringPart, // malformed, but should be treated similar to a StringPart in the parser SingleQuote, @@ -1304,8 +1304,12 @@ pub const Tokenizer = struct { // Backslash (\) '\\' => { - self.cursor.pos += 1; - try self.pushTokenNormalHere(gpa, .OpBackslash, start); + if (self.cursor.peekAt(1) == '\\') { + try self.tokenizeMultilineStringLiteral(gpa); + } else { + self.cursor.pos += 1; + try self.pushTokenNormalHere(gpa, .OpBackslash, start); + } }, // Percent (%) @@ -1532,6 +1536,14 @@ pub const Tokenizer = struct { try self.tokenizeStringLikeLiteralBody(gpa, kind, start); } + pub fn tokenizeMultilineStringLiteral(self: *Tokenizer, gpa: std.mem.Allocator) std.mem.Allocator.Error!void { + const start = self.cursor.pos; + std.debug.assert(self.cursor.peek() == '\\' and self.cursor.peekAt(1) == '\\'); + self.cursor.pos += 2; + try self.pushTokenNormalHere(gpa, .MultilineStringStart, start); + try self.tokenizeStringLikeLiteralBody(gpa, .multi_line, start); + } + // Moving curly chars to constants because some editors hate them inline. const open_curly = '{'; const close_curly = '}'; @@ -1818,9 +1830,14 @@ fn rebuildBufferForTesting(buf: []const u8, tokens: *TokenizedBuffer, alloc: std try buf2.append(alloc, '"'); }, .MultilineStringStart => { - try buf2.append(alloc, '"'); - try buf2.append(alloc, '"'); - try buf2.append(alloc, '"'); + if (length == 3 and buf[region.start.offset] == '"') { + try buf2.append(alloc, '"'); + try buf2.append(alloc, '"'); + try buf2.append(alloc, '"'); + } else { + try buf2.append(alloc, '\\'); + try buf2.append(alloc, '\\'); + } }, .StringPart => { for (0..length) |_| { @@ -2298,6 +2315,30 @@ test "tokenizer" { .StringPart, }, ); + + // Test new \\ multiline string syntax + try testTokenization( + gpa, + "\\\\hello", + &[_]Token.Tag{ .MultilineStringStart, .StringPart }, + ); + try testTokenization( + gpa, + "\\\\hello\n\\\\world", + &[_]Token.Tag{ .MultilineStringStart, .StringPart, .MultilineStringStart, .StringPart }, + ); + try testTokenization( + gpa, + "\\\\a${b}c", + &[_]Token.Tag{ + .MultilineStringStart, + .StringPart, + .OpenStringInterpolation, + .LowerIdent, + .CloseStringInterpolation, + .StringPart, + }, + ); } test "tokenizer with invalid UTF-8" { diff --git a/test/snapshots/fuzz_crash/fuzz_crash_026.md b/test/snapshots/fuzz_crash/fuzz_crash_026.md index 736e121ad321cef1cb9d058b77f8e766474b2efc..1dd9704d1b98af2f2ddbbbf1c4669a9e71dd47bc 100644 GIT binary patch delta 741 zcmcbYvm{``w|YN+Kiz;JPrndHCm&aZP(M#+e-~E;Pd|m=5TK~Lu8*g;tAdYbh-;9e zkAkj(l>r2DQAwWz(H468g*p0ox+u5?J39urDg?U*hPwJWyKbJy$jCVP0HgBcNle~s zCRRpPMwXK&GAV9e!Lpdig%nq6X>s}G=jj#{W#*M6Cgr3mgyv-?=clAjKF2N}Ut9td zPS?%J%uZFv$t+1NO3W$N;{pOLt)XN`EFOR02w-5agk)qEE2I`DCl;hC6sH!JrsgH5 zDgcG@@=FwoQj_!3^D?VaQ}nndFBG($93aNI`7wt*3oI@!@>}x7p@7L7g+(Si$aQai zB>s!n!nZW1Br_*7FEtn(#K9$rMI{$T{9^j+(AsdWOv_pQICwP-{{=LPC&7i0BB~OoV5v;?)bIkh zX$Bz8o&;l~G>ekD5=5EiK#ovMMhD605*h_J1oKkJ1`8pbE9d)b!M8*GQ21h?c&5i> zGP2tdl1SwUbB};J5K^LKq-j{LI_z4C>-VGg_3xQ+$nrVRuS2T-mq;xTc4x|$lS%zy zbW!u@tBJp}d9dU|Z%hxCedxBB@!-t?9L(;)TR!x5!GpJb2=l>yc<_!7-JKb$0r3iR@SYf>ek7KoXbt#`n)b;ZVm*Arg&X~8?cXcEB zCIqmcGZT08_tEglbV&c0evPc;GyU=Mgx-(7?9WdAnoMol4_I?w8ccTLa7O5Lc;ymj zaSm}_PhOcn#)E`b@mx?5ap@c`0j3!j3zIp$iZHtpYS9cjvS;II7BM}Cu``-Wh%>EG zY>nmY?l6mwj>gt*}2tpUY(C^YT`ub3^&Q0iA^=R<*< zw&p4$P$UiWv)E!71jMJaMYP6HprudeuIWl_9NjQ*nq~&UIAxk