From 44a4d615433db99ce48929cd7a40a78b7d0c6625 Mon Sep 17 00:00:00 2001 From: Joshua Warner Date: Mon, 23 Jun 2025 20:58:26 -0700 Subject: [PATCH] Update snapshot tests to add html output with better visualization tools --- .gitignore | 3 + src/snapshot.zig | 1349 +++++++++++++++++++++++++++++++++++++--------- 2 files changed, 1092 insertions(+), 260 deletions(-) diff --git a/.gitignore b/.gitignore index a59b08ca73..83370da2ea 100644 --- a/.gitignore +++ b/.gitignore @@ -134,3 +134,6 @@ perf.data perf.data.old profile.json flamegraph.svg + +# Ignore html files in the snapshot directory +src/snapshots/**/*.html diff --git a/src/snapshot.zig b/src/snapshot.zig index 243c1ad860..c04163d85b 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -9,10 +9,67 @@ const parse = @import("check/parse.zig"); const fmt = @import("fmt.zig"); const types = @import("types.zig"); const reporting = @import("reporting.zig"); +const tokenize = @import("check/parse/tokenize.zig"); const AST = parse.AST; const Report = reporting.Report; +/// Categories of tokens for syntax highlighting +const TokenCategory = enum { + keyword, + identifier, + string, + number, + operator, + bracket, + comment, + punctuation, + default, + + pub fn toCssClass(self: TokenCategory) []const u8 { + return switch (self) { + .keyword => "token-keyword", + .identifier => "token-identifier", + .string => "token-string", + .number => "token-number", + .operator => "token-operator", + .bracket => "token-bracket", + .comment => "token-comment", + .punctuation => "token-punctuation", + .default => "token-default", + }; + } +}; + +/// Convert a token type to its category for syntax highlighting +fn tokenToCategory(token: tokenize.Token.Tag) TokenCategory { + return switch (token) { + // Keywords + .KwApp, .KwAs, .KwCrash, .KwDbg, .KwElse, .KwExpect, .KwExposes, .KwExposing, .KwFor, .KwGenerates, .KwHas, .KwHosted, .KwIf, .KwImplements, .KwImport, .KwImports, .KwIn, .KwInterface, .KwMatch, .KwModule, .KwPackage, .KwPackages, .KwPlatform, .KwProvides, .KwRequires, .KwReturn, .KwVar, .KwWhere, .KwWith => .keyword, + + // Identifiers + .UpperIdent, .LowerIdent, .DotLowerIdent, .DotUpperIdent, .NoSpaceDotLowerIdent, .NoSpaceDotUpperIdent, .NamedUnderscore, .OpaqueName => .identifier, + + // Strings + .StringStart, .StringEnd, .StringPart, .MultilineStringStart, .MultilineStringEnd, .SingleQuote => .string, + + // Numbers + .Float, .Int, .DotInt, .NoSpaceDotInt, .MalformedNumberBadSuffix, .MalformedNumberUnicodeSuffix, .MalformedNumberNoDigits, .MalformedNumberNoExponentDigits => .number, + + // Operators + .OpPlus, .OpStar, .OpBinaryMinus, .OpUnaryMinus, .OpEquals, .OpNotEquals, .OpAnd, .OpOr, .OpGreaterThan, .OpLessThan, .OpGreaterThanOrEq, .OpLessThanOrEq, .OpAssign, .OpColonEqual, .OpArrow, .OpBackslash, .OpBar, .OpBang, .OpQuestion, .OpColon, .OpPercent, .OpDoubleSlash, .OpCaret, .OpAmpersand, .OpPizza, .OpSlash, .OpDoubleQuestion, .OpBackArrow, .OpFatArrow, .NoSpaceOpQuestion => .operator, + + // Brackets + .OpenRound, .CloseRound, .OpenSquare, .CloseSquare, .OpenCurly, .CloseCurly => .bracket, + + // Punctuation + .Comma, .Dot, .DoubleDot, .TripleDot, .Underscore => .punctuation, + + // Everything else + else => .default, + }; +} + var verbose_log: bool = false; var prng = std.Random.DefaultPrng.init(1234567890); @@ -395,8 +452,1000 @@ const Content = struct { const Error = error{ MissingSnapshotHeader, MissingSnapshotSource, InvalidNodeType, BadSectionHeader }; -fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_corpus_path: ?[]const u8) !bool { +/// Dual output writers for markdown and HTML generation +const DualOutput = struct { + md_writer: std.ArrayList(u8).Writer, + html_writer: std.ArrayList(u8).Writer, + gpa: Allocator, + fn init(gpa: Allocator, md_buffer: *std.ArrayList(u8), html_buffer: *std.ArrayList(u8)) DualOutput { + return .{ + .md_writer = md_buffer.writer(), + .html_writer = html_buffer.writer(), + .gpa = gpa, + }; + } +}; + +/// Helper function to escape HTML characters +fn escapeHtmlChar(writer: anytype, char: u8) !void { + switch (char) { + '<' => try writer.writeAll("<"), + '>' => try writer.writeAll(">"), + '&' => try writer.writeAll("&"), + '"' => try writer.writeAll("""), + '\'' => try writer.writeAll("'"), + else => try writer.writeByte(char), + } +} + +/// Generate META section for both markdown and HTML +fn generateMetaSection(output: *DualOutput, content: *const Content) !void { + // Markdown META section + try output.md_writer.writeAll(Section.META); + try content.meta.format(output.md_writer); + try output.md_writer.writeAll("\n"); + try output.md_writer.writeAll(Section.SECTION_END); + + // HTML META section + try output.html_writer.writeAll( + \\
+ \\
META
+ \\
+ \\
+ \\

Description: + ); + try output.html_writer.writeAll(content.meta.description); + try output.html_writer.writeAll("

\n

Type: "); + try output.html_writer.writeAll(content.meta.node_type.toString()); + try output.html_writer.writeAll( + \\

+ \\
+ \\
+ \\
+ \\ + ); +} + +/// Generate SOURCE section for both markdown and HTML +fn generateSourceSection(output: *DualOutput, content: *const Content, parse_ast: *AST) !void { + // Markdown SOURCE section + try output.md_writer.writeAll(Section.SOURCE); + try output.md_writer.writeAll(content.source); + try output.md_writer.writeAll("\n"); + try output.md_writer.writeAll(Section.SECTION_END); + + // HTML SOURCE section with syntax highlighting + try output.html_writer.writeAll( + \\
+ \\
SOURCE
+ \\
+ \\
+ \\
+ ); + + // Apply syntax highlighting by processing tokens in order + var tokenizedBuffer = parse_ast.tokens; + const tokens = tokenizedBuffer.tokens.items(.tag); + var source_offset: u32 = 0; + var line_num: u32 = 1; + + try output.html_writer.print(" ", .{line_num}); + + for (tokens, 0..) |tok, i| { + const region = tokenizedBuffer.resolve(@intCast(i)); + + // Output any characters between last token and this token (whitespace, etc.) + while (source_offset < region.start.offset) { + const char = content.source[source_offset]; + if (char == '\n') { + try output.html_writer.writeAll("\n"); + line_num += 1; + try output.html_writer.print(" ", .{line_num}); + } else { + try escapeHtmlChar(output.html_writer, char); + } + source_offset += 1; + } + + // Skip newline tokens since we handle newlines in whitespace above + if (tok == .Newline) { + continue; + } + + // Output the token with syntax highlighting + const category = tokenToCategory(tok); + const token_text = content.source[region.start.offset..region.end.offset]; + + try output.html_writer.print("", .{ category.toCssClass(), i }); + + for (token_text) |char| { + try escapeHtmlChar(output.html_writer, char); + } + + try output.html_writer.writeAll(""); + source_offset = region.end.offset; + } + + // Output any remaining characters + while (source_offset < content.source.len) { + const char = content.source[source_offset]; + if (char == '\n') { + try output.html_writer.writeAll("\n"); + line_num += 1; + if (source_offset + 1 < content.source.len) { + try output.html_writer.print(" ", .{line_num}); + } + } else { + try escapeHtmlChar(output.html_writer, char); + } + source_offset += 1; + } + + try output.html_writer.writeAll(""); + + try output.html_writer.writeAll( + \\ + \\
+ \\
+ \\
+ \\
+ \\ + ); +} + +/// Generate PROBLEMS section for both markdown and HTML +fn generateProblemsSection(output: *DualOutput, parse_ast: *AST, can_ir: *CIR, solver: *Solver, content: *const Content, snapshot_path: []const u8, module_env: *base.ModuleEnv) !void { + // Markdown PROBLEMS section + try output.md_writer.writeAll(Section.PROBLEMS); + + // HTML PROBLEMS section + try output.html_writer.writeAll( + \\
+ \\
PROBLEMS
+ \\
+ \\
+ ); + + var tokenize_problems: usize = 0; + var parser_problems: usize = 0; + var canonicalize_problems: usize = 0; + var check_types_problem: usize = 0; + + // Tokenize Diagnostics + for (parse_ast.tokenize_diagnostics.items) |diagnostic| { + tokenize_problems += 1; + var report: reporting.Report = parse_ast.tokenizeDiagnosticToReport(diagnostic, output.gpa) catch |err| { + try output.md_writer.print("Error creating tokenize report: {}\n", .{err}); + try output.html_writer.print("

Error creating tokenize report: {}

\n", .{err}); + continue; + }; + defer report.deinit(); + + report.render(output.md_writer.any(), .markdown) catch |err| { + try output.md_writer.print("Error rendering report: {}\n", .{err}); + }; + + try output.html_writer.writeAll("
"); + report.render(output.html_writer.any(), .markdown) catch |err| { + try output.html_writer.print("Error rendering report: {}", .{err}); + }; + try output.html_writer.writeAll("
\n"); + } + + // Parser Diagnostics + for (parse_ast.parse_diagnostics.items) |diagnostic| { + parser_problems += 1; + var report: reporting.Report = parse_ast.parseDiagnosticToReport(diagnostic, output.gpa, snapshot_path) catch |err| { + try output.md_writer.print("Error creating parse report: {}\n", .{err}); + try output.html_writer.print("

Error creating parse report: {}

\n", .{err}); + continue; + }; + defer report.deinit(); + + report.render(output.md_writer.any(), .markdown) catch |err| { + try output.md_writer.print("Error rendering report: {}\n", .{err}); + }; + + try output.html_writer.writeAll("
"); + report.render(output.html_writer.any(), .markdown) catch |err| { + try output.html_writer.print("Error rendering report: {}", .{err}); + }; + try output.html_writer.writeAll("
\n"); + } + + // Canonicalization Diagnostics + const diagnostics = can_ir.getDiagnostics(); + defer output.gpa.free(diagnostics); + for (diagnostics) |diagnostic| { + canonicalize_problems += 1; + var report: reporting.Report = can_ir.diagnosticToReport(diagnostic, output.gpa, content.source, snapshot_path) catch |err| { + try output.md_writer.print("Error creating canonicalization report: {}\n", .{err}); + try output.html_writer.print("

Error creating canonicalization report: {}

\n", .{err}); + continue; + }; + defer report.deinit(); + + report.render(output.md_writer.any(), .markdown) catch |err| { + try output.md_writer.print("Error rendering report: {}\n", .{err}); + }; + + try output.html_writer.writeAll("
"); + report.render(output.html_writer.any(), .markdown) catch |err| { + try output.html_writer.print("Error rendering report: {}", .{err}); + }; + try output.html_writer.writeAll("
\n"); + } + + // Check Types Problems + var problem_buf = std.ArrayList(u8).init(output.gpa); + defer problem_buf.deinit(); + + var problems_itr = solver.problems.problems.iterIndices(); + while (problems_itr.next()) |problem_idx| { + check_types_problem += 1; + const problem = solver.problems.problems.get(problem_idx); + var report: reporting.Report = problem.buildReport( + output.gpa, + &problem_buf, + &solver.snapshots, + &module_env.idents, + content.source, + snapshot_path, + module_env, + ) catch |err| { + try output.md_writer.print("Error creating type checking report: {}\n", .{err}); + try output.html_writer.print("

Error creating type checking report: {}

\n", .{err}); + continue; + }; + defer report.deinit(); + + report.render(output.md_writer.any(), .markdown) catch |err| { + try output.md_writer.print("Error rendering report: {}\n", .{err}); + }; + + try output.html_writer.writeAll("
"); + report.render(output.html_writer.any(), .markdown) catch |err| { + try output.html_writer.print("Error rendering report: {}", .{err}); + }; + try output.html_writer.writeAll("
\n"); + } + + const nil_problems = tokenize_problems == 0 and parser_problems == 0 and canonicalize_problems == 0 and check_types_problem == 0; + + if (nil_problems) { + try output.md_writer.writeAll("NIL\n"); + try output.html_writer.writeAll("

NIL

\n"); + log("reported NIL problems", .{}); + } else { + log("reported {} token problems", .{tokenize_problems}); + log("reported {} parser problems", .{parser_problems}); + log("reported {} canonicalization problems", .{canonicalize_problems}); + log("reported {} type problems", .{check_types_problem}); + } + + // Don't write out section end for markdown, as the problem reports are already in markdown format. + + try output.html_writer.writeAll( + \\
+ \\
+ \\
+ \\ + ); +} + +/// Generate TOKENS section for both markdown and HTML +fn generateTokensSection(output: *DualOutput, parse_ast: *AST, content: *const Content, module_env: *base.ModuleEnv) !void { + // Markdown TOKENS section + try output.md_writer.writeAll(Section.TOKENS); + + // HTML TOKENS section + try output.html_writer.writeAll( + \\
+ \\
TOKENS
+ \\
+ \\
+ ); + + var tokenizedBuffer = parse_ast.tokens; + const tokens = tokenizedBuffer.tokens.items(.tag); + for (tokens, 0..) |tok, i| { + const region = tokenizedBuffer.resolve(@intCast(i)); + const info = try module_env.calcRegionInfo(content.source, region.start.offset, region.end.offset); + const category = tokenToCategory(tok); + + // Markdown token output + const region_str = try std.fmt.allocPrint(output.gpa, "{s}({d}:{d}-{d}:{d}),", .{ + @tagName(tok), + // add one to display numbers instead of index + info.start_line_idx + 1, + info.start_col_idx + 1, + info.end_line_idx + 1, + info.end_col_idx + 1, + }); + defer output.gpa.free(region_str); + + try output.md_writer.writeAll(region_str); + + // HTML token output (without line:col ranges) + try output.html_writer.print(" {s}", .{ + category.toCssClass(), + i, + @tagName(tok), + }); + + if (tok == .Newline) { + try output.md_writer.writeAll("\n"); + try output.html_writer.writeAll("\n"); + try output.html_writer.writeAll("
"); + } else { + try output.html_writer.writeAll(" "); + } + } + + try output.md_writer.writeAll("\n"); + try output.md_writer.writeAll(Section.SECTION_END); + + try output.html_writer.writeAll( + \\
+ \\
+ \\
+ \\ + ); +} + +/// Generate PARSE section for both markdown and HTML +fn generateParseSection(output: *DualOutput, content: *const Content, parse_ast: *AST, module_env: *base.ModuleEnv) !void { + var parse_buffer = std.ArrayList(u8).init(output.gpa); + defer parse_buffer.deinit(); + + switch (content.meta.node_type) { + .file => { + try parse_ast.toSExprStr(module_env, parse_buffer.writer().any()); + }, + .header => { + const header = parse_ast.store.getHeader(@enumFromInt(parse_ast.root_node_idx)); + var node = header.toSExpr(module_env, parse_ast); + defer node.deinit(output.gpa); + + node.toStringPretty(parse_buffer.writer().any()); + }, + .expr => { + const expr = parse_ast.store.getExpr(@enumFromInt(parse_ast.root_node_idx)); + var node = expr.toSExpr(module_env, parse_ast); + defer node.deinit(output.gpa); + + node.toStringPretty(parse_buffer.writer().any()); + }, + .statement => { + const stmt = parse_ast.store.getStatement(@enumFromInt(parse_ast.root_node_idx)); + var node = stmt.toSExpr(module_env, parse_ast); + defer node.deinit(output.gpa); + + node.toStringPretty(parse_buffer.writer().any()); + }, + } + + // Markdown PARSE section + try output.md_writer.writeAll(Section.PARSE); + try output.md_writer.writeAll(parse_buffer.items); + try output.md_writer.writeAll("\n"); + try output.md_writer.writeAll(Section.SECTION_END); + + // HTML PARSE section + try output.html_writer.writeAll( + \\
+ \\
PARSE
+ \\
+ \\
+    );
+
+    // Escape HTML in parse content
+    for (parse_buffer.items) |char| {
+        try escapeHtmlChar(output.html_writer, char);
+    }
+
+    try output.html_writer.writeAll(
+        \\
+ \\
+ \\
+ \\ + ); +} + +/// Generate FORMATTED section for both markdown and HTML +fn generateFormattedSection(output: *DualOutput, content: *const Content, parse_ast: *AST) !void { + var formatted = std.ArrayList(u8).init(output.gpa); + defer formatted.deinit(); + + switch (content.meta.node_type) { + .file => { + try fmt.formatAst(parse_ast.*, formatted.writer().any()); + }, + .header => { + try fmt.formatHeader(parse_ast.*, formatted.writer().any()); + }, + .expr => { + try fmt.formatExpr(parse_ast.*, formatted.writer().any()); + }, + .statement => { + try fmt.formatStatement(parse_ast.*, formatted.writer().any()); + }, + } + + const is_changed = !std.mem.eql(u8, formatted.items, content.source); + const display_content = if (is_changed) formatted.items else "NO CHANGE"; + + // Markdown FORMATTED section + try output.md_writer.writeAll(Section.FORMATTED); + try output.md_writer.writeAll(display_content); + try output.md_writer.writeAll("\n"); + try output.md_writer.writeAll(Section.SECTION_END); + + // HTML FORMATTED section + try output.html_writer.writeAll( + \\
+ \\
FORMATTED
+ \\
+ \\
+    );
+
+    // Escape HTML in formatted content
+    for (display_content) |char| {
+        try escapeHtmlChar(output.html_writer, char);
+    }
+
+    try output.html_writer.writeAll(
+        \\
+ \\
+ \\
+ \\ + ); +} + +/// Generate CANONICALIZE section for both markdown and HTML +fn generateCanonicalizeSection(output: *DualOutput, content: *const Content, can_ir: *CIR, module_env: *base.ModuleEnv, maybe_expr_idx: ?CIR.Expr.Idx) !void { + var canonicalized = std.ArrayList(u8).init(output.gpa); + defer canonicalized.deinit(); + + try can_ir.toSExprStr(module_env, canonicalized.writer().any(), maybe_expr_idx, content.source); + + // Markdown CANONICALIZE section + try output.md_writer.writeAll(Section.CANONICALIZE); + try output.md_writer.writeAll(canonicalized.items); + try output.md_writer.writeAll("\n"); + try output.md_writer.writeAll(Section.SECTION_END); + + // HTML CANONICALIZE section + try output.html_writer.writeAll( + \\
+ \\
CANONICALIZE
+ \\
+ \\
+    );
+
+    // Escape HTML in canonicalized content
+    for (canonicalized.items) |char| {
+        try escapeHtmlChar(output.html_writer, char);
+    }
+
+    try output.html_writer.writeAll(
+        \\
+ \\
+ \\
+ \\ + ); +} + +/// Generate TYPES section for both markdown and HTML +fn generateTypesSection(output: *DualOutput, content: *const Content, can_ir: *CIR, maybe_expr_idx: ?CIR.Expr.Idx) !void { + var solved = std.ArrayList(u8).init(output.gpa); + defer solved.deinit(); + + try can_ir.toSexprTypesStr(solved.writer().any(), maybe_expr_idx, content.source); + + // Markdown TYPES section + try output.md_writer.writeAll(Section.TYPES); + try output.md_writer.writeAll(solved.items); + try output.md_writer.writeAll("\n"); + try output.md_writer.writeAll(Section.SECTION_END[0 .. Section.SECTION_END.len - 1]); + + // HTML TYPES section + try output.html_writer.writeAll( + \\
+ \\
TYPES
+ \\
+ \\
+    );
+
+    // Escape HTML in types content
+    for (solved.items) |char| {
+        try escapeHtmlChar(output.html_writer, char);
+    }
+
+    try output.html_writer.writeAll(
+        \\
+ \\
+ \\
+ \\ + ); +} + +/// Generate HTML document structure and JavaScript +fn generateHtmlWrapper(output: *DualOutput, content: *const Content) !void { + // Write HTML document structure + try output.html_writer.writeAll( + \\ + \\ + \\ + \\ + \\ + \\ Roc Snapshot: + ); + try output.html_writer.writeAll(content.meta.description); + try output.html_writer.writeAll( + \\ + \\ + \\ + \\ + \\
+ \\

Roc Snapshot: + ); + try output.html_writer.writeAll(content.meta.description); + try output.html_writer.writeAll("

\n"); +} + +/// Generate HTML closing tags and JavaScript +fn generateHtmlClosing(output: *DualOutput) !void { + // JavaScript for interactivity + try output.html_writer.writeAll( + \\ + \\
+ \\ + \\ + ); +} + +fn generateHtmlVersion(gpa: Allocator, snapshot_path: []const u8, content: *const Content, parse_ast: *AST, module_env: *base.ModuleEnv) !void { + // Convert .md path to .html path + const html_path = blk: { + if (std.mem.endsWith(u8, snapshot_path, ".md")) { + const base_path = snapshot_path[0 .. snapshot_path.len - 3]; + break :blk try std.fmt.allocPrint(gpa, "{s}.html", .{base_path}); + } else { + break :blk try std.fmt.allocPrint(gpa, "{s}.html", .{snapshot_path}); + } + }; + defer gpa.free(html_path); + + var html_buffer = std.ArrayList(u8).init(gpa); + defer html_buffer.deinit(); + + var html_writer = html_buffer.writer(); + + // Write HTML document structure + try html_writer.writeAll( + \\ + \\ + \\ + \\ + \\ + \\ Roc Snapshot: + ); + try html_writer.writeAll(content.meta.description); + try html_writer.writeAll( + \\ + \\ + \\ + \\ + \\
+ \\

Roc Snapshot: + ); + try html_writer.writeAll(content.meta.description); + try html_writer.writeAll("

\n"); + + // META section + try html_writer.writeAll( + \\
+ \\
META
+ \\
+ \\
+ \\

Description: + ); + try html_writer.writeAll(content.meta.description); + try html_writer.writeAll("

\n

Type: "); + try html_writer.writeAll(content.meta.node_type.toString()); + try html_writer.writeAll( + \\

+ \\
+ \\
+ \\
+ \\ + ); + + // SOURCE section with syntax highlighting + try html_writer.writeAll( + \\
+ \\
SOURCE
+ \\
+ \\
+ \\
+ ); + + // Apply syntax highlighting by processing tokens in order + var tokenizedBuffer = parse_ast.tokens; + const tokens = tokenizedBuffer.tokens.items(.tag); + var source_offset: u32 = 0; + var line_num: u32 = 1; + var col_num: u32 = 1; + + try html_writer.print(" ", .{line_num}); + + for (tokens, 0..) |tok, i| { + const region = tokenizedBuffer.resolve(@intCast(i)); + const info = try module_env.calcRegionInfo(content.source, region.start.offset, region.end.offset); + + // Output any characters between last token and this token (whitespace, etc.) + while (source_offset < region.start.offset) { + const char = content.source[source_offset]; + if (char == '\n') { + try html_writer.writeAll("\n"); + line_num += 1; + col_num = 1; + try html_writer.print(" ", .{line_num}); + } else { + try escapeHtmlChar(html_writer, char); + col_num += 1; + } + source_offset += 1; + } + + // Skip newline tokens since we handle newlines in whitespace above + if (tok == .Newline) { + continue; + } + + // Output the token with syntax highlighting + const category = tokenToCategory(tok); + const token_text = content.source[region.start.offset..region.end.offset]; + + try html_writer.print("", .{ category.toCssClass(), i }); + + for (token_text) |char| { + try escapeHtmlChar(html_writer, char); + } + + try html_writer.writeAll(""); + source_offset = region.end.offset; + col_num = @intCast(info.end_col_idx + 1); + } + + // Output any remaining characters + while (source_offset < content.source.len) { + const char = content.source[source_offset]; + if (char == '\n') { + try html_writer.writeAll("\n"); + line_num += 1; + col_num = 1; + if (source_offset + 1 < content.source.len) { + try html_writer.print(" ", .{line_num}); + } + } else { + try escapeHtmlChar(html_writer, char); + col_num += 1; + } + source_offset += 1; + } + + try html_writer.writeAll(""); + + try html_writer.writeAll( + \\ + \\
+ \\
+ \\
+ \\
+ \\ + ); + + // PROBLEMS section + try html_writer.writeAll( + \\
+ \\
PROBLEMS
+ \\
+ \\
+ ); + + // Add problems output (we'll use the same logic as the markdown version) + var tokenize_problems: usize = 0; + var parser_problems: usize = 0; + + for (parse_ast.tokenize_diagnostics.items) |diagnostic| { + tokenize_problems += 1; + var report: reporting.Report = parse_ast.tokenizeDiagnosticToReport(diagnostic, gpa) catch |err| { + try html_writer.print("

Error creating tokenize report: {}

\n", .{err}); + continue; + }; + defer report.deinit(); + + // Render as HTML instead of markdown + try html_writer.writeAll("
"); + report.render(html_writer.any(), .markdown) catch |err| { + try html_writer.print("Error rendering report: {}", .{err}); + }; + try html_writer.writeAll("
\n"); + } + + for (parse_ast.parse_diagnostics.items) |diagnostic| { + parser_problems += 1; + var report: reporting.Report = parse_ast.parseDiagnosticToReport(diagnostic, gpa, snapshot_path) catch |err| { + try html_writer.print("

Error creating parse report: {}

\n", .{err}); + continue; + }; + defer report.deinit(); + + try html_writer.writeAll("
"); + report.render(html_writer.any(), .markdown) catch |err| { + try html_writer.print("Error rendering report: {}", .{err}); + }; + try html_writer.writeAll("
\n"); + } + + if (tokenize_problems == 0 and parser_problems == 0) { + try html_writer.writeAll("

NIL

\n"); + } + + try html_writer.writeAll( + \\
+ \\
+ \\
+ \\ + ); + + // TOKENS section + try html_writer.writeAll( + \\
+ \\
TOKENS
+ \\
+ \\
+ ); + + var tokenizedBuffer2 = parse_ast.tokens; + const tokens2 = tokenizedBuffer2.tokens.items(.tag); + for (tokens2, 0..) |tok, i| { + const category = tokenToCategory(tok); + + try html_writer.print(" {s}", .{ + category.toCssClass(), + i, + @tagName(tok), + }); + + if (tok == .Newline) { + try html_writer.writeAll("\n"); + } else { + try html_writer.writeAll(" "); + } + } + + try html_writer.writeAll( + \\
+ \\
+ \\
+ \\ + ); + + // JavaScript for interactivity + try html_writer.writeAll( + \\ + \\
+ \\ + \\ + ); + + // Write HTML file + var html_file = std.fs.cwd().createFile(html_path, .{}) catch |err| { + log("failed to create HTML file '{s}': {s}", .{ html_path, @errorName(err) }); + return; + }; + defer html_file.close(); + try html_file.writer().writeAll(html_buffer.items); + + log("generated HTML version: {s}", .{html_path}); +} + +/// Write HTML buffer to file +fn writeHtmlFile(gpa: Allocator, snapshot_path: []const u8, html_buffer: *std.ArrayList(u8)) !void { + // Convert .md path to .html path + const html_path = blk: { + if (std.mem.endsWith(u8, snapshot_path, ".md")) { + const base_path = snapshot_path[0 .. snapshot_path.len - 3]; + break :blk try std.fmt.allocPrint(gpa, "{s}.html", .{base_path}); + } else { + break :blk try std.fmt.allocPrint(gpa, "{s}.html", .{snapshot_path}); + } + }; + defer gpa.free(html_path); + + // Write HTML file + var html_file = std.fs.cwd().createFile(html_path, .{}) catch |err| { + log("failed to create HTML file '{s}': {s}", .{ html_path, @errorName(err) }); + return; + }; + defer html_file.close(); + try html_file.writer().writeAll(html_buffer.items); + + log("generated HTML version: {s}", .{html_path}); +} + +/// New unified processSnapshotFile function that generates both markdown and HTML simultaneously +fn processSnapshotFileUnified(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_corpus_path: ?[]const u8) !bool { // Log the file path that was written to log("processing snapshot file: {s}", .{snapshot_path}); @@ -408,7 +1457,6 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor defer gpa.free(file_content); // Check our file starts with the metadata section - // so we can skip parsing and later steps if this isn't a snapshot file if (!std.mem.startsWith(u8, file_content, Section.META)) { std.log.err("file '{s}' is not a valid snapshot file", .{snapshot_path}); std.log.err("snapshot files must start with '~~~META'", .{}); @@ -445,7 +1493,7 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor var module_env = base.ModuleEnv.init(gpa); defer module_env.deinit(); - // Parse the source cod + // Parse the source code (ONCE) var parse_ast = switch (content.meta.node_type) { .file => parse.parse(&module_env, content.source), .header => parse.parseHeader(&module_env, content.source), @@ -454,11 +1502,9 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor }; defer parse_ast.deinit(gpa); - // shouldn't be required in future parse_ast.store.emptyScratch(); - // Canonicalize the source code - + // Canonicalize the source code (ONCE) var can_ir = CIR.init(&module_env); defer can_ir.deinit(); @@ -473,7 +1519,6 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor // TODO: implement canonicalize_header when available }, .expr => { - // For expr snapshots, just canonicalize the root expression directly const expr_idx: AST.Expr.Idx = @enumFromInt(parse_ast.root_node_idx); maybe_expr_idx = can.canonicalize_expr(expr_idx); }, @@ -482,7 +1527,7 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor }, } - // Types + // Types (ONCE) var solver = try Solver.init(gpa, &can_ir.env.types, &can_ir); defer solver.deinit(); @@ -492,268 +1537,48 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor solver.checkDefs(); } - // Buffer all output in memory before writing to the snapshot file - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); + // Buffer all output in memory before writing files + var md_buffer = std.ArrayList(u8).init(gpa); + defer md_buffer.deinit(); - var writer = buffer.writer(); + var html_buffer = std.ArrayList(u8).init(gpa); + defer html_buffer.deinit(); - // Copy original META - { - try writer.writeAll(Section.META); - try content.meta.format(writer); - try writer.writeAll("\n"); - try writer.writeAll(Section.SECTION_END); + var output = DualOutput.init(gpa, &md_buffer, &html_buffer); + + // Generate HTML wrapper + try generateHtmlWrapper(&output, &content); + + // Generate all sections simultaneously + try generateMetaSection(&output, &content); + try generateSourceSection(&output, &content, &parse_ast); + try generateProblemsSection(&output, &parse_ast, &can_ir, &solver, &content, snapshot_path, &module_env); + try generateTokensSection(&output, &parse_ast, &content, &module_env); + + // Generate remaining sections + try generateParseSection(&output, &content, &parse_ast, &module_env); + try generateFormattedSection(&output, &content, &parse_ast); + if (content.has_canonicalize) { + try generateCanonicalizeSection(&output, &content, &can_ir, &module_env, maybe_expr_idx); } + try generateTypesSection(&output, &content, &can_ir, maybe_expr_idx); - // Copy original SOURCE - { - try writer.writeAll(Section.SOURCE); - try writer.writeAll(content.source); - try writer.writeAll("\n"); - try writer.writeAll(Section.SECTION_END); - } + // Generate HTML closing + try generateHtmlClosing(&output); - // Write out any PROBLEMS - { - try writer.writeAll(Section.PROBLEMS); - - var tokenize_problems: usize = 0; - var parser_problems: usize = 0; - var canonicalize_problems: usize = 0; - var check_types_problem: usize = 0; - - // Use plain text rendering target - - // Tokenize Diagnostics - for (parse_ast.tokenize_diagnostics.items) |diagnostic| { - tokenize_problems += 1; - - var report: Report = parse_ast.tokenizeDiagnosticToReport(diagnostic, gpa) catch |err| { - try writer.print("Error creating tokenize report: {}\n", .{err}); - continue; - }; - defer report.deinit(); - report.render(writer.any(), .markdown) catch |err| { - try writer.print("Error rendering report: {}\n", .{err}); - continue; - }; - } - - // Parser Diagnostics - for (parse_ast.parse_diagnostics.items) |diagnostic| { - parser_problems += 1; - - var report: Report = parse_ast.parseDiagnosticToReport(diagnostic, gpa, snapshot_path) catch |err| { - try writer.print("Error creating parse report: {}\n", .{err}); - continue; - }; - defer report.deinit(); - report.render(writer.any(), .markdown) catch |err| { - try writer.print("Error rendering report: {}\n", .{err}); - continue; - }; - } - - // Canonicalization Diagnostics - const diagnostics = can_ir.getDiagnostics(); - defer gpa.free(diagnostics); - for (diagnostics) |diagnostic| { - canonicalize_problems += 1; - - var report: Report = can_ir.diagnosticToReport(diagnostic, gpa, content.source, snapshot_path) catch |err| { - try writer.print("Error creating canonicalization report: {}\n", .{err}); - continue; - }; - defer report.deinit(); - report.render(writer.any(), .markdown) catch |err| { - try writer.print("Error rendering report: {}\n", .{err}); - continue; - }; - } - - // Check Types Problems - - // Create TypeWriter for converting types to strings - var problem_buf = std.ArrayList(u8).init(gpa); - defer problem_buf.deinit(); - - var problems_itr = solver.problems.problems.iterIndices(); - while (problems_itr.next()) |problem_idx| { - check_types_problem += 1; - const problem = solver.problems.problems.get(problem_idx); - var report: Report = problem.buildReport( - gpa, - &problem_buf, - &solver.snapshots, - &module_env.idents, - content.source, - snapshot_path, - &module_env, - ) catch |err| { - try writer.print("Error creating type checking report: {}\n", .{err}); - continue; - }; - defer report.deinit(); - report.render(writer.any(), .markdown) catch |err| { - try writer.print("Error rendering report: {}\n", .{err}); - continue; - }; - } - - const nil_problems = tokenize_problems == 0 and parser_problems == 0 and canonicalize_problems == 0 and check_types_problem == 0; - - if (nil_problems) { - try writer.writeAll("NIL\n"); - log("reported NIL problems", .{}); - } else { - log("reported {} token problems", .{tokenize_problems}); - log("reported {} parser problems", .{parser_problems}); - log("reported {} canonicalization problems", .{canonicalize_problems}); - log("reported {} type problems", .{check_types_problem}); - } - - // Don't write out section end, as the problem reports are already in markdown format. - } - - // Write out any TOKENS - { - try writer.writeAll(Section.TOKENS); - var tokenizedBuffer = parse_ast.tokens; - const tokens = tokenizedBuffer.tokens.items(.tag); - for (tokens, 0..) |tok, i| { - const region = tokenizedBuffer.resolve(@intCast(i)); - const info = try module_env.calcRegionInfo(content.source, region.start.offset, region.end.offset); - const region_str = try std.fmt.allocPrint(gpa, "{s}({d}:{d}-{d}:{d}),", .{ - @tagName(tok), - // add one to display numbers instead of index - info.start_line_idx + 1, - info.start_col_idx + 1, - info.end_line_idx + 1, - info.end_col_idx + 1, - }); - defer gpa.free(region_str); - - try writer.writeAll(region_str); - - if (tok == .Newline) { - try writer.writeAll("\n"); - } - } - try writer.writeAll("\n"); - try writer.writeAll(Section.SECTION_END); - } - - // Write PARSE SECTION - { - var parse_buffer = std.ArrayList(u8).init(gpa); - defer parse_buffer.deinit(); - switch (content.meta.node_type) { - .file => { - try parse_ast.toSExprStr(&module_env, parse_buffer.writer().any()); - }, - .header => { - const header = parse_ast.store.getHeader(@enumFromInt(parse_ast.root_node_idx)); - var node = header.toSExpr(&module_env, &parse_ast); - defer node.deinit(gpa); - - node.toStringPretty(parse_buffer.writer().any()); - }, - .expr => { - const expr = parse_ast.store.getExpr(@enumFromInt(parse_ast.root_node_idx)); - var node = expr.toSExpr(&module_env, &parse_ast); - defer node.deinit(gpa); - - node.toStringPretty(parse_buffer.writer().any()); - }, - .statement => { - const stmt = parse_ast.store.getStatement(@enumFromInt(parse_ast.root_node_idx)); - var node = stmt.toSExpr(&module_env, &parse_ast); - defer node.deinit(gpa); - - node.toStringPretty(parse_buffer.writer().any()); - }, - } - try writer.writeAll(Section.PARSE); - try writer.writeAll(parse_buffer.items); - try writer.writeAll("\n"); - try writer.writeAll(Section.SECTION_END); - } - - // Write FORMAT SECTION - { - var formatted = std.ArrayList(u8).init(gpa); - defer formatted.deinit(); - switch (content.meta.node_type) { - .file => { - try fmt.formatAst(parse_ast, formatted.writer().any()); - }, - .header => { - try fmt.formatHeader(parse_ast, formatted.writer().any()); - }, - .expr => { - try fmt.formatExpr(parse_ast, formatted.writer().any()); - }, - .statement => { - try fmt.formatStatement(parse_ast, formatted.writer().any()); - }, - } - - try writer.writeAll(Section.FORMATTED); - - if (!std.mem.eql(u8, formatted.items, content.source)) { - try writer.writeAll(formatted.items); - try writer.writeAll("\n"); - } else { - try writer.writeAll("NO CHANGE"); - try writer.writeAll("\n"); - } - try writer.writeAll(Section.SECTION_END); - } - - // Write CANONICALIZE SECTION - { - var canonicalized = std.ArrayList(u8).init(gpa); - defer canonicalized.deinit(); - - try can_ir.toSExprStr(&module_env, canonicalized.writer().any(), maybe_expr_idx, content.source); - - try writer.writeAll(Section.CANONICALIZE); - try writer.writeAll(canonicalized.items); - try writer.writeAll("\n"); - try writer.writeAll(Section.SECTION_END); - } - - // Write TYPES SECTION - { - var solved = std.ArrayList(u8).init(gpa); - defer solved.deinit(); - - try can_ir.toSexprTypesStr(solved.writer().any(), maybe_expr_idx, content.source); - - // Uncomment to print entire types store, helpful for debugging - // try solved.writer().any().writeAll("\n"); - // try types.SExprWriter.allVarsToSExprStr(solved.writer().any(), gpa, &module_env); - - try writer.writeAll(Section.TYPES); - try writer.writeAll(solved.items); - try writer.writeAll("\n"); - } - - try writer.writeAll(Section.SECTION_END[0 .. Section.SECTION_END.len - 1]); - - // Now write the buffer to the snapshot file in one go - var file = std.fs.cwd().createFile(snapshot_path, .{}) catch |err| { + // Write markdown file + var md_file = std.fs.cwd().createFile(snapshot_path, .{}) catch |err| { log("failed to create file '{s}': {s}", .{ snapshot_path, @errorName(err) }); return false; }; - defer file.close(); - try file.writer().writeAll(buffer.items); + defer md_file.close(); + try md_file.writer().writeAll(md_buffer.items); - // If flag --fuzz-corpus is passed, so write the SOURCE to our corpus + // Write HTML file + try writeHtmlFile(gpa, snapshot_path, &html_buffer); + + // If flag --fuzz-corpus is passed, write the SOURCE to our corpus if (maybe_fuzz_corpus_path != null) { - - // create a pseudo-random name for our file const rand_file_name = [_][]const u8{ maybe_fuzz_corpus_path.?, &[_]u8{ rand.intRangeAtMost(u8, 'a', 'z'), @@ -786,6 +1611,10 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor return true; } +fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_corpus_path: ?[]const u8) !bool { + return processSnapshotFileUnified(gpa, snapshot_path, maybe_fuzz_corpus_path); +} + /// Extracts the sections from a snapshot file fn extractSections(gpa: Allocator, content: []const u8) !Content { var ranges = std.AutoHashMap(Section, Section.Range).init(gpa);