Implement initial roc tokenizer in zig

This commit is contained in:
Joshua Warner 2025-01-31 18:33:59 -08:00
parent 5e4ff44483
commit ce8f7065db
No known key found for this signature in database
GPG key ID: 89AD497003F93FDD
8 changed files with 1497 additions and 72 deletions

View file

@ -9,12 +9,16 @@ pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});
// Zig unicode library - https://codeberg.org/atman/zg
const zg = b.dependency("zg", .{});
const exe = b.addExecutable(.{
.name = "roc",
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
});
exe.root_module.addImport("GenCatData", zg.module("GenCatData"));
b.installArtifact(exe);
@ -34,6 +38,7 @@ pub fn build(b: *std.Build) void {
.target = target,
.optimize = optimize,
});
all_tests.root_module.addImport("GenCatData", zg.module("GenCatData"));
// Install the test binary so we can run separately
// ```sh

View file

@ -7,6 +7,10 @@
.url = "git+https://github.com/kristoff-it/zig-afl-kit#88c6b71377767c1b8d26979b0adfa12a58d988dd",
.hash = "1220796f7d2d9a2d4d7f8339ee0b14aa4bf133a15ae9ba39c941cc68e08d5c5ce9a2",
},
.zg = .{
.url = "https://codeberg.org/dude_the_builder/zg/archive/v0.13.2.tar.gz",
.hash = "122055beff332830a391e9895c044d33b15ea21063779557024b46169fb1984c6e40",
},
},
.paths = .{
"build.zig",

View file

@ -1,74 +1,79 @@
@0-21 Defs(
Defs {
tags: [
EitherIndex(2147483648),
],
regions: [
@0-17,
],
space_before: [
Slice<roc_parse::ast::CommentOrNewline> { start: 0, length: 0 },
],
space_after: [
Slice<roc_parse::ast::CommentOrNewline> { start: 0, length: 0 },
],
spaces: [],
type_defs: [],
value_defs: [
Annotation(
@0-2 SpaceAfter(
NumLiteral(
"1P",
@0-21 SpaceAfter(
Defs(
Defs {
tags: [
EitherIndex(2147483648),
],
regions: [
@0-17,
],
space_before: [
Slice<roc_parse::ast::CommentOrNewline> { start: 0, length: 0 },
],
space_after: [
Slice<roc_parse::ast::CommentOrNewline> { start: 0, length: 0 },
],
spaces: [],
type_defs: [],
value_defs: [
Annotation(
@0-2 SpaceAfter(
NumLiteral(
"11",
),
[
Newline,
],
),
[
Newline,
],
),
@4-17 Tuple {
elems: [
@5-15 Function(
[
@5-6 Apply(
"",
"I",
[],
),
@7-8 SpaceAfter(
BoundVariable(
"s",
@4-17 Tuple {
elems: [
@5-15 Function(
[
@5-6 Apply(
"",
"I",
[],
),
[
Newline,
],
@7-8 SpaceAfter(
BoundVariable(
"s",
),
[
Newline,
],
),
@10-12 Apply(
"",
"Mw",
[],
),
],
Pure,
@14-15 BoundVariable(
"r",
),
@10-12 Apply(
"",
"Mw",
[],
),
],
Pure,
@14-15 BoundVariable(
"r",
),
],
ext: Some(
@16-17 BoundVariable(
"l",
),
),
],
ext: Some(
@16-17 BoundVariable(
"l",
),
),
},
),
],
},
@18-21 SpaceBefore(
Var {
module_name: "",
ident: "asl",
},
),
],
},
[
Newline,
],
@18-21 SpaceBefore(
Var {
module_name: "",
ident: "asl",
},
[
Newline,
],
),
),
[
Newline,
],
)

View file

@ -1,4 +1,4 @@
1P
11
:(I,s
,Mw->r)l
asl
asl

195
src/check/parse.zig Normal file
View file

@ -0,0 +1,195 @@
const std = @import("std");
const tokenize = @import("tokenize.zig");
pub const Region = struct {
start: usize,
end: usize,
};
pub const Node = struct {
tag: Tag,
data: Data,
region: Region,
pub const Tag = enum {
Unary,
Binary,
// TODO
};
pub const Data = union {
Unary: UnaryOpData,
Binary: BinaryOpData,
// Add more node data as needed
};
pub const UnaryOpData = struct {
// TODO
};
pub const BinaryOpData = struct {
// TODO
};
};
pub const Diagnostic = struct {
tag: Tag,
region: Region,
pub const Tag = enum {
// TODO
};
};
pub const Parser = struct {
pos: usize,
tokens: tokenize.TokenizedBuffer,
nodes: std.MultiArrayList(Node),
diagnostics: std.ArrayList(tokenize.Diagnostic),
allocator: std.mem.Allocator,
pub fn init(tokens: tokenize.TokenizedBuffer, allocator: std.mem.Allocator) Parser {
return Parser{
.pos = 0,
.tokens = tokens,
.nodes = std.MultiArrayList(Node){},
.diagnostics = std.ArrayList(tokenize.Diagnostic).init(allocator),
.allocator = allocator,
};
}
pub fn advance(self: *Parser) void {
if (self.pos >= self.tokens.tokens.len) {
return;
}
std.debug.print("advance {s}\n", .{@tagName(self.tokens.tokens.items(.tag)[self.pos])});
self.pos += 1;
}
pub fn peek(self: *Parser) tokenize.Token.Tag {
if (self.pos >= self.tokens.tokens.len) {
return .EndOfFile;
}
return self.tokens.tokens.items(.tag)[self.pos];
}
// If the next token is a newline, consume it
// Returns the indent level of the next line if it is a newline, otherwise null
pub fn consumeNewline(self: *Parser) ?u16 {
if (self.peek() != .Newline) {
return null;
}
const indent = self.tokens.tokens.items(.offset)[self.pos];
self.advance();
return @intCast(indent);
}
// Returns the indent level of the next line if the next token is a newline, otherwise null
pub fn peekNewline(self: *Parser) ?u16 {
if (self.peek() != .Newline) {
return null;
}
const indent = self.tokens.tokens.items(.offset)[self.pos];
return @intCast(indent);
}
pub fn parseFile(self: *Parser) !void {
while (self.peek() != .EndOfFile) {
if (self.consumeNewline()) |indent| {
std.debug.print("parseFile indent {d}\n", .{indent});
std.debug.assert(indent == 0); // TODO: report an error
}
if (self.peek() == .EndOfFile) {
break;
}
self.parseStmt(0);
}
}
pub fn parseStmt(self: *Parser, base_indent: u16) void {
switch (self.peek()) {
.LowerIdent => {
self.advance();
if (self.peek() == .OpEquals) {
self.finishParseAssign(base_indent);
std.debug.print("parseStmt assign\n", .{});
} else {
std.debug.print("parseStmt expr\n", .{});
}
},
else => {
std.debug.panic("todo: emit error, unexpected token {s}", .{@tagName(self.peek())});
},
}
}
pub fn parseExpr(self: *Parser) void {
switch (self.peek()) {
.LowerIdent => {
self.advance();
std.debug.print("parseExpr {s}\n", .{@tagName(self.peek())});
// TODO: add node
},
.Int => {
self.advance();
std.debug.print("parseExpr {s}\n", .{@tagName(self.peek())});
// TODO: add node
},
else => {
std.debug.panic("todo: emit error", .{});
},
}
}
pub fn finishParseAssign(self: *Parser, base_indent: u16) void {
std.debug.assert(self.peek() == .OpEquals);
self.advance();
if (self.consumeNewline()) |indent| {
std.debug.print("startParseAssign indent {d}\n", .{indent});
if (indent <= base_indent) {
std.debug.panic("todo: emit error", .{});
}
self.parseStmt(indent);
while (true) {
if (self.peekNewline()) |i| {
if (i <= base_indent) {
break;
}
self.advance();
} else {
break;
}
self.parseStmt(indent);
}
} else {
self.parseExpr();
}
std.debug.print("finishParseAssign\n", .{});
}
};
test "Parser advance and peek" {
const allocator = std.heap.page_allocator;
var tokens = try tokenize.TokenizedBuffer.init(allocator);
// x =
// y = 1
// y
try tokens.pushToken(.LowerIdent, 0, 1);
try tokens.pushToken(.OpEquals, 0, 0);
try tokens.pushNewline(4);
try tokens.pushToken(.LowerIdent, 0, 0);
try tokens.pushToken(.OpEquals, 0, 0);
try tokens.pushToken(.Int, 0, 0);
try tokens.pushNewline(4);
try tokens.pushToken(.LowerIdent, 0, 0);
try tokens.pushNewline(0);
try tokens.pushToken(.EndOfFile, 0, 0);
var parser = Parser.init(tokens, allocator);
try parser.parseFile();
// std.debug.assert(parser.nodes)
}

1206
src/check/tokenize.zig Normal file

File diff suppressed because it is too large Load diff

View file

@ -3,6 +3,7 @@ const mem = std.mem;
const Allocator = std.mem.Allocator;
const RocCmd = @import("cli.zig").RocCmd;
const RocOpt = @import("cli.zig").RocOpt;
const syntax = @import("check/syntax.zig");
const usage =
\\Usage:
@ -138,11 +139,20 @@ fn rocVersion(allocator: Allocator, args: []const []const u8) !void {
}
fn rocCheck(allocator: Allocator, opt: RocOpt, args: []const []const u8) !void {
_ = allocator;
var syn = try syntax.Syntax.init(allocator);
defer syn.deinit();
_ = opt;
std.debug.print("TODO roc check\n{}\n{s}\n\n", .{ opt, args });
// Temporary implementation for early testing
const dir_path = args[0];
const fs = std.fs.cwd();
var dir = try fs.openDir(dir_path, .{ .iterate = true });
defer dir.close();
fatal("not implemented", .{});
const success = try syn.tokenizeAndCheckSyntaxFiles(dir);
if (!success) {
fatal("syntax check failed", .{});
}
}
fn rocDocs(allocator: Allocator, opt: RocOpt, args: []const []const u8) !void {