mirror of
https://github.com/ruuda/rcl.git
synced 2025-10-07 07:30:27 +00:00

The module name and path hack throws off Mypy, in different ways depending on how you run it (outside Nix shell, inside Nix develop shell, or as part of the flake check ...). Let's just forward-declare it and sidestep problems. Maybe I should make this script the source of truth and generate the Pygments grammar from it. We could do that later, for now this works.
154 lines
5.3 KiB
Python
Executable file
154 lines
5.3 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
# RCL -- A reasonable configuration language.
|
|
# Copyright 2025 Ruud van Asseldonk
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# A copy of the License has been included in the root of the repository.
|
|
|
|
"""
|
|
This scripts generates files that contain references to the keywords and builtins,
|
|
so that there is a single source of truth. We take the Pygments lexer in
|
|
//grammar/pygments as that source of truth, as it's a Python script that is easily
|
|
importable here.
|
|
|
|
TODO: Add a check mode that verifies that the generated files are up to date,
|
|
and run it from a check in the Nix flake, so that CI will fail if we get it wrong.
|
|
"""
|
|
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import sys
|
|
|
|
from typing import Iterable, List, TYPE_CHECKING
|
|
|
|
if TYPE_CHECKING:
|
|
KEYWORDS: List[str]
|
|
BUILTINS: List[str]
|
|
TYPES: List[str]
|
|
else:
|
|
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
|
|
from grammar.pygments.rcl import KEYWORDS, BUILTINS, TYPES
|
|
|
|
|
|
def generate_fuzz_dictionary() -> None:
|
|
"""
|
|
Take dictionary_base.txt and append all keywords to it.
|
|
"""
|
|
base = open("fuzz/dictionary_base.txt", "r", encoding="utf-8").read()
|
|
with open("fuzz/dictionary.txt", "w", encoding="utf-8") as f:
|
|
f.write("# This file is generated by tools/generate_keywords.py.\n\n")
|
|
f.write(base)
|
|
|
|
def write_section(title: str, elems: Iterable[str]) -> None:
|
|
f.write(f"\n# {title}\n")
|
|
for x in elems:
|
|
json.dump(x, f)
|
|
f.write("\n")
|
|
|
|
write_section("Keywords.", KEYWORDS)
|
|
write_section("Builtins methods.", BUILTINS)
|
|
write_section("Builtin types.", TYPES)
|
|
|
|
|
|
def generate_vim_plugin() -> None:
|
|
"""
|
|
Fill out the template holes in `rcl.vim.template`.
|
|
"""
|
|
contents = open(
|
|
"grammar/rcl.vim/syntax/rcl.vim.template", "r", encoding="utf-8"
|
|
).read()
|
|
|
|
builtins = " ".join(
|
|
# See also https://vi.stackexchange.com/questions/5966/ for why the `contains`
|
|
# needs to end in `[]`.
|
|
("contains[]" if b == "contains" else b)
|
|
for b in BUILTINS
|
|
)
|
|
types = "\\|".join(TYPES)
|
|
contents = contents.replace("RCL_BUILTINS", builtins)
|
|
contents = contents.replace("RCL_TYPES", types)
|
|
|
|
with open("grammar/rcl.vim/syntax/rcl.vim", "w", encoding="utf-8") as f:
|
|
f.write('" This file is generated by tools/generate_keywords.py.\n\n')
|
|
f.write(contents)
|
|
|
|
|
|
def hash_keyword(slot: int, keyword: str) -> bytes:
|
|
"""
|
|
Hash a (slot, keyword) pair, used to build the smith dictionaries below.
|
|
We fix a particular hash function so the output is stable across Python
|
|
versions.
|
|
"""
|
|
h = hashlib.blake2s(slot.to_bytes(), usedforsecurity=False)
|
|
h.update(keyword.encode("utf-8"))
|
|
return h.digest()
|
|
|
|
|
|
def generate_smith_builtins() -> None:
|
|
"""
|
|
Generate a Rust file with the builtins, for use by the smith fuzzer.
|
|
|
|
For the smith fuzzer, we generate an array of keywords and then the
|
|
fuzzer picks one based on an u8 from the seed. However, if we just
|
|
put the values there in order, then any time we add a builtin, that
|
|
changes the meaning of the existing fuzz corpus, and the fuzzer has
|
|
to run for a long time to "learn" the new meaning. It's also not
|
|
ideal when switching branches. So instead, we use rendezvous hashing
|
|
to generate a slice of 256 elements, such that the changes to the
|
|
slice are minimal when the builtins change.
|
|
"""
|
|
assert len(TYPES) <= 256, "Smith keyword indices are only 8 bits."
|
|
assert len(BUILTINS) <= 256, "Smith keyword indices are only 8 bits."
|
|
|
|
types_list: List[str] = []
|
|
builtins_list: List[str] = []
|
|
|
|
# Fill up the 256 slots in these lists using rendezvous hashing.
|
|
for i in range(0, 256):
|
|
_, t = min((hash_keyword(i, t), t) for t in TYPES)
|
|
_, b = min((hash_keyword(i, b), b) for b in BUILTINS)
|
|
|
|
types_list.append(t)
|
|
builtins_list.append(b)
|
|
|
|
# There is no theoretical guarantee every type/builtin got placed into
|
|
# one of the slots, so confirm that here. We have plenty of slots so
|
|
# it's unlikely to fail, but if it does fail, we should change the salt
|
|
# for the hash function.
|
|
for t in TYPES:
|
|
assert t in types_list, "Generated slice must contain all types."
|
|
for b in BUILTINS:
|
|
assert b in builtins_list, "Generated slice must contain all builtins."
|
|
|
|
with open("fuzz/src/builtins.rs", "w", encoding="utf-8") as f:
|
|
f.write("// This file is generated by tools/generate_keywords.py.\n")
|
|
f.write("// See that file for why these arrays contain duplicates.\n\n")
|
|
|
|
f.write("#[rustfmt::skip]\n")
|
|
f.write("pub const BUILTIN_TYPES: [&str; 256] = [")
|
|
for i, t in enumerate(types_list):
|
|
if i % 8 == 0:
|
|
f.write("\n ")
|
|
f.write(" ")
|
|
json.dump(t, f)
|
|
f.write(",")
|
|
f.write("\n];\n\n")
|
|
|
|
f.write("#[rustfmt::skip]\n")
|
|
f.write("pub const BUILTINS: [&str; 256] = [")
|
|
for i, b in enumerate(builtins_list):
|
|
if i % 4 == 0:
|
|
f.write("\n ")
|
|
f.write(" ")
|
|
json.dump(b, f)
|
|
f.write(",")
|
|
f.write("\n];\n")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
generate_fuzz_dictionary()
|
|
generate_vim_plugin()
|
|
generate_smith_builtins()
|