mirror of
https://github.com/zizmorcore/zizmor.git
synced 2025-12-23 08:47:33 +00:00
190 lines
5.4 KiB
Python
Executable file
190 lines
5.4 KiB
Python
Executable file
#!/usr/bin/env -S uv run --script --only-group codegen
|
|
|
|
"""
|
|
Processes the CodeQL models from https://github.com/github/codeql/tree/main/actions/ql/lib/ext
|
|
and extracts the information needed by zizmor
|
|
"""
|
|
|
|
import json
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
from collections import defaultdict
|
|
from pathlib import Path
|
|
from typing import Dict, List, Set
|
|
|
|
import yaml
|
|
|
|
_MODELS_SUBDIR = "actions/ql/lib/ext/"
|
|
|
|
|
|
def _debug(msg: str) -> None:
|
|
print(f"[+] {msg}", file=sys.stderr)
|
|
|
|
|
|
def _git(args: List[str], cwd: Path = None) -> subprocess.CompletedProcess:
|
|
result = subprocess.run(
|
|
["git", *args], cwd=cwd, capture_output=True, text=True, check=True
|
|
)
|
|
return result
|
|
|
|
|
|
def _clone_actions_codeql(temp_dir: Path) -> Path:
|
|
_debug("Cloning CodeQL repository with sparse checkout...")
|
|
|
|
repo_path = temp_dir / "codeql"
|
|
repo_path.mkdir()
|
|
|
|
_git(
|
|
[
|
|
"clone",
|
|
"--filter=tree:0",
|
|
"--no-checkout",
|
|
"--depth=1",
|
|
"--sparse",
|
|
"https://github.com/github/codeql.git",
|
|
".",
|
|
],
|
|
cwd=repo_path,
|
|
)
|
|
|
|
_git(
|
|
[
|
|
"sparse-checkout",
|
|
"add",
|
|
_MODELS_SUBDIR,
|
|
],
|
|
cwd=repo_path,
|
|
)
|
|
|
|
_git(
|
|
[
|
|
"checkout",
|
|
],
|
|
cwd=repo_path,
|
|
)
|
|
|
|
_debug("Successfully cloned CodeQL repository")
|
|
return repo_path
|
|
|
|
|
|
def _process_yaml_file(
|
|
file_path: Path,
|
|
relevant_kinds: Set[str],
|
|
only_manual_models: bool,
|
|
code_injection_sinks: Dict[str, List[str]],
|
|
) -> None:
|
|
with file_path.open() as f:
|
|
content = yaml.safe_load(f)
|
|
|
|
extensions = content.get("extensions")
|
|
if extensions is None:
|
|
raise ValueError(f"Missing extensions: {content}")
|
|
|
|
for extension in extensions:
|
|
adds_to = extension.get("addsTo")
|
|
if adds_to is None:
|
|
raise ValueError(f"Missing addsTo: {content}")
|
|
|
|
extensible = adds_to.get("extensible")
|
|
if extensible != "actionsSinkModel":
|
|
continue
|
|
|
|
pack = adds_to.get("pack")
|
|
# Fail if CodeQL starts using other packs, have to examine then what this means,
|
|
# e.g. whether it has lower accuracy or severity
|
|
if pack != "codeql/actions-all":
|
|
raise ValueError(f"Unexpected pack: {pack}")
|
|
|
|
data = extension.get("data")
|
|
if data is None:
|
|
raise ValueError(f"Missing data: {content}")
|
|
|
|
for data_entry in data:
|
|
if len(data_entry) != 5:
|
|
raise ValueError(f"Contains malformed data entry: {data_entry}")
|
|
|
|
# See https://github.com/github/codeql/blob/codeql-cli/v2.21.2/actions/ql/lib/codeql/actions/dataflow/internal/ExternalFlowExtensions.qll#L22-L24
|
|
action, version, input_param, kind, provenance = data_entry
|
|
|
|
if kind not in relevant_kinds:
|
|
continue
|
|
|
|
if only_manual_models and provenance != "manual":
|
|
continue
|
|
|
|
# TODO: Look at reusable workflows as sinks as well.
|
|
# This might require some data cleaning, since CodeQL appears to
|
|
# incorrectly duplicate these across both 'composite-actions'
|
|
# and 'reusable-workflows'. Maybe something we can fix upstream?
|
|
if "/.github/workflows/" in action:
|
|
continue
|
|
|
|
# Currently all models use only '*' as affected version, so for simplicity only
|
|
# support that for now
|
|
if version != "*":
|
|
raise ValueError(
|
|
f"Non-wildcard versions are not supported yet: {version}"
|
|
)
|
|
|
|
input_prefix = "input."
|
|
if not input_param.startswith(input_prefix):
|
|
raise ValueError(
|
|
f"Contains input with unexpected format: {input_param}"
|
|
)
|
|
|
|
input_name = input_param[len(input_prefix) :]
|
|
|
|
code_injection_sinks[action].append(input_name)
|
|
|
|
|
|
def _process_models(codeql_dir: Path) -> None:
|
|
"""Process all CodeQL model files and generate the output."""
|
|
code_injection_sinks: Dict[str, List[str]] = defaultdict(list)
|
|
|
|
models_dir = codeql_dir / _MODELS_SUBDIR
|
|
|
|
relevant_kinds = {"code-injection"}
|
|
# For now only include models manually curated by the CodeQL developers
|
|
only_manual_models = True
|
|
|
|
processed_count = 0
|
|
|
|
for file in models_dir.glob("**/*.yml"):
|
|
if file.suffix in [".yml", ".yaml"]:
|
|
processed_count += 1
|
|
try:
|
|
_process_yaml_file(
|
|
file,
|
|
relevant_kinds,
|
|
only_manual_models,
|
|
code_injection_sinks,
|
|
)
|
|
except Exception as e:
|
|
raise RuntimeError(f"Failed processing file: {file}") from e
|
|
|
|
_debug(f"Processed {processed_count} files")
|
|
|
|
print(json.dumps(list(sorted(code_injection_sinks.items())), indent=2))
|
|
|
|
|
|
def main():
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
try:
|
|
codeql_dir = _clone_actions_codeql(Path(temp_dir))
|
|
_process_models(codeql_dir)
|
|
|
|
except subprocess.CalledProcessError as e:
|
|
_debug(f"Git command failed: {e}")
|
|
_debug(f"Command: {e.cmd}")
|
|
_debug(f"Return code: {e.returncode}")
|
|
_debug(f"Stdout: {e.stdout}")
|
|
_debug(f"Stderr: {e.stderr}")
|
|
raise
|
|
except Exception as e:
|
|
_debug(f"Error: {e}")
|
|
raise
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|