gh-116122: Add SBOM generation to PCbuild/build.bat (GH-116138)

(cherry picked from commit 72dae53e09)

Co-authored-by: Seth Michael Larson <seth@python.org>
This commit is contained in:
Seth Michael Larson 2024-05-01 09:47:41 -05:00 committed by GitHub
parent 0586d54241
commit f5406ef454
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 31 additions and 5 deletions

View file

@ -30,11 +30,15 @@
</_TokenOutputs>
<_KeywordSources Include="$(PySourcePath)Grammar\python.gram;$(PySourcePath)Grammar\Tokens" />
<_KeywordOutputs Include="$(PySourcePath)Lib\keyword.py" />
<_SbomSources Include="$(PySourcePath)PCbuild\get_externals.bat" />
<_SbomOutputs Include="$(PySourcePath)Misc\externals.spdx.json;$(PySourcePath)Misc\sbom.spdx.json">
<Format>json</Format>
</_SbomOutputs>
</ItemGroup>
<Target Name="_TouchRegenSources" Condition="$(ForceRegen) == 'true'">
<Message Text="Touching source files to force regeneration" Importance="high" />
<Touch Files="@(_PegenSources);@(_ASTSources);@(_OpcodeSources);@(_TokenSources);@(_KeywordOutputs)"
<Touch Files="@(_PegenSources);@(_ASTSources);@(_OpcodeSources);@(_TokenSources);@(_KeywordOutputs);@(_SbomSources)"
AlwaysCreate="False" />
</Target>
@ -89,9 +93,16 @@
WorkingDirectory="$(PySourcePath)" />
</Target>
<Target Name="_RegenSbom"
DependsOnTargets="FindPythonForBuild"
Inputs="@(_SbomSources)"
Outputs="@(_SbomOutputs)">
<Exec Command='$(PythonForBuild) "$(PySourcePath)Tools\build\generate_sbom.py"'/>
</Target>
<Target Name="Regen"
Condition="$(Configuration) != 'PGUpdate'"
DependsOnTargets="_TouchRegenSources;_RegenPegen;_RegenAST_H;_RegenOpcodes;_RegenTokens;_RegenKeywords;_RegenGlobalObjects">
DependsOnTargets="_TouchRegenSources;_RegenPegen;_RegenAST_H;_RegenOpcodes;_RegenTokens;_RegenKeywords;_RegenGlobalObjects;_RegenSbom">
<Message Text="Generated sources are up to date" Importance="high" />
</Target>

View file

@ -4,13 +4,13 @@ import re
import hashlib
import json
import glob
import pathlib
from pathlib import Path, PurePosixPath, PureWindowsPath
import subprocess
import sys
import urllib.request
import typing
CPYTHON_ROOT_DIR = pathlib.Path(__file__).parent.parent.parent
CPYTHON_ROOT_DIR = Path(__file__).parent.parent.parent
# Before adding a new entry to this list, double check that
# the license expression is a valid SPDX license expression:
@ -119,9 +119,16 @@ def filter_gitignored_paths(paths: list[str]) -> list[str]:
# 1 means matches, 0 means no matches.
assert git_check_ignore_proc.returncode in (0, 1)
# Paths may or may not be quoted, Windows quotes paths.
git_check_ignore_re = re.compile(r"^::\s+(\"([^\"]+)\"|(.+))\Z")
# Return the list of paths sorted
git_check_ignore_lines = git_check_ignore_proc.stdout.decode().splitlines()
return sorted([line.split()[-1] for line in git_check_ignore_lines if line.startswith("::")])
git_check_not_ignored = []
for line in git_check_ignore_lines:
if match := git_check_ignore_re.fullmatch(line):
git_check_not_ignored.append(match.group(2) or match.group(3))
return sorted(git_check_not_ignored)
def get_externals() -> list[str]:
@ -238,12 +245,20 @@ def create_source_sbom() -> None:
)
for path in paths:
# Normalize the filename from any combination of slashes.
path = str(PurePosixPath(PureWindowsPath(path)))
# Skip directories and excluded files
if not (CPYTHON_ROOT_DIR / path).is_file() or path in exclude:
continue
# SPDX requires SHA1 to be used for files, but we provide SHA256 too.
data = (CPYTHON_ROOT_DIR / path).read_bytes()
# We normalize line-endings for consistent checksums.
# This is a rudimentary check for binary files.
if b"\x00" not in data:
data = data.replace(b"\r\n", b"\n")
checksum_sha1 = hashlib.sha1(data).hexdigest()
checksum_sha256 = hashlib.sha256(data).hexdigest()