mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
gh-97669: Create Tools/build/ directory (#97963)
Create Tools/build/ directory. Move the following scripts from Tools/scripts/ to Tools/build/: * check_extension_modules.py * deepfreeze.py * freeze_modules.py * generate_global_objects.py * generate_levenshtein_examples.py * generate_opcode_h.py * generate_re_casefix.py * generate_sre_constants.py * generate_stdlib_module_names.py * generate_token.py * parse_html5_entities.py * smelly.py * stable_abi.py * umarshal.py * update_file.py * verify_ensurepip_wheels.py Update references to these scripts.
This commit is contained in:
parent
eae7dad402
commit
1863302d61
41 changed files with 102 additions and 84 deletions
70
Tools/build/generate_levenshtein_examples.py
Normal file
70
Tools/build/generate_levenshtein_examples.py
Normal file
|
@ -0,0 +1,70 @@
|
|||
"""Generate 10,000 unique examples for the Levenshtein short-circuit tests."""
|
||||
|
||||
import argparse
|
||||
from functools import cache
|
||||
import json
|
||||
import os.path
|
||||
from random import choices, randrange
|
||||
|
||||
|
||||
# This should be in sync with Lib/traceback.py. It's not importing those values
|
||||
# because this script is being executed by PYTHON_FOR_REGEN and not by the in-tree
|
||||
# build of Python.
|
||||
_MOVE_COST = 2
|
||||
_CASE_COST = 1
|
||||
|
||||
|
||||
def _substitution_cost(ch_a, ch_b):
|
||||
if ch_a == ch_b:
|
||||
return 0
|
||||
if ch_a.lower() == ch_b.lower():
|
||||
return _CASE_COST
|
||||
return _MOVE_COST
|
||||
|
||||
|
||||
@cache
|
||||
def levenshtein(a, b):
|
||||
if not a or not b:
|
||||
return (len(a) + len(b)) * _MOVE_COST
|
||||
option1 = levenshtein(a[:-1], b[:-1]) + _substitution_cost(a[-1], b[-1])
|
||||
option2 = levenshtein(a[:-1], b) + _MOVE_COST
|
||||
option3 = levenshtein(a, b[:-1]) + _MOVE_COST
|
||||
return min(option1, option2, option3)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument('output_path', metavar='FILE', type=str)
|
||||
parser.add_argument('--overwrite', dest='overwrite', action='store_const',
|
||||
const=True, default=False,
|
||||
help='overwrite an existing test file')
|
||||
|
||||
args = parser.parse_args()
|
||||
output_path = os.path.realpath(args.output_path)
|
||||
if not args.overwrite and os.path.isfile(output_path):
|
||||
print(f"{output_path} already exists, skipping regeneration.")
|
||||
print(
|
||||
"To force, add --overwrite to the invocation of this tool or"
|
||||
" delete the existing file."
|
||||
)
|
||||
return
|
||||
|
||||
examples = set()
|
||||
# Create a lot of non-empty examples, which should end up with a Gauss-like
|
||||
# distribution for even costs (moves) and odd costs (case substitutions).
|
||||
while len(examples) < 9990:
|
||||
a = ''.join(choices("abcABC", k=randrange(1, 10)))
|
||||
b = ''.join(choices("abcABC", k=randrange(1, 10)))
|
||||
expected = levenshtein(a, b)
|
||||
examples.add((a, b, expected))
|
||||
# Create one empty case each for strings between 0 and 9 in length.
|
||||
for i in range(10):
|
||||
b = ''.join(choices("abcABC", k=i))
|
||||
expected = levenshtein("", b)
|
||||
examples.add(("", b, expected))
|
||||
with open(output_path, "w") as f:
|
||||
json.dump(sorted(examples), f, indent=2)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Add table
Add a link
Reference in a new issue