mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
gh-97669: Create Tools/build/ directory (#97963)
Create Tools/build/ directory. Move the following scripts from Tools/scripts/ to Tools/build/: * check_extension_modules.py * deepfreeze.py * freeze_modules.py * generate_global_objects.py * generate_levenshtein_examples.py * generate_opcode_h.py * generate_re_casefix.py * generate_sre_constants.py * generate_stdlib_module_names.py * generate_token.py * parse_html5_entities.py * smelly.py * stable_abi.py * umarshal.py * update_file.py * verify_ensurepip_wheels.py Update references to these scripts.
This commit is contained in:
parent
eae7dad402
commit
1863302d61
41 changed files with 102 additions and 84 deletions
96
Tools/build/generate_re_casefix.py
Executable file
96
Tools/build/generate_re_casefix.py
Executable file
|
@ -0,0 +1,96 @@
|
|||
#! /usr/bin/env python3
|
||||
# This script generates Lib/re/_casefix.py.
|
||||
|
||||
import collections
|
||||
import sys
|
||||
import unicodedata
|
||||
|
||||
SCRIPT_NAME = 'Tools/build/generate_re_casefix.py'
|
||||
|
||||
def update_file(file, content):
|
||||
try:
|
||||
with open(file, 'r', encoding='utf-8') as fobj:
|
||||
if fobj.read() == content:
|
||||
return False
|
||||
except (OSError, ValueError):
|
||||
pass
|
||||
with open(file, 'w', encoding='utf-8') as fobj:
|
||||
fobj.write(content)
|
||||
return True
|
||||
|
||||
re_casefix_template = f"""\
|
||||
# Auto-generated by {SCRIPT_NAME}.
|
||||
|
||||
# Maps the code of lowercased character to codes of different lowercased
|
||||
# characters which have the same uppercase.
|
||||
_EXTRA_CASES = {
|
||||
%s
|
||||
}
|
||||
"""
|
||||
|
||||
def uname(i):
|
||||
return unicodedata.name(chr(i), r'U+%04X' % i)
|
||||
|
||||
class hexint(int):
|
||||
def __repr__(self):
|
||||
return '%#06x' % self
|
||||
|
||||
def alpha(i):
|
||||
c = chr(i)
|
||||
return c if c.isalpha() else ascii(c)[1:-1]
|
||||
|
||||
|
||||
def main(outfile='Lib/re/_casefix.py'):
|
||||
# Find sets of characters which have the same uppercase.
|
||||
equivalent_chars = collections.defaultdict(str)
|
||||
for c in map(chr, range(sys.maxunicode + 1)):
|
||||
equivalent_chars[c.upper()] += c
|
||||
equivalent_chars = [t for t in equivalent_chars.values() if len(t) > 1]
|
||||
|
||||
# List of codes of lowercased characters which have the same uppercase.
|
||||
equivalent_lower_codes = [sorted(t)
|
||||
for s in equivalent_chars
|
||||
for t in [set(ord(c.lower()) for c in s)]
|
||||
if len(t) > 1]
|
||||
|
||||
bad_codes = []
|
||||
for t in equivalent_lower_codes:
|
||||
for i in t:
|
||||
if i > 0xffff:
|
||||
bad_codes.extend(t)
|
||||
try:
|
||||
bad_codes.append(ord(chr(i).upper()))
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
break
|
||||
if bad_codes:
|
||||
print('Case-insensitive matching may not work correctly for character:',
|
||||
file=sys.stderr)
|
||||
for i in sorted(bad_codes):
|
||||
print(" '%s' (U+%04x, %s)" % (alpha(i), i, uname(i)),
|
||||
file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
mapping = {i: tuple(j for j in t if i != j)
|
||||
for t in equivalent_lower_codes
|
||||
for i in t}
|
||||
|
||||
items = []
|
||||
for i, t in sorted(mapping.items()):
|
||||
items.append(' # %s: %s' % (
|
||||
uname(i),
|
||||
', '.join(map(uname, t)),
|
||||
))
|
||||
items.append(" %r: %r, # '%s': '%s'" % (
|
||||
hexint(i),
|
||||
tuple(map(hexint, t)),
|
||||
alpha(i),
|
||||
''.join(map(alpha, t)),
|
||||
))
|
||||
|
||||
update_file(outfile, re_casefix_template % '\n'.join(items))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
main(*sys.argv[1:])
|
Loading…
Add table
Add a link
Reference in a new issue