mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 02:15:10 +00:00 
			
		
		
		
	 1863302d61
			
		
	
	
		1863302d61
		
			
		
	
	
	
	
		
			
			Create Tools/build/ directory. Move the following scripts from Tools/scripts/ to Tools/build/: * check_extension_modules.py * deepfreeze.py * freeze_modules.py * generate_global_objects.py * generate_levenshtein_examples.py * generate_opcode_h.py * generate_re_casefix.py * generate_sre_constants.py * generate_stdlib_module_names.py * generate_token.py * parse_html5_entities.py * smelly.py * stable_abi.py * umarshal.py * update_file.py * verify_ensurepip_wheels.py Update references to these scripts.
		
			
				
	
	
		
			96 lines
		
	
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			96 lines
		
	
	
	
		
			2.7 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| #! /usr/bin/env python3
 | |
| # This script generates Lib/re/_casefix.py.
 | |
| 
 | |
| import collections
 | |
| import sys
 | |
| import unicodedata
 | |
| 
 | |
| SCRIPT_NAME = 'Tools/build/generate_re_casefix.py'
 | |
| 
 | |
| def update_file(file, content):
 | |
|     try:
 | |
|         with open(file, 'r', encoding='utf-8') as fobj:
 | |
|             if fobj.read() == content:
 | |
|                 return False
 | |
|     except (OSError, ValueError):
 | |
|         pass
 | |
|     with open(file, 'w', encoding='utf-8') as fobj:
 | |
|         fobj.write(content)
 | |
|     return True
 | |
| 
 | |
| re_casefix_template = f"""\
 | |
| # Auto-generated by {SCRIPT_NAME}.
 | |
| 
 | |
| # Maps the code of lowercased character to codes of different lowercased
 | |
| # characters which have the same uppercase.
 | |
| _EXTRA_CASES = {
 | |
| %s
 | |
| }
 | |
| """
 | |
| 
 | |
| def uname(i):
 | |
|     return unicodedata.name(chr(i), r'U+%04X' % i)
 | |
| 
 | |
| class hexint(int):
 | |
|     def __repr__(self):
 | |
|         return '%#06x' % self
 | |
| 
 | |
| def alpha(i):
 | |
|     c = chr(i)
 | |
|     return c if c.isalpha() else ascii(c)[1:-1]
 | |
| 
 | |
| 
 | |
| def main(outfile='Lib/re/_casefix.py'):
 | |
|     # Find sets of characters which have the same uppercase.
 | |
|     equivalent_chars = collections.defaultdict(str)
 | |
|     for c in map(chr, range(sys.maxunicode + 1)):
 | |
|         equivalent_chars[c.upper()] += c
 | |
|     equivalent_chars = [t for t in equivalent_chars.values() if len(t) > 1]
 | |
| 
 | |
|     # List of codes of lowercased characters which have the same uppercase.
 | |
|     equivalent_lower_codes = [sorted(t)
 | |
|                               for s in equivalent_chars
 | |
|                               for t in [set(ord(c.lower()) for c in s)]
 | |
|                               if len(t) > 1]
 | |
| 
 | |
|     bad_codes = []
 | |
|     for t in equivalent_lower_codes:
 | |
|         for i in t:
 | |
|             if i > 0xffff:
 | |
|                 bad_codes.extend(t)
 | |
|                 try:
 | |
|                     bad_codes.append(ord(chr(i).upper()))
 | |
|                 except (ValueError, TypeError):
 | |
|                     pass
 | |
|                 break
 | |
|     if bad_codes:
 | |
|         print('Case-insensitive matching may not work correctly for character:',
 | |
|               file=sys.stderr)
 | |
|         for i in sorted(bad_codes):
 | |
|             print("  '%s' (U+%04x, %s)" % (alpha(i), i, uname(i)),
 | |
|                   file=sys.stderr)
 | |
|         sys.exit(1)
 | |
| 
 | |
|     mapping = {i: tuple(j for j in t if i != j)
 | |
|                for t in equivalent_lower_codes
 | |
|                for i in t}
 | |
| 
 | |
|     items = []
 | |
|     for i, t in sorted(mapping.items()):
 | |
|         items.append('    # %s: %s' % (
 | |
|             uname(i),
 | |
|             ', '.join(map(uname, t)),
 | |
|         ))
 | |
|         items.append("    %r: %r, # '%s': '%s'" % (
 | |
|             hexint(i),
 | |
|             tuple(map(hexint, t)),
 | |
|             alpha(i),
 | |
|             ''.join(map(alpha, t)),
 | |
|         ))
 | |
| 
 | |
|     update_file(outfile, re_casefix_template % '\n'.join(items))
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     import sys
 | |
|     main(*sys.argv[1:])
 |