mirror of
https://github.com/python/cpython.git
synced 2025-07-12 22:05:16 +00:00

This patch changes a few of the scripts to have __name__=='__main__' clauses so that they are importable without running. Also fixes the syntax errors revealed by the tests.
62 lines
1.7 KiB
Python
Executable file
62 lines
1.7 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
""" Utility for parsing HTML entity definitions available from:
|
|
|
|
http://www.w3.org/ as e.g.
|
|
http://www.w3.org/TR/REC-html40/HTMLlat1.ent
|
|
|
|
Input is read from stdin, output is written to stdout in form of a
|
|
Python snippet defining a dictionary "entitydefs" mapping literal
|
|
entity name to character or numeric entity.
|
|
|
|
Marc-Andre Lemburg, mal@lemburg.com, 1999.
|
|
Use as you like. NO WARRANTIES.
|
|
|
|
"""
|
|
import re,sys
|
|
|
|
entityRE = re.compile('<!ENTITY +(\w+) +CDATA +"([^"]+)" +-- +((?:.|\n)+?) *-->')
|
|
|
|
def parse(text,pos=0,endpos=None):
|
|
|
|
pos = 0
|
|
if endpos is None:
|
|
endpos = len(text)
|
|
d = {}
|
|
while 1:
|
|
m = entityRE.search(text,pos,endpos)
|
|
if not m:
|
|
break
|
|
name,charcode,comment = m.groups()
|
|
d[name] = charcode,comment
|
|
pos = m.end()
|
|
return d
|
|
|
|
def writefile(f,defs):
|
|
|
|
f.write("entitydefs = {\n")
|
|
items = sorted(defs.items())
|
|
for name, (charcode,comment) in items:
|
|
if charcode[:2] == '&#':
|
|
code = int(charcode[2:-1])
|
|
if code < 256:
|
|
charcode = "'\%o'" % code
|
|
else:
|
|
charcode = repr(charcode)
|
|
else:
|
|
charcode = repr(charcode)
|
|
comment = ' '.join(comment.split())
|
|
f.write(" '%s':\t%s, \t# %s\n" % (name,charcode,comment))
|
|
f.write('\n}\n')
|
|
|
|
if __name__ == '__main__':
|
|
if len(sys.argv) > 1:
|
|
infile = open(sys.argv[1])
|
|
else:
|
|
infile = sys.stdin
|
|
if len(sys.argv) > 2:
|
|
outfile = open(sys.argv[2],'w')
|
|
else:
|
|
outfile = sys.stdout
|
|
text = infile.read()
|
|
defs = parse(text)
|
|
writefile(outfile,defs)
|