mirror of
https://github.com/python/cpython.git
synced 2025-12-04 00:30:19 +00:00
bpo-30299: Display a bytecode when compile a regex in debug mode. (#1491)
`re.compile(..., re.DEBUG)` now displays the compiled bytecode in human readable form.
This commit is contained in:
parent
821a9d146b
commit
4ab6abfca4
3 changed files with 177 additions and 1 deletions
|
|
@ -595,6 +595,150 @@ def _code(p, flags):
|
|||
|
||||
return code
|
||||
|
||||
def _hex_code(code):
|
||||
return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)
|
||||
|
||||
def dis(code):
|
||||
import sys
|
||||
|
||||
labels = set()
|
||||
level = 0
|
||||
offset_width = len(str(len(code) - 1))
|
||||
|
||||
def dis_(start, end):
|
||||
def print_(*args, to=None):
|
||||
if to is not None:
|
||||
labels.add(to)
|
||||
args += ('(to %d)' % (to,),)
|
||||
print('%*d%s ' % (offset_width, start, ':' if start in labels else '.'),
|
||||
end=' '*(level-1))
|
||||
print(*args)
|
||||
|
||||
def print_2(*args):
|
||||
print(end=' '*(offset_width + 2*level))
|
||||
print(*args)
|
||||
|
||||
nonlocal level
|
||||
level += 1
|
||||
i = start
|
||||
while i < end:
|
||||
start = i
|
||||
op = code[i]
|
||||
i += 1
|
||||
op = OPCODES[op]
|
||||
if op in (SUCCESS, FAILURE, ANY, ANY_ALL,
|
||||
MAX_UNTIL, MIN_UNTIL, NEGATE):
|
||||
print_(op)
|
||||
elif op in (LITERAL, NOT_LITERAL,
|
||||
LITERAL_IGNORE, NOT_LITERAL_IGNORE,
|
||||
LITERAL_LOC_IGNORE, NOT_LITERAL_LOC_IGNORE):
|
||||
arg = code[i]
|
||||
i += 1
|
||||
print_(op, '%#02x (%r)' % (arg, chr(arg)))
|
||||
elif op is AT:
|
||||
arg = code[i]
|
||||
i += 1
|
||||
arg = str(ATCODES[arg])
|
||||
assert arg[:3] == 'AT_'
|
||||
print_(op, arg[3:])
|
||||
elif op is CATEGORY:
|
||||
arg = code[i]
|
||||
i += 1
|
||||
arg = str(CHCODES[arg])
|
||||
assert arg[:9] == 'CATEGORY_'
|
||||
print_(op, arg[9:])
|
||||
elif op in (IN, IN_IGNORE, IN_LOC_IGNORE):
|
||||
skip = code[i]
|
||||
print_(op, skip, to=i+skip)
|
||||
dis_(i+1, i+skip)
|
||||
i += skip
|
||||
elif op in (RANGE, RANGE_IGNORE):
|
||||
lo, hi = code[i: i+2]
|
||||
i += 2
|
||||
print_(op, '%#02x %#02x (%r-%r)' % (lo, hi, chr(lo), chr(hi)))
|
||||
elif op is CHARSET:
|
||||
print_(op, _hex_code(code[i: i + 256//_CODEBITS]))
|
||||
i += 256//_CODEBITS
|
||||
elif op is BIGCHARSET:
|
||||
arg = code[i]
|
||||
i += 1
|
||||
mapping = list(b''.join(x.to_bytes(_sre.CODESIZE, sys.byteorder)
|
||||
for x in code[i: i + 256//_sre.CODESIZE]))
|
||||
print_(op, arg, mapping)
|
||||
i += 256//_sre.CODESIZE
|
||||
level += 1
|
||||
for j in range(arg):
|
||||
print_2(_hex_code(code[i: i + 256//_CODEBITS]))
|
||||
i += 256//_CODEBITS
|
||||
level -= 1
|
||||
elif op in (MARK, GROUPREF, GROUPREF_IGNORE):
|
||||
arg = code[i]
|
||||
i += 1
|
||||
print_(op, arg)
|
||||
elif op is JUMP:
|
||||
skip = code[i]
|
||||
print_(op, skip, to=i+skip)
|
||||
i += 1
|
||||
elif op is BRANCH:
|
||||
skip = code[i]
|
||||
print_(op, skip, to=i+skip)
|
||||
while skip:
|
||||
dis_(i+1, i+skip)
|
||||
i += skip
|
||||
start = i
|
||||
skip = code[i]
|
||||
if skip:
|
||||
print_('branch', skip, to=i+skip)
|
||||
else:
|
||||
print_(FAILURE)
|
||||
i += 1
|
||||
elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE):
|
||||
skip, min, max = code[i: i+3]
|
||||
if max == MAXREPEAT:
|
||||
max = 'MAXREPEAT'
|
||||
print_(op, skip, min, max, to=i+skip)
|
||||
dis_(i+3, i+skip)
|
||||
i += skip
|
||||
elif op is GROUPREF_EXISTS:
|
||||
arg, skip = code[i: i+2]
|
||||
print_(op, arg, skip, to=i+skip)
|
||||
i += 2
|
||||
elif op in (ASSERT, ASSERT_NOT):
|
||||
skip, arg = code[i: i+2]
|
||||
print_(op, skip, arg, to=i+skip)
|
||||
dis_(i+2, i+skip)
|
||||
i += skip
|
||||
elif op is INFO:
|
||||
skip, flags, min, max = code[i: i+4]
|
||||
if max == MAXREPEAT:
|
||||
max = 'MAXREPEAT'
|
||||
print_(op, skip, bin(flags), min, max, to=i+skip)
|
||||
start = i+4
|
||||
if flags & SRE_INFO_PREFIX:
|
||||
prefix_len, prefix_skip = code[i+4: i+6]
|
||||
print_2(' prefix_skip', prefix_skip)
|
||||
start = i + 6
|
||||
prefix = code[start: start+prefix_len]
|
||||
print_2(' prefix',
|
||||
'[%s]' % ', '.join('%#02x' % x for x in prefix),
|
||||
'(%r)' % ''.join(map(chr, prefix)))
|
||||
start += prefix_len
|
||||
print_2(' overlap', code[start: start+prefix_len])
|
||||
start += prefix_len
|
||||
if flags & SRE_INFO_CHARSET:
|
||||
level += 1
|
||||
print_2('in')
|
||||
dis_(start, i+skip)
|
||||
level -= 1
|
||||
i += skip
|
||||
else:
|
||||
raise ValueError(op)
|
||||
|
||||
level -= 1
|
||||
|
||||
dis_(0, len(code))
|
||||
|
||||
|
||||
def compile(p, flags=0):
|
||||
# internal: convert pattern list to internal format
|
||||
|
||||
|
|
@ -606,7 +750,9 @@ def compile(p, flags=0):
|
|||
|
||||
code = _code(p, flags)
|
||||
|
||||
# print(code)
|
||||
if flags & SRE_FLAG_DEBUG:
|
||||
print()
|
||||
dis(code)
|
||||
|
||||
# map in either direction
|
||||
groupindex = p.pattern.groupdict
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue