mirror of
https://github.com/python/cpython.git
synced 2025-11-26 21:33:10 +00:00
gh-112962: in dis module, put cache information in the Instruction instead of creating fake Instructions to represent it (#113016)
This commit is contained in:
parent
3531ea441b
commit
428c9812cb
7 changed files with 115 additions and 47 deletions
|
|
@ -328,13 +328,17 @@ operation is being performed, so the intermediate analysis object isn't useful:
|
||||||
source line information (if any) is taken directly from the disassembled code
|
source line information (if any) is taken directly from the disassembled code
|
||||||
object.
|
object.
|
||||||
|
|
||||||
The *show_caches* and *adaptive* parameters work as they do in :func:`dis`.
|
The *adaptive* parameter works as it does in :func:`dis`.
|
||||||
|
|
||||||
.. versionadded:: 3.4
|
.. versionadded:: 3.4
|
||||||
|
|
||||||
.. versionchanged:: 3.11
|
.. versionchanged:: 3.11
|
||||||
Added the *show_caches* and *adaptive* parameters.
|
Added the *show_caches* and *adaptive* parameters.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.13
|
||||||
|
The *show_caches* parameter is deprecated and has no effect. The *cache_info*
|
||||||
|
field of each instruction is populated regardless of its value.
|
||||||
|
|
||||||
|
|
||||||
.. function:: findlinestarts(code)
|
.. function:: findlinestarts(code)
|
||||||
|
|
||||||
|
|
@ -482,6 +486,14 @@ details of bytecode instructions as :class:`Instruction` instances:
|
||||||
:class:`dis.Positions` object holding the
|
:class:`dis.Positions` object holding the
|
||||||
start and end locations that are covered by this instruction.
|
start and end locations that are covered by this instruction.
|
||||||
|
|
||||||
|
.. data::cache_info
|
||||||
|
|
||||||
|
Information about the cache entries of this instruction, as
|
||||||
|
triplets of the form ``(name, size, data)``, where the ``name``
|
||||||
|
and ``size`` describe the cache format and data is the contents
|
||||||
|
of the cache. ``cache_info`` is ``None`` if the instruction does not have
|
||||||
|
caches.
|
||||||
|
|
||||||
.. versionadded:: 3.4
|
.. versionadded:: 3.4
|
||||||
|
|
||||||
.. versionchanged:: 3.11
|
.. versionchanged:: 3.11
|
||||||
|
|
@ -493,8 +505,8 @@ details of bytecode instructions as :class:`Instruction` instances:
|
||||||
Changed field ``starts_line``.
|
Changed field ``starts_line``.
|
||||||
|
|
||||||
Added fields ``start_offset``, ``cache_offset``, ``end_offset``,
|
Added fields ``start_offset``, ``cache_offset``, ``end_offset``,
|
||||||
``baseopname``, ``baseopcode``, ``jump_target``, ``oparg``, and
|
``baseopname``, ``baseopcode``, ``jump_target``, ``oparg``,
|
||||||
``line_number``.
|
``line_number`` and ``cache_info``.
|
||||||
|
|
||||||
|
|
||||||
.. class:: Positions
|
.. class:: Positions
|
||||||
|
|
|
||||||
75
Lib/dis.py
75
Lib/dis.py
|
|
@ -267,9 +267,10 @@ _Instruction = collections.namedtuple(
|
||||||
'starts_line',
|
'starts_line',
|
||||||
'line_number',
|
'line_number',
|
||||||
'label',
|
'label',
|
||||||
'positions'
|
'positions',
|
||||||
|
'cache_info',
|
||||||
],
|
],
|
||||||
defaults=[None, None]
|
defaults=[None, None, None]
|
||||||
)
|
)
|
||||||
|
|
||||||
_Instruction.opname.__doc__ = "Human readable name for operation"
|
_Instruction.opname.__doc__ = "Human readable name for operation"
|
||||||
|
|
@ -286,6 +287,7 @@ _Instruction.starts_line.__doc__ = "True if this opcode starts a source line, ot
|
||||||
_Instruction.line_number.__doc__ = "source line number associated with this opcode (if any), otherwise None"
|
_Instruction.line_number.__doc__ = "source line number associated with this opcode (if any), otherwise None"
|
||||||
_Instruction.label.__doc__ = "A label (int > 0) if this instruction is a jump target, otherwise None"
|
_Instruction.label.__doc__ = "A label (int > 0) if this instruction is a jump target, otherwise None"
|
||||||
_Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction"
|
_Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction"
|
||||||
|
_Instruction.cache_info.__doc__ = "list of (name, size, data), one for each cache entry of the instruction"
|
||||||
|
|
||||||
_ExceptionTableEntryBase = collections.namedtuple("_ExceptionTableEntryBase",
|
_ExceptionTableEntryBase = collections.namedtuple("_ExceptionTableEntryBase",
|
||||||
"start end target depth lasti")
|
"start end target depth lasti")
|
||||||
|
|
@ -334,6 +336,8 @@ class Instruction(_Instruction):
|
||||||
label - A label if this instruction is a jump target, otherwise None
|
label - A label if this instruction is a jump target, otherwise None
|
||||||
positions - Optional dis.Positions object holding the span of source code
|
positions - Optional dis.Positions object holding the span of source code
|
||||||
covered by this instruction
|
covered by this instruction
|
||||||
|
cache_info - information about the format and content of the instruction's cache
|
||||||
|
entries (if any)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
@ -570,7 +574,6 @@ def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False):
|
||||||
linestarts=linestarts,
|
linestarts=linestarts,
|
||||||
line_offset=line_offset,
|
line_offset=line_offset,
|
||||||
co_positions=co.co_positions(),
|
co_positions=co.co_positions(),
|
||||||
show_caches=show_caches,
|
|
||||||
original_code=original_code,
|
original_code=original_code,
|
||||||
arg_resolver=arg_resolver)
|
arg_resolver=arg_resolver)
|
||||||
|
|
||||||
|
|
@ -645,8 +648,7 @@ def _is_backward_jump(op):
|
||||||
'ENTER_EXECUTOR')
|
'ENTER_EXECUTOR')
|
||||||
|
|
||||||
def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None,
|
def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=None,
|
||||||
show_caches=False, original_code=None, labels_map=None,
|
original_code=None, labels_map=None, arg_resolver=None):
|
||||||
arg_resolver=None):
|
|
||||||
"""Iterate over the instructions in a bytecode string.
|
"""Iterate over the instructions in a bytecode string.
|
||||||
|
|
||||||
Generates a sequence of Instruction namedtuples giving the details of each
|
Generates a sequence of Instruction namedtuples giving the details of each
|
||||||
|
|
@ -682,32 +684,28 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N
|
||||||
else:
|
else:
|
||||||
argval, argrepr = arg, repr(arg)
|
argval, argrepr = arg, repr(arg)
|
||||||
|
|
||||||
yield Instruction(_all_opname[op], op, arg, argval, argrepr,
|
instr = Instruction(_all_opname[op], op, arg, argval, argrepr,
|
||||||
offset, start_offset, starts_line, line_number,
|
offset, start_offset, starts_line, line_number,
|
||||||
labels_map.get(offset, None), positions)
|
labels_map.get(offset, None), positions)
|
||||||
|
|
||||||
caches = _get_cache_size(_all_opname[deop])
|
caches = _get_cache_size(_all_opname[deop])
|
||||||
if not caches:
|
# Advance the co_positions iterator:
|
||||||
continue
|
for _ in range(caches):
|
||||||
if not show_caches:
|
next(co_positions, ())
|
||||||
# We still need to advance the co_positions iterator:
|
|
||||||
for _ in range(caches):
|
if caches:
|
||||||
next(co_positions, ())
|
cache_info = []
|
||||||
continue
|
for name, size in _cache_format[opname[deop]].items():
|
||||||
for name, size in _cache_format[opname[deop]].items():
|
data = code[offset + 2: offset + 2 + 2 * size]
|
||||||
for i in range(size):
|
cache_info.append((name, size, data))
|
||||||
offset += 2
|
else:
|
||||||
# Only show the fancy argrepr for a CACHE instruction when it's
|
cache_info = None
|
||||||
# the first entry for a particular cache value:
|
|
||||||
if i == 0:
|
yield Instruction(_all_opname[op], op, arg, argval, argrepr,
|
||||||
data = code[offset: offset + 2 * size]
|
offset, start_offset, starts_line, line_number,
|
||||||
argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
|
labels_map.get(offset, None), positions, cache_info)
|
||||||
else:
|
|
||||||
argrepr = ""
|
|
||||||
yield Instruction(
|
|
||||||
"CACHE", CACHE, 0, None, argrepr, offset, offset, False, None, None,
|
|
||||||
Positions(*next(co_positions, ()))
|
|
||||||
)
|
|
||||||
|
|
||||||
def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False,
|
def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False,
|
||||||
show_offsets=False):
|
show_offsets=False):
|
||||||
|
|
@ -787,7 +785,6 @@ def _disassemble_bytes(code, lasti=-1, varname_from_oparg=None,
|
||||||
instrs = _get_instructions_bytes(code, linestarts=linestarts,
|
instrs = _get_instructions_bytes(code, linestarts=linestarts,
|
||||||
line_offset=line_offset,
|
line_offset=line_offset,
|
||||||
co_positions=co_positions,
|
co_positions=co_positions,
|
||||||
show_caches=show_caches,
|
|
||||||
original_code=original_code,
|
original_code=original_code,
|
||||||
labels_map=labels_map,
|
labels_map=labels_map,
|
||||||
arg_resolver=arg_resolver)
|
arg_resolver=arg_resolver)
|
||||||
|
|
@ -805,6 +802,23 @@ def print_instructions(instrs, exception_entries, formatter, show_caches=False,
|
||||||
is_current_instr = instr.offset <= lasti \
|
is_current_instr = instr.offset <= lasti \
|
||||||
<= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)])
|
<= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)])
|
||||||
formatter.print_instruction(instr, is_current_instr)
|
formatter.print_instruction(instr, is_current_instr)
|
||||||
|
deop = _deoptop(instr.opcode)
|
||||||
|
if show_caches and instr.cache_info:
|
||||||
|
offset = instr.offset
|
||||||
|
for name, size, data in instr.cache_info:
|
||||||
|
for i in range(size):
|
||||||
|
offset += 2
|
||||||
|
# Only show the fancy argrepr for a CACHE instruction when it's
|
||||||
|
# the first entry for a particular cache value:
|
||||||
|
if i == 0:
|
||||||
|
argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
|
||||||
|
else:
|
||||||
|
argrepr = ""
|
||||||
|
formatter.print_instruction(
|
||||||
|
Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset,
|
||||||
|
False, None, None, instr.positions),
|
||||||
|
is_current_instr)
|
||||||
|
|
||||||
formatter.print_exception_table(exception_entries)
|
formatter.print_exception_table(exception_entries)
|
||||||
|
|
||||||
def _disassemble_str(source, **kwargs):
|
def _disassemble_str(source, **kwargs):
|
||||||
|
|
@ -952,7 +966,6 @@ class Bytecode:
|
||||||
linestarts=self._linestarts,
|
linestarts=self._linestarts,
|
||||||
line_offset=self._line_offset,
|
line_offset=self._line_offset,
|
||||||
co_positions=co.co_positions(),
|
co_positions=co.co_positions(),
|
||||||
show_caches=self.show_caches,
|
|
||||||
original_code=original_code,
|
original_code=original_code,
|
||||||
labels_map=labels_map,
|
labels_map=labels_map,
|
||||||
arg_resolver=arg_resolver)
|
arg_resolver=arg_resolver)
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,18 @@ from _testinternalcapi import compiler_codegen, optimize_cfg, assemble_code_obje
|
||||||
|
|
||||||
_UNSPECIFIED = object()
|
_UNSPECIFIED = object()
|
||||||
|
|
||||||
|
def instructions_with_positions(instrs, co_positions):
|
||||||
|
# Return (instr, positions) pairs from the instrs list and co_positions
|
||||||
|
# iterator. The latter contains items for cache lines and the former
|
||||||
|
# doesn't, so those need to be skipped.
|
||||||
|
|
||||||
|
co_positions = co_positions or iter(())
|
||||||
|
for instr in instrs:
|
||||||
|
yield instr, next(co_positions, ())
|
||||||
|
for _, size, _ in (instr.cache_info or ()):
|
||||||
|
for i in range(size):
|
||||||
|
next(co_positions, ())
|
||||||
|
|
||||||
class BytecodeTestCase(unittest.TestCase):
|
class BytecodeTestCase(unittest.TestCase):
|
||||||
"""Custom assertion methods for inspecting bytecode."""
|
"""Custom assertion methods for inspecting bytecode."""
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -144,6 +144,8 @@ from test.support import (cpython_only,
|
||||||
gc_collect)
|
gc_collect)
|
||||||
from test.support.script_helper import assert_python_ok
|
from test.support.script_helper import assert_python_ok
|
||||||
from test.support import threading_helper
|
from test.support import threading_helper
|
||||||
|
from test.support.bytecode_helper import (BytecodeTestCase,
|
||||||
|
instructions_with_positions)
|
||||||
from opcode import opmap, opname
|
from opcode import opmap, opname
|
||||||
COPY_FREE_VARS = opmap['COPY_FREE_VARS']
|
COPY_FREE_VARS = opmap['COPY_FREE_VARS']
|
||||||
|
|
||||||
|
|
@ -384,10 +386,8 @@ class CodeTest(unittest.TestCase):
|
||||||
code = traceback.tb_frame.f_code
|
code = traceback.tb_frame.f_code
|
||||||
|
|
||||||
artificial_instructions = []
|
artificial_instructions = []
|
||||||
for instr, positions in zip(
|
for instr, positions in instructions_with_positions(
|
||||||
dis.get_instructions(code, show_caches=True),
|
dis.get_instructions(code), code.co_positions()
|
||||||
code.co_positions(),
|
|
||||||
strict=True
|
|
||||||
):
|
):
|
||||||
# If any of the positions is None, then all have to
|
# If any of the positions is None, then all have to
|
||||||
# be None as well for the case above. There are still
|
# be None as well for the case above. There are still
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@ import warnings
|
||||||
from test import support
|
from test import support
|
||||||
from test.support import (script_helper, requires_debug_ranges,
|
from test.support import (script_helper, requires_debug_ranges,
|
||||||
requires_specialization, Py_C_RECURSION_LIMIT)
|
requires_specialization, Py_C_RECURSION_LIMIT)
|
||||||
|
from test.support.bytecode_helper import instructions_with_positions
|
||||||
from test.support.os_helper import FakePath
|
from test.support.os_helper import FakePath
|
||||||
|
|
||||||
class TestSpecifics(unittest.TestCase):
|
class TestSpecifics(unittest.TestCase):
|
||||||
|
|
@ -1346,8 +1347,8 @@ class TestSourcePositions(unittest.TestCase):
|
||||||
def assertOpcodeSourcePositionIs(self, code, opcode,
|
def assertOpcodeSourcePositionIs(self, code, opcode,
|
||||||
line, end_line, column, end_column, occurrence=1):
|
line, end_line, column, end_column, occurrence=1):
|
||||||
|
|
||||||
for instr, position in zip(
|
for instr, position in instructions_with_positions(
|
||||||
dis.Bytecode(code, show_caches=True), code.co_positions(), strict=True
|
dis.Bytecode(code), code.co_positions()
|
||||||
):
|
):
|
||||||
if instr.opname == opcode:
|
if instr.opname == opcode:
|
||||||
occurrence -= 1
|
occurrence -= 1
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ from test.support.bytecode_helper import BytecodeTestCase
|
||||||
|
|
||||||
import opcode
|
import opcode
|
||||||
|
|
||||||
|
CACHE = dis.opmap["CACHE"]
|
||||||
|
|
||||||
def get_tb():
|
def get_tb():
|
||||||
def _error():
|
def _error():
|
||||||
|
|
@ -1227,9 +1228,9 @@ class DisTests(DisTestBase):
|
||||||
else:
|
else:
|
||||||
# "copy" the code to un-quicken it:
|
# "copy" the code to un-quicken it:
|
||||||
f.__code__ = f.__code__.replace()
|
f.__code__ = f.__code__.replace()
|
||||||
for instruction in dis.get_instructions(
|
for instruction in _unroll_caches_as_Instructions(dis.get_instructions(
|
||||||
f, show_caches=True, adaptive=adaptive
|
f, show_caches=True, adaptive=adaptive
|
||||||
):
|
), show_caches=True):
|
||||||
if instruction.opname == "CACHE":
|
if instruction.opname == "CACHE":
|
||||||
yield instruction.argrepr
|
yield instruction.argrepr
|
||||||
|
|
||||||
|
|
@ -1262,7 +1263,8 @@ class DisTests(DisTestBase):
|
||||||
# However, this might change in the future. So we explicitly try to find
|
# However, this might change in the future. So we explicitly try to find
|
||||||
# a CACHE entry in the instructions. If we can't do that, fail the test
|
# a CACHE entry in the instructions. If we can't do that, fail the test
|
||||||
|
|
||||||
for inst in dis.get_instructions(f, show_caches=True):
|
for inst in _unroll_caches_as_Instructions(
|
||||||
|
dis.get_instructions(f, show_caches=True), show_caches=True):
|
||||||
if inst.opname == "CACHE":
|
if inst.opname == "CACHE":
|
||||||
op_offset = inst.offset - 2
|
op_offset = inst.offset - 2
|
||||||
cache_offset = inst.offset
|
cache_offset = inst.offset
|
||||||
|
|
@ -1775,8 +1777,8 @@ expected_opinfo_simple = [
|
||||||
class InstructionTestCase(BytecodeTestCase):
|
class InstructionTestCase(BytecodeTestCase):
|
||||||
|
|
||||||
def assertInstructionsEqual(self, instrs_1, instrs_2, /):
|
def assertInstructionsEqual(self, instrs_1, instrs_2, /):
|
||||||
instrs_1 = [instr_1._replace(positions=None) for instr_1 in instrs_1]
|
instrs_1 = [instr_1._replace(positions=None, cache_info=None) for instr_1 in instrs_1]
|
||||||
instrs_2 = [instr_2._replace(positions=None) for instr_2 in instrs_2]
|
instrs_2 = [instr_2._replace(positions=None, cache_info=None) for instr_2 in instrs_2]
|
||||||
self.assertEqual(instrs_1, instrs_2)
|
self.assertEqual(instrs_1, instrs_2)
|
||||||
|
|
||||||
class InstructionTests(InstructionTestCase):
|
class InstructionTests(InstructionTestCase):
|
||||||
|
|
@ -1890,9 +1892,9 @@ class InstructionTests(InstructionTestCase):
|
||||||
instruction.positions.col_offset,
|
instruction.positions.col_offset,
|
||||||
instruction.positions.end_col_offset,
|
instruction.positions.end_col_offset,
|
||||||
)
|
)
|
||||||
for instruction in dis.get_instructions(
|
for instruction in _unroll_caches_as_Instructions(dis.get_instructions(
|
||||||
code, adaptive=adaptive, show_caches=show_caches
|
code, adaptive=adaptive, show_caches=show_caches
|
||||||
)
|
), show_caches=show_caches)
|
||||||
]
|
]
|
||||||
self.assertEqual(co_positions, dis_positions)
|
self.assertEqual(co_positions, dis_positions)
|
||||||
|
|
||||||
|
|
@ -2233,6 +2235,31 @@ class TestDisTracebackWithFile(TestDisTraceback):
|
||||||
dis.distb(tb, file=output)
|
dis.distb(tb, file=output)
|
||||||
return output.getvalue()
|
return output.getvalue()
|
||||||
|
|
||||||
|
def _unroll_caches_as_Instructions(instrs, show_caches=False):
|
||||||
|
# Cache entries are no longer reported by dis as fake instructions,
|
||||||
|
# but some tests assume that do. We should rewrite the tests to assume
|
||||||
|
# the new API, but it will be clearer to keep the tests working as
|
||||||
|
# before and do that in a separate PR.
|
||||||
|
|
||||||
|
for instr in instrs:
|
||||||
|
yield instr
|
||||||
|
if not show_caches:
|
||||||
|
continue
|
||||||
|
|
||||||
|
offset = instr.offset
|
||||||
|
for name, size, data in (instr.cache_info or ()):
|
||||||
|
for i in range(size):
|
||||||
|
offset += 2
|
||||||
|
# Only show the fancy argrepr for a CACHE instruction when it's
|
||||||
|
# the first entry for a particular cache value:
|
||||||
|
if i == 0:
|
||||||
|
argrepr = f"{name}: {int.from_bytes(data, sys.byteorder)}"
|
||||||
|
else:
|
||||||
|
argrepr = ""
|
||||||
|
|
||||||
|
yield Instruction("CACHE", CACHE, 0, None, argrepr, offset, offset,
|
||||||
|
False, None, None, instr.positions)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
:mod:`dis` module functions add cache information to the
|
||||||
|
:class:`~dis.Instruction` instance rather than creating fake
|
||||||
|
:class:`~dis.Instruction` instances to represent the cache entries.
|
||||||
Loading…
Add table
Add a link
Reference in a new issue