mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 11:49:12 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			482 lines
		
	
	
	
		
			17 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			482 lines
		
	
	
	
		
			17 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
"""Disassembler of Python byte code into mnemonics."""
 | 
						|
 | 
						|
import sys
 | 
						|
import types
 | 
						|
import collections
 | 
						|
import io
 | 
						|
 | 
						|
from opcode import *
 | 
						|
from opcode import __all__ as _opcodes_all
 | 
						|
 | 
						|
__all__ = ["code_info", "dis", "disassemble", "distb", "disco",
 | 
						|
           "findlinestarts", "findlabels", "show_code",
 | 
						|
           "get_instructions", "Instruction", "Bytecode"] + _opcodes_all
 | 
						|
del _opcodes_all
 | 
						|
 | 
						|
_have_code = (types.MethodType, types.FunctionType, types.CodeType,
 | 
						|
              classmethod, staticmethod, type)
 | 
						|
 | 
						|
def _try_compile(source, name):
 | 
						|
    """Attempts to compile the given source, first as an expression and
 | 
						|
       then as a statement if the first approach fails.
 | 
						|
 | 
						|
       Utility function to accept strings in functions that otherwise
 | 
						|
       expect code objects
 | 
						|
    """
 | 
						|
    try:
 | 
						|
        c = compile(source, name, 'eval')
 | 
						|
    except SyntaxError:
 | 
						|
        c = compile(source, name, 'exec')
 | 
						|
    return c
 | 
						|
 | 
						|
def dis(x=None, *, file=None):
 | 
						|
    """Disassemble classes, methods, functions, generators, or code.
 | 
						|
 | 
						|
    With no argument, disassemble the last traceback.
 | 
						|
 | 
						|
    """
 | 
						|
    if x is None:
 | 
						|
        distb(file=file)
 | 
						|
        return
 | 
						|
    if hasattr(x, '__func__'):  # Method
 | 
						|
        x = x.__func__
 | 
						|
    if hasattr(x, '__code__'):  # Function
 | 
						|
        x = x.__code__
 | 
						|
    if hasattr(x, 'gi_code'):  # Generator
 | 
						|
        x = x.gi_code
 | 
						|
    if hasattr(x, '__dict__'):  # Class or module
 | 
						|
        items = sorted(x.__dict__.items())
 | 
						|
        for name, x1 in items:
 | 
						|
            if isinstance(x1, _have_code):
 | 
						|
                print("Disassembly of %s:" % name, file=file)
 | 
						|
                try:
 | 
						|
                    dis(x1, file=file)
 | 
						|
                except TypeError as msg:
 | 
						|
                    print("Sorry:", msg, file=file)
 | 
						|
                print(file=file)
 | 
						|
    elif hasattr(x, 'co_code'): # Code object
 | 
						|
        disassemble(x, file=file)
 | 
						|
    elif isinstance(x, (bytes, bytearray)): # Raw bytecode
 | 
						|
        _disassemble_bytes(x, file=file)
 | 
						|
    elif isinstance(x, str):    # Source code
 | 
						|
        _disassemble_str(x, file=file)
 | 
						|
    else:
 | 
						|
        raise TypeError("don't know how to disassemble %s objects" %
 | 
						|
                        type(x).__name__)
 | 
						|
 | 
						|
def distb(tb=None, *, file=None):
 | 
						|
    """Disassemble a traceback (default: last traceback)."""
 | 
						|
    if tb is None:
 | 
						|
        try:
 | 
						|
            tb = sys.last_traceback
 | 
						|
        except AttributeError:
 | 
						|
            raise RuntimeError("no last traceback to disassemble")
 | 
						|
        while tb.tb_next: tb = tb.tb_next
 | 
						|
    disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file)
 | 
						|
 | 
						|
# The inspect module interrogates this dictionary to build its
 | 
						|
# list of CO_* constants. It is also used by pretty_flags to
 | 
						|
# turn the co_flags field into a human readable list.
 | 
						|
COMPILER_FLAG_NAMES = {
 | 
						|
     1: "OPTIMIZED",
 | 
						|
     2: "NEWLOCALS",
 | 
						|
     4: "VARARGS",
 | 
						|
     8: "VARKEYWORDS",
 | 
						|
    16: "NESTED",
 | 
						|
    32: "GENERATOR",
 | 
						|
    64: "NOFREE",
 | 
						|
   128: "COROUTINE",
 | 
						|
   256: "ITERABLE_COROUTINE",
 | 
						|
}
 | 
						|
 | 
						|
def pretty_flags(flags):
 | 
						|
    """Return pretty representation of code flags."""
 | 
						|
    names = []
 | 
						|
    for i in range(32):
 | 
						|
        flag = 1<<i
 | 
						|
        if flags & flag:
 | 
						|
            names.append(COMPILER_FLAG_NAMES.get(flag, hex(flag)))
 | 
						|
            flags ^= flag
 | 
						|
            if not flags:
 | 
						|
                break
 | 
						|
    else:
 | 
						|
        names.append(hex(flags))
 | 
						|
    return ", ".join(names)
 | 
						|
 | 
						|
def _get_code_object(x):
 | 
						|
    """Helper to handle methods, functions, generators, strings and raw code objects"""
 | 
						|
    if hasattr(x, '__func__'): # Method
 | 
						|
        x = x.__func__
 | 
						|
    if hasattr(x, '__code__'): # Function
 | 
						|
        x = x.__code__
 | 
						|
    if hasattr(x, 'gi_code'):  # Generator
 | 
						|
        x = x.gi_code
 | 
						|
    if isinstance(x, str):     # Source code
 | 
						|
        x = _try_compile(x, "<disassembly>")
 | 
						|
    if hasattr(x, 'co_code'):  # Code object
 | 
						|
        return x
 | 
						|
    raise TypeError("don't know how to disassemble %s objects" %
 | 
						|
                    type(x).__name__)
 | 
						|
 | 
						|
def code_info(x):
 | 
						|
    """Formatted details of methods, functions, or code."""
 | 
						|
    return _format_code_info(_get_code_object(x))
 | 
						|
 | 
						|
def _format_code_info(co):
 | 
						|
    lines = []
 | 
						|
    lines.append("Name:              %s" % co.co_name)
 | 
						|
    lines.append("Filename:          %s" % co.co_filename)
 | 
						|
    lines.append("Argument count:    %s" % co.co_argcount)
 | 
						|
    lines.append("Kw-only arguments: %s" % co.co_kwonlyargcount)
 | 
						|
    lines.append("Number of locals:  %s" % co.co_nlocals)
 | 
						|
    lines.append("Stack size:        %s" % co.co_stacksize)
 | 
						|
    lines.append("Flags:             %s" % pretty_flags(co.co_flags))
 | 
						|
    if co.co_consts:
 | 
						|
        lines.append("Constants:")
 | 
						|
        for i_c in enumerate(co.co_consts):
 | 
						|
            lines.append("%4d: %r" % i_c)
 | 
						|
    if co.co_names:
 | 
						|
        lines.append("Names:")
 | 
						|
        for i_n in enumerate(co.co_names):
 | 
						|
            lines.append("%4d: %s" % i_n)
 | 
						|
    if co.co_varnames:
 | 
						|
        lines.append("Variable names:")
 | 
						|
        for i_n in enumerate(co.co_varnames):
 | 
						|
            lines.append("%4d: %s" % i_n)
 | 
						|
    if co.co_freevars:
 | 
						|
        lines.append("Free variables:")
 | 
						|
        for i_n in enumerate(co.co_freevars):
 | 
						|
            lines.append("%4d: %s" % i_n)
 | 
						|
    if co.co_cellvars:
 | 
						|
        lines.append("Cell variables:")
 | 
						|
        for i_n in enumerate(co.co_cellvars):
 | 
						|
            lines.append("%4d: %s" % i_n)
 | 
						|
    return "\n".join(lines)
 | 
						|
 | 
						|
def show_code(co, *, file=None):
 | 
						|
    """Print details of methods, functions, or code to *file*.
 | 
						|
 | 
						|
    If *file* is not provided, the output is printed on stdout.
 | 
						|
    """
 | 
						|
    print(code_info(co), file=file)
 | 
						|
 | 
						|
_Instruction = collections.namedtuple("_Instruction",
 | 
						|
     "opname opcode arg argval argrepr offset starts_line is_jump_target")
 | 
						|
 | 
						|
_Instruction.opname.__doc__ = "Human readable name for operation"
 | 
						|
_Instruction.opcode.__doc__ = "Numeric code for operation"
 | 
						|
_Instruction.arg.__doc__ = "Numeric argument to operation (if any), otherwise None"
 | 
						|
_Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg"
 | 
						|
_Instruction.argrepr.__doc__ = "Human readable description of operation argument"
 | 
						|
_Instruction.offset.__doc__ = "Start index of operation within bytecode sequence"
 | 
						|
_Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None"
 | 
						|
_Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False"
 | 
						|
 | 
						|
class Instruction(_Instruction):
 | 
						|
    """Details for a bytecode operation
 | 
						|
 | 
						|
       Defined fields:
 | 
						|
         opname - human readable name for operation
 | 
						|
         opcode - numeric code for operation
 | 
						|
         arg - numeric argument to operation (if any), otherwise None
 | 
						|
         argval - resolved arg value (if known), otherwise same as arg
 | 
						|
         argrepr - human readable description of operation argument
 | 
						|
         offset - start index of operation within bytecode sequence
 | 
						|
         starts_line - line started by this opcode (if any), otherwise None
 | 
						|
         is_jump_target - True if other code jumps to here, otherwise False
 | 
						|
    """
 | 
						|
 | 
						|
    def _disassemble(self, lineno_width=3, mark_as_current=False):
 | 
						|
        """Format instruction details for inclusion in disassembly output
 | 
						|
 | 
						|
        *lineno_width* sets the width of the line number field (0 omits it)
 | 
						|
        *mark_as_current* inserts a '-->' marker arrow as part of the line
 | 
						|
        """
 | 
						|
        fields = []
 | 
						|
        # Column: Source code line number
 | 
						|
        if lineno_width:
 | 
						|
            if self.starts_line is not None:
 | 
						|
                lineno_fmt = "%%%dd" % lineno_width
 | 
						|
                fields.append(lineno_fmt % self.starts_line)
 | 
						|
            else:
 | 
						|
                fields.append(' ' * lineno_width)
 | 
						|
        # Column: Current instruction indicator
 | 
						|
        if mark_as_current:
 | 
						|
            fields.append('-->')
 | 
						|
        else:
 | 
						|
            fields.append('   ')
 | 
						|
        # Column: Jump target marker
 | 
						|
        if self.is_jump_target:
 | 
						|
            fields.append('>>')
 | 
						|
        else:
 | 
						|
            fields.append('  ')
 | 
						|
        # Column: Instruction offset from start of code sequence
 | 
						|
        fields.append(repr(self.offset).rjust(4))
 | 
						|
        # Column: Opcode name
 | 
						|
        fields.append(self.opname.ljust(20))
 | 
						|
        # Column: Opcode argument
 | 
						|
        if self.arg is not None:
 | 
						|
            fields.append(repr(self.arg).rjust(5))
 | 
						|
            # Column: Opcode argument details
 | 
						|
            if self.argrepr:
 | 
						|
                fields.append('(' + self.argrepr + ')')
 | 
						|
        return ' '.join(fields).rstrip()
 | 
						|
 | 
						|
 | 
						|
def get_instructions(x, *, first_line=None):
 | 
						|
    """Iterator for the opcodes in methods, functions or code
 | 
						|
 | 
						|
    Generates a series of Instruction named tuples giving the details of
 | 
						|
    each operations in the supplied code.
 | 
						|
 | 
						|
    If *first_line* is not None, it indicates the line number that should
 | 
						|
    be reported for the first source line in the disassembled code.
 | 
						|
    Otherwise, the source line information (if any) is taken directly from
 | 
						|
    the disassembled code object.
 | 
						|
    """
 | 
						|
    co = _get_code_object(x)
 | 
						|
    cell_names = co.co_cellvars + co.co_freevars
 | 
						|
    linestarts = dict(findlinestarts(co))
 | 
						|
    if first_line is not None:
 | 
						|
        line_offset = first_line - co.co_firstlineno
 | 
						|
    else:
 | 
						|
        line_offset = 0
 | 
						|
    return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
 | 
						|
                                   co.co_consts, cell_names, linestarts,
 | 
						|
                                   line_offset)
 | 
						|
 | 
						|
def _get_const_info(const_index, const_list):
 | 
						|
    """Helper to get optional details about const references
 | 
						|
 | 
						|
       Returns the dereferenced constant and its repr if the constant
 | 
						|
       list is defined.
 | 
						|
       Otherwise returns the constant index and its repr().
 | 
						|
    """
 | 
						|
    argval = const_index
 | 
						|
    if const_list is not None:
 | 
						|
        argval = const_list[const_index]
 | 
						|
    return argval, repr(argval)
 | 
						|
 | 
						|
def _get_name_info(name_index, name_list):
 | 
						|
    """Helper to get optional details about named references
 | 
						|
 | 
						|
       Returns the dereferenced name as both value and repr if the name
 | 
						|
       list is defined.
 | 
						|
       Otherwise returns the name index and its repr().
 | 
						|
    """
 | 
						|
    argval = name_index
 | 
						|
    if name_list is not None:
 | 
						|
        argval = name_list[name_index]
 | 
						|
        argrepr = argval
 | 
						|
    else:
 | 
						|
        argrepr = repr(argval)
 | 
						|
    return argval, argrepr
 | 
						|
 | 
						|
 | 
						|
def _get_instructions_bytes(code, varnames=None, names=None, constants=None,
 | 
						|
                      cells=None, linestarts=None, line_offset=0):
 | 
						|
    """Iterate over the instructions in a bytecode string.
 | 
						|
 | 
						|
    Generates a sequence of Instruction namedtuples giving the details of each
 | 
						|
    opcode.  Additional information about the code's runtime environment
 | 
						|
    (e.g. variable names, constants) can be specified using optional
 | 
						|
    arguments.
 | 
						|
 | 
						|
    """
 | 
						|
    labels = findlabels(code)
 | 
						|
    starts_line = None
 | 
						|
    for offset, op, arg in _unpack_opargs(code):
 | 
						|
        if linestarts is not None:
 | 
						|
            starts_line = linestarts.get(offset, None)
 | 
						|
            if starts_line is not None:
 | 
						|
                starts_line += line_offset
 | 
						|
        is_jump_target = offset in labels
 | 
						|
        argval = None
 | 
						|
        argrepr = ''
 | 
						|
        if arg is not None:
 | 
						|
            #  Set argval to the dereferenced value of the argument when
 | 
						|
            #  available, and argrepr to the string representation of argval.
 | 
						|
            #    _disassemble_bytes needs the string repr of the
 | 
						|
            #    raw name index for LOAD_GLOBAL, LOAD_CONST, etc.
 | 
						|
            argval = arg
 | 
						|
            if op in hasconst:
 | 
						|
                argval, argrepr = _get_const_info(arg, constants)
 | 
						|
            elif op in hasname:
 | 
						|
                argval, argrepr = _get_name_info(arg, names)
 | 
						|
            elif op in hasjrel:
 | 
						|
                argval = offset + 2 + arg
 | 
						|
                argrepr = "to " + repr(argval)
 | 
						|
            elif op in haslocal:
 | 
						|
                argval, argrepr = _get_name_info(arg, varnames)
 | 
						|
            elif op in hascompare:
 | 
						|
                argval = cmp_op[arg]
 | 
						|
                argrepr = argval
 | 
						|
            elif op in hasfree:
 | 
						|
                argval, argrepr = _get_name_info(arg, cells)
 | 
						|
            elif op in hasnargs:
 | 
						|
                argrepr = "%d positional, %d keyword pair" % (arg%256, arg//256)
 | 
						|
        yield Instruction(opname[op], op,
 | 
						|
                          arg, argval, argrepr,
 | 
						|
                          offset, starts_line, is_jump_target)
 | 
						|
 | 
						|
def disassemble(co, lasti=-1, *, file=None):
 | 
						|
    """Disassemble a code object."""
 | 
						|
    cell_names = co.co_cellvars + co.co_freevars
 | 
						|
    linestarts = dict(findlinestarts(co))
 | 
						|
    _disassemble_bytes(co.co_code, lasti, co.co_varnames, co.co_names,
 | 
						|
                       co.co_consts, cell_names, linestarts, file=file)
 | 
						|
 | 
						|
def _disassemble_bytes(code, lasti=-1, varnames=None, names=None,
 | 
						|
                       constants=None, cells=None, linestarts=None,
 | 
						|
                       *, file=None, line_offset=0):
 | 
						|
    # Omit the line number column entirely if we have no line number info
 | 
						|
    show_lineno = linestarts is not None
 | 
						|
    # TODO?: Adjust width upwards if max(linestarts.values()) >= 1000?
 | 
						|
    lineno_width = 3 if show_lineno else 0
 | 
						|
    for instr in _get_instructions_bytes(code, varnames, names,
 | 
						|
                                         constants, cells, linestarts,
 | 
						|
                                         line_offset=line_offset):
 | 
						|
        new_source_line = (show_lineno and
 | 
						|
                           instr.starts_line is not None and
 | 
						|
                           instr.offset > 0)
 | 
						|
        if new_source_line:
 | 
						|
            print(file=file)
 | 
						|
        is_current_instr = instr.offset == lasti
 | 
						|
        print(instr._disassemble(lineno_width, is_current_instr), file=file)
 | 
						|
 | 
						|
def _disassemble_str(source, *, file=None):
 | 
						|
    """Compile the source string, then disassemble the code object."""
 | 
						|
    disassemble(_try_compile(source, '<dis>'), file=file)
 | 
						|
 | 
						|
disco = disassemble                     # XXX For backwards compatibility
 | 
						|
 | 
						|
def _unpack_opargs(code):
 | 
						|
    extended_arg = 0
 | 
						|
    for i in range(0, len(code), 2):
 | 
						|
        op = code[i]
 | 
						|
        if op >= HAVE_ARGUMENT:
 | 
						|
            arg = code[i+1] | extended_arg
 | 
						|
            extended_arg = (arg << 8) if op == EXTENDED_ARG else 0
 | 
						|
        else:
 | 
						|
            arg = None
 | 
						|
        yield (i, op, arg)
 | 
						|
 | 
						|
def findlabels(code):
 | 
						|
    """Detect all offsets in a byte code which are jump targets.
 | 
						|
 | 
						|
    Return the list of offsets.
 | 
						|
 | 
						|
    """
 | 
						|
    labels = []
 | 
						|
    for offset, op, arg in _unpack_opargs(code):
 | 
						|
        if arg is not None:
 | 
						|
            if op in hasjrel:
 | 
						|
                label = offset + 2 + arg
 | 
						|
            elif op in hasjabs:
 | 
						|
                label = arg
 | 
						|
            else:
 | 
						|
                continue
 | 
						|
            if label not in labels:
 | 
						|
                labels.append(label)
 | 
						|
    return labels
 | 
						|
 | 
						|
def findlinestarts(code):
 | 
						|
    """Find the offsets in a byte code which are start of lines in the source.
 | 
						|
 | 
						|
    Generate pairs (offset, lineno) as described in Python/compile.c.
 | 
						|
 | 
						|
    """
 | 
						|
    byte_increments = code.co_lnotab[0::2]
 | 
						|
    line_increments = code.co_lnotab[1::2]
 | 
						|
 | 
						|
    lastlineno = None
 | 
						|
    lineno = code.co_firstlineno
 | 
						|
    addr = 0
 | 
						|
    for byte_incr, line_incr in zip(byte_increments, line_increments):
 | 
						|
        if byte_incr:
 | 
						|
            if lineno != lastlineno:
 | 
						|
                yield (addr, lineno)
 | 
						|
                lastlineno = lineno
 | 
						|
            addr += byte_incr
 | 
						|
        if line_incr >= 0x80:
 | 
						|
            # line_increments is an array of 8-bit signed integers
 | 
						|
            line_incr -= 0x100
 | 
						|
        lineno += line_incr
 | 
						|
    if lineno != lastlineno:
 | 
						|
        yield (addr, lineno)
 | 
						|
 | 
						|
class Bytecode:
 | 
						|
    """The bytecode operations of a piece of code
 | 
						|
 | 
						|
    Instantiate this with a function, method, string of code, or a code object
 | 
						|
    (as returned by compile()).
 | 
						|
 | 
						|
    Iterating over this yields the bytecode operations as Instruction instances.
 | 
						|
    """
 | 
						|
    def __init__(self, x, *, first_line=None, current_offset=None):
 | 
						|
        self.codeobj = co = _get_code_object(x)
 | 
						|
        if first_line is None:
 | 
						|
            self.first_line = co.co_firstlineno
 | 
						|
            self._line_offset = 0
 | 
						|
        else:
 | 
						|
            self.first_line = first_line
 | 
						|
            self._line_offset = first_line - co.co_firstlineno
 | 
						|
        self._cell_names = co.co_cellvars + co.co_freevars
 | 
						|
        self._linestarts = dict(findlinestarts(co))
 | 
						|
        self._original_object = x
 | 
						|
        self.current_offset = current_offset
 | 
						|
 | 
						|
    def __iter__(self):
 | 
						|
        co = self.codeobj
 | 
						|
        return _get_instructions_bytes(co.co_code, co.co_varnames, co.co_names,
 | 
						|
                                       co.co_consts, self._cell_names,
 | 
						|
                                       self._linestarts,
 | 
						|
                                       line_offset=self._line_offset)
 | 
						|
 | 
						|
    def __repr__(self):
 | 
						|
        return "{}({!r})".format(self.__class__.__name__,
 | 
						|
                                 self._original_object)
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def from_traceback(cls, tb):
 | 
						|
        """ Construct a Bytecode from the given traceback """
 | 
						|
        while tb.tb_next:
 | 
						|
            tb = tb.tb_next
 | 
						|
        return cls(tb.tb_frame.f_code, current_offset=tb.tb_lasti)
 | 
						|
 | 
						|
    def info(self):
 | 
						|
        """Return formatted information about the code object."""
 | 
						|
        return _format_code_info(self.codeobj)
 | 
						|
 | 
						|
    def dis(self):
 | 
						|
        """Return a formatted view of the bytecode operations."""
 | 
						|
        co = self.codeobj
 | 
						|
        if self.current_offset is not None:
 | 
						|
            offset = self.current_offset
 | 
						|
        else:
 | 
						|
            offset = -1
 | 
						|
        with io.StringIO() as output:
 | 
						|
            _disassemble_bytes(co.co_code, varnames=co.co_varnames,
 | 
						|
                               names=co.co_names, constants=co.co_consts,
 | 
						|
                               cells=self._cell_names,
 | 
						|
                               linestarts=self._linestarts,
 | 
						|
                               line_offset=self._line_offset,
 | 
						|
                               file=output,
 | 
						|
                               lasti=offset)
 | 
						|
            return output.getvalue()
 | 
						|
 | 
						|
 | 
						|
def _test():
 | 
						|
    """Simple test program to disassemble a file."""
 | 
						|
    import argparse
 | 
						|
 | 
						|
    parser = argparse.ArgumentParser()
 | 
						|
    parser.add_argument('infile', type=argparse.FileType(), nargs='?', default='-')
 | 
						|
    args = parser.parse_args()
 | 
						|
    with args.infile as infile:
 | 
						|
        source = infile.read()
 | 
						|
    code = compile(source, args.infile.name, "exec")
 | 
						|
    dis(code)
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    _test()
 |