mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
complete rewrite
code generator uses flowgraph as intermediate representation. the old rep uses a list with explicit "StackRefs" to indicate the target of jumps. pyassem converts flowgraph to bytecode, breaks up individual steps of generating bytecode
This commit is contained in:
parent
f635abee3a
commit
36cc6a2197
4 changed files with 1778 additions and 1716 deletions
|
@ -1,40 +1,127 @@
|
||||||
"""Assembler for Python bytecode
|
"""A flow graph representation for Python bytecode"""
|
||||||
|
|
||||||
The new module is used to create the code object. The following
|
|
||||||
attribute definitions are included from the reference manual:
|
|
||||||
|
|
||||||
co_name gives the function name
|
|
||||||
co_argcount is the number of positional arguments (including
|
|
||||||
arguments with default values)
|
|
||||||
co_nlocals is the number of local variables used by the function
|
|
||||||
(including arguments)
|
|
||||||
co_varnames is a tuple containing the names of the local variables
|
|
||||||
(starting with the argument names)
|
|
||||||
co_code is a string representing the sequence of bytecode instructions
|
|
||||||
co_consts is a tuple containing the literals used by the bytecode
|
|
||||||
co_names is a tuple containing the names used by the bytecode
|
|
||||||
co_filename is the filename from which the code was compiled
|
|
||||||
co_firstlineno is the first line number of the function
|
|
||||||
co_lnotab is a string encoding the mapping from byte code offsets
|
|
||||||
to line numbers. see LineAddrTable below.
|
|
||||||
co_stacksize is the required stack size (including local variables)
|
|
||||||
co_flags is an integer encoding a number of flags for the
|
|
||||||
interpreter. There are four flags:
|
|
||||||
CO_OPTIMIZED -- uses load fast
|
|
||||||
CO_NEWLOCALS -- everything?
|
|
||||||
CO_VARARGS -- use *args
|
|
||||||
CO_VARKEYWORDS -- uses **args
|
|
||||||
|
|
||||||
If a code object represents a function, the first item in co_consts is
|
|
||||||
the documentation string of the function, or None if undefined.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import dis
|
import dis
|
||||||
import new
|
import new
|
||||||
import string
|
import string
|
||||||
|
import types
|
||||||
|
|
||||||
import misc
|
from compiler import misc
|
||||||
|
|
||||||
|
class FlowGraph:
|
||||||
|
def __init__(self):
|
||||||
|
self.current = self.entry = Block()
|
||||||
|
self.exit = Block("exit")
|
||||||
|
self.blocks = misc.Set()
|
||||||
|
self.blocks.add(self.entry)
|
||||||
|
self.blocks.add(self.exit)
|
||||||
|
|
||||||
|
def startBlock(self, block):
|
||||||
|
self.current = block
|
||||||
|
|
||||||
|
def nextBlock(self, block=None):
|
||||||
|
if block is None:
|
||||||
|
block = self.newBlock()
|
||||||
|
# XXX think we need to specify when there is implicit transfer
|
||||||
|
# from one block to the next
|
||||||
|
#
|
||||||
|
# I think this strategy works: each block has a child
|
||||||
|
# designated as "next" which is returned as the last of the
|
||||||
|
# children. because the nodes in a graph are emitted in
|
||||||
|
# reverse post order, the "next" block will always be emitted
|
||||||
|
# immediately after its parent.
|
||||||
|
# Worry: maintaining this invariant could be tricky
|
||||||
|
self.current.addNext(block)
|
||||||
|
self.startBlock(block)
|
||||||
|
|
||||||
|
def newBlock(self):
|
||||||
|
b = Block()
|
||||||
|
self.blocks.add(b)
|
||||||
|
return b
|
||||||
|
|
||||||
|
def startExitBlock(self):
|
||||||
|
self.startBlock(self.exit)
|
||||||
|
|
||||||
|
def emit(self, *inst):
|
||||||
|
# XXX should jump instructions implicitly call nextBlock?
|
||||||
|
if inst[0] == 'RETURN_VALUE':
|
||||||
|
self.current.addOutEdge(self.exit)
|
||||||
|
self.current.emit(inst)
|
||||||
|
|
||||||
|
def getBlocks(self):
|
||||||
|
"""Return the blocks in reverse postorder
|
||||||
|
|
||||||
|
i.e. each node appears before all of its successors
|
||||||
|
"""
|
||||||
|
# XXX make sure every node that doesn't have an explicit next
|
||||||
|
# is set so that next points to exit
|
||||||
|
for b in self.blocks.elements():
|
||||||
|
if b is self.exit:
|
||||||
|
continue
|
||||||
|
if not b.next:
|
||||||
|
b.addNext(self.exit)
|
||||||
|
order = dfs_postorder(self.entry, {})
|
||||||
|
order.reverse()
|
||||||
|
# hack alert
|
||||||
|
if not self.exit in order:
|
||||||
|
order.append(self.exit)
|
||||||
|
return order
|
||||||
|
|
||||||
|
def dfs_postorder(b, seen):
|
||||||
|
"""Depth-first search of tree rooted at b, return in postorder"""
|
||||||
|
order = []
|
||||||
|
seen[b] = b
|
||||||
|
for c in b.children():
|
||||||
|
if seen.has_key(c):
|
||||||
|
continue
|
||||||
|
order = order + dfs_postorder(c, seen)
|
||||||
|
order.append(b)
|
||||||
|
return order
|
||||||
|
|
||||||
|
class Block:
|
||||||
|
_count = 0
|
||||||
|
|
||||||
|
def __init__(self, label=''):
|
||||||
|
self.insts = []
|
||||||
|
self.inEdges = misc.Set()
|
||||||
|
self.outEdges = misc.Set()
|
||||||
|
self.label = label
|
||||||
|
self.bid = Block._count
|
||||||
|
self.next = []
|
||||||
|
Block._count = Block._count + 1
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
if self.label:
|
||||||
|
return "<block %s id=%d len=%d>" % (self.label, self.bid,
|
||||||
|
len(self.insts))
|
||||||
|
else:
|
||||||
|
return "<block id=%d len=%d>" % (self.bid, len(self.insts))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
insts = map(str, self.insts)
|
||||||
|
return "<block %s %d:\n%s>" % (self.label, self.bid,
|
||||||
|
string.join(insts, '\n'))
|
||||||
|
|
||||||
|
def emit(self, inst):
|
||||||
|
op = inst[0]
|
||||||
|
if op[:4] == 'JUMP':
|
||||||
|
self.outEdges.add(inst[1])
|
||||||
|
self.insts.append(inst)
|
||||||
|
|
||||||
|
def getInstructions(self):
|
||||||
|
return self.insts
|
||||||
|
|
||||||
|
def addInEdge(self, block):
|
||||||
|
self.inEdges.add(block)
|
||||||
|
|
||||||
|
def addOutEdge(self, block):
|
||||||
|
self.outEdges.add(block)
|
||||||
|
|
||||||
|
def addNext(self, block):
|
||||||
|
self.next.append(block)
|
||||||
|
assert len(self.next) == 1, map(str, self.next)
|
||||||
|
|
||||||
|
def children(self):
|
||||||
|
return self.outEdges.elements() + self.next
|
||||||
|
|
||||||
# flags for code objects
|
# flags for code objects
|
||||||
CO_OPTIMIZED = 0x0001
|
CO_OPTIMIZED = 0x0001
|
||||||
|
@ -42,224 +129,128 @@ CO_NEWLOCALS = 0x0002
|
||||||
CO_VARARGS = 0x0004
|
CO_VARARGS = 0x0004
|
||||||
CO_VARKEYWORDS = 0x0008
|
CO_VARKEYWORDS = 0x0008
|
||||||
|
|
||||||
class TupleArg:
|
# the FlowGraph is transformed in place; it exists in one of these states
|
||||||
def __init__(self, count, names):
|
RAW = "RAW"
|
||||||
self.count = count
|
FLAT = "FLAT"
|
||||||
self.names = names
|
CONV = "CONV"
|
||||||
def __repr__(self):
|
DONE = "DONE"
|
||||||
return "TupleArg(%s, %s)" % (self.count, self.names)
|
|
||||||
def getName(self):
|
|
||||||
return ".nested%d" % self.count
|
|
||||||
|
|
||||||
class PyAssembler:
|
class PyFlowGraph(FlowGraph):
|
||||||
"""Creates Python code objects
|
super_init = FlowGraph.__init__
|
||||||
"""
|
|
||||||
|
|
||||||
# XXX this class needs to major refactoring
|
def __init__(self, name, filename, args=(), optimized=0):
|
||||||
|
self.super_init()
|
||||||
def __init__(self, args=(), name='?', filename='<?>',
|
self.name = name
|
||||||
docstring=None):
|
self.filename = filename
|
||||||
# XXX why is the default value for flags 3?
|
self.docstring = None
|
||||||
self.insts = []
|
self.args = args # XXX
|
||||||
# used by makeCodeObject
|
self.argcount = getArgCount(args)
|
||||||
self._getArgCount(args)
|
if optimized:
|
||||||
self.code = ''
|
self.flags = CO_OPTIMIZED | CO_NEWLOCALS
|
||||||
self.consts = [docstring]
|
else:
|
||||||
self.filename = filename
|
self.flags = 0
|
||||||
self.flags = CO_NEWLOCALS
|
self.firstlineno = None
|
||||||
self.name = name
|
self.consts = []
|
||||||
self.names = []
|
self.names = []
|
||||||
self.varnames = list(args) or []
|
self.varnames = list(args) or []
|
||||||
for i in range(len(self.varnames)):
|
for i in range(len(self.varnames)):
|
||||||
var = self.varnames[i]
|
var = self.varnames[i]
|
||||||
if isinstance(var, TupleArg):
|
if isinstance(var, TupleArg):
|
||||||
self.varnames[i] = var.getName()
|
self.varnames[i] = var.getName()
|
||||||
# lnotab support
|
self.stage = RAW
|
||||||
self.firstlineno = 0
|
|
||||||
self.lastlineno = 0
|
|
||||||
self.last_addr = 0
|
|
||||||
self.lnotab = ''
|
|
||||||
|
|
||||||
def _getArgCount(self, args):
|
def setDocstring(self, doc):
|
||||||
self.argcount = len(args)
|
self.docstring = doc
|
||||||
if args:
|
self.consts.insert(0, doc)
|
||||||
for arg in args:
|
|
||||||
if isinstance(arg, TupleArg):
|
|
||||||
numNames = len(misc.flatten(arg.names))
|
|
||||||
self.argcount = self.argcount - numNames
|
|
||||||
|
|
||||||
def __repr__(self):
|
def setFlag(self, flag):
|
||||||
return "<bytecode: %d instrs>" % len(self.insts)
|
self.flags = self.flags | flag
|
||||||
|
if flag == CO_VARARGS:
|
||||||
|
self.argcount = self.argcount - 1
|
||||||
|
|
||||||
def setFlags(self, val):
|
def getCode(self):
|
||||||
"""XXX for module's function"""
|
"""Get a Python code object"""
|
||||||
self.flags = val
|
if self.stage == RAW:
|
||||||
|
self.flattenGraph()
|
||||||
|
if self.stage == FLAT:
|
||||||
|
self.convertArgs()
|
||||||
|
if self.stage == CONV:
|
||||||
|
self.makeByteCode()
|
||||||
|
if self.stage == DONE:
|
||||||
|
return self.newCodeObject()
|
||||||
|
raise RuntimeError, "inconsistent PyFlowGraph state"
|
||||||
|
|
||||||
def setOptimized(self):
|
def dump(self, io=None):
|
||||||
self.flags = self.flags | CO_OPTIMIZED
|
if io:
|
||||||
|
save = sys.stdout
|
||||||
def setVarArgs(self):
|
sys.stdout = io
|
||||||
if not self.flags & CO_VARARGS:
|
pc = 0
|
||||||
self.flags = self.flags | CO_VARARGS
|
|
||||||
self.argcount = self.argcount - 1
|
|
||||||
|
|
||||||
def setKWArgs(self):
|
|
||||||
self.flags = self.flags | CO_VARKEYWORDS
|
|
||||||
|
|
||||||
def getCurInst(self):
|
|
||||||
return len(self.insts)
|
|
||||||
|
|
||||||
def getNextInst(self):
|
|
||||||
return len(self.insts) + 1
|
|
||||||
|
|
||||||
def dump(self, io=sys.stdout):
|
|
||||||
i = 0
|
|
||||||
for inst in self.insts:
|
|
||||||
if inst[0] == 'SET_LINENO':
|
|
||||||
io.write("\n")
|
|
||||||
io.write(" %3d " % i)
|
|
||||||
if len(inst) == 1:
|
|
||||||
io.write("%s\n" % inst)
|
|
||||||
else:
|
|
||||||
io.write("%-15.15s\t%s\n" % inst)
|
|
||||||
i = i + 1
|
|
||||||
|
|
||||||
def makeCodeObject(self):
|
|
||||||
"""Make a Python code object
|
|
||||||
|
|
||||||
This creates a Python code object using the new module. This
|
|
||||||
seems simpler than reverse-engineering the way marshal dumps
|
|
||||||
code objects into .pyc files. One of the key difficulties is
|
|
||||||
figuring out how to layout references to code objects that
|
|
||||||
appear on the VM stack; e.g.
|
|
||||||
3 SET_LINENO 1
|
|
||||||
6 LOAD_CONST 0 (<code object fact at 8115878 [...]
|
|
||||||
9 MAKE_FUNCTION 0
|
|
||||||
12 STORE_NAME 0 (fact)
|
|
||||||
"""
|
|
||||||
|
|
||||||
self._findOffsets()
|
|
||||||
lnotab = LineAddrTable()
|
|
||||||
for t in self.insts:
|
for t in self.insts:
|
||||||
opname = t[0]
|
opname = t[0]
|
||||||
|
if opname == "SET_LINENO":
|
||||||
|
print
|
||||||
if len(t) == 1:
|
if len(t) == 1:
|
||||||
lnotab.addCode(self.opnum[opname])
|
print "\t", "%3d" % pc, opname
|
||||||
elif len(t) == 2:
|
pc = pc + 1
|
||||||
if opname == 'SET_LINENO':
|
|
||||||
oparg = t[1]
|
|
||||||
lnotab.nextLine(oparg)
|
|
||||||
else:
|
|
||||||
oparg = self._convertArg(opname, t[1])
|
|
||||||
try:
|
|
||||||
hi, lo = divmod(oparg, 256)
|
|
||||||
except TypeError:
|
|
||||||
raise TypeError, "untranslated arg: %s, %s" % (opname, oparg)
|
|
||||||
lnotab.addCode(self.opnum[opname], lo, hi)
|
|
||||||
|
|
||||||
# why is a module a special case?
|
|
||||||
if self.flags == 0:
|
|
||||||
nlocals = 0
|
|
||||||
else:
|
|
||||||
nlocals = len(self.varnames)
|
|
||||||
# XXX danger! can't pass through here twice
|
|
||||||
if self.flags & CO_VARKEYWORDS:
|
|
||||||
self.argcount = self.argcount - 1
|
|
||||||
stacksize = findDepth(self.insts)
|
|
||||||
try:
|
|
||||||
co = new.code(self.argcount, nlocals, stacksize,
|
|
||||||
self.flags, lnotab.getCode(), self._getConsts(),
|
|
||||||
tuple(self.names), tuple(self.varnames),
|
|
||||||
self.filename, self.name, self.firstlineno,
|
|
||||||
lnotab.getTable())
|
|
||||||
except SystemError, err:
|
|
||||||
print err
|
|
||||||
print repr(self.argcount)
|
|
||||||
print repr(nlocals)
|
|
||||||
print repr(stacksize)
|
|
||||||
print repr(self.flags)
|
|
||||||
print repr(lnotab.getCode())
|
|
||||||
print repr(self._getConsts())
|
|
||||||
print repr(self.names)
|
|
||||||
print repr(self.varnames)
|
|
||||||
print repr(self.filename)
|
|
||||||
print repr(self.name)
|
|
||||||
print repr(self.firstlineno)
|
|
||||||
print repr(lnotab.getTable())
|
|
||||||
raise
|
|
||||||
return co
|
|
||||||
|
|
||||||
def _getConsts(self):
|
|
||||||
"""Return a tuple for the const slot of a code object
|
|
||||||
|
|
||||||
Converts PythonVMCode objects to code objects
|
|
||||||
"""
|
|
||||||
l = []
|
|
||||||
for elt in self.consts:
|
|
||||||
# XXX might be clearer to just as isinstance(CodeGen)
|
|
||||||
if hasattr(elt, 'asConst'):
|
|
||||||
l.append(elt.asConst())
|
|
||||||
else:
|
else:
|
||||||
l.append(elt)
|
print "\t", "%3d" % pc, opname, t[1]
|
||||||
return tuple(l)
|
pc = pc + 3
|
||||||
|
if io:
|
||||||
|
sys.stdout = save
|
||||||
|
|
||||||
def _findOffsets(self):
|
def flattenGraph(self):
|
||||||
"""Find offsets for use in resolving StackRefs"""
|
"""Arrange the blocks in order and resolve jumps"""
|
||||||
self.offsets = []
|
assert self.stage == RAW
|
||||||
cur = 0
|
self.insts = insts = []
|
||||||
for t in self.insts:
|
pc = 0
|
||||||
self.offsets.append(cur)
|
begin = {}
|
||||||
l = len(t)
|
end = {}
|
||||||
if l == 1:
|
for b in self.getBlocks():
|
||||||
cur = cur + 1
|
begin[b] = pc
|
||||||
elif l == 2:
|
for inst in b.getInstructions():
|
||||||
cur = cur + 3
|
insts.append(inst)
|
||||||
arg = t[1]
|
if len(inst) == 1:
|
||||||
# XXX this is a total hack: for a reference used
|
pc = pc + 1
|
||||||
# multiple times, we create a list of offsets and
|
else:
|
||||||
# expect that we when we pass through the code again
|
# arg takes 2 bytes
|
||||||
# to actually generate the offsets, we'll pass in the
|
pc = pc + 3
|
||||||
# same order.
|
end[b] = pc
|
||||||
if isinstance(arg, StackRef):
|
pc = 0
|
||||||
try:
|
for i in range(len(insts)):
|
||||||
arg.__offset.append(cur)
|
inst = insts[i]
|
||||||
except AttributeError:
|
if len(inst) == 1:
|
||||||
arg.__offset = [cur]
|
pc = pc + 1
|
||||||
|
else:
|
||||||
|
pc = pc + 3
|
||||||
|
opname = inst[0]
|
||||||
|
if self.hasjrel.has_elt(opname):
|
||||||
|
oparg = inst[1]
|
||||||
|
offset = begin[oparg] - pc
|
||||||
|
insts[i] = opname, offset
|
||||||
|
elif self.hasjabs.has_elt(opname):
|
||||||
|
insts[i] = opname, begin[inst[1]]
|
||||||
|
self.stacksize = findDepth(self.insts)
|
||||||
|
self.stage = FLAT
|
||||||
|
|
||||||
def _convertArg(self, op, arg):
|
hasjrel = misc.Set()
|
||||||
"""Convert the string representation of an arg to a number
|
for i in dis.hasjrel:
|
||||||
|
hasjrel.add(dis.opname[i])
|
||||||
|
hasjabs = misc.Set()
|
||||||
|
for i in dis.hasjabs:
|
||||||
|
hasjabs.add(dis.opname[i])
|
||||||
|
|
||||||
The specific handling depends on the opcode.
|
def convertArgs(self):
|
||||||
|
"""Convert arguments from symbolic to concrete form"""
|
||||||
XXX This first implementation isn't going to be very
|
assert self.stage == FLAT
|
||||||
efficient.
|
for i in range(len(self.insts)):
|
||||||
"""
|
t = self.insts[i]
|
||||||
if op == 'SET_LINENO':
|
if len(t) == 2:
|
||||||
return arg
|
opname = t[0]
|
||||||
if op == 'LOAD_CONST':
|
oparg = t[1]
|
||||||
return self._lookupName(arg, self.consts)
|
conv = self._converters.get(opname, None)
|
||||||
if op in self.localOps:
|
if conv:
|
||||||
# make sure it's in self.names, but use the bytecode offset
|
self.insts[i] = opname, conv(self, oparg)
|
||||||
self._lookupName(arg, self.names)
|
self.stage = CONV
|
||||||
return self._lookupName(arg, self.varnames)
|
|
||||||
if op in self.globalOps:
|
|
||||||
return self._lookupName(arg, self.names)
|
|
||||||
if op in self.nameOps:
|
|
||||||
return self._lookupName(arg, self.names)
|
|
||||||
if op == 'COMPARE_OP':
|
|
||||||
return self.cmp_op.index(arg)
|
|
||||||
if self.hasjrel.has_elt(op):
|
|
||||||
offset = arg.__offset[0]
|
|
||||||
del arg.__offset[0]
|
|
||||||
return self.offsets[arg.resolve()] - offset
|
|
||||||
if self.hasjabs.has_elt(op):
|
|
||||||
return self.offsets[arg.resolve()]
|
|
||||||
return arg
|
|
||||||
|
|
||||||
nameOps = ('STORE_NAME', 'IMPORT_NAME', 'IMPORT_FROM',
|
|
||||||
'STORE_ATTR', 'LOAD_ATTR', 'LOAD_NAME', 'DELETE_NAME',
|
|
||||||
'DELETE_ATTR')
|
|
||||||
localOps = ('LOAD_FAST', 'STORE_FAST', 'DELETE_FAST')
|
|
||||||
globalOps = ('LOAD_GLOBAL', 'STORE_GLOBAL', 'DELETE_GLOBAL')
|
|
||||||
|
|
||||||
def _lookupName(self, name, list):
|
def _lookupName(self, name, list):
|
||||||
"""Return index of name in list, appending if necessary"""
|
"""Return index of name in list, appending if necessary"""
|
||||||
|
@ -276,32 +267,124 @@ class PyAssembler:
|
||||||
list.append(name)
|
list.append(name)
|
||||||
return end
|
return end
|
||||||
|
|
||||||
# Convert some stuff from the dis module for local use
|
_converters = {}
|
||||||
|
def _convert_LOAD_CONST(self, arg):
|
||||||
cmp_op = list(dis.cmp_op)
|
return self._lookupName(arg, self.consts)
|
||||||
hasjrel = misc.Set()
|
|
||||||
for i in dis.hasjrel:
|
def _convert_LOAD_FAST(self, arg):
|
||||||
hasjrel.add(dis.opname[i])
|
self._lookupName(arg, self.names)
|
||||||
hasjabs = misc.Set()
|
return self._lookupName(arg, self.varnames)
|
||||||
for i in dis.hasjabs:
|
_convert_STORE_FAST = _convert_LOAD_FAST
|
||||||
hasjabs.add(dis.opname[i])
|
_convert_DELETE_FAST = _convert_LOAD_FAST
|
||||||
|
|
||||||
|
def _convert_NAME(self, arg):
|
||||||
|
return self._lookupName(arg, self.names)
|
||||||
|
_convert_LOAD_NAME = _convert_NAME
|
||||||
|
_convert_STORE_NAME = _convert_NAME
|
||||||
|
_convert_DELETE_NAME = _convert_NAME
|
||||||
|
_convert_IMPORT_NAME = _convert_NAME
|
||||||
|
_convert_IMPORT_FROM = _convert_NAME
|
||||||
|
_convert_STORE_ATTR = _convert_NAME
|
||||||
|
_convert_LOAD_ATTR = _convert_NAME
|
||||||
|
_convert_DELETE_ATTR = _convert_NAME
|
||||||
|
_convert_LOAD_GLOBAL = _convert_NAME
|
||||||
|
_convert_STORE_GLOBAL = _convert_NAME
|
||||||
|
_convert_DELETE_GLOBAL = _convert_NAME
|
||||||
|
|
||||||
|
_cmp = list(dis.cmp_op)
|
||||||
|
def _convert_COMPARE_OP(self, arg):
|
||||||
|
return self._cmp.index(arg)
|
||||||
|
|
||||||
|
# similarly for other opcodes...
|
||||||
|
|
||||||
|
for name, obj in locals().items():
|
||||||
|
if name[:9] == "_convert_":
|
||||||
|
opname = name[9:]
|
||||||
|
_converters[opname] = obj
|
||||||
|
del name, obj, opname
|
||||||
|
|
||||||
|
def makeByteCode(self):
|
||||||
|
assert self.stage == CONV
|
||||||
|
self.lnotab = lnotab = LineAddrTable()
|
||||||
|
for t in self.insts:
|
||||||
|
opname = t[0]
|
||||||
|
if len(t) == 1:
|
||||||
|
lnotab.addCode(self.opnum[opname])
|
||||||
|
else:
|
||||||
|
oparg = t[1]
|
||||||
|
if opname == "SET_LINENO":
|
||||||
|
lnotab.nextLine(oparg)
|
||||||
|
if self.firstlineno is None:
|
||||||
|
self.firstlineno = oparg
|
||||||
|
hi, lo = twobyte(oparg)
|
||||||
|
try:
|
||||||
|
lnotab.addCode(self.opnum[opname], lo, hi)
|
||||||
|
except ValueError:
|
||||||
|
print opname, oparg
|
||||||
|
print self.opnum[opname], lo, hi
|
||||||
|
raise
|
||||||
|
self.stage = DONE
|
||||||
|
|
||||||
opnum = {}
|
opnum = {}
|
||||||
for num in range(len(dis.opname)):
|
for num in range(len(dis.opname)):
|
||||||
opnum[dis.opname[num]] = num
|
opnum[dis.opname[num]] = num
|
||||||
|
del num
|
||||||
|
|
||||||
# this version of emit + arbitrary hooks might work, but it's damn
|
def newCodeObject(self):
|
||||||
# messy.
|
assert self.stage == DONE
|
||||||
|
if self.flags == 0:
|
||||||
|
nlocals = 0
|
||||||
|
else:
|
||||||
|
nlocals = len(self.varnames)
|
||||||
|
argcount = self.argcount
|
||||||
|
if self.flags & CO_VARKEYWORDS:
|
||||||
|
argcount = argcount - 1
|
||||||
|
return new.code(argcount, nlocals, self.stacksize, self.flags,
|
||||||
|
self.lnotab.getCode(), self.getConsts(),
|
||||||
|
tuple(self.names), tuple(self.varnames),
|
||||||
|
self.filename, self.name, self.firstlineno,
|
||||||
|
self.lnotab.getTable())
|
||||||
|
|
||||||
def emit(self, *args):
|
def getConsts(self):
|
||||||
self._emitDispatch(args[0], args[1:])
|
"""Return a tuple for the const slot of the code object
|
||||||
self.insts.append(args)
|
|
||||||
|
|
||||||
def _emitDispatch(self, type, args):
|
Must convert references to code (MAKE_FUNCTION) to code
|
||||||
for func in self._emit_hooks.get(type, []):
|
objects recursively.
|
||||||
func(self, args)
|
"""
|
||||||
|
l = []
|
||||||
|
for elt in self.consts:
|
||||||
|
if isinstance(elt, PyFlowGraph):
|
||||||
|
elt = elt.getCode()
|
||||||
|
l.append(elt)
|
||||||
|
return tuple(l)
|
||||||
|
|
||||||
|
def isJump(opname):
|
||||||
|
if opname[:4] == 'JUMP':
|
||||||
|
return 1
|
||||||
|
|
||||||
_emit_hooks = {}
|
class TupleArg:
|
||||||
|
"""Helper for marking func defs with nested tuples in arglist"""
|
||||||
|
def __init__(self, count, names):
|
||||||
|
self.count = count
|
||||||
|
self.names = names
|
||||||
|
def __repr__(self):
|
||||||
|
return "TupleArg(%s, %s)" % (self.count, self.names)
|
||||||
|
def getName(self):
|
||||||
|
return ".nested%d" % self.count
|
||||||
|
|
||||||
|
def getArgCount(args):
|
||||||
|
argcount = len(args)
|
||||||
|
if args:
|
||||||
|
for arg in args:
|
||||||
|
if isinstance(arg, TupleArg):
|
||||||
|
numNames = len(misc.flatten(arg.names))
|
||||||
|
argcount = argcount - numNames
|
||||||
|
return argcount
|
||||||
|
|
||||||
|
def twobyte(val):
|
||||||
|
"""Convert an int argument into high and low bytes"""
|
||||||
|
assert type(val) == types.IntType
|
||||||
|
return divmod(val, 256)
|
||||||
|
|
||||||
class LineAddrTable:
|
class LineAddrTable:
|
||||||
"""lnotab
|
"""lnotab
|
||||||
|
@ -361,34 +444,9 @@ class LineAddrTable:
|
||||||
def getTable(self):
|
def getTable(self):
|
||||||
return string.join(map(chr, self.lnotab), '')
|
return string.join(map(chr, self.lnotab), '')
|
||||||
|
|
||||||
class StackRef:
|
|
||||||
"""Manage stack locations for jumps, loops, etc."""
|
|
||||||
count = 0
|
|
||||||
|
|
||||||
def __init__(self, id=None, val=None):
|
|
||||||
if id is None:
|
|
||||||
id = StackRef.count
|
|
||||||
StackRef.count = StackRef.count + 1
|
|
||||||
self.id = id
|
|
||||||
self.val = val
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
if self.val:
|
|
||||||
return "StackRef(val=%d)" % self.val
|
|
||||||
else:
|
|
||||||
return "StackRef(id=%d)" % self.id
|
|
||||||
|
|
||||||
def bind(self, inst):
|
|
||||||
self.val = inst
|
|
||||||
|
|
||||||
def resolve(self):
|
|
||||||
if self.val is None:
|
|
||||||
print "UNRESOLVE REF", self
|
|
||||||
return 0
|
|
||||||
return self.val
|
|
||||||
|
|
||||||
class StackDepthTracker:
|
class StackDepthTracker:
|
||||||
# XXX need to keep track of stack depth on jumps
|
# XXX 1. need to keep track of stack depth on jumps
|
||||||
|
# XXX 2. at least partly as a result, this code is broken
|
||||||
|
|
||||||
def findDepth(self, insts):
|
def findDepth(self, insts):
|
||||||
depth = 0
|
depth = 0
|
||||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,40 +1,127 @@
|
||||||
"""Assembler for Python bytecode
|
"""A flow graph representation for Python bytecode"""
|
||||||
|
|
||||||
The new module is used to create the code object. The following
|
|
||||||
attribute definitions are included from the reference manual:
|
|
||||||
|
|
||||||
co_name gives the function name
|
|
||||||
co_argcount is the number of positional arguments (including
|
|
||||||
arguments with default values)
|
|
||||||
co_nlocals is the number of local variables used by the function
|
|
||||||
(including arguments)
|
|
||||||
co_varnames is a tuple containing the names of the local variables
|
|
||||||
(starting with the argument names)
|
|
||||||
co_code is a string representing the sequence of bytecode instructions
|
|
||||||
co_consts is a tuple containing the literals used by the bytecode
|
|
||||||
co_names is a tuple containing the names used by the bytecode
|
|
||||||
co_filename is the filename from which the code was compiled
|
|
||||||
co_firstlineno is the first line number of the function
|
|
||||||
co_lnotab is a string encoding the mapping from byte code offsets
|
|
||||||
to line numbers. see LineAddrTable below.
|
|
||||||
co_stacksize is the required stack size (including local variables)
|
|
||||||
co_flags is an integer encoding a number of flags for the
|
|
||||||
interpreter. There are four flags:
|
|
||||||
CO_OPTIMIZED -- uses load fast
|
|
||||||
CO_NEWLOCALS -- everything?
|
|
||||||
CO_VARARGS -- use *args
|
|
||||||
CO_VARKEYWORDS -- uses **args
|
|
||||||
|
|
||||||
If a code object represents a function, the first item in co_consts is
|
|
||||||
the documentation string of the function, or None if undefined.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sys
|
|
||||||
import dis
|
import dis
|
||||||
import new
|
import new
|
||||||
import string
|
import string
|
||||||
|
import types
|
||||||
|
|
||||||
import misc
|
from compiler import misc
|
||||||
|
|
||||||
|
class FlowGraph:
|
||||||
|
def __init__(self):
|
||||||
|
self.current = self.entry = Block()
|
||||||
|
self.exit = Block("exit")
|
||||||
|
self.blocks = misc.Set()
|
||||||
|
self.blocks.add(self.entry)
|
||||||
|
self.blocks.add(self.exit)
|
||||||
|
|
||||||
|
def startBlock(self, block):
|
||||||
|
self.current = block
|
||||||
|
|
||||||
|
def nextBlock(self, block=None):
|
||||||
|
if block is None:
|
||||||
|
block = self.newBlock()
|
||||||
|
# XXX think we need to specify when there is implicit transfer
|
||||||
|
# from one block to the next
|
||||||
|
#
|
||||||
|
# I think this strategy works: each block has a child
|
||||||
|
# designated as "next" which is returned as the last of the
|
||||||
|
# children. because the nodes in a graph are emitted in
|
||||||
|
# reverse post order, the "next" block will always be emitted
|
||||||
|
# immediately after its parent.
|
||||||
|
# Worry: maintaining this invariant could be tricky
|
||||||
|
self.current.addNext(block)
|
||||||
|
self.startBlock(block)
|
||||||
|
|
||||||
|
def newBlock(self):
|
||||||
|
b = Block()
|
||||||
|
self.blocks.add(b)
|
||||||
|
return b
|
||||||
|
|
||||||
|
def startExitBlock(self):
|
||||||
|
self.startBlock(self.exit)
|
||||||
|
|
||||||
|
def emit(self, *inst):
|
||||||
|
# XXX should jump instructions implicitly call nextBlock?
|
||||||
|
if inst[0] == 'RETURN_VALUE':
|
||||||
|
self.current.addOutEdge(self.exit)
|
||||||
|
self.current.emit(inst)
|
||||||
|
|
||||||
|
def getBlocks(self):
|
||||||
|
"""Return the blocks in reverse postorder
|
||||||
|
|
||||||
|
i.e. each node appears before all of its successors
|
||||||
|
"""
|
||||||
|
# XXX make sure every node that doesn't have an explicit next
|
||||||
|
# is set so that next points to exit
|
||||||
|
for b in self.blocks.elements():
|
||||||
|
if b is self.exit:
|
||||||
|
continue
|
||||||
|
if not b.next:
|
||||||
|
b.addNext(self.exit)
|
||||||
|
order = dfs_postorder(self.entry, {})
|
||||||
|
order.reverse()
|
||||||
|
# hack alert
|
||||||
|
if not self.exit in order:
|
||||||
|
order.append(self.exit)
|
||||||
|
return order
|
||||||
|
|
||||||
|
def dfs_postorder(b, seen):
|
||||||
|
"""Depth-first search of tree rooted at b, return in postorder"""
|
||||||
|
order = []
|
||||||
|
seen[b] = b
|
||||||
|
for c in b.children():
|
||||||
|
if seen.has_key(c):
|
||||||
|
continue
|
||||||
|
order = order + dfs_postorder(c, seen)
|
||||||
|
order.append(b)
|
||||||
|
return order
|
||||||
|
|
||||||
|
class Block:
|
||||||
|
_count = 0
|
||||||
|
|
||||||
|
def __init__(self, label=''):
|
||||||
|
self.insts = []
|
||||||
|
self.inEdges = misc.Set()
|
||||||
|
self.outEdges = misc.Set()
|
||||||
|
self.label = label
|
||||||
|
self.bid = Block._count
|
||||||
|
self.next = []
|
||||||
|
Block._count = Block._count + 1
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
if self.label:
|
||||||
|
return "<block %s id=%d len=%d>" % (self.label, self.bid,
|
||||||
|
len(self.insts))
|
||||||
|
else:
|
||||||
|
return "<block id=%d len=%d>" % (self.bid, len(self.insts))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
insts = map(str, self.insts)
|
||||||
|
return "<block %s %d:\n%s>" % (self.label, self.bid,
|
||||||
|
string.join(insts, '\n'))
|
||||||
|
|
||||||
|
def emit(self, inst):
|
||||||
|
op = inst[0]
|
||||||
|
if op[:4] == 'JUMP':
|
||||||
|
self.outEdges.add(inst[1])
|
||||||
|
self.insts.append(inst)
|
||||||
|
|
||||||
|
def getInstructions(self):
|
||||||
|
return self.insts
|
||||||
|
|
||||||
|
def addInEdge(self, block):
|
||||||
|
self.inEdges.add(block)
|
||||||
|
|
||||||
|
def addOutEdge(self, block):
|
||||||
|
self.outEdges.add(block)
|
||||||
|
|
||||||
|
def addNext(self, block):
|
||||||
|
self.next.append(block)
|
||||||
|
assert len(self.next) == 1, map(str, self.next)
|
||||||
|
|
||||||
|
def children(self):
|
||||||
|
return self.outEdges.elements() + self.next
|
||||||
|
|
||||||
# flags for code objects
|
# flags for code objects
|
||||||
CO_OPTIMIZED = 0x0001
|
CO_OPTIMIZED = 0x0001
|
||||||
|
@ -42,224 +129,128 @@ CO_NEWLOCALS = 0x0002
|
||||||
CO_VARARGS = 0x0004
|
CO_VARARGS = 0x0004
|
||||||
CO_VARKEYWORDS = 0x0008
|
CO_VARKEYWORDS = 0x0008
|
||||||
|
|
||||||
class TupleArg:
|
# the FlowGraph is transformed in place; it exists in one of these states
|
||||||
def __init__(self, count, names):
|
RAW = "RAW"
|
||||||
self.count = count
|
FLAT = "FLAT"
|
||||||
self.names = names
|
CONV = "CONV"
|
||||||
def __repr__(self):
|
DONE = "DONE"
|
||||||
return "TupleArg(%s, %s)" % (self.count, self.names)
|
|
||||||
def getName(self):
|
|
||||||
return ".nested%d" % self.count
|
|
||||||
|
|
||||||
class PyAssembler:
|
class PyFlowGraph(FlowGraph):
|
||||||
"""Creates Python code objects
|
super_init = FlowGraph.__init__
|
||||||
"""
|
|
||||||
|
|
||||||
# XXX this class needs to major refactoring
|
def __init__(self, name, filename, args=(), optimized=0):
|
||||||
|
self.super_init()
|
||||||
def __init__(self, args=(), name='?', filename='<?>',
|
self.name = name
|
||||||
docstring=None):
|
self.filename = filename
|
||||||
# XXX why is the default value for flags 3?
|
self.docstring = None
|
||||||
self.insts = []
|
self.args = args # XXX
|
||||||
# used by makeCodeObject
|
self.argcount = getArgCount(args)
|
||||||
self._getArgCount(args)
|
if optimized:
|
||||||
self.code = ''
|
self.flags = CO_OPTIMIZED | CO_NEWLOCALS
|
||||||
self.consts = [docstring]
|
else:
|
||||||
self.filename = filename
|
self.flags = 0
|
||||||
self.flags = CO_NEWLOCALS
|
self.firstlineno = None
|
||||||
self.name = name
|
self.consts = []
|
||||||
self.names = []
|
self.names = []
|
||||||
self.varnames = list(args) or []
|
self.varnames = list(args) or []
|
||||||
for i in range(len(self.varnames)):
|
for i in range(len(self.varnames)):
|
||||||
var = self.varnames[i]
|
var = self.varnames[i]
|
||||||
if isinstance(var, TupleArg):
|
if isinstance(var, TupleArg):
|
||||||
self.varnames[i] = var.getName()
|
self.varnames[i] = var.getName()
|
||||||
# lnotab support
|
self.stage = RAW
|
||||||
self.firstlineno = 0
|
|
||||||
self.lastlineno = 0
|
|
||||||
self.last_addr = 0
|
|
||||||
self.lnotab = ''
|
|
||||||
|
|
||||||
def _getArgCount(self, args):
|
def setDocstring(self, doc):
|
||||||
self.argcount = len(args)
|
self.docstring = doc
|
||||||
if args:
|
self.consts.insert(0, doc)
|
||||||
for arg in args:
|
|
||||||
if isinstance(arg, TupleArg):
|
|
||||||
numNames = len(misc.flatten(arg.names))
|
|
||||||
self.argcount = self.argcount - numNames
|
|
||||||
|
|
||||||
def __repr__(self):
|
def setFlag(self, flag):
|
||||||
return "<bytecode: %d instrs>" % len(self.insts)
|
self.flags = self.flags | flag
|
||||||
|
if flag == CO_VARARGS:
|
||||||
|
self.argcount = self.argcount - 1
|
||||||
|
|
||||||
def setFlags(self, val):
|
def getCode(self):
|
||||||
"""XXX for module's function"""
|
"""Get a Python code object"""
|
||||||
self.flags = val
|
if self.stage == RAW:
|
||||||
|
self.flattenGraph()
|
||||||
|
if self.stage == FLAT:
|
||||||
|
self.convertArgs()
|
||||||
|
if self.stage == CONV:
|
||||||
|
self.makeByteCode()
|
||||||
|
if self.stage == DONE:
|
||||||
|
return self.newCodeObject()
|
||||||
|
raise RuntimeError, "inconsistent PyFlowGraph state"
|
||||||
|
|
||||||
def setOptimized(self):
|
def dump(self, io=None):
|
||||||
self.flags = self.flags | CO_OPTIMIZED
|
if io:
|
||||||
|
save = sys.stdout
|
||||||
def setVarArgs(self):
|
sys.stdout = io
|
||||||
if not self.flags & CO_VARARGS:
|
pc = 0
|
||||||
self.flags = self.flags | CO_VARARGS
|
|
||||||
self.argcount = self.argcount - 1
|
|
||||||
|
|
||||||
def setKWArgs(self):
|
|
||||||
self.flags = self.flags | CO_VARKEYWORDS
|
|
||||||
|
|
||||||
def getCurInst(self):
|
|
||||||
return len(self.insts)
|
|
||||||
|
|
||||||
def getNextInst(self):
|
|
||||||
return len(self.insts) + 1
|
|
||||||
|
|
||||||
def dump(self, io=sys.stdout):
|
|
||||||
i = 0
|
|
||||||
for inst in self.insts:
|
|
||||||
if inst[0] == 'SET_LINENO':
|
|
||||||
io.write("\n")
|
|
||||||
io.write(" %3d " % i)
|
|
||||||
if len(inst) == 1:
|
|
||||||
io.write("%s\n" % inst)
|
|
||||||
else:
|
|
||||||
io.write("%-15.15s\t%s\n" % inst)
|
|
||||||
i = i + 1
|
|
||||||
|
|
||||||
def makeCodeObject(self):
|
|
||||||
"""Make a Python code object
|
|
||||||
|
|
||||||
This creates a Python code object using the new module. This
|
|
||||||
seems simpler than reverse-engineering the way marshal dumps
|
|
||||||
code objects into .pyc files. One of the key difficulties is
|
|
||||||
figuring out how to layout references to code objects that
|
|
||||||
appear on the VM stack; e.g.
|
|
||||||
3 SET_LINENO 1
|
|
||||||
6 LOAD_CONST 0 (<code object fact at 8115878 [...]
|
|
||||||
9 MAKE_FUNCTION 0
|
|
||||||
12 STORE_NAME 0 (fact)
|
|
||||||
"""
|
|
||||||
|
|
||||||
self._findOffsets()
|
|
||||||
lnotab = LineAddrTable()
|
|
||||||
for t in self.insts:
|
for t in self.insts:
|
||||||
opname = t[0]
|
opname = t[0]
|
||||||
|
if opname == "SET_LINENO":
|
||||||
|
print
|
||||||
if len(t) == 1:
|
if len(t) == 1:
|
||||||
lnotab.addCode(self.opnum[opname])
|
print "\t", "%3d" % pc, opname
|
||||||
elif len(t) == 2:
|
pc = pc + 1
|
||||||
if opname == 'SET_LINENO':
|
|
||||||
oparg = t[1]
|
|
||||||
lnotab.nextLine(oparg)
|
|
||||||
else:
|
|
||||||
oparg = self._convertArg(opname, t[1])
|
|
||||||
try:
|
|
||||||
hi, lo = divmod(oparg, 256)
|
|
||||||
except TypeError:
|
|
||||||
raise TypeError, "untranslated arg: %s, %s" % (opname, oparg)
|
|
||||||
lnotab.addCode(self.opnum[opname], lo, hi)
|
|
||||||
|
|
||||||
# why is a module a special case?
|
|
||||||
if self.flags == 0:
|
|
||||||
nlocals = 0
|
|
||||||
else:
|
|
||||||
nlocals = len(self.varnames)
|
|
||||||
# XXX danger! can't pass through here twice
|
|
||||||
if self.flags & CO_VARKEYWORDS:
|
|
||||||
self.argcount = self.argcount - 1
|
|
||||||
stacksize = findDepth(self.insts)
|
|
||||||
try:
|
|
||||||
co = new.code(self.argcount, nlocals, stacksize,
|
|
||||||
self.flags, lnotab.getCode(), self._getConsts(),
|
|
||||||
tuple(self.names), tuple(self.varnames),
|
|
||||||
self.filename, self.name, self.firstlineno,
|
|
||||||
lnotab.getTable())
|
|
||||||
except SystemError, err:
|
|
||||||
print err
|
|
||||||
print repr(self.argcount)
|
|
||||||
print repr(nlocals)
|
|
||||||
print repr(stacksize)
|
|
||||||
print repr(self.flags)
|
|
||||||
print repr(lnotab.getCode())
|
|
||||||
print repr(self._getConsts())
|
|
||||||
print repr(self.names)
|
|
||||||
print repr(self.varnames)
|
|
||||||
print repr(self.filename)
|
|
||||||
print repr(self.name)
|
|
||||||
print repr(self.firstlineno)
|
|
||||||
print repr(lnotab.getTable())
|
|
||||||
raise
|
|
||||||
return co
|
|
||||||
|
|
||||||
def _getConsts(self):
|
|
||||||
"""Return a tuple for the const slot of a code object
|
|
||||||
|
|
||||||
Converts PythonVMCode objects to code objects
|
|
||||||
"""
|
|
||||||
l = []
|
|
||||||
for elt in self.consts:
|
|
||||||
# XXX might be clearer to just as isinstance(CodeGen)
|
|
||||||
if hasattr(elt, 'asConst'):
|
|
||||||
l.append(elt.asConst())
|
|
||||||
else:
|
else:
|
||||||
l.append(elt)
|
print "\t", "%3d" % pc, opname, t[1]
|
||||||
return tuple(l)
|
pc = pc + 3
|
||||||
|
if io:
|
||||||
|
sys.stdout = save
|
||||||
|
|
||||||
def _findOffsets(self):
|
def flattenGraph(self):
|
||||||
"""Find offsets for use in resolving StackRefs"""
|
"""Arrange the blocks in order and resolve jumps"""
|
||||||
self.offsets = []
|
assert self.stage == RAW
|
||||||
cur = 0
|
self.insts = insts = []
|
||||||
for t in self.insts:
|
pc = 0
|
||||||
self.offsets.append(cur)
|
begin = {}
|
||||||
l = len(t)
|
end = {}
|
||||||
if l == 1:
|
for b in self.getBlocks():
|
||||||
cur = cur + 1
|
begin[b] = pc
|
||||||
elif l == 2:
|
for inst in b.getInstructions():
|
||||||
cur = cur + 3
|
insts.append(inst)
|
||||||
arg = t[1]
|
if len(inst) == 1:
|
||||||
# XXX this is a total hack: for a reference used
|
pc = pc + 1
|
||||||
# multiple times, we create a list of offsets and
|
else:
|
||||||
# expect that we when we pass through the code again
|
# arg takes 2 bytes
|
||||||
# to actually generate the offsets, we'll pass in the
|
pc = pc + 3
|
||||||
# same order.
|
end[b] = pc
|
||||||
if isinstance(arg, StackRef):
|
pc = 0
|
||||||
try:
|
for i in range(len(insts)):
|
||||||
arg.__offset.append(cur)
|
inst = insts[i]
|
||||||
except AttributeError:
|
if len(inst) == 1:
|
||||||
arg.__offset = [cur]
|
pc = pc + 1
|
||||||
|
else:
|
||||||
|
pc = pc + 3
|
||||||
|
opname = inst[0]
|
||||||
|
if self.hasjrel.has_elt(opname):
|
||||||
|
oparg = inst[1]
|
||||||
|
offset = begin[oparg] - pc
|
||||||
|
insts[i] = opname, offset
|
||||||
|
elif self.hasjabs.has_elt(opname):
|
||||||
|
insts[i] = opname, begin[inst[1]]
|
||||||
|
self.stacksize = findDepth(self.insts)
|
||||||
|
self.stage = FLAT
|
||||||
|
|
||||||
def _convertArg(self, op, arg):
|
hasjrel = misc.Set()
|
||||||
"""Convert the string representation of an arg to a number
|
for i in dis.hasjrel:
|
||||||
|
hasjrel.add(dis.opname[i])
|
||||||
|
hasjabs = misc.Set()
|
||||||
|
for i in dis.hasjabs:
|
||||||
|
hasjabs.add(dis.opname[i])
|
||||||
|
|
||||||
The specific handling depends on the opcode.
|
def convertArgs(self):
|
||||||
|
"""Convert arguments from symbolic to concrete form"""
|
||||||
XXX This first implementation isn't going to be very
|
assert self.stage == FLAT
|
||||||
efficient.
|
for i in range(len(self.insts)):
|
||||||
"""
|
t = self.insts[i]
|
||||||
if op == 'SET_LINENO':
|
if len(t) == 2:
|
||||||
return arg
|
opname = t[0]
|
||||||
if op == 'LOAD_CONST':
|
oparg = t[1]
|
||||||
return self._lookupName(arg, self.consts)
|
conv = self._converters.get(opname, None)
|
||||||
if op in self.localOps:
|
if conv:
|
||||||
# make sure it's in self.names, but use the bytecode offset
|
self.insts[i] = opname, conv(self, oparg)
|
||||||
self._lookupName(arg, self.names)
|
self.stage = CONV
|
||||||
return self._lookupName(arg, self.varnames)
|
|
||||||
if op in self.globalOps:
|
|
||||||
return self._lookupName(arg, self.names)
|
|
||||||
if op in self.nameOps:
|
|
||||||
return self._lookupName(arg, self.names)
|
|
||||||
if op == 'COMPARE_OP':
|
|
||||||
return self.cmp_op.index(arg)
|
|
||||||
if self.hasjrel.has_elt(op):
|
|
||||||
offset = arg.__offset[0]
|
|
||||||
del arg.__offset[0]
|
|
||||||
return self.offsets[arg.resolve()] - offset
|
|
||||||
if self.hasjabs.has_elt(op):
|
|
||||||
return self.offsets[arg.resolve()]
|
|
||||||
return arg
|
|
||||||
|
|
||||||
nameOps = ('STORE_NAME', 'IMPORT_NAME', 'IMPORT_FROM',
|
|
||||||
'STORE_ATTR', 'LOAD_ATTR', 'LOAD_NAME', 'DELETE_NAME',
|
|
||||||
'DELETE_ATTR')
|
|
||||||
localOps = ('LOAD_FAST', 'STORE_FAST', 'DELETE_FAST')
|
|
||||||
globalOps = ('LOAD_GLOBAL', 'STORE_GLOBAL', 'DELETE_GLOBAL')
|
|
||||||
|
|
||||||
def _lookupName(self, name, list):
|
def _lookupName(self, name, list):
|
||||||
"""Return index of name in list, appending if necessary"""
|
"""Return index of name in list, appending if necessary"""
|
||||||
|
@ -276,32 +267,124 @@ class PyAssembler:
|
||||||
list.append(name)
|
list.append(name)
|
||||||
return end
|
return end
|
||||||
|
|
||||||
# Convert some stuff from the dis module for local use
|
_converters = {}
|
||||||
|
def _convert_LOAD_CONST(self, arg):
|
||||||
cmp_op = list(dis.cmp_op)
|
return self._lookupName(arg, self.consts)
|
||||||
hasjrel = misc.Set()
|
|
||||||
for i in dis.hasjrel:
|
def _convert_LOAD_FAST(self, arg):
|
||||||
hasjrel.add(dis.opname[i])
|
self._lookupName(arg, self.names)
|
||||||
hasjabs = misc.Set()
|
return self._lookupName(arg, self.varnames)
|
||||||
for i in dis.hasjabs:
|
_convert_STORE_FAST = _convert_LOAD_FAST
|
||||||
hasjabs.add(dis.opname[i])
|
_convert_DELETE_FAST = _convert_LOAD_FAST
|
||||||
|
|
||||||
|
def _convert_NAME(self, arg):
|
||||||
|
return self._lookupName(arg, self.names)
|
||||||
|
_convert_LOAD_NAME = _convert_NAME
|
||||||
|
_convert_STORE_NAME = _convert_NAME
|
||||||
|
_convert_DELETE_NAME = _convert_NAME
|
||||||
|
_convert_IMPORT_NAME = _convert_NAME
|
||||||
|
_convert_IMPORT_FROM = _convert_NAME
|
||||||
|
_convert_STORE_ATTR = _convert_NAME
|
||||||
|
_convert_LOAD_ATTR = _convert_NAME
|
||||||
|
_convert_DELETE_ATTR = _convert_NAME
|
||||||
|
_convert_LOAD_GLOBAL = _convert_NAME
|
||||||
|
_convert_STORE_GLOBAL = _convert_NAME
|
||||||
|
_convert_DELETE_GLOBAL = _convert_NAME
|
||||||
|
|
||||||
|
_cmp = list(dis.cmp_op)
|
||||||
|
def _convert_COMPARE_OP(self, arg):
|
||||||
|
return self._cmp.index(arg)
|
||||||
|
|
||||||
|
# similarly for other opcodes...
|
||||||
|
|
||||||
|
for name, obj in locals().items():
|
||||||
|
if name[:9] == "_convert_":
|
||||||
|
opname = name[9:]
|
||||||
|
_converters[opname] = obj
|
||||||
|
del name, obj, opname
|
||||||
|
|
||||||
|
def makeByteCode(self):
|
||||||
|
assert self.stage == CONV
|
||||||
|
self.lnotab = lnotab = LineAddrTable()
|
||||||
|
for t in self.insts:
|
||||||
|
opname = t[0]
|
||||||
|
if len(t) == 1:
|
||||||
|
lnotab.addCode(self.opnum[opname])
|
||||||
|
else:
|
||||||
|
oparg = t[1]
|
||||||
|
if opname == "SET_LINENO":
|
||||||
|
lnotab.nextLine(oparg)
|
||||||
|
if self.firstlineno is None:
|
||||||
|
self.firstlineno = oparg
|
||||||
|
hi, lo = twobyte(oparg)
|
||||||
|
try:
|
||||||
|
lnotab.addCode(self.opnum[opname], lo, hi)
|
||||||
|
except ValueError:
|
||||||
|
print opname, oparg
|
||||||
|
print self.opnum[opname], lo, hi
|
||||||
|
raise
|
||||||
|
self.stage = DONE
|
||||||
|
|
||||||
opnum = {}
|
opnum = {}
|
||||||
for num in range(len(dis.opname)):
|
for num in range(len(dis.opname)):
|
||||||
opnum[dis.opname[num]] = num
|
opnum[dis.opname[num]] = num
|
||||||
|
del num
|
||||||
|
|
||||||
# this version of emit + arbitrary hooks might work, but it's damn
|
def newCodeObject(self):
|
||||||
# messy.
|
assert self.stage == DONE
|
||||||
|
if self.flags == 0:
|
||||||
|
nlocals = 0
|
||||||
|
else:
|
||||||
|
nlocals = len(self.varnames)
|
||||||
|
argcount = self.argcount
|
||||||
|
if self.flags & CO_VARKEYWORDS:
|
||||||
|
argcount = argcount - 1
|
||||||
|
return new.code(argcount, nlocals, self.stacksize, self.flags,
|
||||||
|
self.lnotab.getCode(), self.getConsts(),
|
||||||
|
tuple(self.names), tuple(self.varnames),
|
||||||
|
self.filename, self.name, self.firstlineno,
|
||||||
|
self.lnotab.getTable())
|
||||||
|
|
||||||
def emit(self, *args):
|
def getConsts(self):
|
||||||
self._emitDispatch(args[0], args[1:])
|
"""Return a tuple for the const slot of the code object
|
||||||
self.insts.append(args)
|
|
||||||
|
|
||||||
def _emitDispatch(self, type, args):
|
Must convert references to code (MAKE_FUNCTION) to code
|
||||||
for func in self._emit_hooks.get(type, []):
|
objects recursively.
|
||||||
func(self, args)
|
"""
|
||||||
|
l = []
|
||||||
|
for elt in self.consts:
|
||||||
|
if isinstance(elt, PyFlowGraph):
|
||||||
|
elt = elt.getCode()
|
||||||
|
l.append(elt)
|
||||||
|
return tuple(l)
|
||||||
|
|
||||||
|
def isJump(opname):
|
||||||
|
if opname[:4] == 'JUMP':
|
||||||
|
return 1
|
||||||
|
|
||||||
_emit_hooks = {}
|
class TupleArg:
|
||||||
|
"""Helper for marking func defs with nested tuples in arglist"""
|
||||||
|
def __init__(self, count, names):
|
||||||
|
self.count = count
|
||||||
|
self.names = names
|
||||||
|
def __repr__(self):
|
||||||
|
return "TupleArg(%s, %s)" % (self.count, self.names)
|
||||||
|
def getName(self):
|
||||||
|
return ".nested%d" % self.count
|
||||||
|
|
||||||
|
def getArgCount(args):
|
||||||
|
argcount = len(args)
|
||||||
|
if args:
|
||||||
|
for arg in args:
|
||||||
|
if isinstance(arg, TupleArg):
|
||||||
|
numNames = len(misc.flatten(arg.names))
|
||||||
|
argcount = argcount - numNames
|
||||||
|
return argcount
|
||||||
|
|
||||||
|
def twobyte(val):
|
||||||
|
"""Convert an int argument into high and low bytes"""
|
||||||
|
assert type(val) == types.IntType
|
||||||
|
return divmod(val, 256)
|
||||||
|
|
||||||
class LineAddrTable:
|
class LineAddrTable:
|
||||||
"""lnotab
|
"""lnotab
|
||||||
|
@ -361,34 +444,9 @@ class LineAddrTable:
|
||||||
def getTable(self):
|
def getTable(self):
|
||||||
return string.join(map(chr, self.lnotab), '')
|
return string.join(map(chr, self.lnotab), '')
|
||||||
|
|
||||||
class StackRef:
|
|
||||||
"""Manage stack locations for jumps, loops, etc."""
|
|
||||||
count = 0
|
|
||||||
|
|
||||||
def __init__(self, id=None, val=None):
|
|
||||||
if id is None:
|
|
||||||
id = StackRef.count
|
|
||||||
StackRef.count = StackRef.count + 1
|
|
||||||
self.id = id
|
|
||||||
self.val = val
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
if self.val:
|
|
||||||
return "StackRef(val=%d)" % self.val
|
|
||||||
else:
|
|
||||||
return "StackRef(id=%d)" % self.id
|
|
||||||
|
|
||||||
def bind(self, inst):
|
|
||||||
self.val = inst
|
|
||||||
|
|
||||||
def resolve(self):
|
|
||||||
if self.val is None:
|
|
||||||
print "UNRESOLVE REF", self
|
|
||||||
return 0
|
|
||||||
return self.val
|
|
||||||
|
|
||||||
class StackDepthTracker:
|
class StackDepthTracker:
|
||||||
# XXX need to keep track of stack depth on jumps
|
# XXX 1. need to keep track of stack depth on jumps
|
||||||
|
# XXX 2. at least partly as a result, this code is broken
|
||||||
|
|
||||||
def findDepth(self, insts):
|
def findDepth(self, insts):
|
||||||
depth = 0
|
depth = 0
|
||||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue