initial commit with vorgabe
This commit is contained in:
11
builtins.simple
Normal file
11
builtins.simple
Normal file
@ -0,0 +1,11 @@
|
||||
object nil:
|
||||
1
|
||||
|
||||
def pass:
|
||||
nil
|
||||
|
||||
object inttrait:
|
||||
def add(other):
|
||||
self $int_add(other)
|
||||
def eq(other):
|
||||
self $int_eq(other)
|
124
bytecodeinterpreter.py
Normal file
124
bytecodeinterpreter.py
Normal file
@ -0,0 +1,124 @@
|
||||
from simpleparser import parse
|
||||
from objspace import ObjectSpace
|
||||
import compile
|
||||
from disass import disassemble
|
||||
|
||||
|
||||
class ByteCodeError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class Interpreter(object):
|
||||
|
||||
def __init__(self, builtincode=None):
|
||||
# Using an instance variable to keep the public interface
|
||||
self.space = ObjectSpace(self)
|
||||
self.space.setup_builtins(builtincode)
|
||||
|
||||
def eval(self, ast, w_context):
|
||||
code = compile.compile(ast)
|
||||
return self.run(code, w_context)
|
||||
|
||||
def read4(self, code, pc):
|
||||
highval = ord(code[pc + 3])
|
||||
if highval >= 128:
|
||||
highval -= 256
|
||||
return (ord(code[pc]) |
|
||||
(ord(code[pc + 1]) << 8) |
|
||||
(ord(code[pc + 2]) << 16) |
|
||||
(highval << 24))
|
||||
|
||||
def run(self, bytecode, w_context):
|
||||
pc = 0
|
||||
stack = []
|
||||
code = bytecode.code
|
||||
print(disassemble(bytecode))
|
||||
while pc < len(code):
|
||||
opcode = ord(code[pc])
|
||||
pc += 1
|
||||
if compile.isjump(opcode):
|
||||
oparg = self.read4(code, pc)
|
||||
pc += 4
|
||||
if opcode == compile.JUMP:
|
||||
pc += oparg
|
||||
elif opcode == compile.JUMP_IF_FALSE:
|
||||
w_condition = stack.pop()
|
||||
if self.space.isfalse(w_condition):
|
||||
pc += oparg
|
||||
continue
|
||||
elif compile.hasarg(opcode):
|
||||
oparg = ord(code[pc])
|
||||
pc += 1
|
||||
if oparg >= 128:
|
||||
if oparg > 128:
|
||||
oparg -= 256
|
||||
else:
|
||||
oparg = self.read4(code, pc)
|
||||
pc += 4
|
||||
if opcode == compile.MAKE_OBJECT:
|
||||
name = bytecode.symbols[oparg]
|
||||
obj = self.space.newobject(name, {'__parent__': w_context}, [])
|
||||
stack.append(obj)
|
||||
elif opcode == compile.MAKE_OBJECT_CALL:
|
||||
self.run(bytecode.subbytecodes[oparg], stack[-1])
|
||||
elif opcode == compile.INT_LITERAL:
|
||||
w_value = self.space.newint(oparg)
|
||||
stack.append(w_value)
|
||||
elif opcode == compile.MAKE_FUNCTION:
|
||||
bc = bytecode.subbytecodes[oparg]
|
||||
w_method = self.space.definemethod(name=bc.name, code=bc, w_target=w_context)
|
||||
stack.append(w_method)
|
||||
elif opcode == compile.METHOD_LOOKUP:
|
||||
name = bytecode.symbols[oparg]
|
||||
w_method = self.space.getvalue(stack[-1], name)
|
||||
stack.append(w_method)
|
||||
elif opcode == compile.METHOD_CALL:
|
||||
arguments_w = [stack.pop() for n in range(oparg)]
|
||||
arguments_w.reverse()
|
||||
#
|
||||
w_method = stack.pop()
|
||||
w_receiver = stack.pop()
|
||||
w_result = self.space.call(w_method, w_receiver, arguments_w)
|
||||
stack.append(w_result)
|
||||
elif opcode == compile.PRIMITIVE_METHOD_CALL:
|
||||
nargs = self.space.get_number_of_arguments_of_primitive(oparg)
|
||||
arguments_w = [stack.pop() for n in range(nargs)]
|
||||
arguments_w.reverse()
|
||||
w_receiver = stack.pop()
|
||||
w_result = self.space.call_primitive(oparg, w_receiver, arguments_w)
|
||||
stack.append(w_result)
|
||||
elif opcode == compile.SET_LOCAL:
|
||||
w_value = stack[-1]
|
||||
name = bytecode.symbols[oparg]
|
||||
self.space.setvalue(w_context, name, w_value)
|
||||
elif opcode == compile.ASSIGNMENT:
|
||||
w_value = stack.pop()
|
||||
name = bytecode.symbols[oparg]
|
||||
self.space.setvalue(stack[-1], name, w_value)
|
||||
elif opcode == compile.ASSIGNMENT_APPEND_PARENT:
|
||||
w_value = stack.pop()
|
||||
name = bytecode.symbols[oparg]
|
||||
self.space.setvalue(stack[-1], name, w_value)
|
||||
self.space.addparent(stack[-1], name)
|
||||
elif opcode == compile.GET_LOCAL:
|
||||
name = bytecode.symbols[oparg]
|
||||
w_value = self.space.getvalue(w_context, name)
|
||||
w_value = self.space.call(w_value, w_context, [])
|
||||
stack.append(w_value)
|
||||
else:
|
||||
raise ByteCodeError('Invalid bytecode with arguments')
|
||||
else:
|
||||
if opcode == compile.POP:
|
||||
stack.pop()
|
||||
elif opcode == compile.IMPLICIT_SELF:
|
||||
stack.append(w_context)
|
||||
elif opcode == compile.DUP:
|
||||
stack.append(stack[-1])
|
||||
else:
|
||||
raise ByteCodeError('Invalid bytecode')
|
||||
assert pc == len(code)
|
||||
assert len(stack) == 1
|
||||
return stack.pop()
|
||||
|
||||
def make_module(self):
|
||||
return self.space.make_module()
|
46
c3computation.py
Normal file
46
c3computation.py
Normal file
@ -0,0 +1,46 @@
|
||||
def compute_C3_mro(w_obj):
|
||||
order_w = []
|
||||
parents_w = w_obj.getparents()
|
||||
orderlists = [w_base.get_mro()
|
||||
for w_base in parents_w]
|
||||
orderlists.append([w_obj] + parents_w)
|
||||
while orderlists:
|
||||
for candidatelist in orderlists:
|
||||
w_candidate = candidatelist[0]
|
||||
if mro_blockinglist(w_candidate, orderlists) is None:
|
||||
break # good w_candidate
|
||||
else:
|
||||
return mro_error(orderlists) # no candidate found
|
||||
assert w_candidate not in order_w
|
||||
order_w.append(w_candidate)
|
||||
for i in range(len(orderlists)-1, -1, -1):
|
||||
if orderlists[i][0] is w_candidate:
|
||||
del orderlists[i][0]
|
||||
if len(orderlists[i]) == 0:
|
||||
del orderlists[i]
|
||||
return order_w
|
||||
|
||||
|
||||
def mro_blockinglist(w_candidate, orderlists):
|
||||
for lst in orderlists:
|
||||
if w_candidate in lst[1:]:
|
||||
return lst
|
||||
return None # good candidate
|
||||
|
||||
def mro_error(orderlists):
|
||||
# w_obj.getname() is a pure debugging-helper. it can return whatever string
|
||||
cycle = []
|
||||
w_candidate = orderlists[-1][0]
|
||||
if w_candidate in orderlists[-1][1:]:
|
||||
# explicit error message for this specific case
|
||||
raise TypeError("duplicate parent %s" % w_candidate.getname())
|
||||
while w_candidate not in cycle:
|
||||
cycle.append(w_candidate)
|
||||
nextblockinglist = mro_blockinglist(w_candidate, orderlists)
|
||||
w_candidate = nextblockinglist[0]
|
||||
del cycle[:cycle.index(w_candidate)]
|
||||
cycle.append(w_candidate)
|
||||
cycle.reverse()
|
||||
names = [w_obj.getname() for w_obj in cycle]
|
||||
raise TypeError("cycle among base parents: " + ' < '.join(names))
|
||||
|
401
compile.py
Normal file
401
compile.py
Normal file
@ -0,0 +1,401 @@
|
||||
"""This file contains the bytecode-compiler.
|
||||
|
||||
An instruction can have one or no arguments. There are two different ways how
|
||||
an argument is encoded:
|
||||
|
||||
ARG4 encodes an argument in 4 bytes, in a little-endian manner
|
||||
|
||||
SMALLARG encodes an integer i differently based on its size:
|
||||
if -127 <= i <= 127 the integers is encoded as one byte
|
||||
otherwise it is encoded as 5 bytes:
|
||||
1 marker byte equal to -128 to signify that the large form is used
|
||||
4 bytes to encode the integer as with ARG4
|
||||
|
||||
The instruction set contains the following instructions:
|
||||
|
||||
INT_LITERAL <SMALLARG>
|
||||
Pushes an integer literal on the stack. The argument is the value of the
|
||||
integer.
|
||||
|
||||
IMPLICIT_SELF
|
||||
Pushes the implicit self on the stack.
|
||||
|
||||
POP
|
||||
Pops the top element from the stack.
|
||||
|
||||
DUP
|
||||
Duplicates the top element of the stack.
|
||||
|
||||
JUMP <ARG4>
|
||||
Unconditionally jump to a different point in the program. The offset of the
|
||||
program counter to the target is given by the argument.
|
||||
|
||||
JUMP_IF_FALSE <ARG4>
|
||||
Pops an object from the stack and jump to a different point in the program
|
||||
if that object is false. The offset of the program counter to the target is
|
||||
given by the argument.
|
||||
|
||||
ASSIGNMENT <SMALLARG>
|
||||
Assigns the first object on the stack to the second object on the stack.
|
||||
The objects are popped from the stack, and then the assigned object
|
||||
(i.e. the `expression') is pushed again. The attribute name is given by
|
||||
the argument, which is an index into the symbols list of the bytecode
|
||||
object.
|
||||
|
||||
PRIMITIVE_METHOD_CALL <SMALLARG>
|
||||
Call a primitive method. The argument is an index into a list of all
|
||||
primitives, which must be defined in the "primitive" module. The arguments
|
||||
are found on the stack and are popped by this bytecode; the result is
|
||||
pushed on the stack. To make the compiler work correctly for primitives,
|
||||
the "primitive" module needs to expose two function
|
||||
"get_index_of_primitive_named", which maps primitive name to a primitive
|
||||
number, and get_number_of_arguments_of_primitive, which maps a primitive
|
||||
number to the number of arguments the corresponding function takes.
|
||||
|
||||
METHOD_LOOKUP <SMALLARG>
|
||||
Looks up a method in the object at the top of the stack. The method name
|
||||
is given by the argument, which is an index into the symbols list of the
|
||||
bytecode object. The method is pushed on the stack (and the original
|
||||
object is not removed).
|
||||
|
||||
METHOD_CALL <SMALLARG>
|
||||
Calls a method. The first n (where n is the argument of the bytecode) are
|
||||
the arguments to the method, in reverse order. The next object on the
|
||||
stack is the method. The final object is the receiver. All these objects
|
||||
are popped from the stack. The result of the method call is pushed.
|
||||
|
||||
MAKE_FUNCTION <SMALLARG>
|
||||
Creates a new W_Method object and pushes it on the stack. The bytecode of
|
||||
the method can be found in the subbytecodes list of the current bytecode
|
||||
object; the index is given by the argument.
|
||||
|
||||
MAKE_OBJECT <SMALLARG>
|
||||
Create a new (empty) object and pushes it on the stack. The argument (which
|
||||
can be ignored for now) is the index in symbols of the name of the object.
|
||||
|
||||
ASSIGNMENT_APPEND_PARENT <SMALLARG>
|
||||
Adds a new parent to an object. This bytecode is only used during object
|
||||
creation. It works like the ASSIGNMENT bytecode, but (1) it also adds the
|
||||
name to the list of parent attributes of the object, and (2) it leaves
|
||||
on the stack the assigned-to object (the `lvalue'), not the assigned
|
||||
object (the `expression').
|
||||
|
||||
MAKE_OBJECT_CALL <SMALLARG>
|
||||
Execute the body of a newly created object. The object is on the top of the
|
||||
stack and is left there. The bytecode of the body can be found in the
|
||||
subbytecodes list of the current bytecode object, the index is given by the
|
||||
argument.
|
||||
|
||||
GET_LOCAL <SMALLARG>
|
||||
This is an optimization for the common case of sending a method without
|
||||
arguments to the implicit self. This bytecode is equivalent to:
|
||||
IMPLICIT_SELF
|
||||
METHOD_LOOKUP <SMALLARG>
|
||||
METHOD_CALL 0
|
||||
|
||||
SET_LOCAL <SMALLARG>
|
||||
This is an optimization for the common case of writing a slot to the
|
||||
implicit self. This bytecode is equivalent to:
|
||||
IMPLICIT_SELF
|
||||
ASSIGNMENT <SMALLARG>
|
||||
|
||||
Note that there is no "return" bytecode. When the end of the bytecode is
|
||||
reached, the top of the stack is returned (and the stack should have only one
|
||||
element on it).
|
||||
"""
|
||||
import sys
|
||||
|
||||
import simpleast
|
||||
|
||||
# ---------- bytecodes ----------
|
||||
|
||||
INT_LITERAL = 2 # integer value
|
||||
ASSIGNMENT = 4 # index of attrname
|
||||
METHOD_LOOKUP = 5 # index of method name
|
||||
METHOD_CALL = 6 # number of arguments
|
||||
PRIMITIVE_METHOD_CALL = 7 # number of the primitive
|
||||
MAKE_FUNCTION = 8 # bytecode literal index
|
||||
MAKE_OBJECT = 9 # index of object name
|
||||
ASSIGNMENT_APPEND_PARENT = 10 # index of parentname
|
||||
MAKE_OBJECT_CALL = 11 # bytecode literal index
|
||||
JUMP_IF_FALSE = 12 # offset
|
||||
JUMP = 13 # offset
|
||||
GET_LOCAL = 15 # index of attrname (optimization)
|
||||
SET_LOCAL = 16 # index of attrname (optimization)
|
||||
|
||||
IMPLICIT_SELF = 32 # (no argument)
|
||||
POP = 33 # (no argument)
|
||||
DUP = 34 # (no argument)
|
||||
|
||||
opcode_names = [None] * 256
|
||||
for key, value in list(globals().items()):
|
||||
if key.strip("_").isupper():
|
||||
opcode_names[value] = key
|
||||
|
||||
|
||||
|
||||
def hasarg(opcode):
|
||||
""" Helper function to determine whether an opcode has an argument."""
|
||||
return opcode < 32
|
||||
|
||||
def isjump(opcode):
|
||||
""" Helper function to determine whether an opcode is a jump."""
|
||||
return opcode == JUMP_IF_FALSE or opcode == JUMP
|
||||
|
||||
|
||||
class Bytecode(object):
|
||||
""" A class representing the bytecode of one piece of code.
|
||||
|
||||
self.code is a string that encodes the bytecode itself.
|
||||
|
||||
self.symbols is a list of strings containing the names that occur in the
|
||||
piece of code.
|
||||
|
||||
self.subbytecodes is a list of further bytecodes that occur in the piece of
|
||||
code.
|
||||
"""
|
||||
_immutable_ = True
|
||||
_immutable_fields_ = ["symbols[*]", "subbytecodes[*]"]
|
||||
|
||||
def __init__(self, code, name, symbols,
|
||||
subbytecodes, numargs, stackdepth):
|
||||
self.code = code
|
||||
if name is None:
|
||||
name = "?"
|
||||
self.name = name
|
||||
self.symbols = symbols
|
||||
self.subbytecodes = subbytecodes
|
||||
self.numargs = numargs
|
||||
self.stackdepth = stackdepth
|
||||
|
||||
def dis(self, pc=-1):
|
||||
from disass import disassemble
|
||||
disassemble(self, pc=pc)
|
||||
|
||||
|
||||
# ---------- compiler ----------
|
||||
|
||||
def compile(ast, argumentnames=[], name=None):
|
||||
""" Turns an AST into a Bytecode object."""
|
||||
assert isinstance(ast, simpleast.Program)
|
||||
comp = Compiler()
|
||||
for arg in argumentnames:
|
||||
comp.lookup_symbol(arg)
|
||||
comp.lookup_symbol("__parent__")
|
||||
comp.lookup_symbol("self")
|
||||
comp.compile(ast, True)
|
||||
return comp.make_bytecode(len(argumentnames), name)
|
||||
|
||||
|
||||
stack_effects = {
|
||||
INT_LITERAL: 1,
|
||||
ASSIGNMENT: -1,
|
||||
METHOD_LOOKUP: 1,
|
||||
MAKE_FUNCTION: 1,
|
||||
MAKE_OBJECT: 1,
|
||||
ASSIGNMENT_APPEND_PARENT: -1,
|
||||
MAKE_OBJECT_CALL: 0,
|
||||
GET_LOCAL: 1,
|
||||
SET_LOCAL: 0,
|
||||
JUMP: 0,
|
||||
JUMP_IF_FALSE: -1,
|
||||
IMPLICIT_SELF: 1,
|
||||
POP: -1,
|
||||
DUP: 1,
|
||||
}
|
||||
|
||||
|
||||
class Compiler(object):
|
||||
|
||||
def __init__(self):
|
||||
self.code = []
|
||||
self.symbols = {}
|
||||
self.subbytecodes = []
|
||||
self.stackdepth = 0
|
||||
self.max_stackdepth = 0
|
||||
|
||||
def make_bytecode(self, numargs, funcname):
|
||||
symbols = [None] * len(self.symbols)
|
||||
for name, index in list(self.symbols.items()):
|
||||
symbols[index] = name
|
||||
result = Bytecode(''.join(self.code),
|
||||
funcname,
|
||||
symbols,
|
||||
self.subbytecodes,
|
||||
numargs, self.max_stackdepth)
|
||||
assert self.stackdepth == 1
|
||||
return result
|
||||
|
||||
def stack_effect(self, num):
|
||||
self.stackdepth += num
|
||||
self.max_stackdepth = max(self.stackdepth, self.max_stackdepth)
|
||||
|
||||
def emit(self, opcode, arg=None, stackeffect=sys.maxsize):
|
||||
self.code.append(chr(opcode))
|
||||
if isjump(opcode):
|
||||
assert arg is None
|
||||
for c in self.encode4(0):
|
||||
self.code.append(c)
|
||||
elif hasarg(opcode):
|
||||
assert isinstance(arg, int)
|
||||
if -127 <= arg <= 127:
|
||||
self.code.append(chr(arg & 0xFF))
|
||||
else:
|
||||
self.code.append(chr(128))
|
||||
for c in self.encode4(arg):
|
||||
self.code.append(c)
|
||||
else:
|
||||
assert arg is None
|
||||
|
||||
if opcode in stack_effects:
|
||||
stackeffect = stack_effects[opcode]
|
||||
else:
|
||||
assert stackeffect != sys.maxsize
|
||||
self.stack_effect(stackeffect)
|
||||
|
||||
def get_position(self):
|
||||
return len(self.code)
|
||||
|
||||
def set_target_position(self, oldposition, newtarget):
|
||||
offset = newtarget - (oldposition+5)
|
||||
i = 0
|
||||
for c in self.encode4(offset):
|
||||
self.code[oldposition+1+i] = c
|
||||
i += 1
|
||||
|
||||
def encode4(self, value):
|
||||
return [chr(value & 0xFF),
|
||||
chr((value >> 8) & 0xFF),
|
||||
chr((value >> 16) & 0xFF),
|
||||
chr((value >> 24) & 0xFF)]
|
||||
|
||||
def lookup_symbol(self, symbol):
|
||||
if symbol not in self.symbols:
|
||||
self.symbols[symbol] = len(self.symbols)
|
||||
return self.symbols[symbol]
|
||||
|
||||
|
||||
def compile(self, ast, needsresult=True):
|
||||
return getattr(self, "compile_" + ast.__class__.__name__)(ast, needsresult)
|
||||
|
||||
def compile_IntLiteral(self, astnode, needsresult):
|
||||
self.emit(INT_LITERAL, astnode.value)
|
||||
|
||||
def compile_ImplicitSelf(self, astnode, needsresult):
|
||||
self.emit(IMPLICIT_SELF)
|
||||
|
||||
def compile_Assignment(self, astnode, needsresult):
|
||||
if isinstance(astnode.lvalue, simpleast.ImplicitSelf):
|
||||
self.compile(astnode.expression)
|
||||
self.emit(SET_LOCAL, self.lookup_symbol(astnode.attrname))
|
||||
else:
|
||||
self.compile(astnode.lvalue)
|
||||
self.compile(astnode.expression)
|
||||
self.emit(ASSIGNMENT, self.lookup_symbol(astnode.attrname))
|
||||
if not needsresult:
|
||||
self.emit(POP)
|
||||
|
||||
def compile_ExprStatement(self, astnode, needsresult):
|
||||
self.compile(astnode.expression)
|
||||
if not needsresult:
|
||||
self.emit(POP)
|
||||
|
||||
def compile_MethodCall(self, astnode, needsresult):
|
||||
numargs = len(astnode.arguments)
|
||||
if (isinstance(astnode.receiver, simpleast.ImplicitSelf) and
|
||||
numargs == 0):
|
||||
self.emit(GET_LOCAL, self.lookup_symbol(astnode.methodname))
|
||||
else:
|
||||
self.compile(astnode.receiver)
|
||||
self.emit(METHOD_LOOKUP, self.lookup_symbol(astnode.methodname))
|
||||
for arg in astnode.arguments:
|
||||
self.compile(arg)
|
||||
self.emit(METHOD_CALL, numargs, -numargs - 1)
|
||||
|
||||
def compile_PrimitiveMethodCall(self, astnode, needsresult):
|
||||
import primitives
|
||||
index = primitives.get_index_of_primitive_named(astnode.methodname)
|
||||
expected_args = primitives.get_number_of_arguments_of_primitive(index)
|
||||
if not (len(astnode.arguments) == expected_args):
|
||||
raise TypeError(
|
||||
"Expected {ex} arguments, received {re}.".format(ex=expected_args, re=len(astnode.arguments)))
|
||||
self.compile(astnode.receiver)
|
||||
for arg in astnode.arguments:
|
||||
self.compile(arg)
|
||||
self.emit(PRIMITIVE_METHOD_CALL, index, -len(astnode.arguments))
|
||||
|
||||
def compile_ObjectDefinition(self, astnode, needsresult):
|
||||
self.emit(MAKE_OBJECT, self.lookup_symbol(astnode.name))
|
||||
#
|
||||
for i in range(len(astnode.parentdefinitions)):
|
||||
name = astnode.parentnames[i]
|
||||
if name == "__parent__":
|
||||
self.emit(DUP)
|
||||
self.compile(astnode.parentdefinitions[i])
|
||||
self.emit(ASSIGNMENT, self.lookup_symbol(name))
|
||||
self.emit(POP)
|
||||
else:
|
||||
self.compile(astnode.parentdefinitions[i])
|
||||
self.emit(ASSIGNMENT_APPEND_PARENT, self.lookup_symbol(name))
|
||||
#
|
||||
bytecode = compile(astnode.block, name=astnode.name)
|
||||
index = len(self.subbytecodes)
|
||||
self.subbytecodes.append(bytecode)
|
||||
self.emit(MAKE_OBJECT_CALL, index)
|
||||
self.emit(SET_LOCAL, self.lookup_symbol(astnode.name))
|
||||
if not needsresult:
|
||||
self.emit(POP)
|
||||
|
||||
def compile_Program(self, astnode, needsresult):
|
||||
for statement in astnode.statements[:-1]:
|
||||
self.compile(statement, needsresult=False)
|
||||
laststatement = astnode.statements[-1]
|
||||
self.compile(laststatement, needsresult)
|
||||
|
||||
def compile_FunctionDefinition(self, astnode, needsresult):
|
||||
bytecode = compile(astnode.block, astnode.arguments, astnode.name)
|
||||
index = len(self.subbytecodes)
|
||||
self.subbytecodes.append(bytecode)
|
||||
self.emit(MAKE_FUNCTION, index)
|
||||
self.emit(SET_LOCAL, self.lookup_symbol(astnode.name))
|
||||
if not needsresult:
|
||||
self.emit(POP)
|
||||
|
||||
def compile_IfStatement(self, astnode, needsresult):
|
||||
# XXX this can compute the needed stack by one too much
|
||||
self.compile(astnode.condition)
|
||||
position1 = self.get_position()
|
||||
self.emit(JUMP_IF_FALSE)
|
||||
#
|
||||
self.compile(astnode.ifblock, needsresult)
|
||||
position2 = self.get_position()
|
||||
self.emit(JUMP)
|
||||
#
|
||||
self.set_target_position(position1, self.get_position())
|
||||
if astnode.elseblock:
|
||||
self.compile(astnode.elseblock, needsresult)
|
||||
else:
|
||||
if needsresult:
|
||||
self.emit(IMPLICIT_SELF)
|
||||
if needsresult:
|
||||
self.stack_effect(-1)
|
||||
#
|
||||
self.set_target_position(position2, self.get_position())
|
||||
|
||||
def compile_WhileStatement(self, astnode, needsresult):
|
||||
if needsresult:
|
||||
self.emit(IMPLICIT_SELF)
|
||||
#
|
||||
position1 = self.get_position()
|
||||
self.compile(astnode.condition)
|
||||
position2 = self.get_position()
|
||||
self.emit(JUMP_IF_FALSE)
|
||||
#
|
||||
if needsresult:
|
||||
self.emit(POP)
|
||||
self.compile(astnode.whileblock, needsresult)
|
||||
position3 = self.get_position()
|
||||
self.emit(JUMP)
|
||||
self.set_target_position(position3, position1)
|
||||
#
|
||||
self.set_target_position(position2, self.get_position())
|
118
disass.py
Normal file
118
disass.py
Normal file
@ -0,0 +1,118 @@
|
||||
import compile
|
||||
|
||||
def disassemble(bytecode, indent='', pc=-1):
|
||||
""" disassemble a bytecode object and print a readabable version of it"""
|
||||
assert isinstance(bytecode, compile.Bytecode)
|
||||
findlabeltargets = FindLabelTargets()
|
||||
findlabeltargets.disassemble(bytecode)
|
||||
disass = Disassembler(indent, findlabeltargets.targets)
|
||||
disass.disassemble(bytecode, pc)
|
||||
|
||||
|
||||
opcode2name = {}
|
||||
for name, value in list(compile.__dict__.items()):
|
||||
if name == name.upper() and isinstance(value, int):
|
||||
opcode2name[value] = name
|
||||
|
||||
|
||||
class AbstractDisassembler(object):
|
||||
|
||||
def read4(self, code, pc):
|
||||
highval = ord(code[pc+3])
|
||||
if highval >= 128:
|
||||
highval -= 256
|
||||
return (ord(code[pc]) |
|
||||
(ord(code[pc+1]) << 8) |
|
||||
(ord(code[pc+2]) << 16) |
|
||||
(highval << 24))
|
||||
|
||||
def disassemble(self, bytecode, currpc=-1):
|
||||
self.currpc = currpc
|
||||
self.bytecode = bytecode
|
||||
code = bytecode.code
|
||||
pc = 0
|
||||
while pc < len(code):
|
||||
self.start(pc)
|
||||
opcode = ord(code[pc])
|
||||
pc += 1
|
||||
if compile.isjump(opcode):
|
||||
oparg = self.read4(code, pc)
|
||||
pc += 4
|
||||
elif compile.hasarg(opcode):
|
||||
oparg = ord(code[pc])
|
||||
pc += 1
|
||||
if oparg >= 128:
|
||||
if oparg > 128:
|
||||
oparg -= 256
|
||||
else:
|
||||
oparg = self.read4(code, pc)
|
||||
pc += 4
|
||||
else:
|
||||
oparg = None
|
||||
self.pc = pc
|
||||
self.end(opcode, oparg)
|
||||
name = opcode2name[opcode]
|
||||
method = getattr(self, name, self.dummy)
|
||||
method(opcode, oparg)
|
||||
|
||||
def start(self, pc):
|
||||
pass
|
||||
|
||||
def end(self, opcode, oparg):
|
||||
pass
|
||||
|
||||
def dummy(self, opcode, oparg):
|
||||
pass
|
||||
|
||||
|
||||
class FindLabelTargets(AbstractDisassembler):
|
||||
|
||||
def __init__(self):
|
||||
self.targets = {}
|
||||
|
||||
def JUMP_IF_FALSE(self, opcode, oparg):
|
||||
self.targets[self.pc + oparg] = True
|
||||
|
||||
JUMP = JUMP_IF_FALSE
|
||||
|
||||
|
||||
class Disassembler(AbstractDisassembler):
|
||||
|
||||
def __init__(self, indent, targets):
|
||||
self.indent = indent
|
||||
self.targets = targets
|
||||
|
||||
def start(self, pc):
|
||||
if pc in self.targets:
|
||||
print(self.indent, '>>', pc)
|
||||
if pc == self.currpc:
|
||||
print(self.indent, '->', pc)
|
||||
|
||||
def end(self, opcode, oparg):
|
||||
print(self.indent, '\t', opcode2name[opcode], end=' ')
|
||||
|
||||
def JUMP_IF_FALSE(self, opcode, oparg):
|
||||
print('\t', '-->', self.pc + oparg)
|
||||
|
||||
JUMP = JUMP_IF_FALSE
|
||||
|
||||
def ASSIGNMENT(self, opcode, oparg):
|
||||
print('\t', repr(self.bytecode.symbols[oparg]))
|
||||
|
||||
METHOD_LOOKUP = ASSIGNMENT
|
||||
ASSIGNMENT_APPEND_PARENT = ASSIGNMENT
|
||||
GET_LOCAL = ASSIGNMENT
|
||||
SET_LOCAL = ASSIGNMENT
|
||||
|
||||
def PRIMITIVE_METHOD_CALL(self, opcode, oparg):
|
||||
import primitives
|
||||
func = primitives.all_primitives[oparg]
|
||||
print('\t', repr('$' + func.__qualname__))
|
||||
|
||||
def dummy(self, opcode, oparg):
|
||||
if oparg is None:
|
||||
print()
|
||||
else:
|
||||
print('\t', oparg)
|
||||
|
||||
|
1
interpreter.py
Normal file
1
interpreter.py
Normal file
@ -0,0 +1 @@
|
||||
from bytecodeinterpreter import Interpreter
|
120
objmodel.py
Normal file
120
objmodel.py
Normal file
@ -0,0 +1,120 @@
|
||||
from c3computation import compute_C3_mro as c3
|
||||
|
||||
|
||||
class AbstractObject(object):
|
||||
|
||||
def call(self, w_receiver, args_w):
|
||||
return self
|
||||
|
||||
def istrue(self):
|
||||
return True
|
||||
|
||||
def clone(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def hasslot(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def getvalue(self, name):
|
||||
for obj in self.get_mro():
|
||||
if not obj.hasslot(name):
|
||||
continue
|
||||
return obj.slots[name]
|
||||
|
||||
def get_mro(self):
|
||||
return c3(self)
|
||||
|
||||
|
||||
class W_NormalObject(AbstractObject):
|
||||
|
||||
def __init__(self, name=None, slots=None, parents=None, space=None):
|
||||
self.space = space
|
||||
self.name = name
|
||||
if slots:
|
||||
self.slots = slots
|
||||
else:
|
||||
self.slots = {}
|
||||
if parents is None:
|
||||
parents = []
|
||||
if len(parents) > 0:
|
||||
for x in parents:
|
||||
assert x in slots
|
||||
self.parents = parents
|
||||
|
||||
def getparents(self):
|
||||
parents = self.parents
|
||||
if '__parent__' not in parents:
|
||||
parents.append('__parent__')
|
||||
return [self.slots[p] for p in parents if p in self.slots]
|
||||
|
||||
def hasslot(self, name):
|
||||
return name in self.slots
|
||||
|
||||
def setvalue(self, name, w_value):
|
||||
self.slots[name] = w_value
|
||||
|
||||
def addparent(self, name):
|
||||
self.parents.append(name)
|
||||
|
||||
def __str__(self):
|
||||
return self.getname()
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
def getname(self):
|
||||
return "<Object {name} {slots}>".format(
|
||||
name=self.name if self.name else '',
|
||||
slots=self.slots)
|
||||
|
||||
def clone(self):
|
||||
return W_NormalObject(
|
||||
name=self.name,
|
||||
parents=self.parents,
|
||||
slots=self.slots.copy())
|
||||
|
||||
|
||||
class W_Integer(AbstractObject):
|
||||
def __init__(self, value, space=None):
|
||||
self.value = value
|
||||
self.space = space
|
||||
|
||||
def getparents(self):
|
||||
if self.space is None:
|
||||
return [] # for tests
|
||||
inttrait = self.space.getbuiltin('inttrait')
|
||||
assert inttrait is not None, 'O_o bogus state'
|
||||
return [inttrait]
|
||||
|
||||
def hasslot(self, name):
|
||||
return False
|
||||
|
||||
def __str__(self):
|
||||
return str(self.value)
|
||||
|
||||
__repr__ = __str__
|
||||
|
||||
def istrue(self):
|
||||
return self.value != 0
|
||||
|
||||
|
||||
class W_Method(W_NormalObject):
|
||||
|
||||
def __init__(self, code, *args, **kwargs):
|
||||
super(W_Method, self).__init__(*args, **kwargs)
|
||||
self.code = code
|
||||
|
||||
def clone(self):
|
||||
return W_Method(code=self.code,
|
||||
name=self.name, parents=self.parents,
|
||||
slots=self.slots.copy())
|
||||
|
||||
def getname(self):
|
||||
return "<W_Method({name})>".format(name=self.name)
|
||||
|
||||
def call(self, w_receiver, args_w):
|
||||
w_context = self.clone()
|
||||
assert len(args_w) == self.code.numargs
|
||||
for i in range(self.code.numargs):
|
||||
self.space.setvalue(w_context, self.code.symbols[i], args_w[i])
|
||||
self.space.setvalue(w_context, 'self', w_receiver)
|
||||
return self.space.execute(w_context.code, w_context)
|
87
objspace.py
Normal file
87
objspace.py
Normal file
@ -0,0 +1,87 @@
|
||||
from objmodel import W_Integer
|
||||
from objmodel import W_Method
|
||||
from objmodel import W_NormalObject
|
||||
|
||||
import primitives
|
||||
|
||||
|
||||
class ObjectSpace(object):
|
||||
|
||||
def __init__(self, interpreter):
|
||||
self.interpreter = interpreter
|
||||
|
||||
def setup_builtins(self, builtincode=None):
|
||||
if builtincode is None:
|
||||
builtincode = self._load_default_builtins()
|
||||
|
||||
w_builtins = W_NormalObject(name='Lobby', slots={})
|
||||
self.w_builtins = w_builtins
|
||||
from simpleparser import parse
|
||||
ast = parse(builtincode)
|
||||
|
||||
self.interpreter.eval(ast, w_builtins)
|
||||
|
||||
def _load_default_builtins(self):
|
||||
import os
|
||||
builtins = os.path.join(
|
||||
os.path.dirname(__file__),
|
||||
'builtins.simple')
|
||||
with open(builtins, 'r') as f:
|
||||
return f.read()
|
||||
|
||||
def getbuiltin(self, name):
|
||||
return self.w_builtins.getvalue(name)
|
||||
|
||||
def getbuiltins(self):
|
||||
return self.w_builtins
|
||||
|
||||
def make_module(self, name=None):
|
||||
if not hasattr(self, 'w_builtins'):
|
||||
slots = {}
|
||||
else:
|
||||
slots = {'__parent__': self.getbuiltins()}
|
||||
return W_NormalObject(name=name, slots=slots)
|
||||
|
||||
def newobject(self, name, slots, parentnames):
|
||||
return W_NormalObject(space=self, name=name,
|
||||
slots=slots, parents=parentnames)
|
||||
|
||||
def newint(self, value):
|
||||
return W_Integer(value, space=self)
|
||||
|
||||
def definemethod(self, name, code, w_target):
|
||||
w_meth = W_Method(code, name=name,
|
||||
slots={'__parent__': w_target},
|
||||
space=self)
|
||||
return w_meth
|
||||
|
||||
def execute(self, code, w_context):
|
||||
return self.interpreter.run(code, w_context)
|
||||
|
||||
def setvalue(self, w_receiver, name, w_value):
|
||||
w_receiver.setvalue(name, w_value)
|
||||
|
||||
def addparent(self, w_receiver, name):
|
||||
w_receiver.addparent(name)
|
||||
|
||||
def getvalue(self, w_receiver, name):
|
||||
return w_receiver.getvalue(name)
|
||||
|
||||
def istrue(self, w_condition):
|
||||
return w_condition.istrue()
|
||||
|
||||
def isfalse(self, w_condition):
|
||||
return not w_condition.istrue()
|
||||
|
||||
def call_primitive(self, primitive_num, w_receiver, arguments_w):
|
||||
f = primitives.all_primitives[primitive_num]
|
||||
return f(w_receiver, arguments_w, space=self)
|
||||
|
||||
def get_number_of_arguments_of_primitive(self, primitive_num):
|
||||
return primitives.get_number_of_arguments_of_primitive(primitive_num)
|
||||
|
||||
def call(self, w_method, w_receiver, arguments_w):
|
||||
return w_method.call(w_receiver, arguments_w)
|
||||
|
||||
def clone(self, w_value):
|
||||
return w_value.clone()
|
47
primitives.py
Normal file
47
primitives.py
Normal file
@ -0,0 +1,47 @@
|
||||
registry = {}
|
||||
all_primitives = []
|
||||
primitive_number_of_arguments = []
|
||||
|
||||
|
||||
def primitive(name, unwrap_spec, wrap_spec):
|
||||
assert '$' + name not in registry, '${name} already defined'.format(name=name)
|
||||
primitive_number_of_arguments.append(len(unwrap_spec) - 1) # first argument is the receiver
|
||||
def expose(func):
|
||||
def unwrapper(w_receiver, args_w, space):
|
||||
args = [w_receiver] + args_w
|
||||
if len(args) != len(unwrap_spec):
|
||||
raise TypeError(
|
||||
"Expected {ex} arguments, received {re}.".format(ex=len(unwrap_spec), re=len(args)))
|
||||
unwrapped_args = ()
|
||||
for t, arg in zip(unwrap_spec, args):
|
||||
if t is int:
|
||||
unwrapped_args += (arg.value, )
|
||||
else:
|
||||
unwrapped_args += (arg, )
|
||||
result = func(*unwrapped_args)
|
||||
if wrap_spec is int:
|
||||
return space.newint(result)
|
||||
return result
|
||||
unwrapper.__qualname__ = name
|
||||
all_primitives.append(unwrapper)
|
||||
registry['$' + name] = len(all_primitives) - 1
|
||||
return None
|
||||
return expose
|
||||
|
||||
|
||||
def get_index_of_primitive_named(name):
|
||||
return registry[name]
|
||||
|
||||
|
||||
def get_number_of_arguments_of_primitive(idx):
|
||||
return primitive_number_of_arguments[idx]
|
||||
|
||||
|
||||
@primitive('int_add', [int, int], int)
|
||||
def simple_int_add(a, b):
|
||||
return a + b
|
||||
|
||||
|
||||
@primitive('int_eq', [int, int], int)
|
||||
def simple_int_eq(a, b):
|
||||
return a == b
|
35
shell.nix
Normal file
35
shell.nix
Normal file
@ -0,0 +1,35 @@
|
||||
{ pkgs ? import <nixpkgs> {} }:
|
||||
|
||||
with pkgs;
|
||||
|
||||
let myPython = python39.buildEnv.override {
|
||||
extraLibs = with python39Packages; [
|
||||
# Common Libs
|
||||
rich
|
||||
# numpy
|
||||
# matplotlib
|
||||
# scipy
|
||||
# pytorch
|
||||
# notbook
|
||||
|
||||
# Doom Emacs Libs
|
||||
black
|
||||
pyflakes
|
||||
isort
|
||||
nose
|
||||
pytest
|
||||
|
||||
# DynLang
|
||||
rply
|
||||
];
|
||||
};
|
||||
in
|
||||
|
||||
mkShell {
|
||||
buildInputs = [
|
||||
myPython
|
||||
nodePackages.pyright # LSP
|
||||
pipenv # Doom
|
||||
jetbrains.pycharm-professional
|
||||
];
|
||||
}
|
279
simpleast.py
Normal file
279
simpleast.py
Normal file
@ -0,0 +1,279 @@
|
||||
import py
|
||||
|
||||
|
||||
class MetaNode(type):
|
||||
def __init__(cls, name, bases, dict):
|
||||
compile_name = "compile_" + name
|
||||
abstract = not hasattr(cls, "attrs")
|
||||
|
||||
def dispatch(self, compiler):
|
||||
if not abstract:
|
||||
getattr(compiler, compile_name)(self)
|
||||
|
||||
cls.dispatch = dispatch
|
||||
|
||||
|
||||
class AstNode(object):
|
||||
__metaclass__ = MetaNode
|
||||
|
||||
""" Base class for all ast nodes. Provides generic functionality."""
|
||||
tokens = None
|
||||
|
||||
def __repr__(self):
|
||||
return "%s(%s)" % (self.__class__.__name__,
|
||||
", ".join([repr(getattr(self, a)) for a in self.attrs]))
|
||||
|
||||
def __eq__(self, other):
|
||||
if self.__class__ is not other.__class__:
|
||||
return False
|
||||
for key in self.attrs:
|
||||
if getattr(self, key) != getattr(other, key):
|
||||
return False
|
||||
return True
|
||||
|
||||
def __ne__(self, other):
|
||||
return not (self == other)
|
||||
|
||||
def dot(self, result=None):
|
||||
def uid(x):
|
||||
result = id(x)
|
||||
if result < 0:
|
||||
result = 'm%d' % (-result,)
|
||||
return result
|
||||
|
||||
if result is None:
|
||||
result = []
|
||||
body = [self.__class__.__name__]
|
||||
children = []
|
||||
for key in self.attrs:
|
||||
obj = getattr(self, key)
|
||||
if isinstance(obj, list):
|
||||
if obj and isinstance(obj[0], AstNode):
|
||||
children.extend(obj)
|
||||
for i, elt in enumerate(obj):
|
||||
result.append("o%s -> o%s [label=\"%s[%s]\"]" % (
|
||||
uid(self), uid(elt), key, i))
|
||||
else:
|
||||
body.append("%s = %s" % (key, obj))
|
||||
elif isinstance(obj, AstNode):
|
||||
children.append(obj)
|
||||
result.append("o%s -> o%s [label=\"%s\"]" % (
|
||||
uid(self), uid(obj), key))
|
||||
else:
|
||||
body.append("%s = %s" % (key, obj))
|
||||
result.append("o%s [label=\"%s\", shape=box]" % (uid(self), repr("\n".join(body))[1:-1]))
|
||||
for child in children:
|
||||
child.dot(result)
|
||||
return result
|
||||
|
||||
def view(self):
|
||||
""" Calling this method gives a graphical representation of the ast
|
||||
graph. Needs a checkout of
|
||||
https://bitbucket.org/pypy/pypy/src/default/dotviewer/ in the current directory
|
||||
as well as graphviz (http://graphviz.org) installed. """
|
||||
from dotviewer import graphclient
|
||||
content = ["digraph G{"]
|
||||
content.extend(self.dot())
|
||||
content.append("}")
|
||||
p = py.test.ensuretemp("simpleparser").join("temp.dot")
|
||||
p.write("\n".join(content))
|
||||
graphclient.display_dot_file(str(p))
|
||||
|
||||
|
||||
class Expression(AstNode):
|
||||
""" Abstract Base class for all expression AST nodes"""
|
||||
|
||||
|
||||
class IntLiteral(Expression):
|
||||
""" An integer literal (like "1") """
|
||||
|
||||
attrs = ["value"]
|
||||
|
||||
def __init__(self, value):
|
||||
self.value = int(value)
|
||||
|
||||
|
||||
class StringLiteral(Expression):
|
||||
""" An string literal (like "hello world") """
|
||||
|
||||
attrs = ["value"]
|
||||
|
||||
def __init__(self, value):
|
||||
self.value = str(value)
|
||||
|
||||
|
||||
class MethodCall(Expression):
|
||||
""" A call to a method with name 'methodname' on 'receiver' with
|
||||
'arguments' (which is a list of expression ASTs).
|
||||
|
||||
Example:
|
||||
f(1, 2, 3)
|
||||
(receiver is ImplicitSelf(), methodname is 'f' and
|
||||
args is [IntLiteral(1), IntLiteral(2), IntLiteral(3)])
|
||||
|
||||
5 f
|
||||
(receiver is IntLiteral(5), methodname is 'f' and args is [])
|
||||
"""
|
||||
|
||||
attrs = ["receiver", "methodname", "arguments"]
|
||||
|
||||
def __init__(self, receiver, methodname, arguments=None):
|
||||
self.receiver = receiver
|
||||
self.methodname = methodname
|
||||
if arguments is None:
|
||||
arguments = []
|
||||
self.arguments = arguments
|
||||
|
||||
|
||||
class PrimitiveMethodCall(MethodCall):
|
||||
""" A method call to a primitive method. Primitive method names start with
|
||||
'$'.The attributes are like those in MethodCall.
|
||||
|
||||
Example:
|
||||
|
||||
5 $int_add(6)
|
||||
(receiver is IntLiteral(5), methodname is '$int_add' and args
|
||||
is [IntLiteral(6)])
|
||||
"""
|
||||
|
||||
|
||||
class ImplicitSelf(Expression):
|
||||
""" The receiver that is used when none is specified.
|
||||
|
||||
Example:
|
||||
f
|
||||
this is a method call "f" on the implicit self."""
|
||||
|
||||
attrs = []
|
||||
|
||||
|
||||
class Statement(AstNode):
|
||||
""" Base class of all statement nodes. """
|
||||
|
||||
|
||||
class Assignment(Statement):
|
||||
""" An assignement: lvalue attrname = expression.
|
||||
|
||||
Example:
|
||||
x = 7
|
||||
this is an assignement on the implicit self."""
|
||||
|
||||
attrs = ["lvalue", "attrname", "expression"]
|
||||
|
||||
def __init__(self, lvalue, attrname, expression):
|
||||
self.lvalue = lvalue
|
||||
self.attrname = attrname
|
||||
self.expression = expression
|
||||
|
||||
|
||||
class ExprStatement(Statement):
|
||||
""" A statement that is just an expression evaluation (and the result is
|
||||
ignored)."""
|
||||
|
||||
attrs = ["expression"]
|
||||
|
||||
def __init__(self, expr):
|
||||
self.expression = expr
|
||||
|
||||
|
||||
class IfStatement(Statement):
|
||||
""" An if statement. The syntax looks like this:
|
||||
|
||||
if condition:
|
||||
... ifblock ...
|
||||
else:
|
||||
... elseblock ...
|
||||
|
||||
The elseblock is optional."""
|
||||
|
||||
attrs = ["condition", "ifblock", "elseblock"]
|
||||
|
||||
def __init__(self, condition, ifblock, elseblock=None):
|
||||
self.condition = condition
|
||||
self.ifblock = ifblock
|
||||
self.elseblock = elseblock
|
||||
|
||||
|
||||
class WhileStatement(Statement):
|
||||
""" A while loop. The syntax looks like this:
|
||||
|
||||
while condition:
|
||||
... whileblock ...
|
||||
else:
|
||||
... elseblock ...
|
||||
|
||||
The elseblock is optional."""
|
||||
|
||||
attrs = ["condition", "whileblock", "elseblock"]
|
||||
|
||||
def __init__(self, condition, whileblock, elseblock=None):
|
||||
self.condition = condition
|
||||
self.whileblock = whileblock
|
||||
self.elseblock = elseblock
|
||||
|
||||
|
||||
class FunctionDefinition(Statement):
|
||||
""" A function definition. Corresponds to def name(arguments): block.
|
||||
|
||||
The 'name' is a string, the 'arguments' is a list of strings, and the
|
||||
'block' is a Program (see below). Executing a FunctionDefinition creates
|
||||
a new W_Method and assigns it to the 'name' on the implicit self.
|
||||
|
||||
Example:
|
||||
def f: FunctionDefinition('f', [], Program([...]))
|
||||
41
|
||||
|
||||
def g(a, b, c): FunctionDefinition('g', ['a', 'b', 'c'], ...)
|
||||
43
|
||||
"""
|
||||
attrs = ["name", "arguments", "block"]
|
||||
|
||||
def __init__(self, name, arguments, block):
|
||||
self.name = name
|
||||
self.arguments = arguments
|
||||
self.block = block
|
||||
|
||||
|
||||
class ObjectDefinition(Statement):
|
||||
""" Makes a new normal object.
|
||||
|
||||
The block is immediately executed with the new object as the
|
||||
implicit self. The 'name' is bound to the new object in the
|
||||
outer scope's implicit self.
|
||||
|
||||
Example:
|
||||
object x:
|
||||
def f(y):
|
||||
y
|
||||
|
||||
The 'parentnames' attribute is a list of strings giving the parent
|
||||
attributes of the new object. The 'parentdefinitions' attribute is a list
|
||||
of expression-asts giving the initial value of those parent attributes.
|
||||
|
||||
Example:
|
||||
object x(p1=a, p2=b):
|
||||
...
|
||||
|
||||
gives parentnames = ["p1", "p2"]
|
||||
and parentdefinitions = [MethodCall(ImplicitSelf, "a", []),
|
||||
MethodCall(ImplicitSelf, "b", [])]
|
||||
|
||||
"""
|
||||
attrs = ["name", "block", "parentnames", "parentdefinitions"]
|
||||
|
||||
def __init__(self, name, block, parentnames=None, parentdefinitions=None):
|
||||
self.name = name
|
||||
self.block = block
|
||||
if parentnames is None:
|
||||
parentnames = []
|
||||
parentdefinitions = []
|
||||
self.parentnames = parentnames
|
||||
self.parentdefinitions = parentdefinitions
|
||||
|
||||
|
||||
class Program(AstNode):
|
||||
""" A list of statements. """
|
||||
attrs = ["statements"]
|
||||
|
||||
def __init__(self, statements):
|
||||
self.statements = statements
|
190
simplelexer.py
Normal file
190
simplelexer.py
Normal file
@ -0,0 +1,190 @@
|
||||
from rply import LexerGenerator
|
||||
from rply.token import Token
|
||||
|
||||
# attempts at writing a simple Python-like lexer
|
||||
tabsize = 4
|
||||
|
||||
|
||||
def make_indent_token(token, start):
|
||||
assert token.name == "NewlineAndWhitespace"
|
||||
token.name = "Indent"
|
||||
token.value = token.value[start:]
|
||||
token.source_pos.idx += start
|
||||
token.source_pos.lineno += 1
|
||||
token.source_pos.colno = 0
|
||||
return token
|
||||
|
||||
|
||||
def make_dedent_token(token, start):
|
||||
assert token.name == "NewlineAndWhitespace"
|
||||
token.name = "Dedent"
|
||||
token.value = token.value[start:]
|
||||
token.source_pos.idx += start
|
||||
token.source_pos.lineno += 1
|
||||
token.source_pos.colno = 0
|
||||
return token
|
||||
|
||||
|
||||
# split the token in two: one for the newline and one for the
|
||||
# in/dedent
|
||||
# the NewlineAndWhitespace token looks like this: \r?\n[ \f\t]*
|
||||
def compute_position_of_newline(token):
|
||||
assert token.name == "NewlineAndWhitespace"
|
||||
s = token.value
|
||||
length = len(s)
|
||||
pos = 0
|
||||
column = 0
|
||||
if s[0] == '\n':
|
||||
pos = 1
|
||||
start = 1
|
||||
else:
|
||||
pos = 2
|
||||
start = 2
|
||||
while pos < length: # count the indentation depth of the whitespace
|
||||
c = s[pos]
|
||||
if c == ' ':
|
||||
column = column + 1
|
||||
elif c == '\t':
|
||||
column = (column // tabsize + 1) * tabsize
|
||||
elif c == '\f':
|
||||
column = 0
|
||||
pos = pos + 1
|
||||
return start, column
|
||||
|
||||
|
||||
def compute_indent_or_dedent(token, indentation_levels, output_tokens):
|
||||
start, column = compute_position_of_newline(token)
|
||||
# before start: new line token
|
||||
output_tokens.append(Token("Newline", token.value[:start], token.source_pos))
|
||||
# after start: deal with white spaces (create indent or dedent token)
|
||||
if column > indentation_levels[-1]: # count indents or dedents
|
||||
indentation_levels.append(column)
|
||||
token = make_indent_token(token, start)
|
||||
output_tokens.append(token)
|
||||
else:
|
||||
dedented = False
|
||||
while column < indentation_levels[-1]:
|
||||
dedented = True
|
||||
indentation_levels.pop()
|
||||
output_tokens.append(Token("Dedent", "",
|
||||
token.source_pos))
|
||||
if dedented:
|
||||
token = make_dedent_token(token, start)
|
||||
output_tokens[-1] = token
|
||||
|
||||
|
||||
# input: lexer token stream
|
||||
# output: modified token stream
|
||||
def postprocess(tokens, source):
|
||||
parenthesis_level = 0
|
||||
indentation_levels = [0]
|
||||
output_tokens = []
|
||||
tokens = [token for token in tokens if token.name != "Ignore"]
|
||||
token = None
|
||||
for i in range(len(tokens)):
|
||||
token = tokens[i]
|
||||
# never create indent/dedent token between brackets
|
||||
if token.name == "OpenBracket":
|
||||
parenthesis_level += 1
|
||||
output_tokens.append(token)
|
||||
elif token.name == "CloseBracket":
|
||||
parenthesis_level -= 1
|
||||
if parenthesis_level < 0:
|
||||
raise LexerError(source, token.source_pos, "unmatched parenthesis")
|
||||
output_tokens.append(token)
|
||||
elif token.name == "NewlineAndWhitespace":
|
||||
if i + 1 < len(tokens) and tokens[i + 1].name == "NewlineAndWhitespace":
|
||||
continue
|
||||
if parenthesis_level == 0:
|
||||
compute_indent_or_dedent(token, indentation_levels, output_tokens)
|
||||
else:
|
||||
pass # implicit line-continuations within parenthesis
|
||||
else:
|
||||
# something else: e.g. name, keyword, etc...
|
||||
output_tokens.append(token)
|
||||
if token is not None:
|
||||
output_tokens.append(Token("EOF", "", token.source_pos))
|
||||
return output_tokens
|
||||
|
||||
|
||||
# RPython reimplementation
|
||||
def group(*choices, **namegroup):
|
||||
choices = list(choices)
|
||||
return '(' + '|'.join(choices) + ')'
|
||||
|
||||
|
||||
# RPython reimplementation
|
||||
def any(*choices):
|
||||
result = group(*choices) + '*'
|
||||
return result
|
||||
|
||||
|
||||
# ' or " string. eg. 'hello' or "hello"
|
||||
def make_single_string(delim):
|
||||
normal_chars = r"[^\n\%s]*" % (delim,)
|
||||
return "".join([delim, normal_chars,
|
||||
any(r"\\." + normal_chars), delim])
|
||||
|
||||
|
||||
# ____________________________________________________________
|
||||
# Literals
|
||||
|
||||
Number = r'(([+-])?[1-9][0-9]*)|0'
|
||||
String = group(make_single_string(r"\'"), make_single_string(r'\"'))
|
||||
|
||||
# ____________________________________________________________
|
||||
# Ignored
|
||||
|
||||
Whitespace = r'[ \f\t]'
|
||||
Newline = r'\r?\n'
|
||||
Linecontinue = r'\\' + Newline
|
||||
Comment = r'#[^\r\n]*'
|
||||
NewlineAndWhitespace = Newline + any(Whitespace)
|
||||
Ignore = group(Whitespace + '+', Linecontinue, Comment)
|
||||
|
||||
# ____________________________________________________________
|
||||
# Identifier
|
||||
|
||||
Name = r'[a-zA-Z_][a-zA-Z0-9_]*'
|
||||
PrimitiveName = '\\$' + Name
|
||||
|
||||
# ____________________________________________________________
|
||||
# Symbols
|
||||
|
||||
Colon = r'\:'
|
||||
Comma = r'\,'
|
||||
Assign = r'\='
|
||||
|
||||
OpenBracket = r'[\[\(\{]'
|
||||
CloseBracket = r'[\]\)\}]'
|
||||
|
||||
# ____________________________________________________________
|
||||
# Keywords
|
||||
|
||||
If = r'if'
|
||||
Else = r'else'
|
||||
While = r'while'
|
||||
Def = r'def'
|
||||
Object = r'object'
|
||||
|
||||
tokens = ["If", "Else", "While", "Def", "Object", "Number", "String", "Ignore",
|
||||
"NewlineAndWhitespace", "OpenBracket", "CloseBracket", "Comma", "Assign",
|
||||
"Colon", "Name", "PrimitiveName"]
|
||||
|
||||
|
||||
def make_lexer():
|
||||
lg = LexerGenerator()
|
||||
for token in tokens:
|
||||
# e.g. (Name, r'[a-zA-Z_][a-zA-Z0-9_]*')
|
||||
lg.add(token, globals()[token])
|
||||
return lg.build()
|
||||
|
||||
|
||||
lexer = make_lexer()
|
||||
|
||||
|
||||
# s is the simple program code
|
||||
def lex(s):
|
||||
if not s.endswith('\n'):
|
||||
s += '\n'
|
||||
return list(postprocess(lexer.lex(s), s))
|
333
simpleparser.py
Normal file
333
simpleparser.py
Normal file
@ -0,0 +1,333 @@
|
||||
"""
|
||||
A 'simple' parser. Don't look into this file :-)
|
||||
"""
|
||||
import py
|
||||
import simpleast
|
||||
from simplelexer import lex
|
||||
from rply.token import Token
|
||||
|
||||
from rply import ParserGenerator
|
||||
|
||||
pg = ParserGenerator(["If", "Else", "While", "Def", "Object", "Number",
|
||||
"String", "Name", "Indent", "Dedent", "Newline", "OpenBracket",
|
||||
"CloseBracket", "Comma", "Assign", "Colon", "PrimitiveName", "EOF"])
|
||||
|
||||
|
||||
def build_methodcall(call, cls):
|
||||
if len(call) == 1:
|
||||
args = []
|
||||
else:
|
||||
args = call[1]
|
||||
name = call[0]
|
||||
return cls(None, name, args)
|
||||
|
||||
|
||||
@pg.production("program : statements EOF")
|
||||
@pg.production("program : newlines statements EOF")
|
||||
def program(prog):
|
||||
# import pdb; pdb.set_trace()
|
||||
if prog[0] is None:
|
||||
prog = prog[1]
|
||||
else:
|
||||
prog = prog[0]
|
||||
return prog
|
||||
|
||||
|
||||
@pg.production("statements : statement")
|
||||
@pg.production("statements : statement statements")
|
||||
@pg.production("statements : statement newlines statements")
|
||||
def statements(stmts):
|
||||
if len(stmts) == 1:
|
||||
stmt = stmts[0]
|
||||
return simpleast.Program([stmt])
|
||||
elif stmts[0] is None:
|
||||
assert len(stmts) == 2
|
||||
return stmts[1]
|
||||
elif len(stmts) == 2:
|
||||
stmt = stmts[0]
|
||||
result = stmts[1]
|
||||
result.statements.insert(0, stmt)
|
||||
return result
|
||||
|
||||
|
||||
@pg.production("newlines : Newline")
|
||||
@pg.production("newlines : Newline newlines")
|
||||
def newlines(n):
|
||||
return None
|
||||
|
||||
|
||||
@pg.production("statement : simplestatement")
|
||||
@pg.production("statement : ifstatement")
|
||||
@pg.production("statement : whilestatement")
|
||||
@pg.production("statement : defstatement")
|
||||
@pg.production("statement : objectstatement")
|
||||
def statement(stmt):
|
||||
return stmt[0]
|
||||
|
||||
|
||||
@pg.production("ifstatement : If expression block")
|
||||
@pg.production("ifstatement : If expression block Else block")
|
||||
def ifstatement(ifstmt):
|
||||
elseblock = None
|
||||
if len(ifstmt) > 3:
|
||||
elseblock = ifstmt[-1]
|
||||
return simpleast.IfStatement(ifstmt[1], ifstmt[2], elseblock)
|
||||
|
||||
|
||||
@pg.production("whilestatement : While expression block")
|
||||
def ifstatement(whilestmt):
|
||||
return simpleast.WhileStatement(whilestmt[1], whilestmt[2])
|
||||
|
||||
|
||||
@pg.production("objectstatement : Object name block")
|
||||
@pg.production("objectstatement : Object name parentlist block")
|
||||
def objectstatement(obj):
|
||||
name = obj[1]
|
||||
names = []
|
||||
expressions = []
|
||||
if len(obj) == 3:
|
||||
blk = obj[2]
|
||||
else:
|
||||
parents = obj[2]
|
||||
names = [p.attrname for p in parents]
|
||||
expressions = [p.expression for p in parents]
|
||||
blk = obj[3]
|
||||
return simpleast.ObjectDefinition(name, blk, names, expressions)
|
||||
|
||||
|
||||
@pg.production("defstatement : Def name argumentnamelist block")
|
||||
@pg.production("defstatement : Def name block")
|
||||
def defstatement(defn):
|
||||
name = defn[1]
|
||||
if len(defn) == 4:
|
||||
args = defn[2]
|
||||
blk = defn[3]
|
||||
else:
|
||||
args = []
|
||||
blk = defn[2]
|
||||
return simpleast.FunctionDefinition(name, args, blk)
|
||||
|
||||
|
||||
@pg.production("block : Colon newlines Indent statements Dedent")
|
||||
def block(blk):
|
||||
return blk[3]
|
||||
|
||||
|
||||
@pg.production("simplestatement : expression Newline")
|
||||
@pg.production("simplestatement : expression Assign expression Newline")
|
||||
def simplestatement(stmts):
|
||||
if len(stmts) == 2:
|
||||
return simpleast.ExprStatement(stmts[0])
|
||||
# assignement
|
||||
result = stmts[0]
|
||||
assign = stmts[2]
|
||||
if (isinstance(result, simpleast.MethodCall) and
|
||||
result.arguments == []):
|
||||
return simpleast.Assignment(
|
||||
result.receiver, result.methodname, assign)
|
||||
else:
|
||||
source_pos = stmts[1].source_pos
|
||||
raise ParseError(source_pos,
|
||||
ErrorInformation(source_pos.idx,
|
||||
customerror="can only assign to attribute")) # , self.source)
|
||||
|
||||
|
||||
@pg.production("expression : basic_expression")
|
||||
@pg.production("expression : basic_expression msg-chain")
|
||||
def expression(expr):
|
||||
if len(expr) > 1:
|
||||
prev = expr[0]
|
||||
for i in expr[1]:
|
||||
i.receiver = prev
|
||||
prev = i
|
||||
return expr[1][-1]
|
||||
return expr[0]
|
||||
|
||||
|
||||
@pg.production("msg-chain : methodcall")
|
||||
@pg.production("msg-chain : methodcall msg-chain")
|
||||
def msg_chain(cc):
|
||||
if len(cc) > 1:
|
||||
return [cc[0]] + cc[1]
|
||||
return cc
|
||||
|
||||
|
||||
@pg.production("basic_expression : Number")
|
||||
def number_expression(stmt):
|
||||
return simpleast.IntLiteral(stmt[0].value)
|
||||
|
||||
|
||||
@pg.production("basic_expression : String")
|
||||
def string_expression(stmt):
|
||||
return simpleast.StringLiteral(stmt[0].value)
|
||||
|
||||
|
||||
@pg.production("basic_expression : implicitselfmethodcall")
|
||||
def implicitselfmethodcall(call):
|
||||
methodcall = call[0]
|
||||
methodcall.receiver = simpleast.ImplicitSelf()
|
||||
return methodcall
|
||||
|
||||
|
||||
@pg.production("implicitselfmethodcall : methodcall")
|
||||
def implicitselfmethodcall_methodcall(call):
|
||||
return call[0]
|
||||
|
||||
|
||||
@pg.production("methodcall : primitivemethodcall")
|
||||
@pg.production("methodcall : simplemethodcall")
|
||||
def methodcall(call):
|
||||
return call[0]
|
||||
|
||||
|
||||
@pg.production("simplemethodcall : name")
|
||||
@pg.production("simplemethodcall : name argumentslist")
|
||||
def simplemethodcall(call):
|
||||
return build_methodcall(call, simpleast.MethodCall)
|
||||
|
||||
|
||||
@pg.production("primitivemethodcall : primitivename")
|
||||
@pg.production("primitivemethodcall : primitivename argumentslist")
|
||||
def primitivemethodcall(call):
|
||||
return build_methodcall(call, simpleast.PrimitiveMethodCall)
|
||||
|
||||
|
||||
@pg.production("argumentslist : OpenBracket arguments CloseBracket")
|
||||
@pg.production("argumentnamelist : OpenBracket argumentnames CloseBracket")
|
||||
@pg.production("parentlist : OpenBracket parentdefinitions CloseBracket")
|
||||
def argumentslist(args):
|
||||
return args[1]
|
||||
|
||||
|
||||
@pg.production("arguments : expression")
|
||||
@pg.production("arguments : expression Comma")
|
||||
@pg.production("arguments : expression Comma arguments")
|
||||
@pg.production("argumentnames : name")
|
||||
@pg.production("argumentnames : name Comma")
|
||||
@pg.production("argumentnames : name Comma argumentnames")
|
||||
@pg.production("parentdefinitions : assignment")
|
||||
@pg.production("parentdefinitions : assignment Comma")
|
||||
@pg.production("parentdefinitions : assignment Comma parentdefinitions")
|
||||
def arguments(args):
|
||||
if len(args) == 3:
|
||||
return [args[0]] + args[2]
|
||||
return [args[0]]
|
||||
|
||||
|
||||
@pg.production("assignment : name Assign expression")
|
||||
def assignement(args):
|
||||
return simpleast.Assignment(None, args[0], args[2])
|
||||
|
||||
|
||||
@pg.production("primitivename : PrimitiveName")
|
||||
@pg.production("name : Name")
|
||||
def name(name):
|
||||
return name[0].value
|
||||
|
||||
|
||||
@pg.error
|
||||
def error_handler(token):
|
||||
raise ParseError(source_pos=token.getsourcepos(),
|
||||
errorinformation=ErrorInformation(token.getsourcepos().idx,
|
||||
customerror="Ran into a %s where it wasn't expected" % token.gettokentype()))
|
||||
|
||||
|
||||
parser = pg.build()
|
||||
|
||||
|
||||
def print_conflicts():
|
||||
print("rr conflicts")
|
||||
for rule_num, token, conflict in parser.lr_table.rr_conflicts:
|
||||
print(rule_num, token, conflict)
|
||||
|
||||
print("sr conflicts")
|
||||
for rule_num, token, conflict in parser.lr_table.sr_conflicts:
|
||||
print(rule_num, token, conflict)
|
||||
|
||||
|
||||
print_conflicts()
|
||||
|
||||
|
||||
def parse(s):
|
||||
l = lex(s)
|
||||
return parser.parse(iter(l))
|
||||
|
||||
|
||||
# ____________________________________________________________
|
||||
|
||||
class ParseError(Exception):
|
||||
def __init__(self, source_pos, errorinformation, source=""):
|
||||
self.source_pos = source_pos
|
||||
self.errorinformation = errorinformation
|
||||
self.args = (source_pos, errorinformation)
|
||||
self.source = source
|
||||
|
||||
def nice_error_message(self, filename="<unknown>"):
|
||||
result = [" File %s, line %s" % (filename, self.source_pos.lineno + 1)]
|
||||
source = self.source
|
||||
if source:
|
||||
result.append(source.split("\n")[self.source_pos.lineno])
|
||||
result.append(" " * self.source_pos.colno + "^")
|
||||
else:
|
||||
result.append("<couldn't get source>")
|
||||
result.append("ParseError")
|
||||
if self.errorinformation:
|
||||
failure_reasons = self.errorinformation.expected
|
||||
if failure_reasons:
|
||||
expected = ''
|
||||
if len(failure_reasons) > 1:
|
||||
all_but_one = failure_reasons[:-1]
|
||||
last = failure_reasons[-1]
|
||||
expected = "%s or '%s'" % (
|
||||
", ".join(["'%s'" % e for e in all_but_one]), last)
|
||||
elif len(failure_reasons) == 1:
|
||||
expected = failure_reasons[0]
|
||||
if expected:
|
||||
result.append("expected %s" % (expected,))
|
||||
if self.errorinformation.customerror:
|
||||
result.append(self.errorinformation.customerror)
|
||||
return "\n".join(result)
|
||||
|
||||
def __str__(self):
|
||||
return self.nice_error_message()
|
||||
|
||||
|
||||
class ErrorInformation(object):
|
||||
def __init__(self, pos, expected=None, customerror=None):
|
||||
if expected is None:
|
||||
expected = []
|
||||
self.expected = expected
|
||||
self.pos = pos
|
||||
self.customerror = customerror
|
||||
|
||||
|
||||
def combine_errors(self, other):
|
||||
if self is None:
|
||||
return other
|
||||
if (other is None or self.pos > other.pos or
|
||||
len(other.expected) == 0):
|
||||
return self
|
||||
elif other.pos > self.pos or len(self.expected) == 0:
|
||||
return other
|
||||
failure_reasons = []
|
||||
already_there = {}
|
||||
for fr in [self.expected, other.expected]:
|
||||
for reason in fr:
|
||||
if reason not in already_there:
|
||||
already_there[reason] = True
|
||||
failure_reasons.append(reason)
|
||||
return ErrorInformation(self.pos, failure_reasons,
|
||||
self.customerror or other.customerror)
|
||||
|
||||
|
||||
def make_arglist(methodname):
|
||||
def arglist(self):
|
||||
self.match("OpenBracket", "(")
|
||||
method = getattr(self, methodname)
|
||||
result = [method()]
|
||||
result.extend(self.repeat(self.comma, method))
|
||||
self.maybe(self.comma)
|
||||
self.match("CloseBracket", ")")
|
||||
return result
|
||||
|
||||
return arglist
|
Reference in New Issue
Block a user