commit 744de3c46e8f9c22e1ea2d2e9bf58958de49ec07 Author: ChUrl Date: Mon Aug 9 16:32:53 2021 +0200 initial commit with vorgabe diff --git a/builtins.simple b/builtins.simple new file mode 100644 index 0000000..b257fe6 --- /dev/null +++ b/builtins.simple @@ -0,0 +1,11 @@ +object nil: + 1 + +def pass: + nil + +object inttrait: + def add(other): + self $int_add(other) + def eq(other): + self $int_eq(other) diff --git a/bytecodeinterpreter.py b/bytecodeinterpreter.py new file mode 100644 index 0000000..80d7358 --- /dev/null +++ b/bytecodeinterpreter.py @@ -0,0 +1,124 @@ +from simpleparser import parse +from objspace import ObjectSpace +import compile +from disass import disassemble + + +class ByteCodeError(Exception): + pass + + +class Interpreter(object): + + def __init__(self, builtincode=None): + # Using an instance variable to keep the public interface + self.space = ObjectSpace(self) + self.space.setup_builtins(builtincode) + + def eval(self, ast, w_context): + code = compile.compile(ast) + return self.run(code, w_context) + + def read4(self, code, pc): + highval = ord(code[pc + 3]) + if highval >= 128: + highval -= 256 + return (ord(code[pc]) | + (ord(code[pc + 1]) << 8) | + (ord(code[pc + 2]) << 16) | + (highval << 24)) + + def run(self, bytecode, w_context): + pc = 0 + stack = [] + code = bytecode.code + print(disassemble(bytecode)) + while pc < len(code): + opcode = ord(code[pc]) + pc += 1 + if compile.isjump(opcode): + oparg = self.read4(code, pc) + pc += 4 + if opcode == compile.JUMP: + pc += oparg + elif opcode == compile.JUMP_IF_FALSE: + w_condition = stack.pop() + if self.space.isfalse(w_condition): + pc += oparg + continue + elif compile.hasarg(opcode): + oparg = ord(code[pc]) + pc += 1 + if oparg >= 128: + if oparg > 128: + oparg -= 256 + else: + oparg = self.read4(code, pc) + pc += 4 + if opcode == compile.MAKE_OBJECT: + name = bytecode.symbols[oparg] + obj = self.space.newobject(name, {'__parent__': w_context}, []) + stack.append(obj) + elif opcode == compile.MAKE_OBJECT_CALL: + self.run(bytecode.subbytecodes[oparg], stack[-1]) + elif opcode == compile.INT_LITERAL: + w_value = self.space.newint(oparg) + stack.append(w_value) + elif opcode == compile.MAKE_FUNCTION: + bc = bytecode.subbytecodes[oparg] + w_method = self.space.definemethod(name=bc.name, code=bc, w_target=w_context) + stack.append(w_method) + elif opcode == compile.METHOD_LOOKUP: + name = bytecode.symbols[oparg] + w_method = self.space.getvalue(stack[-1], name) + stack.append(w_method) + elif opcode == compile.METHOD_CALL: + arguments_w = [stack.pop() for n in range(oparg)] + arguments_w.reverse() + # + w_method = stack.pop() + w_receiver = stack.pop() + w_result = self.space.call(w_method, w_receiver, arguments_w) + stack.append(w_result) + elif opcode == compile.PRIMITIVE_METHOD_CALL: + nargs = self.space.get_number_of_arguments_of_primitive(oparg) + arguments_w = [stack.pop() for n in range(nargs)] + arguments_w.reverse() + w_receiver = stack.pop() + w_result = self.space.call_primitive(oparg, w_receiver, arguments_w) + stack.append(w_result) + elif opcode == compile.SET_LOCAL: + w_value = stack[-1] + name = bytecode.symbols[oparg] + self.space.setvalue(w_context, name, w_value) + elif opcode == compile.ASSIGNMENT: + w_value = stack.pop() + name = bytecode.symbols[oparg] + self.space.setvalue(stack[-1], name, w_value) + elif opcode == compile.ASSIGNMENT_APPEND_PARENT: + w_value = stack.pop() + name = bytecode.symbols[oparg] + self.space.setvalue(stack[-1], name, w_value) + self.space.addparent(stack[-1], name) + elif opcode == compile.GET_LOCAL: + name = bytecode.symbols[oparg] + w_value = self.space.getvalue(w_context, name) + w_value = self.space.call(w_value, w_context, []) + stack.append(w_value) + else: + raise ByteCodeError('Invalid bytecode with arguments') + else: + if opcode == compile.POP: + stack.pop() + elif opcode == compile.IMPLICIT_SELF: + stack.append(w_context) + elif opcode == compile.DUP: + stack.append(stack[-1]) + else: + raise ByteCodeError('Invalid bytecode') + assert pc == len(code) + assert len(stack) == 1 + return stack.pop() + + def make_module(self): + return self.space.make_module() diff --git a/c3computation.py b/c3computation.py new file mode 100644 index 0000000..728a02e --- /dev/null +++ b/c3computation.py @@ -0,0 +1,46 @@ +def compute_C3_mro(w_obj): + order_w = [] + parents_w = w_obj.getparents() + orderlists = [w_base.get_mro() + for w_base in parents_w] + orderlists.append([w_obj] + parents_w) + while orderlists: + for candidatelist in orderlists: + w_candidate = candidatelist[0] + if mro_blockinglist(w_candidate, orderlists) is None: + break # good w_candidate + else: + return mro_error(orderlists) # no candidate found + assert w_candidate not in order_w + order_w.append(w_candidate) + for i in range(len(orderlists)-1, -1, -1): + if orderlists[i][0] is w_candidate: + del orderlists[i][0] + if len(orderlists[i]) == 0: + del orderlists[i] + return order_w + + +def mro_blockinglist(w_candidate, orderlists): + for lst in orderlists: + if w_candidate in lst[1:]: + return lst + return None # good candidate + +def mro_error(orderlists): + # w_obj.getname() is a pure debugging-helper. it can return whatever string + cycle = [] + w_candidate = orderlists[-1][0] + if w_candidate in orderlists[-1][1:]: + # explicit error message for this specific case + raise TypeError("duplicate parent %s" % w_candidate.getname()) + while w_candidate not in cycle: + cycle.append(w_candidate) + nextblockinglist = mro_blockinglist(w_candidate, orderlists) + w_candidate = nextblockinglist[0] + del cycle[:cycle.index(w_candidate)] + cycle.append(w_candidate) + cycle.reverse() + names = [w_obj.getname() for w_obj in cycle] + raise TypeError("cycle among base parents: " + ' < '.join(names)) + diff --git a/compile.py b/compile.py new file mode 100644 index 0000000..704dfec --- /dev/null +++ b/compile.py @@ -0,0 +1,401 @@ +"""This file contains the bytecode-compiler. + +An instruction can have one or no arguments. There are two different ways how +an argument is encoded: + + ARG4 encodes an argument in 4 bytes, in a little-endian manner + + SMALLARG encodes an integer i differently based on its size: + if -127 <= i <= 127 the integers is encoded as one byte + otherwise it is encoded as 5 bytes: + 1 marker byte equal to -128 to signify that the large form is used + 4 bytes to encode the integer as with ARG4 + +The instruction set contains the following instructions: + + INT_LITERAL + Pushes an integer literal on the stack. The argument is the value of the + integer. + + IMPLICIT_SELF + Pushes the implicit self on the stack. + + POP + Pops the top element from the stack. + + DUP + Duplicates the top element of the stack. + + JUMP + Unconditionally jump to a different point in the program. The offset of the + program counter to the target is given by the argument. + + JUMP_IF_FALSE + Pops an object from the stack and jump to a different point in the program + if that object is false. The offset of the program counter to the target is + given by the argument. + + ASSIGNMENT + Assigns the first object on the stack to the second object on the stack. + The objects are popped from the stack, and then the assigned object + (i.e. the `expression') is pushed again. The attribute name is given by + the argument, which is an index into the symbols list of the bytecode + object. + + PRIMITIVE_METHOD_CALL + Call a primitive method. The argument is an index into a list of all + primitives, which must be defined in the "primitive" module. The arguments + are found on the stack and are popped by this bytecode; the result is + pushed on the stack. To make the compiler work correctly for primitives, + the "primitive" module needs to expose two function + "get_index_of_primitive_named", which maps primitive name to a primitive + number, and get_number_of_arguments_of_primitive, which maps a primitive + number to the number of arguments the corresponding function takes. + + METHOD_LOOKUP + Looks up a method in the object at the top of the stack. The method name + is given by the argument, which is an index into the symbols list of the + bytecode object. The method is pushed on the stack (and the original + object is not removed). + + METHOD_CALL + Calls a method. The first n (where n is the argument of the bytecode) are + the arguments to the method, in reverse order. The next object on the + stack is the method. The final object is the receiver. All these objects + are popped from the stack. The result of the method call is pushed. + + MAKE_FUNCTION + Creates a new W_Method object and pushes it on the stack. The bytecode of + the method can be found in the subbytecodes list of the current bytecode + object; the index is given by the argument. + + MAKE_OBJECT + Create a new (empty) object and pushes it on the stack. The argument (which + can be ignored for now) is the index in symbols of the name of the object. + + ASSIGNMENT_APPEND_PARENT + Adds a new parent to an object. This bytecode is only used during object + creation. It works like the ASSIGNMENT bytecode, but (1) it also adds the + name to the list of parent attributes of the object, and (2) it leaves + on the stack the assigned-to object (the `lvalue'), not the assigned + object (the `expression'). + + MAKE_OBJECT_CALL + Execute the body of a newly created object. The object is on the top of the + stack and is left there. The bytecode of the body can be found in the + subbytecodes list of the current bytecode object, the index is given by the + argument. + + GET_LOCAL + This is an optimization for the common case of sending a method without + arguments to the implicit self. This bytecode is equivalent to: + IMPLICIT_SELF + METHOD_LOOKUP + METHOD_CALL 0 + + SET_LOCAL + This is an optimization for the common case of writing a slot to the + implicit self. This bytecode is equivalent to: + IMPLICIT_SELF + ASSIGNMENT + +Note that there is no "return" bytecode. When the end of the bytecode is +reached, the top of the stack is returned (and the stack should have only one +element on it). +""" +import sys + +import simpleast + +# ---------- bytecodes ---------- + +INT_LITERAL = 2 # integer value +ASSIGNMENT = 4 # index of attrname +METHOD_LOOKUP = 5 # index of method name +METHOD_CALL = 6 # number of arguments +PRIMITIVE_METHOD_CALL = 7 # number of the primitive +MAKE_FUNCTION = 8 # bytecode literal index +MAKE_OBJECT = 9 # index of object name +ASSIGNMENT_APPEND_PARENT = 10 # index of parentname +MAKE_OBJECT_CALL = 11 # bytecode literal index +JUMP_IF_FALSE = 12 # offset +JUMP = 13 # offset +GET_LOCAL = 15 # index of attrname (optimization) +SET_LOCAL = 16 # index of attrname (optimization) + +IMPLICIT_SELF = 32 # (no argument) +POP = 33 # (no argument) +DUP = 34 # (no argument) + +opcode_names = [None] * 256 +for key, value in list(globals().items()): + if key.strip("_").isupper(): + opcode_names[value] = key + + + +def hasarg(opcode): + """ Helper function to determine whether an opcode has an argument.""" + return opcode < 32 + +def isjump(opcode): + """ Helper function to determine whether an opcode is a jump.""" + return opcode == JUMP_IF_FALSE or opcode == JUMP + + +class Bytecode(object): + """ A class representing the bytecode of one piece of code. + + self.code is a string that encodes the bytecode itself. + + self.symbols is a list of strings containing the names that occur in the + piece of code. + + self.subbytecodes is a list of further bytecodes that occur in the piece of + code. + """ + _immutable_ = True + _immutable_fields_ = ["symbols[*]", "subbytecodes[*]"] + + def __init__(self, code, name, symbols, + subbytecodes, numargs, stackdepth): + self.code = code + if name is None: + name = "?" + self.name = name + self.symbols = symbols + self.subbytecodes = subbytecodes + self.numargs = numargs + self.stackdepth = stackdepth + + def dis(self, pc=-1): + from disass import disassemble + disassemble(self, pc=pc) + + +# ---------- compiler ---------- + +def compile(ast, argumentnames=[], name=None): + """ Turns an AST into a Bytecode object.""" + assert isinstance(ast, simpleast.Program) + comp = Compiler() + for arg in argumentnames: + comp.lookup_symbol(arg) + comp.lookup_symbol("__parent__") + comp.lookup_symbol("self") + comp.compile(ast, True) + return comp.make_bytecode(len(argumentnames), name) + + +stack_effects = { + INT_LITERAL: 1, + ASSIGNMENT: -1, + METHOD_LOOKUP: 1, + MAKE_FUNCTION: 1, + MAKE_OBJECT: 1, + ASSIGNMENT_APPEND_PARENT: -1, + MAKE_OBJECT_CALL: 0, + GET_LOCAL: 1, + SET_LOCAL: 0, + JUMP: 0, + JUMP_IF_FALSE: -1, + IMPLICIT_SELF: 1, + POP: -1, + DUP: 1, +} + + +class Compiler(object): + + def __init__(self): + self.code = [] + self.symbols = {} + self.subbytecodes = [] + self.stackdepth = 0 + self.max_stackdepth = 0 + + def make_bytecode(self, numargs, funcname): + symbols = [None] * len(self.symbols) + for name, index in list(self.symbols.items()): + symbols[index] = name + result = Bytecode(''.join(self.code), + funcname, + symbols, + self.subbytecodes, + numargs, self.max_stackdepth) + assert self.stackdepth == 1 + return result + + def stack_effect(self, num): + self.stackdepth += num + self.max_stackdepth = max(self.stackdepth, self.max_stackdepth) + + def emit(self, opcode, arg=None, stackeffect=sys.maxsize): + self.code.append(chr(opcode)) + if isjump(opcode): + assert arg is None + for c in self.encode4(0): + self.code.append(c) + elif hasarg(opcode): + assert isinstance(arg, int) + if -127 <= arg <= 127: + self.code.append(chr(arg & 0xFF)) + else: + self.code.append(chr(128)) + for c in self.encode4(arg): + self.code.append(c) + else: + assert arg is None + + if opcode in stack_effects: + stackeffect = stack_effects[opcode] + else: + assert stackeffect != sys.maxsize + self.stack_effect(stackeffect) + + def get_position(self): + return len(self.code) + + def set_target_position(self, oldposition, newtarget): + offset = newtarget - (oldposition+5) + i = 0 + for c in self.encode4(offset): + self.code[oldposition+1+i] = c + i += 1 + + def encode4(self, value): + return [chr(value & 0xFF), + chr((value >> 8) & 0xFF), + chr((value >> 16) & 0xFF), + chr((value >> 24) & 0xFF)] + + def lookup_symbol(self, symbol): + if symbol not in self.symbols: + self.symbols[symbol] = len(self.symbols) + return self.symbols[symbol] + + + def compile(self, ast, needsresult=True): + return getattr(self, "compile_" + ast.__class__.__name__)(ast, needsresult) + + def compile_IntLiteral(self, astnode, needsresult): + self.emit(INT_LITERAL, astnode.value) + + def compile_ImplicitSelf(self, astnode, needsresult): + self.emit(IMPLICIT_SELF) + + def compile_Assignment(self, astnode, needsresult): + if isinstance(astnode.lvalue, simpleast.ImplicitSelf): + self.compile(astnode.expression) + self.emit(SET_LOCAL, self.lookup_symbol(astnode.attrname)) + else: + self.compile(astnode.lvalue) + self.compile(astnode.expression) + self.emit(ASSIGNMENT, self.lookup_symbol(astnode.attrname)) + if not needsresult: + self.emit(POP) + + def compile_ExprStatement(self, astnode, needsresult): + self.compile(astnode.expression) + if not needsresult: + self.emit(POP) + + def compile_MethodCall(self, astnode, needsresult): + numargs = len(astnode.arguments) + if (isinstance(astnode.receiver, simpleast.ImplicitSelf) and + numargs == 0): + self.emit(GET_LOCAL, self.lookup_symbol(astnode.methodname)) + else: + self.compile(astnode.receiver) + self.emit(METHOD_LOOKUP, self.lookup_symbol(astnode.methodname)) + for arg in astnode.arguments: + self.compile(arg) + self.emit(METHOD_CALL, numargs, -numargs - 1) + + def compile_PrimitiveMethodCall(self, astnode, needsresult): + import primitives + index = primitives.get_index_of_primitive_named(astnode.methodname) + expected_args = primitives.get_number_of_arguments_of_primitive(index) + if not (len(astnode.arguments) == expected_args): + raise TypeError( + "Expected {ex} arguments, received {re}.".format(ex=expected_args, re=len(astnode.arguments))) + self.compile(astnode.receiver) + for arg in astnode.arguments: + self.compile(arg) + self.emit(PRIMITIVE_METHOD_CALL, index, -len(astnode.arguments)) + + def compile_ObjectDefinition(self, astnode, needsresult): + self.emit(MAKE_OBJECT, self.lookup_symbol(astnode.name)) + # + for i in range(len(astnode.parentdefinitions)): + name = astnode.parentnames[i] + if name == "__parent__": + self.emit(DUP) + self.compile(astnode.parentdefinitions[i]) + self.emit(ASSIGNMENT, self.lookup_symbol(name)) + self.emit(POP) + else: + self.compile(astnode.parentdefinitions[i]) + self.emit(ASSIGNMENT_APPEND_PARENT, self.lookup_symbol(name)) + # + bytecode = compile(astnode.block, name=astnode.name) + index = len(self.subbytecodes) + self.subbytecodes.append(bytecode) + self.emit(MAKE_OBJECT_CALL, index) + self.emit(SET_LOCAL, self.lookup_symbol(astnode.name)) + if not needsresult: + self.emit(POP) + + def compile_Program(self, astnode, needsresult): + for statement in astnode.statements[:-1]: + self.compile(statement, needsresult=False) + laststatement = astnode.statements[-1] + self.compile(laststatement, needsresult) + + def compile_FunctionDefinition(self, astnode, needsresult): + bytecode = compile(astnode.block, astnode.arguments, astnode.name) + index = len(self.subbytecodes) + self.subbytecodes.append(bytecode) + self.emit(MAKE_FUNCTION, index) + self.emit(SET_LOCAL, self.lookup_symbol(astnode.name)) + if not needsresult: + self.emit(POP) + + def compile_IfStatement(self, astnode, needsresult): + # XXX this can compute the needed stack by one too much + self.compile(astnode.condition) + position1 = self.get_position() + self.emit(JUMP_IF_FALSE) + # + self.compile(astnode.ifblock, needsresult) + position2 = self.get_position() + self.emit(JUMP) + # + self.set_target_position(position1, self.get_position()) + if astnode.elseblock: + self.compile(astnode.elseblock, needsresult) + else: + if needsresult: + self.emit(IMPLICIT_SELF) + if needsresult: + self.stack_effect(-1) + # + self.set_target_position(position2, self.get_position()) + + def compile_WhileStatement(self, astnode, needsresult): + if needsresult: + self.emit(IMPLICIT_SELF) + # + position1 = self.get_position() + self.compile(astnode.condition) + position2 = self.get_position() + self.emit(JUMP_IF_FALSE) + # + if needsresult: + self.emit(POP) + self.compile(astnode.whileblock, needsresult) + position3 = self.get_position() + self.emit(JUMP) + self.set_target_position(position3, position1) + # + self.set_target_position(position2, self.get_position()) diff --git a/disass.py b/disass.py new file mode 100644 index 0000000..1dc6129 --- /dev/null +++ b/disass.py @@ -0,0 +1,118 @@ +import compile + +def disassemble(bytecode, indent='', pc=-1): + """ disassemble a bytecode object and print a readabable version of it""" + assert isinstance(bytecode, compile.Bytecode) + findlabeltargets = FindLabelTargets() + findlabeltargets.disassemble(bytecode) + disass = Disassembler(indent, findlabeltargets.targets) + disass.disassemble(bytecode, pc) + + +opcode2name = {} +for name, value in list(compile.__dict__.items()): + if name == name.upper() and isinstance(value, int): + opcode2name[value] = name + + +class AbstractDisassembler(object): + + def read4(self, code, pc): + highval = ord(code[pc+3]) + if highval >= 128: + highval -= 256 + return (ord(code[pc]) | + (ord(code[pc+1]) << 8) | + (ord(code[pc+2]) << 16) | + (highval << 24)) + + def disassemble(self, bytecode, currpc=-1): + self.currpc = currpc + self.bytecode = bytecode + code = bytecode.code + pc = 0 + while pc < len(code): + self.start(pc) + opcode = ord(code[pc]) + pc += 1 + if compile.isjump(opcode): + oparg = self.read4(code, pc) + pc += 4 + elif compile.hasarg(opcode): + oparg = ord(code[pc]) + pc += 1 + if oparg >= 128: + if oparg > 128: + oparg -= 256 + else: + oparg = self.read4(code, pc) + pc += 4 + else: + oparg = None + self.pc = pc + self.end(opcode, oparg) + name = opcode2name[opcode] + method = getattr(self, name, self.dummy) + method(opcode, oparg) + + def start(self, pc): + pass + + def end(self, opcode, oparg): + pass + + def dummy(self, opcode, oparg): + pass + + +class FindLabelTargets(AbstractDisassembler): + + def __init__(self): + self.targets = {} + + def JUMP_IF_FALSE(self, opcode, oparg): + self.targets[self.pc + oparg] = True + + JUMP = JUMP_IF_FALSE + + +class Disassembler(AbstractDisassembler): + + def __init__(self, indent, targets): + self.indent = indent + self.targets = targets + + def start(self, pc): + if pc in self.targets: + print(self.indent, '>>', pc) + if pc == self.currpc: + print(self.indent, '->', pc) + + def end(self, opcode, oparg): + print(self.indent, '\t', opcode2name[opcode], end=' ') + + def JUMP_IF_FALSE(self, opcode, oparg): + print('\t', '-->', self.pc + oparg) + + JUMP = JUMP_IF_FALSE + + def ASSIGNMENT(self, opcode, oparg): + print('\t', repr(self.bytecode.symbols[oparg])) + + METHOD_LOOKUP = ASSIGNMENT + ASSIGNMENT_APPEND_PARENT = ASSIGNMENT + GET_LOCAL = ASSIGNMENT + SET_LOCAL = ASSIGNMENT + + def PRIMITIVE_METHOD_CALL(self, opcode, oparg): + import primitives + func = primitives.all_primitives[oparg] + print('\t', repr('$' + func.__qualname__)) + + def dummy(self, opcode, oparg): + if oparg is None: + print() + else: + print('\t', oparg) + + diff --git a/interpreter.py b/interpreter.py new file mode 100644 index 0000000..950b5bf --- /dev/null +++ b/interpreter.py @@ -0,0 +1 @@ +from bytecodeinterpreter import Interpreter diff --git a/objmodel.py b/objmodel.py new file mode 100644 index 0000000..172b548 --- /dev/null +++ b/objmodel.py @@ -0,0 +1,120 @@ +from c3computation import compute_C3_mro as c3 + + +class AbstractObject(object): + + def call(self, w_receiver, args_w): + return self + + def istrue(self): + return True + + def clone(self): + raise NotImplementedError + + def hasslot(self): + raise NotImplementedError + + def getvalue(self, name): + for obj in self.get_mro(): + if not obj.hasslot(name): + continue + return obj.slots[name] + + def get_mro(self): + return c3(self) + + +class W_NormalObject(AbstractObject): + + def __init__(self, name=None, slots=None, parents=None, space=None): + self.space = space + self.name = name + if slots: + self.slots = slots + else: + self.slots = {} + if parents is None: + parents = [] + if len(parents) > 0: + for x in parents: + assert x in slots + self.parents = parents + + def getparents(self): + parents = self.parents + if '__parent__' not in parents: + parents.append('__parent__') + return [self.slots[p] for p in parents if p in self.slots] + + def hasslot(self, name): + return name in self.slots + + def setvalue(self, name, w_value): + self.slots[name] = w_value + + def addparent(self, name): + self.parents.append(name) + + def __str__(self): + return self.getname() + + __repr__ = __str__ + + def getname(self): + return "".format( + name=self.name if self.name else '', + slots=self.slots) + + def clone(self): + return W_NormalObject( + name=self.name, + parents=self.parents, + slots=self.slots.copy()) + + +class W_Integer(AbstractObject): + def __init__(self, value, space=None): + self.value = value + self.space = space + + def getparents(self): + if self.space is None: + return [] # for tests + inttrait = self.space.getbuiltin('inttrait') + assert inttrait is not None, 'O_o bogus state' + return [inttrait] + + def hasslot(self, name): + return False + + def __str__(self): + return str(self.value) + + __repr__ = __str__ + + def istrue(self): + return self.value != 0 + + +class W_Method(W_NormalObject): + + def __init__(self, code, *args, **kwargs): + super(W_Method, self).__init__(*args, **kwargs) + self.code = code + + def clone(self): + return W_Method(code=self.code, + name=self.name, parents=self.parents, + slots=self.slots.copy()) + + def getname(self): + return "".format(name=self.name) + + def call(self, w_receiver, args_w): + w_context = self.clone() + assert len(args_w) == self.code.numargs + for i in range(self.code.numargs): + self.space.setvalue(w_context, self.code.symbols[i], args_w[i]) + self.space.setvalue(w_context, 'self', w_receiver) + return self.space.execute(w_context.code, w_context) diff --git a/objspace.py b/objspace.py new file mode 100644 index 0000000..c98c87d --- /dev/null +++ b/objspace.py @@ -0,0 +1,87 @@ +from objmodel import W_Integer +from objmodel import W_Method +from objmodel import W_NormalObject + +import primitives + + +class ObjectSpace(object): + + def __init__(self, interpreter): + self.interpreter = interpreter + + def setup_builtins(self, builtincode=None): + if builtincode is None: + builtincode = self._load_default_builtins() + + w_builtins = W_NormalObject(name='Lobby', slots={}) + self.w_builtins = w_builtins + from simpleparser import parse + ast = parse(builtincode) + + self.interpreter.eval(ast, w_builtins) + + def _load_default_builtins(self): + import os + builtins = os.path.join( + os.path.dirname(__file__), + 'builtins.simple') + with open(builtins, 'r') as f: + return f.read() + + def getbuiltin(self, name): + return self.w_builtins.getvalue(name) + + def getbuiltins(self): + return self.w_builtins + + def make_module(self, name=None): + if not hasattr(self, 'w_builtins'): + slots = {} + else: + slots = {'__parent__': self.getbuiltins()} + return W_NormalObject(name=name, slots=slots) + + def newobject(self, name, slots, parentnames): + return W_NormalObject(space=self, name=name, + slots=slots, parents=parentnames) + + def newint(self, value): + return W_Integer(value, space=self) + + def definemethod(self, name, code, w_target): + w_meth = W_Method(code, name=name, + slots={'__parent__': w_target}, + space=self) + return w_meth + + def execute(self, code, w_context): + return self.interpreter.run(code, w_context) + + def setvalue(self, w_receiver, name, w_value): + w_receiver.setvalue(name, w_value) + + def addparent(self, w_receiver, name): + w_receiver.addparent(name) + + def getvalue(self, w_receiver, name): + return w_receiver.getvalue(name) + + def istrue(self, w_condition): + return w_condition.istrue() + + def isfalse(self, w_condition): + return not w_condition.istrue() + + def call_primitive(self, primitive_num, w_receiver, arguments_w): + f = primitives.all_primitives[primitive_num] + return f(w_receiver, arguments_w, space=self) + + def get_number_of_arguments_of_primitive(self, primitive_num): + return primitives.get_number_of_arguments_of_primitive(primitive_num) + + def call(self, w_method, w_receiver, arguments_w): + return w_method.call(w_receiver, arguments_w) + + def clone(self, w_value): + return w_value.clone() diff --git a/primitives.py b/primitives.py new file mode 100644 index 0000000..60bbeb5 --- /dev/null +++ b/primitives.py @@ -0,0 +1,47 @@ +registry = {} +all_primitives = [] +primitive_number_of_arguments = [] + + +def primitive(name, unwrap_spec, wrap_spec): + assert '$' + name not in registry, '${name} already defined'.format(name=name) + primitive_number_of_arguments.append(len(unwrap_spec) - 1) # first argument is the receiver + def expose(func): + def unwrapper(w_receiver, args_w, space): + args = [w_receiver] + args_w + if len(args) != len(unwrap_spec): + raise TypeError( + "Expected {ex} arguments, received {re}.".format(ex=len(unwrap_spec), re=len(args))) + unwrapped_args = () + for t, arg in zip(unwrap_spec, args): + if t is int: + unwrapped_args += (arg.value, ) + else: + unwrapped_args += (arg, ) + result = func(*unwrapped_args) + if wrap_spec is int: + return space.newint(result) + return result + unwrapper.__qualname__ = name + all_primitives.append(unwrapper) + registry['$' + name] = len(all_primitives) - 1 + return None + return expose + + +def get_index_of_primitive_named(name): + return registry[name] + + +def get_number_of_arguments_of_primitive(idx): + return primitive_number_of_arguments[idx] + + +@primitive('int_add', [int, int], int) +def simple_int_add(a, b): + return a + b + + +@primitive('int_eq', [int, int], int) +def simple_int_eq(a, b): + return a == b diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..089045c --- /dev/null +++ b/shell.nix @@ -0,0 +1,35 @@ +{ pkgs ? import {} }: + +with pkgs; + +let myPython = python39.buildEnv.override { + extraLibs = with python39Packages; [ + # Common Libs + rich + # numpy + # matplotlib + # scipy + # pytorch + # notbook + + # Doom Emacs Libs + black + pyflakes + isort + nose + pytest + + # DynLang + rply + ]; + }; +in + +mkShell { + buildInputs = [ + myPython + nodePackages.pyright # LSP + pipenv # Doom + jetbrains.pycharm-professional + ]; +} diff --git a/simpleast.py b/simpleast.py new file mode 100644 index 0000000..8a6448b --- /dev/null +++ b/simpleast.py @@ -0,0 +1,279 @@ +import py + + +class MetaNode(type): + def __init__(cls, name, bases, dict): + compile_name = "compile_" + name + abstract = not hasattr(cls, "attrs") + + def dispatch(self, compiler): + if not abstract: + getattr(compiler, compile_name)(self) + + cls.dispatch = dispatch + + +class AstNode(object): + __metaclass__ = MetaNode + + """ Base class for all ast nodes. Provides generic functionality.""" + tokens = None + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, + ", ".join([repr(getattr(self, a)) for a in self.attrs])) + + def __eq__(self, other): + if self.__class__ is not other.__class__: + return False + for key in self.attrs: + if getattr(self, key) != getattr(other, key): + return False + return True + + def __ne__(self, other): + return not (self == other) + + def dot(self, result=None): + def uid(x): + result = id(x) + if result < 0: + result = 'm%d' % (-result,) + return result + + if result is None: + result = [] + body = [self.__class__.__name__] + children = [] + for key in self.attrs: + obj = getattr(self, key) + if isinstance(obj, list): + if obj and isinstance(obj[0], AstNode): + children.extend(obj) + for i, elt in enumerate(obj): + result.append("o%s -> o%s [label=\"%s[%s]\"]" % ( + uid(self), uid(elt), key, i)) + else: + body.append("%s = %s" % (key, obj)) + elif isinstance(obj, AstNode): + children.append(obj) + result.append("o%s -> o%s [label=\"%s\"]" % ( + uid(self), uid(obj), key)) + else: + body.append("%s = %s" % (key, obj)) + result.append("o%s [label=\"%s\", shape=box]" % (uid(self), repr("\n".join(body))[1:-1])) + for child in children: + child.dot(result) + return result + + def view(self): + """ Calling this method gives a graphical representation of the ast + graph. Needs a checkout of + https://bitbucket.org/pypy/pypy/src/default/dotviewer/ in the current directory + as well as graphviz (http://graphviz.org) installed. """ + from dotviewer import graphclient + content = ["digraph G{"] + content.extend(self.dot()) + content.append("}") + p = py.test.ensuretemp("simpleparser").join("temp.dot") + p.write("\n".join(content)) + graphclient.display_dot_file(str(p)) + + +class Expression(AstNode): + """ Abstract Base class for all expression AST nodes""" + + +class IntLiteral(Expression): + """ An integer literal (like "1") """ + + attrs = ["value"] + + def __init__(self, value): + self.value = int(value) + + +class StringLiteral(Expression): + """ An string literal (like "hello world") """ + + attrs = ["value"] + + def __init__(self, value): + self.value = str(value) + + +class MethodCall(Expression): + """ A call to a method with name 'methodname' on 'receiver' with + 'arguments' (which is a list of expression ASTs). + + Example: + f(1, 2, 3) + (receiver is ImplicitSelf(), methodname is 'f' and + args is [IntLiteral(1), IntLiteral(2), IntLiteral(3)]) + + 5 f + (receiver is IntLiteral(5), methodname is 'f' and args is []) + """ + + attrs = ["receiver", "methodname", "arguments"] + + def __init__(self, receiver, methodname, arguments=None): + self.receiver = receiver + self.methodname = methodname + if arguments is None: + arguments = [] + self.arguments = arguments + + +class PrimitiveMethodCall(MethodCall): + """ A method call to a primitive method. Primitive method names start with + '$'.The attributes are like those in MethodCall. + + Example: + + 5 $int_add(6) + (receiver is IntLiteral(5), methodname is '$int_add' and args + is [IntLiteral(6)]) + """ + + +class ImplicitSelf(Expression): + """ The receiver that is used when none is specified. + + Example: + f + this is a method call "f" on the implicit self.""" + + attrs = [] + + +class Statement(AstNode): + """ Base class of all statement nodes. """ + + +class Assignment(Statement): + """ An assignement: lvalue attrname = expression. + + Example: + x = 7 + this is an assignement on the implicit self.""" + + attrs = ["lvalue", "attrname", "expression"] + + def __init__(self, lvalue, attrname, expression): + self.lvalue = lvalue + self.attrname = attrname + self.expression = expression + + +class ExprStatement(Statement): + """ A statement that is just an expression evaluation (and the result is + ignored).""" + + attrs = ["expression"] + + def __init__(self, expr): + self.expression = expr + + +class IfStatement(Statement): + """ An if statement. The syntax looks like this: + + if condition: + ... ifblock ... + else: + ... elseblock ... + + The elseblock is optional.""" + + attrs = ["condition", "ifblock", "elseblock"] + + def __init__(self, condition, ifblock, elseblock=None): + self.condition = condition + self.ifblock = ifblock + self.elseblock = elseblock + + +class WhileStatement(Statement): + """ A while loop. The syntax looks like this: + + while condition: + ... whileblock ... + else: + ... elseblock ... + + The elseblock is optional.""" + + attrs = ["condition", "whileblock", "elseblock"] + + def __init__(self, condition, whileblock, elseblock=None): + self.condition = condition + self.whileblock = whileblock + self.elseblock = elseblock + + +class FunctionDefinition(Statement): + """ A function definition. Corresponds to def name(arguments): block. + + The 'name' is a string, the 'arguments' is a list of strings, and the + 'block' is a Program (see below). Executing a FunctionDefinition creates + a new W_Method and assigns it to the 'name' on the implicit self. + + Example: + def f: FunctionDefinition('f', [], Program([...])) + 41 + + def g(a, b, c): FunctionDefinition('g', ['a', 'b', 'c'], ...) + 43 + """ + attrs = ["name", "arguments", "block"] + + def __init__(self, name, arguments, block): + self.name = name + self.arguments = arguments + self.block = block + + +class ObjectDefinition(Statement): + """ Makes a new normal object. + + The block is immediately executed with the new object as the + implicit self. The 'name' is bound to the new object in the + outer scope's implicit self. + + Example: + object x: + def f(y): + y + + The 'parentnames' attribute is a list of strings giving the parent + attributes of the new object. The 'parentdefinitions' attribute is a list + of expression-asts giving the initial value of those parent attributes. + + Example: + object x(p1=a, p2=b): + ... + + gives parentnames = ["p1", "p2"] + and parentdefinitions = [MethodCall(ImplicitSelf, "a", []), + MethodCall(ImplicitSelf, "b", [])] + + """ + attrs = ["name", "block", "parentnames", "parentdefinitions"] + + def __init__(self, name, block, parentnames=None, parentdefinitions=None): + self.name = name + self.block = block + if parentnames is None: + parentnames = [] + parentdefinitions = [] + self.parentnames = parentnames + self.parentdefinitions = parentdefinitions + + +class Program(AstNode): + """ A list of statements. """ + attrs = ["statements"] + + def __init__(self, statements): + self.statements = statements diff --git a/simplelexer.py b/simplelexer.py new file mode 100644 index 0000000..5a80052 --- /dev/null +++ b/simplelexer.py @@ -0,0 +1,190 @@ +from rply import LexerGenerator +from rply.token import Token + +# attempts at writing a simple Python-like lexer +tabsize = 4 + + +def make_indent_token(token, start): + assert token.name == "NewlineAndWhitespace" + token.name = "Indent" + token.value = token.value[start:] + token.source_pos.idx += start + token.source_pos.lineno += 1 + token.source_pos.colno = 0 + return token + + +def make_dedent_token(token, start): + assert token.name == "NewlineAndWhitespace" + token.name = "Dedent" + token.value = token.value[start:] + token.source_pos.idx += start + token.source_pos.lineno += 1 + token.source_pos.colno = 0 + return token + + +# split the token in two: one for the newline and one for the +# in/dedent +# the NewlineAndWhitespace token looks like this: \r?\n[ \f\t]* +def compute_position_of_newline(token): + assert token.name == "NewlineAndWhitespace" + s = token.value + length = len(s) + pos = 0 + column = 0 + if s[0] == '\n': + pos = 1 + start = 1 + else: + pos = 2 + start = 2 + while pos < length: # count the indentation depth of the whitespace + c = s[pos] + if c == ' ': + column = column + 1 + elif c == '\t': + column = (column // tabsize + 1) * tabsize + elif c == '\f': + column = 0 + pos = pos + 1 + return start, column + + +def compute_indent_or_dedent(token, indentation_levels, output_tokens): + start, column = compute_position_of_newline(token) + # before start: new line token + output_tokens.append(Token("Newline", token.value[:start], token.source_pos)) + # after start: deal with white spaces (create indent or dedent token) + if column > indentation_levels[-1]: # count indents or dedents + indentation_levels.append(column) + token = make_indent_token(token, start) + output_tokens.append(token) + else: + dedented = False + while column < indentation_levels[-1]: + dedented = True + indentation_levels.pop() + output_tokens.append(Token("Dedent", "", + token.source_pos)) + if dedented: + token = make_dedent_token(token, start) + output_tokens[-1] = token + + +# input: lexer token stream +# output: modified token stream +def postprocess(tokens, source): + parenthesis_level = 0 + indentation_levels = [0] + output_tokens = [] + tokens = [token for token in tokens if token.name != "Ignore"] + token = None + for i in range(len(tokens)): + token = tokens[i] + # never create indent/dedent token between brackets + if token.name == "OpenBracket": + parenthesis_level += 1 + output_tokens.append(token) + elif token.name == "CloseBracket": + parenthesis_level -= 1 + if parenthesis_level < 0: + raise LexerError(source, token.source_pos, "unmatched parenthesis") + output_tokens.append(token) + elif token.name == "NewlineAndWhitespace": + if i + 1 < len(tokens) and tokens[i + 1].name == "NewlineAndWhitespace": + continue + if parenthesis_level == 0: + compute_indent_or_dedent(token, indentation_levels, output_tokens) + else: + pass # implicit line-continuations within parenthesis + else: + # something else: e.g. name, keyword, etc... + output_tokens.append(token) + if token is not None: + output_tokens.append(Token("EOF", "", token.source_pos)) + return output_tokens + + +# RPython reimplementation +def group(*choices, **namegroup): + choices = list(choices) + return '(' + '|'.join(choices) + ')' + + +# RPython reimplementation +def any(*choices): + result = group(*choices) + '*' + return result + + +# ' or " string. eg. 'hello' or "hello" +def make_single_string(delim): + normal_chars = r"[^\n\%s]*" % (delim,) + return "".join([delim, normal_chars, + any(r"\\." + normal_chars), delim]) + + +# ____________________________________________________________ +# Literals + +Number = r'(([+-])?[1-9][0-9]*)|0' +String = group(make_single_string(r"\'"), make_single_string(r'\"')) + +# ____________________________________________________________ +# Ignored + +Whitespace = r'[ \f\t]' +Newline = r'\r?\n' +Linecontinue = r'\\' + Newline +Comment = r'#[^\r\n]*' +NewlineAndWhitespace = Newline + any(Whitespace) +Ignore = group(Whitespace + '+', Linecontinue, Comment) + +# ____________________________________________________________ +# Identifier + +Name = r'[a-zA-Z_][a-zA-Z0-9_]*' +PrimitiveName = '\\$' + Name + +# ____________________________________________________________ +# Symbols + +Colon = r'\:' +Comma = r'\,' +Assign = r'\=' + +OpenBracket = r'[\[\(\{]' +CloseBracket = r'[\]\)\}]' + +# ____________________________________________________________ +# Keywords + +If = r'if' +Else = r'else' +While = r'while' +Def = r'def' +Object = r'object' + +tokens = ["If", "Else", "While", "Def", "Object", "Number", "String", "Ignore", + "NewlineAndWhitespace", "OpenBracket", "CloseBracket", "Comma", "Assign", + "Colon", "Name", "PrimitiveName"] + + +def make_lexer(): + lg = LexerGenerator() + for token in tokens: + # e.g. (Name, r'[a-zA-Z_][a-zA-Z0-9_]*') + lg.add(token, globals()[token]) + return lg.build() + + +lexer = make_lexer() + + +# s is the simple program code +def lex(s): + if not s.endswith('\n'): + s += '\n' + return list(postprocess(lexer.lex(s), s)) diff --git a/simpleparser.py b/simpleparser.py new file mode 100644 index 0000000..02ad09e --- /dev/null +++ b/simpleparser.py @@ -0,0 +1,333 @@ +""" +A 'simple' parser. Don't look into this file :-) +""" +import py +import simpleast +from simplelexer import lex +from rply.token import Token + +from rply import ParserGenerator + +pg = ParserGenerator(["If", "Else", "While", "Def", "Object", "Number", + "String", "Name", "Indent", "Dedent", "Newline", "OpenBracket", + "CloseBracket", "Comma", "Assign", "Colon", "PrimitiveName", "EOF"]) + + +def build_methodcall(call, cls): + if len(call) == 1: + args = [] + else: + args = call[1] + name = call[0] + return cls(None, name, args) + + +@pg.production("program : statements EOF") +@pg.production("program : newlines statements EOF") +def program(prog): + # import pdb; pdb.set_trace() + if prog[0] is None: + prog = prog[1] + else: + prog = prog[0] + return prog + + +@pg.production("statements : statement") +@pg.production("statements : statement statements") +@pg.production("statements : statement newlines statements") +def statements(stmts): + if len(stmts) == 1: + stmt = stmts[0] + return simpleast.Program([stmt]) + elif stmts[0] is None: + assert len(stmts) == 2 + return stmts[1] + elif len(stmts) == 2: + stmt = stmts[0] + result = stmts[1] + result.statements.insert(0, stmt) + return result + + +@pg.production("newlines : Newline") +@pg.production("newlines : Newline newlines") +def newlines(n): + return None + + +@pg.production("statement : simplestatement") +@pg.production("statement : ifstatement") +@pg.production("statement : whilestatement") +@pg.production("statement : defstatement") +@pg.production("statement : objectstatement") +def statement(stmt): + return stmt[0] + + +@pg.production("ifstatement : If expression block") +@pg.production("ifstatement : If expression block Else block") +def ifstatement(ifstmt): + elseblock = None + if len(ifstmt) > 3: + elseblock = ifstmt[-1] + return simpleast.IfStatement(ifstmt[1], ifstmt[2], elseblock) + + +@pg.production("whilestatement : While expression block") +def ifstatement(whilestmt): + return simpleast.WhileStatement(whilestmt[1], whilestmt[2]) + + +@pg.production("objectstatement : Object name block") +@pg.production("objectstatement : Object name parentlist block") +def objectstatement(obj): + name = obj[1] + names = [] + expressions = [] + if len(obj) == 3: + blk = obj[2] + else: + parents = obj[2] + names = [p.attrname for p in parents] + expressions = [p.expression for p in parents] + blk = obj[3] + return simpleast.ObjectDefinition(name, blk, names, expressions) + + +@pg.production("defstatement : Def name argumentnamelist block") +@pg.production("defstatement : Def name block") +def defstatement(defn): + name = defn[1] + if len(defn) == 4: + args = defn[2] + blk = defn[3] + else: + args = [] + blk = defn[2] + return simpleast.FunctionDefinition(name, args, blk) + + +@pg.production("block : Colon newlines Indent statements Dedent") +def block(blk): + return blk[3] + + +@pg.production("simplestatement : expression Newline") +@pg.production("simplestatement : expression Assign expression Newline") +def simplestatement(stmts): + if len(stmts) == 2: + return simpleast.ExprStatement(stmts[0]) + # assignement + result = stmts[0] + assign = stmts[2] + if (isinstance(result, simpleast.MethodCall) and + result.arguments == []): + return simpleast.Assignment( + result.receiver, result.methodname, assign) + else: + source_pos = stmts[1].source_pos + raise ParseError(source_pos, + ErrorInformation(source_pos.idx, + customerror="can only assign to attribute")) # , self.source) + + +@pg.production("expression : basic_expression") +@pg.production("expression : basic_expression msg-chain") +def expression(expr): + if len(expr) > 1: + prev = expr[0] + for i in expr[1]: + i.receiver = prev + prev = i + return expr[1][-1] + return expr[0] + + +@pg.production("msg-chain : methodcall") +@pg.production("msg-chain : methodcall msg-chain") +def msg_chain(cc): + if len(cc) > 1: + return [cc[0]] + cc[1] + return cc + + +@pg.production("basic_expression : Number") +def number_expression(stmt): + return simpleast.IntLiteral(stmt[0].value) + + +@pg.production("basic_expression : String") +def string_expression(stmt): + return simpleast.StringLiteral(stmt[0].value) + + +@pg.production("basic_expression : implicitselfmethodcall") +def implicitselfmethodcall(call): + methodcall = call[0] + methodcall.receiver = simpleast.ImplicitSelf() + return methodcall + + +@pg.production("implicitselfmethodcall : methodcall") +def implicitselfmethodcall_methodcall(call): + return call[0] + + +@pg.production("methodcall : primitivemethodcall") +@pg.production("methodcall : simplemethodcall") +def methodcall(call): + return call[0] + + +@pg.production("simplemethodcall : name") +@pg.production("simplemethodcall : name argumentslist") +def simplemethodcall(call): + return build_methodcall(call, simpleast.MethodCall) + + +@pg.production("primitivemethodcall : primitivename") +@pg.production("primitivemethodcall : primitivename argumentslist") +def primitivemethodcall(call): + return build_methodcall(call, simpleast.PrimitiveMethodCall) + + +@pg.production("argumentslist : OpenBracket arguments CloseBracket") +@pg.production("argumentnamelist : OpenBracket argumentnames CloseBracket") +@pg.production("parentlist : OpenBracket parentdefinitions CloseBracket") +def argumentslist(args): + return args[1] + + +@pg.production("arguments : expression") +@pg.production("arguments : expression Comma") +@pg.production("arguments : expression Comma arguments") +@pg.production("argumentnames : name") +@pg.production("argumentnames : name Comma") +@pg.production("argumentnames : name Comma argumentnames") +@pg.production("parentdefinitions : assignment") +@pg.production("parentdefinitions : assignment Comma") +@pg.production("parentdefinitions : assignment Comma parentdefinitions") +def arguments(args): + if len(args) == 3: + return [args[0]] + args[2] + return [args[0]] + + +@pg.production("assignment : name Assign expression") +def assignement(args): + return simpleast.Assignment(None, args[0], args[2]) + + +@pg.production("primitivename : PrimitiveName") +@pg.production("name : Name") +def name(name): + return name[0].value + + +@pg.error +def error_handler(token): + raise ParseError(source_pos=token.getsourcepos(), + errorinformation=ErrorInformation(token.getsourcepos().idx, + customerror="Ran into a %s where it wasn't expected" % token.gettokentype())) + + +parser = pg.build() + + +def print_conflicts(): + print("rr conflicts") + for rule_num, token, conflict in parser.lr_table.rr_conflicts: + print(rule_num, token, conflict) + + print("sr conflicts") + for rule_num, token, conflict in parser.lr_table.sr_conflicts: + print(rule_num, token, conflict) + + +print_conflicts() + + +def parse(s): + l = lex(s) + return parser.parse(iter(l)) + + +# ____________________________________________________________ + +class ParseError(Exception): + def __init__(self, source_pos, errorinformation, source=""): + self.source_pos = source_pos + self.errorinformation = errorinformation + self.args = (source_pos, errorinformation) + self.source = source + + def nice_error_message(self, filename=""): + result = [" File %s, line %s" % (filename, self.source_pos.lineno + 1)] + source = self.source + if source: + result.append(source.split("\n")[self.source_pos.lineno]) + result.append(" " * self.source_pos.colno + "^") + else: + result.append("") + result.append("ParseError") + if self.errorinformation: + failure_reasons = self.errorinformation.expected + if failure_reasons: + expected = '' + if len(failure_reasons) > 1: + all_but_one = failure_reasons[:-1] + last = failure_reasons[-1] + expected = "%s or '%s'" % ( + ", ".join(["'%s'" % e for e in all_but_one]), last) + elif len(failure_reasons) == 1: + expected = failure_reasons[0] + if expected: + result.append("expected %s" % (expected,)) + if self.errorinformation.customerror: + result.append(self.errorinformation.customerror) + return "\n".join(result) + + def __str__(self): + return self.nice_error_message() + + +class ErrorInformation(object): + def __init__(self, pos, expected=None, customerror=None): + if expected is None: + expected = [] + self.expected = expected + self.pos = pos + self.customerror = customerror + + +def combine_errors(self, other): + if self is None: + return other + if (other is None or self.pos > other.pos or + len(other.expected) == 0): + return self + elif other.pos > self.pos or len(self.expected) == 0: + return other + failure_reasons = [] + already_there = {} + for fr in [self.expected, other.expected]: + for reason in fr: + if reason not in already_there: + already_there[reason] = True + failure_reasons.append(reason) + return ErrorInformation(self.pos, failure_reasons, + self.customerror or other.customerror) + + +def make_arglist(methodname): + def arglist(self): + self.match("OpenBracket", "(") + method = getattr(self, methodname) + result = [method()] + result.extend(self.repeat(self.comma, method)) + self.maybe(self.comma) + self.match("CloseBracket", ")") + return result + + return arglist