From 744de3c46e8f9c22e1ea2d2e9bf58958de49ec07 Mon Sep 17 00:00:00 2001 From: ChUrl Date: Mon, 9 Aug 2021 16:32:53 +0200 Subject: [PATCH] initial commit with vorgabe --- builtins.simple | 11 ++ bytecodeinterpreter.py | 124 +++++++++++++ c3computation.py | 46 +++++ compile.py | 401 +++++++++++++++++++++++++++++++++++++++++ disass.py | 118 ++++++++++++ interpreter.py | 1 + objmodel.py | 120 ++++++++++++ objspace.py | 87 +++++++++ primitives.py | 47 +++++ shell.nix | 35 ++++ simpleast.py | 279 ++++++++++++++++++++++++++++ simplelexer.py | 190 +++++++++++++++++++ simpleparser.py | 333 ++++++++++++++++++++++++++++++++++ 13 files changed, 1792 insertions(+) create mode 100644 builtins.simple create mode 100644 bytecodeinterpreter.py create mode 100644 c3computation.py create mode 100644 compile.py create mode 100644 disass.py create mode 100644 interpreter.py create mode 100644 objmodel.py create mode 100644 objspace.py create mode 100644 primitives.py create mode 100644 shell.nix create mode 100644 simpleast.py create mode 100644 simplelexer.py create mode 100644 simpleparser.py diff --git a/builtins.simple b/builtins.simple new file mode 100644 index 0000000..b257fe6 --- /dev/null +++ b/builtins.simple @@ -0,0 +1,11 @@ +object nil: + 1 + +def pass: + nil + +object inttrait: + def add(other): + self $int_add(other) + def eq(other): + self $int_eq(other) diff --git a/bytecodeinterpreter.py b/bytecodeinterpreter.py new file mode 100644 index 0000000..80d7358 --- /dev/null +++ b/bytecodeinterpreter.py @@ -0,0 +1,124 @@ +from simpleparser import parse +from objspace import ObjectSpace +import compile +from disass import disassemble + + +class ByteCodeError(Exception): + pass + + +class Interpreter(object): + + def __init__(self, builtincode=None): + # Using an instance variable to keep the public interface + self.space = ObjectSpace(self) + self.space.setup_builtins(builtincode) + + def eval(self, ast, w_context): + code = compile.compile(ast) + return self.run(code, w_context) + + def read4(self, code, pc): + highval = ord(code[pc + 3]) + if highval >= 128: + highval -= 256 + return (ord(code[pc]) | + (ord(code[pc + 1]) << 8) | + (ord(code[pc + 2]) << 16) | + (highval << 24)) + + def run(self, bytecode, w_context): + pc = 0 + stack = [] + code = bytecode.code + print(disassemble(bytecode)) + while pc < len(code): + opcode = ord(code[pc]) + pc += 1 + if compile.isjump(opcode): + oparg = self.read4(code, pc) + pc += 4 + if opcode == compile.JUMP: + pc += oparg + elif opcode == compile.JUMP_IF_FALSE: + w_condition = stack.pop() + if self.space.isfalse(w_condition): + pc += oparg + continue + elif compile.hasarg(opcode): + oparg = ord(code[pc]) + pc += 1 + if oparg >= 128: + if oparg > 128: + oparg -= 256 + else: + oparg = self.read4(code, pc) + pc += 4 + if opcode == compile.MAKE_OBJECT: + name = bytecode.symbols[oparg] + obj = self.space.newobject(name, {'__parent__': w_context}, []) + stack.append(obj) + elif opcode == compile.MAKE_OBJECT_CALL: + self.run(bytecode.subbytecodes[oparg], stack[-1]) + elif opcode == compile.INT_LITERAL: + w_value = self.space.newint(oparg) + stack.append(w_value) + elif opcode == compile.MAKE_FUNCTION: + bc = bytecode.subbytecodes[oparg] + w_method = self.space.definemethod(name=bc.name, code=bc, w_target=w_context) + stack.append(w_method) + elif opcode == compile.METHOD_LOOKUP: + name = bytecode.symbols[oparg] + w_method = self.space.getvalue(stack[-1], name) + stack.append(w_method) + elif opcode == compile.METHOD_CALL: + arguments_w = [stack.pop() for n in range(oparg)] + arguments_w.reverse() + # + w_method = stack.pop() + w_receiver = stack.pop() + w_result = self.space.call(w_method, w_receiver, arguments_w) + stack.append(w_result) + elif opcode == compile.PRIMITIVE_METHOD_CALL: + nargs = self.space.get_number_of_arguments_of_primitive(oparg) + arguments_w = [stack.pop() for n in range(nargs)] + arguments_w.reverse() + w_receiver = stack.pop() + w_result = self.space.call_primitive(oparg, w_receiver, arguments_w) + stack.append(w_result) + elif opcode == compile.SET_LOCAL: + w_value = stack[-1] + name = bytecode.symbols[oparg] + self.space.setvalue(w_context, name, w_value) + elif opcode == compile.ASSIGNMENT: + w_value = stack.pop() + name = bytecode.symbols[oparg] + self.space.setvalue(stack[-1], name, w_value) + elif opcode == compile.ASSIGNMENT_APPEND_PARENT: + w_value = stack.pop() + name = bytecode.symbols[oparg] + self.space.setvalue(stack[-1], name, w_value) + self.space.addparent(stack[-1], name) + elif opcode == compile.GET_LOCAL: + name = bytecode.symbols[oparg] + w_value = self.space.getvalue(w_context, name) + w_value = self.space.call(w_value, w_context, []) + stack.append(w_value) + else: + raise ByteCodeError('Invalid bytecode with arguments') + else: + if opcode == compile.POP: + stack.pop() + elif opcode == compile.IMPLICIT_SELF: + stack.append(w_context) + elif opcode == compile.DUP: + stack.append(stack[-1]) + else: + raise ByteCodeError('Invalid bytecode') + assert pc == len(code) + assert len(stack) == 1 + return stack.pop() + + def make_module(self): + return self.space.make_module() diff --git a/c3computation.py b/c3computation.py new file mode 100644 index 0000000..728a02e --- /dev/null +++ b/c3computation.py @@ -0,0 +1,46 @@ +def compute_C3_mro(w_obj): + order_w = [] + parents_w = w_obj.getparents() + orderlists = [w_base.get_mro() + for w_base in parents_w] + orderlists.append([w_obj] + parents_w) + while orderlists: + for candidatelist in orderlists: + w_candidate = candidatelist[0] + if mro_blockinglist(w_candidate, orderlists) is None: + break # good w_candidate + else: + return mro_error(orderlists) # no candidate found + assert w_candidate not in order_w + order_w.append(w_candidate) + for i in range(len(orderlists)-1, -1, -1): + if orderlists[i][0] is w_candidate: + del orderlists[i][0] + if len(orderlists[i]) == 0: + del orderlists[i] + return order_w + + +def mro_blockinglist(w_candidate, orderlists): + for lst in orderlists: + if w_candidate in lst[1:]: + return lst + return None # good candidate + +def mro_error(orderlists): + # w_obj.getname() is a pure debugging-helper. it can return whatever string + cycle = [] + w_candidate = orderlists[-1][0] + if w_candidate in orderlists[-1][1:]: + # explicit error message for this specific case + raise TypeError("duplicate parent %s" % w_candidate.getname()) + while w_candidate not in cycle: + cycle.append(w_candidate) + nextblockinglist = mro_blockinglist(w_candidate, orderlists) + w_candidate = nextblockinglist[0] + del cycle[:cycle.index(w_candidate)] + cycle.append(w_candidate) + cycle.reverse() + names = [w_obj.getname() for w_obj in cycle] + raise TypeError("cycle among base parents: " + ' < '.join(names)) + diff --git a/compile.py b/compile.py new file mode 100644 index 0000000..704dfec --- /dev/null +++ b/compile.py @@ -0,0 +1,401 @@ +"""This file contains the bytecode-compiler. + +An instruction can have one or no arguments. There are two different ways how +an argument is encoded: + + ARG4 encodes an argument in 4 bytes, in a little-endian manner + + SMALLARG encodes an integer i differently based on its size: + if -127 <= i <= 127 the integers is encoded as one byte + otherwise it is encoded as 5 bytes: + 1 marker byte equal to -128 to signify that the large form is used + 4 bytes to encode the integer as with ARG4 + +The instruction set contains the following instructions: + + INT_LITERAL + Pushes an integer literal on the stack. The argument is the value of the + integer. + + IMPLICIT_SELF + Pushes the implicit self on the stack. + + POP + Pops the top element from the stack. + + DUP + Duplicates the top element of the stack. + + JUMP + Unconditionally jump to a different point in the program. The offset of the + program counter to the target is given by the argument. + + JUMP_IF_FALSE + Pops an object from the stack and jump to a different point in the program + if that object is false. The offset of the program counter to the target is + given by the argument. + + ASSIGNMENT + Assigns the first object on the stack to the second object on the stack. + The objects are popped from the stack, and then the assigned object + (i.e. the `expression') is pushed again. The attribute name is given by + the argument, which is an index into the symbols list of the bytecode + object. + + PRIMITIVE_METHOD_CALL + Call a primitive method. The argument is an index into a list of all + primitives, which must be defined in the "primitive" module. The arguments + are found on the stack and are popped by this bytecode; the result is + pushed on the stack. To make the compiler work correctly for primitives, + the "primitive" module needs to expose two function + "get_index_of_primitive_named", which maps primitive name to a primitive + number, and get_number_of_arguments_of_primitive, which maps a primitive + number to the number of arguments the corresponding function takes. + + METHOD_LOOKUP + Looks up a method in the object at the top of the stack. The method name + is given by the argument, which is an index into the symbols list of the + bytecode object. The method is pushed on the stack (and the original + object is not removed). + + METHOD_CALL + Calls a method. The first n (where n is the argument of the bytecode) are + the arguments to the method, in reverse order. The next object on the + stack is the method. The final object is the receiver. All these objects + are popped from the stack. The result of the method call is pushed. + + MAKE_FUNCTION + Creates a new W_Method object and pushes it on the stack. The bytecode of + the method can be found in the subbytecodes list of the current bytecode + object; the index is given by the argument. + + MAKE_OBJECT + Create a new (empty) object and pushes it on the stack. The argument (which + can be ignored for now) is the index in symbols of the name of the object. + + ASSIGNMENT_APPEND_PARENT + Adds a new parent to an object. This bytecode is only used during object + creation. It works like the ASSIGNMENT bytecode, but (1) it also adds the + name to the list of parent attributes of the object, and (2) it leaves + on the stack the assigned-to object (the `lvalue'), not the assigned + object (the `expression'). + + MAKE_OBJECT_CALL + Execute the body of a newly created object. The object is on the top of the + stack and is left there. The bytecode of the body can be found in the + subbytecodes list of the current bytecode object, the index is given by the + argument. + + GET_LOCAL + This is an optimization for the common case of sending a method without + arguments to the implicit self. This bytecode is equivalent to: + IMPLICIT_SELF + METHOD_LOOKUP + METHOD_CALL 0 + + SET_LOCAL + This is an optimization for the common case of writing a slot to the + implicit self. This bytecode is equivalent to: + IMPLICIT_SELF + ASSIGNMENT + +Note that there is no "return" bytecode. When the end of the bytecode is +reached, the top of the stack is returned (and the stack should have only one +element on it). +""" +import sys + +import simpleast + +# ---------- bytecodes ---------- + +INT_LITERAL = 2 # integer value +ASSIGNMENT = 4 # index of attrname +METHOD_LOOKUP = 5 # index of method name +METHOD_CALL = 6 # number of arguments +PRIMITIVE_METHOD_CALL = 7 # number of the primitive +MAKE_FUNCTION = 8 # bytecode literal index +MAKE_OBJECT = 9 # index of object name +ASSIGNMENT_APPEND_PARENT = 10 # index of parentname +MAKE_OBJECT_CALL = 11 # bytecode literal index +JUMP_IF_FALSE = 12 # offset +JUMP = 13 # offset +GET_LOCAL = 15 # index of attrname (optimization) +SET_LOCAL = 16 # index of attrname (optimization) + +IMPLICIT_SELF = 32 # (no argument) +POP = 33 # (no argument) +DUP = 34 # (no argument) + +opcode_names = [None] * 256 +for key, value in list(globals().items()): + if key.strip("_").isupper(): + opcode_names[value] = key + + + +def hasarg(opcode): + """ Helper function to determine whether an opcode has an argument.""" + return opcode < 32 + +def isjump(opcode): + """ Helper function to determine whether an opcode is a jump.""" + return opcode == JUMP_IF_FALSE or opcode == JUMP + + +class Bytecode(object): + """ A class representing the bytecode of one piece of code. + + self.code is a string that encodes the bytecode itself. + + self.symbols is a list of strings containing the names that occur in the + piece of code. + + self.subbytecodes is a list of further bytecodes that occur in the piece of + code. + """ + _immutable_ = True + _immutable_fields_ = ["symbols[*]", "subbytecodes[*]"] + + def __init__(self, code, name, symbols, + subbytecodes, numargs, stackdepth): + self.code = code + if name is None: + name = "?" + self.name = name + self.symbols = symbols + self.subbytecodes = subbytecodes + self.numargs = numargs + self.stackdepth = stackdepth + + def dis(self, pc=-1): + from disass import disassemble + disassemble(self, pc=pc) + + +# ---------- compiler ---------- + +def compile(ast, argumentnames=[], name=None): + """ Turns an AST into a Bytecode object.""" + assert isinstance(ast, simpleast.Program) + comp = Compiler() + for arg in argumentnames: + comp.lookup_symbol(arg) + comp.lookup_symbol("__parent__") + comp.lookup_symbol("self") + comp.compile(ast, True) + return comp.make_bytecode(len(argumentnames), name) + + +stack_effects = { + INT_LITERAL: 1, + ASSIGNMENT: -1, + METHOD_LOOKUP: 1, + MAKE_FUNCTION: 1, + MAKE_OBJECT: 1, + ASSIGNMENT_APPEND_PARENT: -1, + MAKE_OBJECT_CALL: 0, + GET_LOCAL: 1, + SET_LOCAL: 0, + JUMP: 0, + JUMP_IF_FALSE: -1, + IMPLICIT_SELF: 1, + POP: -1, + DUP: 1, +} + + +class Compiler(object): + + def __init__(self): + self.code = [] + self.symbols = {} + self.subbytecodes = [] + self.stackdepth = 0 + self.max_stackdepth = 0 + + def make_bytecode(self, numargs, funcname): + symbols = [None] * len(self.symbols) + for name, index in list(self.symbols.items()): + symbols[index] = name + result = Bytecode(''.join(self.code), + funcname, + symbols, + self.subbytecodes, + numargs, self.max_stackdepth) + assert self.stackdepth == 1 + return result + + def stack_effect(self, num): + self.stackdepth += num + self.max_stackdepth = max(self.stackdepth, self.max_stackdepth) + + def emit(self, opcode, arg=None, stackeffect=sys.maxsize): + self.code.append(chr(opcode)) + if isjump(opcode): + assert arg is None + for c in self.encode4(0): + self.code.append(c) + elif hasarg(opcode): + assert isinstance(arg, int) + if -127 <= arg <= 127: + self.code.append(chr(arg & 0xFF)) + else: + self.code.append(chr(128)) + for c in self.encode4(arg): + self.code.append(c) + else: + assert arg is None + + if opcode in stack_effects: + stackeffect = stack_effects[opcode] + else: + assert stackeffect != sys.maxsize + self.stack_effect(stackeffect) + + def get_position(self): + return len(self.code) + + def set_target_position(self, oldposition, newtarget): + offset = newtarget - (oldposition+5) + i = 0 + for c in self.encode4(offset): + self.code[oldposition+1+i] = c + i += 1 + + def encode4(self, value): + return [chr(value & 0xFF), + chr((value >> 8) & 0xFF), + chr((value >> 16) & 0xFF), + chr((value >> 24) & 0xFF)] + + def lookup_symbol(self, symbol): + if symbol not in self.symbols: + self.symbols[symbol] = len(self.symbols) + return self.symbols[symbol] + + + def compile(self, ast, needsresult=True): + return getattr(self, "compile_" + ast.__class__.__name__)(ast, needsresult) + + def compile_IntLiteral(self, astnode, needsresult): + self.emit(INT_LITERAL, astnode.value) + + def compile_ImplicitSelf(self, astnode, needsresult): + self.emit(IMPLICIT_SELF) + + def compile_Assignment(self, astnode, needsresult): + if isinstance(astnode.lvalue, simpleast.ImplicitSelf): + self.compile(astnode.expression) + self.emit(SET_LOCAL, self.lookup_symbol(astnode.attrname)) + else: + self.compile(astnode.lvalue) + self.compile(astnode.expression) + self.emit(ASSIGNMENT, self.lookup_symbol(astnode.attrname)) + if not needsresult: + self.emit(POP) + + def compile_ExprStatement(self, astnode, needsresult): + self.compile(astnode.expression) + if not needsresult: + self.emit(POP) + + def compile_MethodCall(self, astnode, needsresult): + numargs = len(astnode.arguments) + if (isinstance(astnode.receiver, simpleast.ImplicitSelf) and + numargs == 0): + self.emit(GET_LOCAL, self.lookup_symbol(astnode.methodname)) + else: + self.compile(astnode.receiver) + self.emit(METHOD_LOOKUP, self.lookup_symbol(astnode.methodname)) + for arg in astnode.arguments: + self.compile(arg) + self.emit(METHOD_CALL, numargs, -numargs - 1) + + def compile_PrimitiveMethodCall(self, astnode, needsresult): + import primitives + index = primitives.get_index_of_primitive_named(astnode.methodname) + expected_args = primitives.get_number_of_arguments_of_primitive(index) + if not (len(astnode.arguments) == expected_args): + raise TypeError( + "Expected {ex} arguments, received {re}.".format(ex=expected_args, re=len(astnode.arguments))) + self.compile(astnode.receiver) + for arg in astnode.arguments: + self.compile(arg) + self.emit(PRIMITIVE_METHOD_CALL, index, -len(astnode.arguments)) + + def compile_ObjectDefinition(self, astnode, needsresult): + self.emit(MAKE_OBJECT, self.lookup_symbol(astnode.name)) + # + for i in range(len(astnode.parentdefinitions)): + name = astnode.parentnames[i] + if name == "__parent__": + self.emit(DUP) + self.compile(astnode.parentdefinitions[i]) + self.emit(ASSIGNMENT, self.lookup_symbol(name)) + self.emit(POP) + else: + self.compile(astnode.parentdefinitions[i]) + self.emit(ASSIGNMENT_APPEND_PARENT, self.lookup_symbol(name)) + # + bytecode = compile(astnode.block, name=astnode.name) + index = len(self.subbytecodes) + self.subbytecodes.append(bytecode) + self.emit(MAKE_OBJECT_CALL, index) + self.emit(SET_LOCAL, self.lookup_symbol(astnode.name)) + if not needsresult: + self.emit(POP) + + def compile_Program(self, astnode, needsresult): + for statement in astnode.statements[:-1]: + self.compile(statement, needsresult=False) + laststatement = astnode.statements[-1] + self.compile(laststatement, needsresult) + + def compile_FunctionDefinition(self, astnode, needsresult): + bytecode = compile(astnode.block, astnode.arguments, astnode.name) + index = len(self.subbytecodes) + self.subbytecodes.append(bytecode) + self.emit(MAKE_FUNCTION, index) + self.emit(SET_LOCAL, self.lookup_symbol(astnode.name)) + if not needsresult: + self.emit(POP) + + def compile_IfStatement(self, astnode, needsresult): + # XXX this can compute the needed stack by one too much + self.compile(astnode.condition) + position1 = self.get_position() + self.emit(JUMP_IF_FALSE) + # + self.compile(astnode.ifblock, needsresult) + position2 = self.get_position() + self.emit(JUMP) + # + self.set_target_position(position1, self.get_position()) + if astnode.elseblock: + self.compile(astnode.elseblock, needsresult) + else: + if needsresult: + self.emit(IMPLICIT_SELF) + if needsresult: + self.stack_effect(-1) + # + self.set_target_position(position2, self.get_position()) + + def compile_WhileStatement(self, astnode, needsresult): + if needsresult: + self.emit(IMPLICIT_SELF) + # + position1 = self.get_position() + self.compile(astnode.condition) + position2 = self.get_position() + self.emit(JUMP_IF_FALSE) + # + if needsresult: + self.emit(POP) + self.compile(astnode.whileblock, needsresult) + position3 = self.get_position() + self.emit(JUMP) + self.set_target_position(position3, position1) + # + self.set_target_position(position2, self.get_position()) diff --git a/disass.py b/disass.py new file mode 100644 index 0000000..1dc6129 --- /dev/null +++ b/disass.py @@ -0,0 +1,118 @@ +import compile + +def disassemble(bytecode, indent='', pc=-1): + """ disassemble a bytecode object and print a readabable version of it""" + assert isinstance(bytecode, compile.Bytecode) + findlabeltargets = FindLabelTargets() + findlabeltargets.disassemble(bytecode) + disass = Disassembler(indent, findlabeltargets.targets) + disass.disassemble(bytecode, pc) + + +opcode2name = {} +for name, value in list(compile.__dict__.items()): + if name == name.upper() and isinstance(value, int): + opcode2name[value] = name + + +class AbstractDisassembler(object): + + def read4(self, code, pc): + highval = ord(code[pc+3]) + if highval >= 128: + highval -= 256 + return (ord(code[pc]) | + (ord(code[pc+1]) << 8) | + (ord(code[pc+2]) << 16) | + (highval << 24)) + + def disassemble(self, bytecode, currpc=-1): + self.currpc = currpc + self.bytecode = bytecode + code = bytecode.code + pc = 0 + while pc < len(code): + self.start(pc) + opcode = ord(code[pc]) + pc += 1 + if compile.isjump(opcode): + oparg = self.read4(code, pc) + pc += 4 + elif compile.hasarg(opcode): + oparg = ord(code[pc]) + pc += 1 + if oparg >= 128: + if oparg > 128: + oparg -= 256 + else: + oparg = self.read4(code, pc) + pc += 4 + else: + oparg = None + self.pc = pc + self.end(opcode, oparg) + name = opcode2name[opcode] + method = getattr(self, name, self.dummy) + method(opcode, oparg) + + def start(self, pc): + pass + + def end(self, opcode, oparg): + pass + + def dummy(self, opcode, oparg): + pass + + +class FindLabelTargets(AbstractDisassembler): + + def __init__(self): + self.targets = {} + + def JUMP_IF_FALSE(self, opcode, oparg): + self.targets[self.pc + oparg] = True + + JUMP = JUMP_IF_FALSE + + +class Disassembler(AbstractDisassembler): + + def __init__(self, indent, targets): + self.indent = indent + self.targets = targets + + def start(self, pc): + if pc in self.targets: + print(self.indent, '>>', pc) + if pc == self.currpc: + print(self.indent, '->', pc) + + def end(self, opcode, oparg): + print(self.indent, '\t', opcode2name[opcode], end=' ') + + def JUMP_IF_FALSE(self, opcode, oparg): + print('\t', '-->', self.pc + oparg) + + JUMP = JUMP_IF_FALSE + + def ASSIGNMENT(self, opcode, oparg): + print('\t', repr(self.bytecode.symbols[oparg])) + + METHOD_LOOKUP = ASSIGNMENT + ASSIGNMENT_APPEND_PARENT = ASSIGNMENT + GET_LOCAL = ASSIGNMENT + SET_LOCAL = ASSIGNMENT + + def PRIMITIVE_METHOD_CALL(self, opcode, oparg): + import primitives + func = primitives.all_primitives[oparg] + print('\t', repr('$' + func.__qualname__)) + + def dummy(self, opcode, oparg): + if oparg is None: + print() + else: + print('\t', oparg) + + diff --git a/interpreter.py b/interpreter.py new file mode 100644 index 0000000..950b5bf --- /dev/null +++ b/interpreter.py @@ -0,0 +1 @@ +from bytecodeinterpreter import Interpreter diff --git a/objmodel.py b/objmodel.py new file mode 100644 index 0000000..172b548 --- /dev/null +++ b/objmodel.py @@ -0,0 +1,120 @@ +from c3computation import compute_C3_mro as c3 + + +class AbstractObject(object): + + def call(self, w_receiver, args_w): + return self + + def istrue(self): + return True + + def clone(self): + raise NotImplementedError + + def hasslot(self): + raise NotImplementedError + + def getvalue(self, name): + for obj in self.get_mro(): + if not obj.hasslot(name): + continue + return obj.slots[name] + + def get_mro(self): + return c3(self) + + +class W_NormalObject(AbstractObject): + + def __init__(self, name=None, slots=None, parents=None, space=None): + self.space = space + self.name = name + if slots: + self.slots = slots + else: + self.slots = {} + if parents is None: + parents = [] + if len(parents) > 0: + for x in parents: + assert x in slots + self.parents = parents + + def getparents(self): + parents = self.parents + if '__parent__' not in parents: + parents.append('__parent__') + return [self.slots[p] for p in parents if p in self.slots] + + def hasslot(self, name): + return name in self.slots + + def setvalue(self, name, w_value): + self.slots[name] = w_value + + def addparent(self, name): + self.parents.append(name) + + def __str__(self): + return self.getname() + + __repr__ = __str__ + + def getname(self): + return "".format( + name=self.name if self.name else '', + slots=self.slots) + + def clone(self): + return W_NormalObject( + name=self.name, + parents=self.parents, + slots=self.slots.copy()) + + +class W_Integer(AbstractObject): + def __init__(self, value, space=None): + self.value = value + self.space = space + + def getparents(self): + if self.space is None: + return [] # for tests + inttrait = self.space.getbuiltin('inttrait') + assert inttrait is not None, 'O_o bogus state' + return [inttrait] + + def hasslot(self, name): + return False + + def __str__(self): + return str(self.value) + + __repr__ = __str__ + + def istrue(self): + return self.value != 0 + + +class W_Method(W_NormalObject): + + def __init__(self, code, *args, **kwargs): + super(W_Method, self).__init__(*args, **kwargs) + self.code = code + + def clone(self): + return W_Method(code=self.code, + name=self.name, parents=self.parents, + slots=self.slots.copy()) + + def getname(self): + return "".format(name=self.name) + + def call(self, w_receiver, args_w): + w_context = self.clone() + assert len(args_w) == self.code.numargs + for i in range(self.code.numargs): + self.space.setvalue(w_context, self.code.symbols[i], args_w[i]) + self.space.setvalue(w_context, 'self', w_receiver) + return self.space.execute(w_context.code, w_context) diff --git a/objspace.py b/objspace.py new file mode 100644 index 0000000..c98c87d --- /dev/null +++ b/objspace.py @@ -0,0 +1,87 @@ +from objmodel import W_Integer +from objmodel import W_Method +from objmodel import W_NormalObject + +import primitives + + +class ObjectSpace(object): + + def __init__(self, interpreter): + self.interpreter = interpreter + + def setup_builtins(self, builtincode=None): + if builtincode is None: + builtincode = self._load_default_builtins() + + w_builtins = W_NormalObject(name='Lobby', slots={}) + self.w_builtins = w_builtins + from simpleparser import parse + ast = parse(builtincode) + + self.interpreter.eval(ast, w_builtins) + + def _load_default_builtins(self): + import os + builtins = os.path.join( + os.path.dirname(__file__), + 'builtins.simple') + with open(builtins, 'r') as f: + return f.read() + + def getbuiltin(self, name): + return self.w_builtins.getvalue(name) + + def getbuiltins(self): + return self.w_builtins + + def make_module(self, name=None): + if not hasattr(self, 'w_builtins'): + slots = {} + else: + slots = {'__parent__': self.getbuiltins()} + return W_NormalObject(name=name, slots=slots) + + def newobject(self, name, slots, parentnames): + return W_NormalObject(space=self, name=name, + slots=slots, parents=parentnames) + + def newint(self, value): + return W_Integer(value, space=self) + + def definemethod(self, name, code, w_target): + w_meth = W_Method(code, name=name, + slots={'__parent__': w_target}, + space=self) + return w_meth + + def execute(self, code, w_context): + return self.interpreter.run(code, w_context) + + def setvalue(self, w_receiver, name, w_value): + w_receiver.setvalue(name, w_value) + + def addparent(self, w_receiver, name): + w_receiver.addparent(name) + + def getvalue(self, w_receiver, name): + return w_receiver.getvalue(name) + + def istrue(self, w_condition): + return w_condition.istrue() + + def isfalse(self, w_condition): + return not w_condition.istrue() + + def call_primitive(self, primitive_num, w_receiver, arguments_w): + f = primitives.all_primitives[primitive_num] + return f(w_receiver, arguments_w, space=self) + + def get_number_of_arguments_of_primitive(self, primitive_num): + return primitives.get_number_of_arguments_of_primitive(primitive_num) + + def call(self, w_method, w_receiver, arguments_w): + return w_method.call(w_receiver, arguments_w) + + def clone(self, w_value): + return w_value.clone() diff --git a/primitives.py b/primitives.py new file mode 100644 index 0000000..60bbeb5 --- /dev/null +++ b/primitives.py @@ -0,0 +1,47 @@ +registry = {} +all_primitives = [] +primitive_number_of_arguments = [] + + +def primitive(name, unwrap_spec, wrap_spec): + assert '$' + name not in registry, '${name} already defined'.format(name=name) + primitive_number_of_arguments.append(len(unwrap_spec) - 1) # first argument is the receiver + def expose(func): + def unwrapper(w_receiver, args_w, space): + args = [w_receiver] + args_w + if len(args) != len(unwrap_spec): + raise TypeError( + "Expected {ex} arguments, received {re}.".format(ex=len(unwrap_spec), re=len(args))) + unwrapped_args = () + for t, arg in zip(unwrap_spec, args): + if t is int: + unwrapped_args += (arg.value, ) + else: + unwrapped_args += (arg, ) + result = func(*unwrapped_args) + if wrap_spec is int: + return space.newint(result) + return result + unwrapper.__qualname__ = name + all_primitives.append(unwrapper) + registry['$' + name] = len(all_primitives) - 1 + return None + return expose + + +def get_index_of_primitive_named(name): + return registry[name] + + +def get_number_of_arguments_of_primitive(idx): + return primitive_number_of_arguments[idx] + + +@primitive('int_add', [int, int], int) +def simple_int_add(a, b): + return a + b + + +@primitive('int_eq', [int, int], int) +def simple_int_eq(a, b): + return a == b diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..089045c --- /dev/null +++ b/shell.nix @@ -0,0 +1,35 @@ +{ pkgs ? import {} }: + +with pkgs; + +let myPython = python39.buildEnv.override { + extraLibs = with python39Packages; [ + # Common Libs + rich + # numpy + # matplotlib + # scipy + # pytorch + # notbook + + # Doom Emacs Libs + black + pyflakes + isort + nose + pytest + + # DynLang + rply + ]; + }; +in + +mkShell { + buildInputs = [ + myPython + nodePackages.pyright # LSP + pipenv # Doom + jetbrains.pycharm-professional + ]; +} diff --git a/simpleast.py b/simpleast.py new file mode 100644 index 0000000..8a6448b --- /dev/null +++ b/simpleast.py @@ -0,0 +1,279 @@ +import py + + +class MetaNode(type): + def __init__(cls, name, bases, dict): + compile_name = "compile_" + name + abstract = not hasattr(cls, "attrs") + + def dispatch(self, compiler): + if not abstract: + getattr(compiler, compile_name)(self) + + cls.dispatch = dispatch + + +class AstNode(object): + __metaclass__ = MetaNode + + """ Base class for all ast nodes. Provides generic functionality.""" + tokens = None + + def __repr__(self): + return "%s(%s)" % (self.__class__.__name__, + ", ".join([repr(getattr(self, a)) for a in self.attrs])) + + def __eq__(self, other): + if self.__class__ is not other.__class__: + return False + for key in self.attrs: + if getattr(self, key) != getattr(other, key): + return False + return True + + def __ne__(self, other): + return not (self == other) + + def dot(self, result=None): + def uid(x): + result = id(x) + if result < 0: + result = 'm%d' % (-result,) + return result + + if result is None: + result = [] + body = [self.__class__.__name__] + children = [] + for key in self.attrs: + obj = getattr(self, key) + if isinstance(obj, list): + if obj and isinstance(obj[0], AstNode): + children.extend(obj) + for i, elt in enumerate(obj): + result.append("o%s -> o%s [label=\"%s[%s]\"]" % ( + uid(self), uid(elt), key, i)) + else: + body.append("%s = %s" % (key, obj)) + elif isinstance(obj, AstNode): + children.append(obj) + result.append("o%s -> o%s [label=\"%s\"]" % ( + uid(self), uid(obj), key)) + else: + body.append("%s = %s" % (key, obj)) + result.append("o%s [label=\"%s\", shape=box]" % (uid(self), repr("\n".join(body))[1:-1])) + for child in children: + child.dot(result) + return result + + def view(self): + """ Calling this method gives a graphical representation of the ast + graph. Needs a checkout of + https://bitbucket.org/pypy/pypy/src/default/dotviewer/ in the current directory + as well as graphviz (http://graphviz.org) installed. """ + from dotviewer import graphclient + content = ["digraph G{"] + content.extend(self.dot()) + content.append("}") + p = py.test.ensuretemp("simpleparser").join("temp.dot") + p.write("\n".join(content)) + graphclient.display_dot_file(str(p)) + + +class Expression(AstNode): + """ Abstract Base class for all expression AST nodes""" + + +class IntLiteral(Expression): + """ An integer literal (like "1") """ + + attrs = ["value"] + + def __init__(self, value): + self.value = int(value) + + +class StringLiteral(Expression): + """ An string literal (like "hello world") """ + + attrs = ["value"] + + def __init__(self, value): + self.value = str(value) + + +class MethodCall(Expression): + """ A call to a method with name 'methodname' on 'receiver' with + 'arguments' (which is a list of expression ASTs). + + Example: + f(1, 2, 3) + (receiver is ImplicitSelf(), methodname is 'f' and + args is [IntLiteral(1), IntLiteral(2), IntLiteral(3)]) + + 5 f + (receiver is IntLiteral(5), methodname is 'f' and args is []) + """ + + attrs = ["receiver", "methodname", "arguments"] + + def __init__(self, receiver, methodname, arguments=None): + self.receiver = receiver + self.methodname = methodname + if arguments is None: + arguments = [] + self.arguments = arguments + + +class PrimitiveMethodCall(MethodCall): + """ A method call to a primitive method. Primitive method names start with + '$'.The attributes are like those in MethodCall. + + Example: + + 5 $int_add(6) + (receiver is IntLiteral(5), methodname is '$int_add' and args + is [IntLiteral(6)]) + """ + + +class ImplicitSelf(Expression): + """ The receiver that is used when none is specified. + + Example: + f + this is a method call "f" on the implicit self.""" + + attrs = [] + + +class Statement(AstNode): + """ Base class of all statement nodes. """ + + +class Assignment(Statement): + """ An assignement: lvalue attrname = expression. + + Example: + x = 7 + this is an assignement on the implicit self.""" + + attrs = ["lvalue", "attrname", "expression"] + + def __init__(self, lvalue, attrname, expression): + self.lvalue = lvalue + self.attrname = attrname + self.expression = expression + + +class ExprStatement(Statement): + """ A statement that is just an expression evaluation (and the result is + ignored).""" + + attrs = ["expression"] + + def __init__(self, expr): + self.expression = expr + + +class IfStatement(Statement): + """ An if statement. The syntax looks like this: + + if condition: + ... ifblock ... + else: + ... elseblock ... + + The elseblock is optional.""" + + attrs = ["condition", "ifblock", "elseblock"] + + def __init__(self, condition, ifblock, elseblock=None): + self.condition = condition + self.ifblock = ifblock + self.elseblock = elseblock + + +class WhileStatement(Statement): + """ A while loop. The syntax looks like this: + + while condition: + ... whileblock ... + else: + ... elseblock ... + + The elseblock is optional.""" + + attrs = ["condition", "whileblock", "elseblock"] + + def __init__(self, condition, whileblock, elseblock=None): + self.condition = condition + self.whileblock = whileblock + self.elseblock = elseblock + + +class FunctionDefinition(Statement): + """ A function definition. Corresponds to def name(arguments): block. + + The 'name' is a string, the 'arguments' is a list of strings, and the + 'block' is a Program (see below). Executing a FunctionDefinition creates + a new W_Method and assigns it to the 'name' on the implicit self. + + Example: + def f: FunctionDefinition('f', [], Program([...])) + 41 + + def g(a, b, c): FunctionDefinition('g', ['a', 'b', 'c'], ...) + 43 + """ + attrs = ["name", "arguments", "block"] + + def __init__(self, name, arguments, block): + self.name = name + self.arguments = arguments + self.block = block + + +class ObjectDefinition(Statement): + """ Makes a new normal object. + + The block is immediately executed with the new object as the + implicit self. The 'name' is bound to the new object in the + outer scope's implicit self. + + Example: + object x: + def f(y): + y + + The 'parentnames' attribute is a list of strings giving the parent + attributes of the new object. The 'parentdefinitions' attribute is a list + of expression-asts giving the initial value of those parent attributes. + + Example: + object x(p1=a, p2=b): + ... + + gives parentnames = ["p1", "p2"] + and parentdefinitions = [MethodCall(ImplicitSelf, "a", []), + MethodCall(ImplicitSelf, "b", [])] + + """ + attrs = ["name", "block", "parentnames", "parentdefinitions"] + + def __init__(self, name, block, parentnames=None, parentdefinitions=None): + self.name = name + self.block = block + if parentnames is None: + parentnames = [] + parentdefinitions = [] + self.parentnames = parentnames + self.parentdefinitions = parentdefinitions + + +class Program(AstNode): + """ A list of statements. """ + attrs = ["statements"] + + def __init__(self, statements): + self.statements = statements diff --git a/simplelexer.py b/simplelexer.py new file mode 100644 index 0000000..5a80052 --- /dev/null +++ b/simplelexer.py @@ -0,0 +1,190 @@ +from rply import LexerGenerator +from rply.token import Token + +# attempts at writing a simple Python-like lexer +tabsize = 4 + + +def make_indent_token(token, start): + assert token.name == "NewlineAndWhitespace" + token.name = "Indent" + token.value = token.value[start:] + token.source_pos.idx += start + token.source_pos.lineno += 1 + token.source_pos.colno = 0 + return token + + +def make_dedent_token(token, start): + assert token.name == "NewlineAndWhitespace" + token.name = "Dedent" + token.value = token.value[start:] + token.source_pos.idx += start + token.source_pos.lineno += 1 + token.source_pos.colno = 0 + return token + + +# split the token in two: one for the newline and one for the +# in/dedent +# the NewlineAndWhitespace token looks like this: \r?\n[ \f\t]* +def compute_position_of_newline(token): + assert token.name == "NewlineAndWhitespace" + s = token.value + length = len(s) + pos = 0 + column = 0 + if s[0] == '\n': + pos = 1 + start = 1 + else: + pos = 2 + start = 2 + while pos < length: # count the indentation depth of the whitespace + c = s[pos] + if c == ' ': + column = column + 1 + elif c == '\t': + column = (column // tabsize + 1) * tabsize + elif c == '\f': + column = 0 + pos = pos + 1 + return start, column + + +def compute_indent_or_dedent(token, indentation_levels, output_tokens): + start, column = compute_position_of_newline(token) + # before start: new line token + output_tokens.append(Token("Newline", token.value[:start], token.source_pos)) + # after start: deal with white spaces (create indent or dedent token) + if column > indentation_levels[-1]: # count indents or dedents + indentation_levels.append(column) + token = make_indent_token(token, start) + output_tokens.append(token) + else: + dedented = False + while column < indentation_levels[-1]: + dedented = True + indentation_levels.pop() + output_tokens.append(Token("Dedent", "", + token.source_pos)) + if dedented: + token = make_dedent_token(token, start) + output_tokens[-1] = token + + +# input: lexer token stream +# output: modified token stream +def postprocess(tokens, source): + parenthesis_level = 0 + indentation_levels = [0] + output_tokens = [] + tokens = [token for token in tokens if token.name != "Ignore"] + token = None + for i in range(len(tokens)): + token = tokens[i] + # never create indent/dedent token between brackets + if token.name == "OpenBracket": + parenthesis_level += 1 + output_tokens.append(token) + elif token.name == "CloseBracket": + parenthesis_level -= 1 + if parenthesis_level < 0: + raise LexerError(source, token.source_pos, "unmatched parenthesis") + output_tokens.append(token) + elif token.name == "NewlineAndWhitespace": + if i + 1 < len(tokens) and tokens[i + 1].name == "NewlineAndWhitespace": + continue + if parenthesis_level == 0: + compute_indent_or_dedent(token, indentation_levels, output_tokens) + else: + pass # implicit line-continuations within parenthesis + else: + # something else: e.g. name, keyword, etc... + output_tokens.append(token) + if token is not None: + output_tokens.append(Token("EOF", "", token.source_pos)) + return output_tokens + + +# RPython reimplementation +def group(*choices, **namegroup): + choices = list(choices) + return '(' + '|'.join(choices) + ')' + + +# RPython reimplementation +def any(*choices): + result = group(*choices) + '*' + return result + + +# ' or " string. eg. 'hello' or "hello" +def make_single_string(delim): + normal_chars = r"[^\n\%s]*" % (delim,) + return "".join([delim, normal_chars, + any(r"\\." + normal_chars), delim]) + + +# ____________________________________________________________ +# Literals + +Number = r'(([+-])?[1-9][0-9]*)|0' +String = group(make_single_string(r"\'"), make_single_string(r'\"')) + +# ____________________________________________________________ +# Ignored + +Whitespace = r'[ \f\t]' +Newline = r'\r?\n' +Linecontinue = r'\\' + Newline +Comment = r'#[^\r\n]*' +NewlineAndWhitespace = Newline + any(Whitespace) +Ignore = group(Whitespace + '+', Linecontinue, Comment) + +# ____________________________________________________________ +# Identifier + +Name = r'[a-zA-Z_][a-zA-Z0-9_]*' +PrimitiveName = '\\$' + Name + +# ____________________________________________________________ +# Symbols + +Colon = r'\:' +Comma = r'\,' +Assign = r'\=' + +OpenBracket = r'[\[\(\{]' +CloseBracket = r'[\]\)\}]' + +# ____________________________________________________________ +# Keywords + +If = r'if' +Else = r'else' +While = r'while' +Def = r'def' +Object = r'object' + +tokens = ["If", "Else", "While", "Def", "Object", "Number", "String", "Ignore", + "NewlineAndWhitespace", "OpenBracket", "CloseBracket", "Comma", "Assign", + "Colon", "Name", "PrimitiveName"] + + +def make_lexer(): + lg = LexerGenerator() + for token in tokens: + # e.g. (Name, r'[a-zA-Z_][a-zA-Z0-9_]*') + lg.add(token, globals()[token]) + return lg.build() + + +lexer = make_lexer() + + +# s is the simple program code +def lex(s): + if not s.endswith('\n'): + s += '\n' + return list(postprocess(lexer.lex(s), s)) diff --git a/simpleparser.py b/simpleparser.py new file mode 100644 index 0000000..02ad09e --- /dev/null +++ b/simpleparser.py @@ -0,0 +1,333 @@ +""" +A 'simple' parser. Don't look into this file :-) +""" +import py +import simpleast +from simplelexer import lex +from rply.token import Token + +from rply import ParserGenerator + +pg = ParserGenerator(["If", "Else", "While", "Def", "Object", "Number", + "String", "Name", "Indent", "Dedent", "Newline", "OpenBracket", + "CloseBracket", "Comma", "Assign", "Colon", "PrimitiveName", "EOF"]) + + +def build_methodcall(call, cls): + if len(call) == 1: + args = [] + else: + args = call[1] + name = call[0] + return cls(None, name, args) + + +@pg.production("program : statements EOF") +@pg.production("program : newlines statements EOF") +def program(prog): + # import pdb; pdb.set_trace() + if prog[0] is None: + prog = prog[1] + else: + prog = prog[0] + return prog + + +@pg.production("statements : statement") +@pg.production("statements : statement statements") +@pg.production("statements : statement newlines statements") +def statements(stmts): + if len(stmts) == 1: + stmt = stmts[0] + return simpleast.Program([stmt]) + elif stmts[0] is None: + assert len(stmts) == 2 + return stmts[1] + elif len(stmts) == 2: + stmt = stmts[0] + result = stmts[1] + result.statements.insert(0, stmt) + return result + + +@pg.production("newlines : Newline") +@pg.production("newlines : Newline newlines") +def newlines(n): + return None + + +@pg.production("statement : simplestatement") +@pg.production("statement : ifstatement") +@pg.production("statement : whilestatement") +@pg.production("statement : defstatement") +@pg.production("statement : objectstatement") +def statement(stmt): + return stmt[0] + + +@pg.production("ifstatement : If expression block") +@pg.production("ifstatement : If expression block Else block") +def ifstatement(ifstmt): + elseblock = None + if len(ifstmt) > 3: + elseblock = ifstmt[-1] + return simpleast.IfStatement(ifstmt[1], ifstmt[2], elseblock) + + +@pg.production("whilestatement : While expression block") +def ifstatement(whilestmt): + return simpleast.WhileStatement(whilestmt[1], whilestmt[2]) + + +@pg.production("objectstatement : Object name block") +@pg.production("objectstatement : Object name parentlist block") +def objectstatement(obj): + name = obj[1] + names = [] + expressions = [] + if len(obj) == 3: + blk = obj[2] + else: + parents = obj[2] + names = [p.attrname for p in parents] + expressions = [p.expression for p in parents] + blk = obj[3] + return simpleast.ObjectDefinition(name, blk, names, expressions) + + +@pg.production("defstatement : Def name argumentnamelist block") +@pg.production("defstatement : Def name block") +def defstatement(defn): + name = defn[1] + if len(defn) == 4: + args = defn[2] + blk = defn[3] + else: + args = [] + blk = defn[2] + return simpleast.FunctionDefinition(name, args, blk) + + +@pg.production("block : Colon newlines Indent statements Dedent") +def block(blk): + return blk[3] + + +@pg.production("simplestatement : expression Newline") +@pg.production("simplestatement : expression Assign expression Newline") +def simplestatement(stmts): + if len(stmts) == 2: + return simpleast.ExprStatement(stmts[0]) + # assignement + result = stmts[0] + assign = stmts[2] + if (isinstance(result, simpleast.MethodCall) and + result.arguments == []): + return simpleast.Assignment( + result.receiver, result.methodname, assign) + else: + source_pos = stmts[1].source_pos + raise ParseError(source_pos, + ErrorInformation(source_pos.idx, + customerror="can only assign to attribute")) # , self.source) + + +@pg.production("expression : basic_expression") +@pg.production("expression : basic_expression msg-chain") +def expression(expr): + if len(expr) > 1: + prev = expr[0] + for i in expr[1]: + i.receiver = prev + prev = i + return expr[1][-1] + return expr[0] + + +@pg.production("msg-chain : methodcall") +@pg.production("msg-chain : methodcall msg-chain") +def msg_chain(cc): + if len(cc) > 1: + return [cc[0]] + cc[1] + return cc + + +@pg.production("basic_expression : Number") +def number_expression(stmt): + return simpleast.IntLiteral(stmt[0].value) + + +@pg.production("basic_expression : String") +def string_expression(stmt): + return simpleast.StringLiteral(stmt[0].value) + + +@pg.production("basic_expression : implicitselfmethodcall") +def implicitselfmethodcall(call): + methodcall = call[0] + methodcall.receiver = simpleast.ImplicitSelf() + return methodcall + + +@pg.production("implicitselfmethodcall : methodcall") +def implicitselfmethodcall_methodcall(call): + return call[0] + + +@pg.production("methodcall : primitivemethodcall") +@pg.production("methodcall : simplemethodcall") +def methodcall(call): + return call[0] + + +@pg.production("simplemethodcall : name") +@pg.production("simplemethodcall : name argumentslist") +def simplemethodcall(call): + return build_methodcall(call, simpleast.MethodCall) + + +@pg.production("primitivemethodcall : primitivename") +@pg.production("primitivemethodcall : primitivename argumentslist") +def primitivemethodcall(call): + return build_methodcall(call, simpleast.PrimitiveMethodCall) + + +@pg.production("argumentslist : OpenBracket arguments CloseBracket") +@pg.production("argumentnamelist : OpenBracket argumentnames CloseBracket") +@pg.production("parentlist : OpenBracket parentdefinitions CloseBracket") +def argumentslist(args): + return args[1] + + +@pg.production("arguments : expression") +@pg.production("arguments : expression Comma") +@pg.production("arguments : expression Comma arguments") +@pg.production("argumentnames : name") +@pg.production("argumentnames : name Comma") +@pg.production("argumentnames : name Comma argumentnames") +@pg.production("parentdefinitions : assignment") +@pg.production("parentdefinitions : assignment Comma") +@pg.production("parentdefinitions : assignment Comma parentdefinitions") +def arguments(args): + if len(args) == 3: + return [args[0]] + args[2] + return [args[0]] + + +@pg.production("assignment : name Assign expression") +def assignement(args): + return simpleast.Assignment(None, args[0], args[2]) + + +@pg.production("primitivename : PrimitiveName") +@pg.production("name : Name") +def name(name): + return name[0].value + + +@pg.error +def error_handler(token): + raise ParseError(source_pos=token.getsourcepos(), + errorinformation=ErrorInformation(token.getsourcepos().idx, + customerror="Ran into a %s where it wasn't expected" % token.gettokentype())) + + +parser = pg.build() + + +def print_conflicts(): + print("rr conflicts") + for rule_num, token, conflict in parser.lr_table.rr_conflicts: + print(rule_num, token, conflict) + + print("sr conflicts") + for rule_num, token, conflict in parser.lr_table.sr_conflicts: + print(rule_num, token, conflict) + + +print_conflicts() + + +def parse(s): + l = lex(s) + return parser.parse(iter(l)) + + +# ____________________________________________________________ + +class ParseError(Exception): + def __init__(self, source_pos, errorinformation, source=""): + self.source_pos = source_pos + self.errorinformation = errorinformation + self.args = (source_pos, errorinformation) + self.source = source + + def nice_error_message(self, filename=""): + result = [" File %s, line %s" % (filename, self.source_pos.lineno + 1)] + source = self.source + if source: + result.append(source.split("\n")[self.source_pos.lineno]) + result.append(" " * self.source_pos.colno + "^") + else: + result.append("") + result.append("ParseError") + if self.errorinformation: + failure_reasons = self.errorinformation.expected + if failure_reasons: + expected = '' + if len(failure_reasons) > 1: + all_but_one = failure_reasons[:-1] + last = failure_reasons[-1] + expected = "%s or '%s'" % ( + ", ".join(["'%s'" % e for e in all_but_one]), last) + elif len(failure_reasons) == 1: + expected = failure_reasons[0] + if expected: + result.append("expected %s" % (expected,)) + if self.errorinformation.customerror: + result.append(self.errorinformation.customerror) + return "\n".join(result) + + def __str__(self): + return self.nice_error_message() + + +class ErrorInformation(object): + def __init__(self, pos, expected=None, customerror=None): + if expected is None: + expected = [] + self.expected = expected + self.pos = pos + self.customerror = customerror + + +def combine_errors(self, other): + if self is None: + return other + if (other is None or self.pos > other.pos or + len(other.expected) == 0): + return self + elif other.pos > self.pos or len(self.expected) == 0: + return other + failure_reasons = [] + already_there = {} + for fr in [self.expected, other.expected]: + for reason in fr: + if reason not in already_there: + already_there[reason] = True + failure_reasons.append(reason) + return ErrorInformation(self.pos, failure_reasons, + self.customerror or other.customerror) + + +def make_arglist(methodname): + def arglist(self): + self.match("OpenBracket", "(") + method = getattr(self, methodname) + result = [method()] + result.extend(self.repeat(self.comma, method)) + self.maybe(self.comma) + self.match("CloseBracket", ")") + return result + + return arglist