1

initial commit with vorgabe

This commit is contained in:
ChUrl
2021-08-09 16:32:53 +02:00
commit 744de3c46e
13 changed files with 1792 additions and 0 deletions

11
builtins.simple Normal file
View File

@ -0,0 +1,11 @@
object nil:
1
def pass:
nil
object inttrait:
def add(other):
self $int_add(other)
def eq(other):
self $int_eq(other)

124
bytecodeinterpreter.py Normal file
View File

@ -0,0 +1,124 @@
from simpleparser import parse
from objspace import ObjectSpace
import compile
from disass import disassemble
class ByteCodeError(Exception):
pass
class Interpreter(object):
def __init__(self, builtincode=None):
# Using an instance variable to keep the public interface
self.space = ObjectSpace(self)
self.space.setup_builtins(builtincode)
def eval(self, ast, w_context):
code = compile.compile(ast)
return self.run(code, w_context)
def read4(self, code, pc):
highval = ord(code[pc + 3])
if highval >= 128:
highval -= 256
return (ord(code[pc]) |
(ord(code[pc + 1]) << 8) |
(ord(code[pc + 2]) << 16) |
(highval << 24))
def run(self, bytecode, w_context):
pc = 0
stack = []
code = bytecode.code
print(disassemble(bytecode))
while pc < len(code):
opcode = ord(code[pc])
pc += 1
if compile.isjump(opcode):
oparg = self.read4(code, pc)
pc += 4
if opcode == compile.JUMP:
pc += oparg
elif opcode == compile.JUMP_IF_FALSE:
w_condition = stack.pop()
if self.space.isfalse(w_condition):
pc += oparg
continue
elif compile.hasarg(opcode):
oparg = ord(code[pc])
pc += 1
if oparg >= 128:
if oparg > 128:
oparg -= 256
else:
oparg = self.read4(code, pc)
pc += 4
if opcode == compile.MAKE_OBJECT:
name = bytecode.symbols[oparg]
obj = self.space.newobject(name, {'__parent__': w_context}, [])
stack.append(obj)
elif opcode == compile.MAKE_OBJECT_CALL:
self.run(bytecode.subbytecodes[oparg], stack[-1])
elif opcode == compile.INT_LITERAL:
w_value = self.space.newint(oparg)
stack.append(w_value)
elif opcode == compile.MAKE_FUNCTION:
bc = bytecode.subbytecodes[oparg]
w_method = self.space.definemethod(name=bc.name, code=bc, w_target=w_context)
stack.append(w_method)
elif opcode == compile.METHOD_LOOKUP:
name = bytecode.symbols[oparg]
w_method = self.space.getvalue(stack[-1], name)
stack.append(w_method)
elif opcode == compile.METHOD_CALL:
arguments_w = [stack.pop() for n in range(oparg)]
arguments_w.reverse()
#
w_method = stack.pop()
w_receiver = stack.pop()
w_result = self.space.call(w_method, w_receiver, arguments_w)
stack.append(w_result)
elif opcode == compile.PRIMITIVE_METHOD_CALL:
nargs = self.space.get_number_of_arguments_of_primitive(oparg)
arguments_w = [stack.pop() for n in range(nargs)]
arguments_w.reverse()
w_receiver = stack.pop()
w_result = self.space.call_primitive(oparg, w_receiver, arguments_w)
stack.append(w_result)
elif opcode == compile.SET_LOCAL:
w_value = stack[-1]
name = bytecode.symbols[oparg]
self.space.setvalue(w_context, name, w_value)
elif opcode == compile.ASSIGNMENT:
w_value = stack.pop()
name = bytecode.symbols[oparg]
self.space.setvalue(stack[-1], name, w_value)
elif opcode == compile.ASSIGNMENT_APPEND_PARENT:
w_value = stack.pop()
name = bytecode.symbols[oparg]
self.space.setvalue(stack[-1], name, w_value)
self.space.addparent(stack[-1], name)
elif opcode == compile.GET_LOCAL:
name = bytecode.symbols[oparg]
w_value = self.space.getvalue(w_context, name)
w_value = self.space.call(w_value, w_context, [])
stack.append(w_value)
else:
raise ByteCodeError('Invalid bytecode with arguments')
else:
if opcode == compile.POP:
stack.pop()
elif opcode == compile.IMPLICIT_SELF:
stack.append(w_context)
elif opcode == compile.DUP:
stack.append(stack[-1])
else:
raise ByteCodeError('Invalid bytecode')
assert pc == len(code)
assert len(stack) == 1
return stack.pop()
def make_module(self):
return self.space.make_module()

46
c3computation.py Normal file
View File

@ -0,0 +1,46 @@
def compute_C3_mro(w_obj):
order_w = []
parents_w = w_obj.getparents()
orderlists = [w_base.get_mro()
for w_base in parents_w]
orderlists.append([w_obj] + parents_w)
while orderlists:
for candidatelist in orderlists:
w_candidate = candidatelist[0]
if mro_blockinglist(w_candidate, orderlists) is None:
break # good w_candidate
else:
return mro_error(orderlists) # no candidate found
assert w_candidate not in order_w
order_w.append(w_candidate)
for i in range(len(orderlists)-1, -1, -1):
if orderlists[i][0] is w_candidate:
del orderlists[i][0]
if len(orderlists[i]) == 0:
del orderlists[i]
return order_w
def mro_blockinglist(w_candidate, orderlists):
for lst in orderlists:
if w_candidate in lst[1:]:
return lst
return None # good candidate
def mro_error(orderlists):
# w_obj.getname() is a pure debugging-helper. it can return whatever string
cycle = []
w_candidate = orderlists[-1][0]
if w_candidate in orderlists[-1][1:]:
# explicit error message for this specific case
raise TypeError("duplicate parent %s" % w_candidate.getname())
while w_candidate not in cycle:
cycle.append(w_candidate)
nextblockinglist = mro_blockinglist(w_candidate, orderlists)
w_candidate = nextblockinglist[0]
del cycle[:cycle.index(w_candidate)]
cycle.append(w_candidate)
cycle.reverse()
names = [w_obj.getname() for w_obj in cycle]
raise TypeError("cycle among base parents: " + ' < '.join(names))

401
compile.py Normal file
View File

@ -0,0 +1,401 @@
"""This file contains the bytecode-compiler.
An instruction can have one or no arguments. There are two different ways how
an argument is encoded:
ARG4 encodes an argument in 4 bytes, in a little-endian manner
SMALLARG encodes an integer i differently based on its size:
if -127 <= i <= 127 the integers is encoded as one byte
otherwise it is encoded as 5 bytes:
1 marker byte equal to -128 to signify that the large form is used
4 bytes to encode the integer as with ARG4
The instruction set contains the following instructions:
INT_LITERAL <SMALLARG>
Pushes an integer literal on the stack. The argument is the value of the
integer.
IMPLICIT_SELF
Pushes the implicit self on the stack.
POP
Pops the top element from the stack.
DUP
Duplicates the top element of the stack.
JUMP <ARG4>
Unconditionally jump to a different point in the program. The offset of the
program counter to the target is given by the argument.
JUMP_IF_FALSE <ARG4>
Pops an object from the stack and jump to a different point in the program
if that object is false. The offset of the program counter to the target is
given by the argument.
ASSIGNMENT <SMALLARG>
Assigns the first object on the stack to the second object on the stack.
The objects are popped from the stack, and then the assigned object
(i.e. the `expression') is pushed again. The attribute name is given by
the argument, which is an index into the symbols list of the bytecode
object.
PRIMITIVE_METHOD_CALL <SMALLARG>
Call a primitive method. The argument is an index into a list of all
primitives, which must be defined in the "primitive" module. The arguments
are found on the stack and are popped by this bytecode; the result is
pushed on the stack. To make the compiler work correctly for primitives,
the "primitive" module needs to expose two function
"get_index_of_primitive_named", which maps primitive name to a primitive
number, and get_number_of_arguments_of_primitive, which maps a primitive
number to the number of arguments the corresponding function takes.
METHOD_LOOKUP <SMALLARG>
Looks up a method in the object at the top of the stack. The method name
is given by the argument, which is an index into the symbols list of the
bytecode object. The method is pushed on the stack (and the original
object is not removed).
METHOD_CALL <SMALLARG>
Calls a method. The first n (where n is the argument of the bytecode) are
the arguments to the method, in reverse order. The next object on the
stack is the method. The final object is the receiver. All these objects
are popped from the stack. The result of the method call is pushed.
MAKE_FUNCTION <SMALLARG>
Creates a new W_Method object and pushes it on the stack. The bytecode of
the method can be found in the subbytecodes list of the current bytecode
object; the index is given by the argument.
MAKE_OBJECT <SMALLARG>
Create a new (empty) object and pushes it on the stack. The argument (which
can be ignored for now) is the index in symbols of the name of the object.
ASSIGNMENT_APPEND_PARENT <SMALLARG>
Adds a new parent to an object. This bytecode is only used during object
creation. It works like the ASSIGNMENT bytecode, but (1) it also adds the
name to the list of parent attributes of the object, and (2) it leaves
on the stack the assigned-to object (the `lvalue'), not the assigned
object (the `expression').
MAKE_OBJECT_CALL <SMALLARG>
Execute the body of a newly created object. The object is on the top of the
stack and is left there. The bytecode of the body can be found in the
subbytecodes list of the current bytecode object, the index is given by the
argument.
GET_LOCAL <SMALLARG>
This is an optimization for the common case of sending a method without
arguments to the implicit self. This bytecode is equivalent to:
IMPLICIT_SELF
METHOD_LOOKUP <SMALLARG>
METHOD_CALL 0
SET_LOCAL <SMALLARG>
This is an optimization for the common case of writing a slot to the
implicit self. This bytecode is equivalent to:
IMPLICIT_SELF
ASSIGNMENT <SMALLARG>
Note that there is no "return" bytecode. When the end of the bytecode is
reached, the top of the stack is returned (and the stack should have only one
element on it).
"""
import sys
import simpleast
# ---------- bytecodes ----------
INT_LITERAL = 2 # integer value
ASSIGNMENT = 4 # index of attrname
METHOD_LOOKUP = 5 # index of method name
METHOD_CALL = 6 # number of arguments
PRIMITIVE_METHOD_CALL = 7 # number of the primitive
MAKE_FUNCTION = 8 # bytecode literal index
MAKE_OBJECT = 9 # index of object name
ASSIGNMENT_APPEND_PARENT = 10 # index of parentname
MAKE_OBJECT_CALL = 11 # bytecode literal index
JUMP_IF_FALSE = 12 # offset
JUMP = 13 # offset
GET_LOCAL = 15 # index of attrname (optimization)
SET_LOCAL = 16 # index of attrname (optimization)
IMPLICIT_SELF = 32 # (no argument)
POP = 33 # (no argument)
DUP = 34 # (no argument)
opcode_names = [None] * 256
for key, value in list(globals().items()):
if key.strip("_").isupper():
opcode_names[value] = key
def hasarg(opcode):
""" Helper function to determine whether an opcode has an argument."""
return opcode < 32
def isjump(opcode):
""" Helper function to determine whether an opcode is a jump."""
return opcode == JUMP_IF_FALSE or opcode == JUMP
class Bytecode(object):
""" A class representing the bytecode of one piece of code.
self.code is a string that encodes the bytecode itself.
self.symbols is a list of strings containing the names that occur in the
piece of code.
self.subbytecodes is a list of further bytecodes that occur in the piece of
code.
"""
_immutable_ = True
_immutable_fields_ = ["symbols[*]", "subbytecodes[*]"]
def __init__(self, code, name, symbols,
subbytecodes, numargs, stackdepth):
self.code = code
if name is None:
name = "?"
self.name = name
self.symbols = symbols
self.subbytecodes = subbytecodes
self.numargs = numargs
self.stackdepth = stackdepth
def dis(self, pc=-1):
from disass import disassemble
disassemble(self, pc=pc)
# ---------- compiler ----------
def compile(ast, argumentnames=[], name=None):
""" Turns an AST into a Bytecode object."""
assert isinstance(ast, simpleast.Program)
comp = Compiler()
for arg in argumentnames:
comp.lookup_symbol(arg)
comp.lookup_symbol("__parent__")
comp.lookup_symbol("self")
comp.compile(ast, True)
return comp.make_bytecode(len(argumentnames), name)
stack_effects = {
INT_LITERAL: 1,
ASSIGNMENT: -1,
METHOD_LOOKUP: 1,
MAKE_FUNCTION: 1,
MAKE_OBJECT: 1,
ASSIGNMENT_APPEND_PARENT: -1,
MAKE_OBJECT_CALL: 0,
GET_LOCAL: 1,
SET_LOCAL: 0,
JUMP: 0,
JUMP_IF_FALSE: -1,
IMPLICIT_SELF: 1,
POP: -1,
DUP: 1,
}
class Compiler(object):
def __init__(self):
self.code = []
self.symbols = {}
self.subbytecodes = []
self.stackdepth = 0
self.max_stackdepth = 0
def make_bytecode(self, numargs, funcname):
symbols = [None] * len(self.symbols)
for name, index in list(self.symbols.items()):
symbols[index] = name
result = Bytecode(''.join(self.code),
funcname,
symbols,
self.subbytecodes,
numargs, self.max_stackdepth)
assert self.stackdepth == 1
return result
def stack_effect(self, num):
self.stackdepth += num
self.max_stackdepth = max(self.stackdepth, self.max_stackdepth)
def emit(self, opcode, arg=None, stackeffect=sys.maxsize):
self.code.append(chr(opcode))
if isjump(opcode):
assert arg is None
for c in self.encode4(0):
self.code.append(c)
elif hasarg(opcode):
assert isinstance(arg, int)
if -127 <= arg <= 127:
self.code.append(chr(arg & 0xFF))
else:
self.code.append(chr(128))
for c in self.encode4(arg):
self.code.append(c)
else:
assert arg is None
if opcode in stack_effects:
stackeffect = stack_effects[opcode]
else:
assert stackeffect != sys.maxsize
self.stack_effect(stackeffect)
def get_position(self):
return len(self.code)
def set_target_position(self, oldposition, newtarget):
offset = newtarget - (oldposition+5)
i = 0
for c in self.encode4(offset):
self.code[oldposition+1+i] = c
i += 1
def encode4(self, value):
return [chr(value & 0xFF),
chr((value >> 8) & 0xFF),
chr((value >> 16) & 0xFF),
chr((value >> 24) & 0xFF)]
def lookup_symbol(self, symbol):
if symbol not in self.symbols:
self.symbols[symbol] = len(self.symbols)
return self.symbols[symbol]
def compile(self, ast, needsresult=True):
return getattr(self, "compile_" + ast.__class__.__name__)(ast, needsresult)
def compile_IntLiteral(self, astnode, needsresult):
self.emit(INT_LITERAL, astnode.value)
def compile_ImplicitSelf(self, astnode, needsresult):
self.emit(IMPLICIT_SELF)
def compile_Assignment(self, astnode, needsresult):
if isinstance(astnode.lvalue, simpleast.ImplicitSelf):
self.compile(astnode.expression)
self.emit(SET_LOCAL, self.lookup_symbol(astnode.attrname))
else:
self.compile(astnode.lvalue)
self.compile(astnode.expression)
self.emit(ASSIGNMENT, self.lookup_symbol(astnode.attrname))
if not needsresult:
self.emit(POP)
def compile_ExprStatement(self, astnode, needsresult):
self.compile(astnode.expression)
if not needsresult:
self.emit(POP)
def compile_MethodCall(self, astnode, needsresult):
numargs = len(astnode.arguments)
if (isinstance(astnode.receiver, simpleast.ImplicitSelf) and
numargs == 0):
self.emit(GET_LOCAL, self.lookup_symbol(astnode.methodname))
else:
self.compile(astnode.receiver)
self.emit(METHOD_LOOKUP, self.lookup_symbol(astnode.methodname))
for arg in astnode.arguments:
self.compile(arg)
self.emit(METHOD_CALL, numargs, -numargs - 1)
def compile_PrimitiveMethodCall(self, astnode, needsresult):
import primitives
index = primitives.get_index_of_primitive_named(astnode.methodname)
expected_args = primitives.get_number_of_arguments_of_primitive(index)
if not (len(astnode.arguments) == expected_args):
raise TypeError(
"Expected {ex} arguments, received {re}.".format(ex=expected_args, re=len(astnode.arguments)))
self.compile(astnode.receiver)
for arg in astnode.arguments:
self.compile(arg)
self.emit(PRIMITIVE_METHOD_CALL, index, -len(astnode.arguments))
def compile_ObjectDefinition(self, astnode, needsresult):
self.emit(MAKE_OBJECT, self.lookup_symbol(astnode.name))
#
for i in range(len(astnode.parentdefinitions)):
name = astnode.parentnames[i]
if name == "__parent__":
self.emit(DUP)
self.compile(astnode.parentdefinitions[i])
self.emit(ASSIGNMENT, self.lookup_symbol(name))
self.emit(POP)
else:
self.compile(astnode.parentdefinitions[i])
self.emit(ASSIGNMENT_APPEND_PARENT, self.lookup_symbol(name))
#
bytecode = compile(astnode.block, name=astnode.name)
index = len(self.subbytecodes)
self.subbytecodes.append(bytecode)
self.emit(MAKE_OBJECT_CALL, index)
self.emit(SET_LOCAL, self.lookup_symbol(astnode.name))
if not needsresult:
self.emit(POP)
def compile_Program(self, astnode, needsresult):
for statement in astnode.statements[:-1]:
self.compile(statement, needsresult=False)
laststatement = astnode.statements[-1]
self.compile(laststatement, needsresult)
def compile_FunctionDefinition(self, astnode, needsresult):
bytecode = compile(astnode.block, astnode.arguments, astnode.name)
index = len(self.subbytecodes)
self.subbytecodes.append(bytecode)
self.emit(MAKE_FUNCTION, index)
self.emit(SET_LOCAL, self.lookup_symbol(astnode.name))
if not needsresult:
self.emit(POP)
def compile_IfStatement(self, astnode, needsresult):
# XXX this can compute the needed stack by one too much
self.compile(astnode.condition)
position1 = self.get_position()
self.emit(JUMP_IF_FALSE)
#
self.compile(astnode.ifblock, needsresult)
position2 = self.get_position()
self.emit(JUMP)
#
self.set_target_position(position1, self.get_position())
if astnode.elseblock:
self.compile(astnode.elseblock, needsresult)
else:
if needsresult:
self.emit(IMPLICIT_SELF)
if needsresult:
self.stack_effect(-1)
#
self.set_target_position(position2, self.get_position())
def compile_WhileStatement(self, astnode, needsresult):
if needsresult:
self.emit(IMPLICIT_SELF)
#
position1 = self.get_position()
self.compile(astnode.condition)
position2 = self.get_position()
self.emit(JUMP_IF_FALSE)
#
if needsresult:
self.emit(POP)
self.compile(astnode.whileblock, needsresult)
position3 = self.get_position()
self.emit(JUMP)
self.set_target_position(position3, position1)
#
self.set_target_position(position2, self.get_position())

118
disass.py Normal file
View File

@ -0,0 +1,118 @@
import compile
def disassemble(bytecode, indent='', pc=-1):
""" disassemble a bytecode object and print a readabable version of it"""
assert isinstance(bytecode, compile.Bytecode)
findlabeltargets = FindLabelTargets()
findlabeltargets.disassemble(bytecode)
disass = Disassembler(indent, findlabeltargets.targets)
disass.disassemble(bytecode, pc)
opcode2name = {}
for name, value in list(compile.__dict__.items()):
if name == name.upper() and isinstance(value, int):
opcode2name[value] = name
class AbstractDisassembler(object):
def read4(self, code, pc):
highval = ord(code[pc+3])
if highval >= 128:
highval -= 256
return (ord(code[pc]) |
(ord(code[pc+1]) << 8) |
(ord(code[pc+2]) << 16) |
(highval << 24))
def disassemble(self, bytecode, currpc=-1):
self.currpc = currpc
self.bytecode = bytecode
code = bytecode.code
pc = 0
while pc < len(code):
self.start(pc)
opcode = ord(code[pc])
pc += 1
if compile.isjump(opcode):
oparg = self.read4(code, pc)
pc += 4
elif compile.hasarg(opcode):
oparg = ord(code[pc])
pc += 1
if oparg >= 128:
if oparg > 128:
oparg -= 256
else:
oparg = self.read4(code, pc)
pc += 4
else:
oparg = None
self.pc = pc
self.end(opcode, oparg)
name = opcode2name[opcode]
method = getattr(self, name, self.dummy)
method(opcode, oparg)
def start(self, pc):
pass
def end(self, opcode, oparg):
pass
def dummy(self, opcode, oparg):
pass
class FindLabelTargets(AbstractDisassembler):
def __init__(self):
self.targets = {}
def JUMP_IF_FALSE(self, opcode, oparg):
self.targets[self.pc + oparg] = True
JUMP = JUMP_IF_FALSE
class Disassembler(AbstractDisassembler):
def __init__(self, indent, targets):
self.indent = indent
self.targets = targets
def start(self, pc):
if pc in self.targets:
print(self.indent, '>>', pc)
if pc == self.currpc:
print(self.indent, '->', pc)
def end(self, opcode, oparg):
print(self.indent, '\t', opcode2name[opcode], end=' ')
def JUMP_IF_FALSE(self, opcode, oparg):
print('\t', '-->', self.pc + oparg)
JUMP = JUMP_IF_FALSE
def ASSIGNMENT(self, opcode, oparg):
print('\t', repr(self.bytecode.symbols[oparg]))
METHOD_LOOKUP = ASSIGNMENT
ASSIGNMENT_APPEND_PARENT = ASSIGNMENT
GET_LOCAL = ASSIGNMENT
SET_LOCAL = ASSIGNMENT
def PRIMITIVE_METHOD_CALL(self, opcode, oparg):
import primitives
func = primitives.all_primitives[oparg]
print('\t', repr('$' + func.__qualname__))
def dummy(self, opcode, oparg):
if oparg is None:
print()
else:
print('\t', oparg)

1
interpreter.py Normal file
View File

@ -0,0 +1 @@
from bytecodeinterpreter import Interpreter

120
objmodel.py Normal file
View File

@ -0,0 +1,120 @@
from c3computation import compute_C3_mro as c3
class AbstractObject(object):
def call(self, w_receiver, args_w):
return self
def istrue(self):
return True
def clone(self):
raise NotImplementedError
def hasslot(self):
raise NotImplementedError
def getvalue(self, name):
for obj in self.get_mro():
if not obj.hasslot(name):
continue
return obj.slots[name]
def get_mro(self):
return c3(self)
class W_NormalObject(AbstractObject):
def __init__(self, name=None, slots=None, parents=None, space=None):
self.space = space
self.name = name
if slots:
self.slots = slots
else:
self.slots = {}
if parents is None:
parents = []
if len(parents) > 0:
for x in parents:
assert x in slots
self.parents = parents
def getparents(self):
parents = self.parents
if '__parent__' not in parents:
parents.append('__parent__')
return [self.slots[p] for p in parents if p in self.slots]
def hasslot(self, name):
return name in self.slots
def setvalue(self, name, w_value):
self.slots[name] = w_value
def addparent(self, name):
self.parents.append(name)
def __str__(self):
return self.getname()
__repr__ = __str__
def getname(self):
return "<Object {name} {slots}>".format(
name=self.name if self.name else '',
slots=self.slots)
def clone(self):
return W_NormalObject(
name=self.name,
parents=self.parents,
slots=self.slots.copy())
class W_Integer(AbstractObject):
def __init__(self, value, space=None):
self.value = value
self.space = space
def getparents(self):
if self.space is None:
return [] # for tests
inttrait = self.space.getbuiltin('inttrait')
assert inttrait is not None, 'O_o bogus state'
return [inttrait]
def hasslot(self, name):
return False
def __str__(self):
return str(self.value)
__repr__ = __str__
def istrue(self):
return self.value != 0
class W_Method(W_NormalObject):
def __init__(self, code, *args, **kwargs):
super(W_Method, self).__init__(*args, **kwargs)
self.code = code
def clone(self):
return W_Method(code=self.code,
name=self.name, parents=self.parents,
slots=self.slots.copy())
def getname(self):
return "<W_Method({name})>".format(name=self.name)
def call(self, w_receiver, args_w):
w_context = self.clone()
assert len(args_w) == self.code.numargs
for i in range(self.code.numargs):
self.space.setvalue(w_context, self.code.symbols[i], args_w[i])
self.space.setvalue(w_context, 'self', w_receiver)
return self.space.execute(w_context.code, w_context)

87
objspace.py Normal file
View File

@ -0,0 +1,87 @@
from objmodel import W_Integer
from objmodel import W_Method
from objmodel import W_NormalObject
import primitives
class ObjectSpace(object):
def __init__(self, interpreter):
self.interpreter = interpreter
def setup_builtins(self, builtincode=None):
if builtincode is None:
builtincode = self._load_default_builtins()
w_builtins = W_NormalObject(name='Lobby', slots={})
self.w_builtins = w_builtins
from simpleparser import parse
ast = parse(builtincode)
self.interpreter.eval(ast, w_builtins)
def _load_default_builtins(self):
import os
builtins = os.path.join(
os.path.dirname(__file__),
'builtins.simple')
with open(builtins, 'r') as f:
return f.read()
def getbuiltin(self, name):
return self.w_builtins.getvalue(name)
def getbuiltins(self):
return self.w_builtins
def make_module(self, name=None):
if not hasattr(self, 'w_builtins'):
slots = {}
else:
slots = {'__parent__': self.getbuiltins()}
return W_NormalObject(name=name, slots=slots)
def newobject(self, name, slots, parentnames):
return W_NormalObject(space=self, name=name,
slots=slots, parents=parentnames)
def newint(self, value):
return W_Integer(value, space=self)
def definemethod(self, name, code, w_target):
w_meth = W_Method(code, name=name,
slots={'__parent__': w_target},
space=self)
return w_meth
def execute(self, code, w_context):
return self.interpreter.run(code, w_context)
def setvalue(self, w_receiver, name, w_value):
w_receiver.setvalue(name, w_value)
def addparent(self, w_receiver, name):
w_receiver.addparent(name)
def getvalue(self, w_receiver, name):
return w_receiver.getvalue(name)
def istrue(self, w_condition):
return w_condition.istrue()
def isfalse(self, w_condition):
return not w_condition.istrue()
def call_primitive(self, primitive_num, w_receiver, arguments_w):
f = primitives.all_primitives[primitive_num]
return f(w_receiver, arguments_w, space=self)
def get_number_of_arguments_of_primitive(self, primitive_num):
return primitives.get_number_of_arguments_of_primitive(primitive_num)
def call(self, w_method, w_receiver, arguments_w):
return w_method.call(w_receiver, arguments_w)
def clone(self, w_value):
return w_value.clone()

47
primitives.py Normal file
View File

@ -0,0 +1,47 @@
registry = {}
all_primitives = []
primitive_number_of_arguments = []
def primitive(name, unwrap_spec, wrap_spec):
assert '$' + name not in registry, '${name} already defined'.format(name=name)
primitive_number_of_arguments.append(len(unwrap_spec) - 1) # first argument is the receiver
def expose(func):
def unwrapper(w_receiver, args_w, space):
args = [w_receiver] + args_w
if len(args) != len(unwrap_spec):
raise TypeError(
"Expected {ex} arguments, received {re}.".format(ex=len(unwrap_spec), re=len(args)))
unwrapped_args = ()
for t, arg in zip(unwrap_spec, args):
if t is int:
unwrapped_args += (arg.value, )
else:
unwrapped_args += (arg, )
result = func(*unwrapped_args)
if wrap_spec is int:
return space.newint(result)
return result
unwrapper.__qualname__ = name
all_primitives.append(unwrapper)
registry['$' + name] = len(all_primitives) - 1
return None
return expose
def get_index_of_primitive_named(name):
return registry[name]
def get_number_of_arguments_of_primitive(idx):
return primitive_number_of_arguments[idx]
@primitive('int_add', [int, int], int)
def simple_int_add(a, b):
return a + b
@primitive('int_eq', [int, int], int)
def simple_int_eq(a, b):
return a == b

35
shell.nix Normal file
View File

@ -0,0 +1,35 @@
{ pkgs ? import <nixpkgs> {} }:
with pkgs;
let myPython = python39.buildEnv.override {
extraLibs = with python39Packages; [
# Common Libs
rich
# numpy
# matplotlib
# scipy
# pytorch
# notbook
# Doom Emacs Libs
black
pyflakes
isort
nose
pytest
# DynLang
rply
];
};
in
mkShell {
buildInputs = [
myPython
nodePackages.pyright # LSP
pipenv # Doom
jetbrains.pycharm-professional
];
}

279
simpleast.py Normal file
View File

@ -0,0 +1,279 @@
import py
class MetaNode(type):
def __init__(cls, name, bases, dict):
compile_name = "compile_" + name
abstract = not hasattr(cls, "attrs")
def dispatch(self, compiler):
if not abstract:
getattr(compiler, compile_name)(self)
cls.dispatch = dispatch
class AstNode(object):
__metaclass__ = MetaNode
""" Base class for all ast nodes. Provides generic functionality."""
tokens = None
def __repr__(self):
return "%s(%s)" % (self.__class__.__name__,
", ".join([repr(getattr(self, a)) for a in self.attrs]))
def __eq__(self, other):
if self.__class__ is not other.__class__:
return False
for key in self.attrs:
if getattr(self, key) != getattr(other, key):
return False
return True
def __ne__(self, other):
return not (self == other)
def dot(self, result=None):
def uid(x):
result = id(x)
if result < 0:
result = 'm%d' % (-result,)
return result
if result is None:
result = []
body = [self.__class__.__name__]
children = []
for key in self.attrs:
obj = getattr(self, key)
if isinstance(obj, list):
if obj and isinstance(obj[0], AstNode):
children.extend(obj)
for i, elt in enumerate(obj):
result.append("o%s -> o%s [label=\"%s[%s]\"]" % (
uid(self), uid(elt), key, i))
else:
body.append("%s = %s" % (key, obj))
elif isinstance(obj, AstNode):
children.append(obj)
result.append("o%s -> o%s [label=\"%s\"]" % (
uid(self), uid(obj), key))
else:
body.append("%s = %s" % (key, obj))
result.append("o%s [label=\"%s\", shape=box]" % (uid(self), repr("\n".join(body))[1:-1]))
for child in children:
child.dot(result)
return result
def view(self):
""" Calling this method gives a graphical representation of the ast
graph. Needs a checkout of
https://bitbucket.org/pypy/pypy/src/default/dotviewer/ in the current directory
as well as graphviz (http://graphviz.org) installed. """
from dotviewer import graphclient
content = ["digraph G{"]
content.extend(self.dot())
content.append("}")
p = py.test.ensuretemp("simpleparser").join("temp.dot")
p.write("\n".join(content))
graphclient.display_dot_file(str(p))
class Expression(AstNode):
""" Abstract Base class for all expression AST nodes"""
class IntLiteral(Expression):
""" An integer literal (like "1") """
attrs = ["value"]
def __init__(self, value):
self.value = int(value)
class StringLiteral(Expression):
""" An string literal (like "hello world") """
attrs = ["value"]
def __init__(self, value):
self.value = str(value)
class MethodCall(Expression):
""" A call to a method with name 'methodname' on 'receiver' with
'arguments' (which is a list of expression ASTs).
Example:
f(1, 2, 3)
(receiver is ImplicitSelf(), methodname is 'f' and
args is [IntLiteral(1), IntLiteral(2), IntLiteral(3)])
5 f
(receiver is IntLiteral(5), methodname is 'f' and args is [])
"""
attrs = ["receiver", "methodname", "arguments"]
def __init__(self, receiver, methodname, arguments=None):
self.receiver = receiver
self.methodname = methodname
if arguments is None:
arguments = []
self.arguments = arguments
class PrimitiveMethodCall(MethodCall):
""" A method call to a primitive method. Primitive method names start with
'$'.The attributes are like those in MethodCall.
Example:
5 $int_add(6)
(receiver is IntLiteral(5), methodname is '$int_add' and args
is [IntLiteral(6)])
"""
class ImplicitSelf(Expression):
""" The receiver that is used when none is specified.
Example:
f
this is a method call "f" on the implicit self."""
attrs = []
class Statement(AstNode):
""" Base class of all statement nodes. """
class Assignment(Statement):
""" An assignement: lvalue attrname = expression.
Example:
x = 7
this is an assignement on the implicit self."""
attrs = ["lvalue", "attrname", "expression"]
def __init__(self, lvalue, attrname, expression):
self.lvalue = lvalue
self.attrname = attrname
self.expression = expression
class ExprStatement(Statement):
""" A statement that is just an expression evaluation (and the result is
ignored)."""
attrs = ["expression"]
def __init__(self, expr):
self.expression = expr
class IfStatement(Statement):
""" An if statement. The syntax looks like this:
if condition:
... ifblock ...
else:
... elseblock ...
The elseblock is optional."""
attrs = ["condition", "ifblock", "elseblock"]
def __init__(self, condition, ifblock, elseblock=None):
self.condition = condition
self.ifblock = ifblock
self.elseblock = elseblock
class WhileStatement(Statement):
""" A while loop. The syntax looks like this:
while condition:
... whileblock ...
else:
... elseblock ...
The elseblock is optional."""
attrs = ["condition", "whileblock", "elseblock"]
def __init__(self, condition, whileblock, elseblock=None):
self.condition = condition
self.whileblock = whileblock
self.elseblock = elseblock
class FunctionDefinition(Statement):
""" A function definition. Corresponds to def name(arguments): block.
The 'name' is a string, the 'arguments' is a list of strings, and the
'block' is a Program (see below). Executing a FunctionDefinition creates
a new W_Method and assigns it to the 'name' on the implicit self.
Example:
def f: FunctionDefinition('f', [], Program([...]))
41
def g(a, b, c): FunctionDefinition('g', ['a', 'b', 'c'], ...)
43
"""
attrs = ["name", "arguments", "block"]
def __init__(self, name, arguments, block):
self.name = name
self.arguments = arguments
self.block = block
class ObjectDefinition(Statement):
""" Makes a new normal object.
The block is immediately executed with the new object as the
implicit self. The 'name' is bound to the new object in the
outer scope's implicit self.
Example:
object x:
def f(y):
y
The 'parentnames' attribute is a list of strings giving the parent
attributes of the new object. The 'parentdefinitions' attribute is a list
of expression-asts giving the initial value of those parent attributes.
Example:
object x(p1=a, p2=b):
...
gives parentnames = ["p1", "p2"]
and parentdefinitions = [MethodCall(ImplicitSelf, "a", []),
MethodCall(ImplicitSelf, "b", [])]
"""
attrs = ["name", "block", "parentnames", "parentdefinitions"]
def __init__(self, name, block, parentnames=None, parentdefinitions=None):
self.name = name
self.block = block
if parentnames is None:
parentnames = []
parentdefinitions = []
self.parentnames = parentnames
self.parentdefinitions = parentdefinitions
class Program(AstNode):
""" A list of statements. """
attrs = ["statements"]
def __init__(self, statements):
self.statements = statements

190
simplelexer.py Normal file
View File

@ -0,0 +1,190 @@
from rply import LexerGenerator
from rply.token import Token
# attempts at writing a simple Python-like lexer
tabsize = 4
def make_indent_token(token, start):
assert token.name == "NewlineAndWhitespace"
token.name = "Indent"
token.value = token.value[start:]
token.source_pos.idx += start
token.source_pos.lineno += 1
token.source_pos.colno = 0
return token
def make_dedent_token(token, start):
assert token.name == "NewlineAndWhitespace"
token.name = "Dedent"
token.value = token.value[start:]
token.source_pos.idx += start
token.source_pos.lineno += 1
token.source_pos.colno = 0
return token
# split the token in two: one for the newline and one for the
# in/dedent
# the NewlineAndWhitespace token looks like this: \r?\n[ \f\t]*
def compute_position_of_newline(token):
assert token.name == "NewlineAndWhitespace"
s = token.value
length = len(s)
pos = 0
column = 0
if s[0] == '\n':
pos = 1
start = 1
else:
pos = 2
start = 2
while pos < length: # count the indentation depth of the whitespace
c = s[pos]
if c == ' ':
column = column + 1
elif c == '\t':
column = (column // tabsize + 1) * tabsize
elif c == '\f':
column = 0
pos = pos + 1
return start, column
def compute_indent_or_dedent(token, indentation_levels, output_tokens):
start, column = compute_position_of_newline(token)
# before start: new line token
output_tokens.append(Token("Newline", token.value[:start], token.source_pos))
# after start: deal with white spaces (create indent or dedent token)
if column > indentation_levels[-1]: # count indents or dedents
indentation_levels.append(column)
token = make_indent_token(token, start)
output_tokens.append(token)
else:
dedented = False
while column < indentation_levels[-1]:
dedented = True
indentation_levels.pop()
output_tokens.append(Token("Dedent", "",
token.source_pos))
if dedented:
token = make_dedent_token(token, start)
output_tokens[-1] = token
# input: lexer token stream
# output: modified token stream
def postprocess(tokens, source):
parenthesis_level = 0
indentation_levels = [0]
output_tokens = []
tokens = [token for token in tokens if token.name != "Ignore"]
token = None
for i in range(len(tokens)):
token = tokens[i]
# never create indent/dedent token between brackets
if token.name == "OpenBracket":
parenthesis_level += 1
output_tokens.append(token)
elif token.name == "CloseBracket":
parenthesis_level -= 1
if parenthesis_level < 0:
raise LexerError(source, token.source_pos, "unmatched parenthesis")
output_tokens.append(token)
elif token.name == "NewlineAndWhitespace":
if i + 1 < len(tokens) and tokens[i + 1].name == "NewlineAndWhitespace":
continue
if parenthesis_level == 0:
compute_indent_or_dedent(token, indentation_levels, output_tokens)
else:
pass # implicit line-continuations within parenthesis
else:
# something else: e.g. name, keyword, etc...
output_tokens.append(token)
if token is not None:
output_tokens.append(Token("EOF", "", token.source_pos))
return output_tokens
# RPython reimplementation
def group(*choices, **namegroup):
choices = list(choices)
return '(' + '|'.join(choices) + ')'
# RPython reimplementation
def any(*choices):
result = group(*choices) + '*'
return result
# ' or " string. eg. 'hello' or "hello"
def make_single_string(delim):
normal_chars = r"[^\n\%s]*" % (delim,)
return "".join([delim, normal_chars,
any(r"\\." + normal_chars), delim])
# ____________________________________________________________
# Literals
Number = r'(([+-])?[1-9][0-9]*)|0'
String = group(make_single_string(r"\'"), make_single_string(r'\"'))
# ____________________________________________________________
# Ignored
Whitespace = r'[ \f\t]'
Newline = r'\r?\n'
Linecontinue = r'\\' + Newline
Comment = r'#[^\r\n]*'
NewlineAndWhitespace = Newline + any(Whitespace)
Ignore = group(Whitespace + '+', Linecontinue, Comment)
# ____________________________________________________________
# Identifier
Name = r'[a-zA-Z_][a-zA-Z0-9_]*'
PrimitiveName = '\\$' + Name
# ____________________________________________________________
# Symbols
Colon = r'\:'
Comma = r'\,'
Assign = r'\='
OpenBracket = r'[\[\(\{]'
CloseBracket = r'[\]\)\}]'
# ____________________________________________________________
# Keywords
If = r'if'
Else = r'else'
While = r'while'
Def = r'def'
Object = r'object'
tokens = ["If", "Else", "While", "Def", "Object", "Number", "String", "Ignore",
"NewlineAndWhitespace", "OpenBracket", "CloseBracket", "Comma", "Assign",
"Colon", "Name", "PrimitiveName"]
def make_lexer():
lg = LexerGenerator()
for token in tokens:
# e.g. (Name, r'[a-zA-Z_][a-zA-Z0-9_]*')
lg.add(token, globals()[token])
return lg.build()
lexer = make_lexer()
# s is the simple program code
def lex(s):
if not s.endswith('\n'):
s += '\n'
return list(postprocess(lexer.lex(s), s))

333
simpleparser.py Normal file
View File

@ -0,0 +1,333 @@
"""
A 'simple' parser. Don't look into this file :-)
"""
import py
import simpleast
from simplelexer import lex
from rply.token import Token
from rply import ParserGenerator
pg = ParserGenerator(["If", "Else", "While", "Def", "Object", "Number",
"String", "Name", "Indent", "Dedent", "Newline", "OpenBracket",
"CloseBracket", "Comma", "Assign", "Colon", "PrimitiveName", "EOF"])
def build_methodcall(call, cls):
if len(call) == 1:
args = []
else:
args = call[1]
name = call[0]
return cls(None, name, args)
@pg.production("program : statements EOF")
@pg.production("program : newlines statements EOF")
def program(prog):
# import pdb; pdb.set_trace()
if prog[0] is None:
prog = prog[1]
else:
prog = prog[0]
return prog
@pg.production("statements : statement")
@pg.production("statements : statement statements")
@pg.production("statements : statement newlines statements")
def statements(stmts):
if len(stmts) == 1:
stmt = stmts[0]
return simpleast.Program([stmt])
elif stmts[0] is None:
assert len(stmts) == 2
return stmts[1]
elif len(stmts) == 2:
stmt = stmts[0]
result = stmts[1]
result.statements.insert(0, stmt)
return result
@pg.production("newlines : Newline")
@pg.production("newlines : Newline newlines")
def newlines(n):
return None
@pg.production("statement : simplestatement")
@pg.production("statement : ifstatement")
@pg.production("statement : whilestatement")
@pg.production("statement : defstatement")
@pg.production("statement : objectstatement")
def statement(stmt):
return stmt[0]
@pg.production("ifstatement : If expression block")
@pg.production("ifstatement : If expression block Else block")
def ifstatement(ifstmt):
elseblock = None
if len(ifstmt) > 3:
elseblock = ifstmt[-1]
return simpleast.IfStatement(ifstmt[1], ifstmt[2], elseblock)
@pg.production("whilestatement : While expression block")
def ifstatement(whilestmt):
return simpleast.WhileStatement(whilestmt[1], whilestmt[2])
@pg.production("objectstatement : Object name block")
@pg.production("objectstatement : Object name parentlist block")
def objectstatement(obj):
name = obj[1]
names = []
expressions = []
if len(obj) == 3:
blk = obj[2]
else:
parents = obj[2]
names = [p.attrname for p in parents]
expressions = [p.expression for p in parents]
blk = obj[3]
return simpleast.ObjectDefinition(name, blk, names, expressions)
@pg.production("defstatement : Def name argumentnamelist block")
@pg.production("defstatement : Def name block")
def defstatement(defn):
name = defn[1]
if len(defn) == 4:
args = defn[2]
blk = defn[3]
else:
args = []
blk = defn[2]
return simpleast.FunctionDefinition(name, args, blk)
@pg.production("block : Colon newlines Indent statements Dedent")
def block(blk):
return blk[3]
@pg.production("simplestatement : expression Newline")
@pg.production("simplestatement : expression Assign expression Newline")
def simplestatement(stmts):
if len(stmts) == 2:
return simpleast.ExprStatement(stmts[0])
# assignement
result = stmts[0]
assign = stmts[2]
if (isinstance(result, simpleast.MethodCall) and
result.arguments == []):
return simpleast.Assignment(
result.receiver, result.methodname, assign)
else:
source_pos = stmts[1].source_pos
raise ParseError(source_pos,
ErrorInformation(source_pos.idx,
customerror="can only assign to attribute")) # , self.source)
@pg.production("expression : basic_expression")
@pg.production("expression : basic_expression msg-chain")
def expression(expr):
if len(expr) > 1:
prev = expr[0]
for i in expr[1]:
i.receiver = prev
prev = i
return expr[1][-1]
return expr[0]
@pg.production("msg-chain : methodcall")
@pg.production("msg-chain : methodcall msg-chain")
def msg_chain(cc):
if len(cc) > 1:
return [cc[0]] + cc[1]
return cc
@pg.production("basic_expression : Number")
def number_expression(stmt):
return simpleast.IntLiteral(stmt[0].value)
@pg.production("basic_expression : String")
def string_expression(stmt):
return simpleast.StringLiteral(stmt[0].value)
@pg.production("basic_expression : implicitselfmethodcall")
def implicitselfmethodcall(call):
methodcall = call[0]
methodcall.receiver = simpleast.ImplicitSelf()
return methodcall
@pg.production("implicitselfmethodcall : methodcall")
def implicitselfmethodcall_methodcall(call):
return call[0]
@pg.production("methodcall : primitivemethodcall")
@pg.production("methodcall : simplemethodcall")
def methodcall(call):
return call[0]
@pg.production("simplemethodcall : name")
@pg.production("simplemethodcall : name argumentslist")
def simplemethodcall(call):
return build_methodcall(call, simpleast.MethodCall)
@pg.production("primitivemethodcall : primitivename")
@pg.production("primitivemethodcall : primitivename argumentslist")
def primitivemethodcall(call):
return build_methodcall(call, simpleast.PrimitiveMethodCall)
@pg.production("argumentslist : OpenBracket arguments CloseBracket")
@pg.production("argumentnamelist : OpenBracket argumentnames CloseBracket")
@pg.production("parentlist : OpenBracket parentdefinitions CloseBracket")
def argumentslist(args):
return args[1]
@pg.production("arguments : expression")
@pg.production("arguments : expression Comma")
@pg.production("arguments : expression Comma arguments")
@pg.production("argumentnames : name")
@pg.production("argumentnames : name Comma")
@pg.production("argumentnames : name Comma argumentnames")
@pg.production("parentdefinitions : assignment")
@pg.production("parentdefinitions : assignment Comma")
@pg.production("parentdefinitions : assignment Comma parentdefinitions")
def arguments(args):
if len(args) == 3:
return [args[0]] + args[2]
return [args[0]]
@pg.production("assignment : name Assign expression")
def assignement(args):
return simpleast.Assignment(None, args[0], args[2])
@pg.production("primitivename : PrimitiveName")
@pg.production("name : Name")
def name(name):
return name[0].value
@pg.error
def error_handler(token):
raise ParseError(source_pos=token.getsourcepos(),
errorinformation=ErrorInformation(token.getsourcepos().idx,
customerror="Ran into a %s where it wasn't expected" % token.gettokentype()))
parser = pg.build()
def print_conflicts():
print("rr conflicts")
for rule_num, token, conflict in parser.lr_table.rr_conflicts:
print(rule_num, token, conflict)
print("sr conflicts")
for rule_num, token, conflict in parser.lr_table.sr_conflicts:
print(rule_num, token, conflict)
print_conflicts()
def parse(s):
l = lex(s)
return parser.parse(iter(l))
# ____________________________________________________________
class ParseError(Exception):
def __init__(self, source_pos, errorinformation, source=""):
self.source_pos = source_pos
self.errorinformation = errorinformation
self.args = (source_pos, errorinformation)
self.source = source
def nice_error_message(self, filename="<unknown>"):
result = [" File %s, line %s" % (filename, self.source_pos.lineno + 1)]
source = self.source
if source:
result.append(source.split("\n")[self.source_pos.lineno])
result.append(" " * self.source_pos.colno + "^")
else:
result.append("<couldn't get source>")
result.append("ParseError")
if self.errorinformation:
failure_reasons = self.errorinformation.expected
if failure_reasons:
expected = ''
if len(failure_reasons) > 1:
all_but_one = failure_reasons[:-1]
last = failure_reasons[-1]
expected = "%s or '%s'" % (
", ".join(["'%s'" % e for e in all_but_one]), last)
elif len(failure_reasons) == 1:
expected = failure_reasons[0]
if expected:
result.append("expected %s" % (expected,))
if self.errorinformation.customerror:
result.append(self.errorinformation.customerror)
return "\n".join(result)
def __str__(self):
return self.nice_error_message()
class ErrorInformation(object):
def __init__(self, pos, expected=None, customerror=None):
if expected is None:
expected = []
self.expected = expected
self.pos = pos
self.customerror = customerror
def combine_errors(self, other):
if self is None:
return other
if (other is None or self.pos > other.pos or
len(other.expected) == 0):
return self
elif other.pos > self.pos or len(self.expected) == 0:
return other
failure_reasons = []
already_there = {}
for fr in [self.expected, other.expected]:
for reason in fr:
if reason not in already_there:
already_there[reason] = True
failure_reasons.append(reason)
return ErrorInformation(self.pos, failure_reasons,
self.customerror or other.customerror)
def make_arglist(methodname):
def arglist(self):
self.match("OpenBracket", "(")
method = getattr(self, methodname)
result = [method()]
result.extend(self.repeat(self.comma, method))
self.maybe(self.comma)
self.match("CloseBracket", ")")
return result
return arglist