diff --git a/builtins.simple b/builtins.simple index 0d2b41e..df7c3ae 100644 --- a/builtins.simple +++ b/builtins.simple @@ -34,6 +34,8 @@ object inttrait: def tobool: self $int_tobool + def tostr: + self $int_tostr # Project: Boolean object booltrait: @@ -46,3 +48,21 @@ object booltrait: def toint: self $bool_toint + def tostr: + self $bool_tostr + +# Project: String +object strtrait: + def eq(other): + self $str_eq(other) + def add(other): + self $str_add(other) + def rev: + self $str_rev + def len: + self $str_len + + def tobool: + self $str_tobool + def toint: + self $str_toint diff --git a/de.churl.simple/bytecodeinterpreter.py b/de.churl.simple/bytecodeinterpreter.py index 5f7d6fb..4f9b128 100644 --- a/de.churl.simple/bytecodeinterpreter.py +++ b/de.churl.simple/bytecodeinterpreter.py @@ -20,10 +20,11 @@ class Interpreter(object): return self.run(code, w_context) def read4(self, code, pc): - highval = ord(code[pc + 3]) - if highval >= 128: + """ Converts 4 unicode characters to single 4 byte value """ + highval = ord(code[pc + 3]) # most significant byte + if highval >= 128: # convert from 2's complement? highval -= 256 - return (ord(code[pc]) | + return (ord(code[pc]) | # merge single bytes into 4 byte value (ord(code[pc + 1]) << 8) | (ord(code[pc + 2]) << 16) | (highval << 24)) @@ -34,7 +35,7 @@ class Interpreter(object): code = bytecode.code print(disassemble(bytecode)) while pc < len(code): - opcode = ord(code[pc]) + opcode = ord(code[pc]) # convert unicode to number pc += 1 if compile.isjump(opcode): oparg = self.read4(code, pc) @@ -67,6 +68,10 @@ class Interpreter(object): elif opcode == compile.BOOL_LITERAL: # Project: Boolean w_value = self.space.newbool(oparg) # oparg is 1 or 0 stack.append(w_value) + elif opcode == compile.STRING_LITERAL: # Project: String + value = bytecode.symbols[oparg] + w_value = self.space.newstring(value) + stack.append(w_value) elif opcode == compile.MAKE_FUNCTION: bc = bytecode.subbytecodes[oparg] w_method = self.space.definemethod(name=bc.name, code=bc, w_target=w_context) diff --git a/de.churl.simple/compile.py b/de.churl.simple/compile.py index 025d40b..4f03bcd 100644 --- a/de.churl.simple/compile.py +++ b/de.churl.simple/compile.py @@ -111,6 +111,7 @@ import simpleast BOOL_LITERAL = 1 # 1 or 0 # Project: Boolean INT_LITERAL = 2 # integer value +STRING_LITERAL = 3 # Project: String ASSIGNMENT = 4 # index of attrname METHOD_LOOKUP = 5 # index of method name METHOD_CALL = 6 # number of arguments @@ -190,6 +191,7 @@ def compile(ast, argumentnames=[], name=None): stack_effects = { BOOL_LITERAL: 1, # Project: Boolean + STRING_LITERAL: 1, # Project: String INT_LITERAL: 1, ASSIGNMENT: -1, METHOD_LOOKUP: 1, @@ -240,11 +242,11 @@ class Compiler(object): self.code.append(c) elif hasarg(opcode): assert isinstance(arg, int) - if -127 <= arg <= 127: - self.code.append(chr(arg & 0xFF)) + if -127 <= arg <= 127: # arg can be encoded as one byte + self.code.append(chr(arg & 0xFF)) # mask the least significant byte and convert to unicode else: - self.code.append(chr(128)) - for c in self.encode4(arg): + self.code.append(chr(128)) # padding character + for c in self.encode4(arg): # append as 4 single bytes self.code.append(c) else: assert arg is None @@ -266,14 +268,17 @@ class Compiler(object): i += 1 def encode4(self, value): + """ Encodes 4 byte value as list of 4 unicode characters. """ return [chr(value & 0xFF), chr((value >> 8) & 0xFF), chr((value >> 16) & 0xFF), chr((value >> 24) & 0xFF)] def lookup_symbol(self, symbol): + """ Assigns indices to symbols/strings """ if symbol not in self.symbols: self.symbols[symbol] = len(self.symbols) + return self.symbols[symbol] def compile(self, ast, needsresult=True): @@ -286,6 +291,10 @@ class Compiler(object): def compile_BooleanLiteral(self, astnode, needsresult): self.emit(BOOL_LITERAL, astnode.value) + # Project: String + def compile_StringLiteral(self, astnode, needsresult): + self.emit(STRING_LITERAL, self.lookup_symbol(astnode.value)) # save string value to symboltable + def compile_ImplicitSelf(self, astnode, needsresult): self.emit(IMPLICIT_SELF) diff --git a/de.churl.simple/disass.py b/de.churl.simple/disass.py index 1dc6129..f7cd9c4 100644 --- a/de.churl.simple/disass.py +++ b/de.churl.simple/disass.py @@ -1,5 +1,6 @@ import compile + def disassemble(bytecode, indent='', pc=-1): """ disassemble a bytecode object and print a readabable version of it""" assert isinstance(bytecode, compile.Bytecode) @@ -18,12 +19,13 @@ for name, value in list(compile.__dict__.items()): class AbstractDisassembler(object): def read4(self, code, pc): - highval = ord(code[pc+3]) + """ Converts 4 unicode characters to single 4 byte value """ + highval = ord(code[pc + 3]) if highval >= 128: highval -= 256 return (ord(code[pc]) | - (ord(code[pc+1]) << 8) | - (ord(code[pc+2]) << 16) | + (ord(code[pc + 1]) << 8) | + (ord(code[pc + 2]) << 16) | (highval << 24)) def disassemble(self, bytecode, currpc=-1): @@ -114,5 +116,3 @@ class Disassembler(AbstractDisassembler): print() else: print('\t', oparg) - - diff --git a/de.churl.simple/objmodel.py b/de.churl.simple/objmodel.py index a3e54f1..2c23865 100644 --- a/de.churl.simple/objmodel.py +++ b/de.churl.simple/objmodel.py @@ -77,7 +77,7 @@ class W_Integer(AbstractObject): def __init__(self, value, space=None, trait="inttrait"): self.value = int(value) self.space = space - self.__trait = trait # used this to extend from W_Integer + self.__trait = trait # able to extend from W_Integer def getparents(self): if self.space is None: @@ -103,6 +103,37 @@ class W_Boolean(W_Integer): # don't know if extending is good idea def __init__(self, value, space=None): super().__init__(int(value), space=space, trait="booltrait") + def __str__(self): + return str(bool(self.value)) + + __repr__ = __str__ + + +# Project: String +class W_String(AbstractObject): + def __init__(self, value, space=None): + self.value = str(value) + self.space = space + self.__trait = "strtrait" + + def getparents(self): + if self.space is None: + return [] # for tests + trait = self.space.getbuiltin(self.__trait) + assert trait is not None, 'O_o bogus state' + return [trait] + + def hasslot(self, name): + return False + + def __str__(self): + return self.value + + __repr__ = __str__ + + def istrue(self): + return self.value != "" + class W_Method(W_NormalObject): diff --git a/de.churl.simple/objspace.py b/de.churl.simple/objspace.py index e1a9758..c03a446 100644 --- a/de.churl.simple/objspace.py +++ b/de.churl.simple/objspace.py @@ -1,4 +1,4 @@ -from objmodel import W_Integer, W_Boolean +from objmodel import W_Integer, W_Boolean, W_String from objmodel import W_Method from objmodel import W_NormalObject @@ -53,6 +53,10 @@ class ObjectSpace(object): def newbool(self, value): return W_Boolean(value, space=self) + # Project: String + def newstring(self, value): + return W_String(value, space=self) + def definemethod(self, name, code, w_target): w_meth = W_Method(code, name=name, slots={'__parent__': w_target}, diff --git a/de.churl.simple/primitives.py b/de.churl.simple/primitives.py index 7db12cd..fc7c3c0 100644 --- a/de.churl.simple/primitives.py +++ b/de.churl.simple/primitives.py @@ -19,7 +19,9 @@ def primitive(name, unwrap_spec, wrap_spec): # decorator arguments if t is int: unwrapped_args += (arg.value,) elif t is bool: # Project: Boolean - unwrapped_args += (bool(arg.value),) # isn't necessary because "1 or 0" is valid + unwrapped_args += (bool(arg.value),) # isn't really necessary because "1 or 0" is also valid + elif t is str: # Project: String + unwrapped_args += (arg.value,) else: unwrapped_args += (arg,) @@ -29,6 +31,8 @@ def primitive(name, unwrap_spec, wrap_spec): # decorator arguments return space.newint(result) elif wrap_spec is bool: # Project: Boolean return space.newbool(result) + elif wrap_spec is str: # Project: String + return space.newstring(result) return result unwrapper.__qualname__ = name @@ -78,6 +82,17 @@ def simple_int_increment(a): return a + 1 +# Project: Boolean, String, Double +@primitive("int_tobool", [int], bool) +def simple_int_tobool(a): + return a + + +@primitive("int_tostr", [int], str) +def simple_int_tobstr(a): + return a + + # Project: Boolean @primitive("bool_and", [bool, bool], bool) def simple_bool_and(a, b): @@ -94,6 +109,17 @@ def simple_bool_not(a): return not a +@primitive("bool_toint", [bool], int) +def simple_bool_toint(a): + return a + + +@primitive("bool_tostr", [bool], str) +def simple_bool_tostr(a): + return str(a).lower() + + +# bool stuff for int @primitive("int_eq", [int, int], bool) def simple_int_eq(a, b): return a == b @@ -119,11 +145,32 @@ def simple_int_less(a, b): return a < b -@primitive("int_tobool", [int], bool) -def simple_int_tobool(a): +# Project: String +@primitive("str_eq", [str, str], bool) +def simple_str_eq(a, b): + return a == b + + +@primitive("str_add", [str, str], str) +def simple_str_add(a, b): + return a + b + + +@primitive("str_rev", [str], str) +def simple_str_eq(a): + return a[::-1] + + +@primitive("str_len", [str], int) +def simple_str_eq(a): + return len(a) + + +@primitive("str_toint", [str], int) +def simple_str_toint(a): return a -@primitive("bool_toint", [bool], int) -def simple_bool_toint(a): - return a +@primitive("str_tobool", [str], bool) +def simple_str_tobool(a): + return a == "true" diff --git a/de.churl.simple/simpleast.py b/de.churl.simple/simpleast.py index d361fab..a7a79a7 100644 --- a/de.churl.simple/simpleast.py +++ b/de.churl.simple/simpleast.py @@ -93,16 +93,6 @@ class IntLiteral(Expression): self.value = int(value) -# Project: String -class StringLiteral(Expression): - """ A string literal (like "hello world") """ - - attrs = ["value"] - - def __init__(self, value): - self.value = str(value) - - # Project: Boolean class BooleanLiteral(Expression): """ A boolean literal (like "false") """ @@ -113,6 +103,16 @@ class BooleanLiteral(Expression): self.value = value == "true" +# Project: String +class StringLiteral(Expression): + """ A string literal (like "hello world") """ + + attrs = ["value"] + + def __init__(self, value): + self.value = str(value) + + class MethodCall(Expression): """ A call to a method with name 'methodname' on 'receiver' with 'arguments' (which is a list of expression ASTs). diff --git a/de.churl.simple/simplelexer.py b/de.churl.simple/simplelexer.py index 3177219..9718494 100644 --- a/de.churl.simple/simplelexer.py +++ b/de.churl.simple/simplelexer.py @@ -1,4 +1,4 @@ -from rply import LexerGenerator +from rply import LexerGenerator, LexingError from rply.token import Token # attempts at writing a simple Python-like lexer @@ -90,7 +90,7 @@ def postprocess(tokens, source): elif token.name == "CloseBracket": parenthesis_level -= 1 if parenthesis_level < 0: - raise LexerError(source, token.source_pos, "unmatched parenthesis") + raise LexingError("unmatched parenthesis", token.source_pos) output_tokens.append(token) elif token.name == "NewlineAndWhitespace": if i + 1 < len(tokens) and tokens[i + 1].name == "NewlineAndWhitespace": @@ -182,8 +182,8 @@ While = r'while' Def = r'def' Object = r'object' -tokens = ["If", "Else", "While", "Def", "Object", "Number", "String", "Ignore", - "Boolean", # Project: Boolean +tokens = ["If", "Else", "While", "Def", "Object", "Number", "Ignore", + "String", "Boolean", # Project: Boolean, String "NewlineAndWhitespace", "OpenBracket", "CloseBracket", "Comma", "Assign", "Colon", "Increment", "Plus", "Minus", "Multiply", "Divide", "Modulo", # Project: Sugar "Name", "PrimitiveName"] diff --git a/de.churl.simple/simpleparser.py b/de.churl.simple/simpleparser.py index b1998f0..29fdc84 100644 --- a/de.churl.simple/simpleparser.py +++ b/de.churl.simple/simpleparser.py @@ -6,10 +6,10 @@ from simplelexer import lex import simpleast pg = ParserGenerator(["If", "Else", "While", "Def", "Object", "Number", - "Boolean", # New Types - "String", "Name", "Indent", "Dedent", "Newline", "OpenBracket", + "String", "Boolean", # Project: Boolean, String + "Name", "Indent", "Dedent", "Newline", "OpenBracket", "CloseBracket", "Comma", "Assign", "Colon", - "Increment", "Plus", "Minus", "Multiply", "Divide", "Modulo", # Sugar + "Increment", "Plus", "Minus", "Multiply", "Divide", "Modulo", # Project: Sugar "PrimitiveName", "EOF"], # Operator precedence for ambiguous rules, ascending precedence=[("left", ["Plus", "Minus"]), @@ -198,18 +198,18 @@ def number_expression(stmt): return simpleast.IntLiteral(stmt[0].value) -# Project: String -@pg.production("basic_expression : String") -def string_expression(stmt): - return simpleast.StringLiteral(stmt[0].value[1:-1]) # cut off delimiters - - # Project: Boolean @pg.production("basic_expression : Boolean") def boolean_expression(stmt): return simpleast.BooleanLiteral(stmt[0].value) +# Project: String +@pg.production("basic_expression : String") +def string_expression(stmt): + return simpleast.StringLiteral(stmt[0].value[1:-1]) # cut off delimiters + + @pg.production("basic_expression : implicitselfmethodcall") def implicitselfmethodcall(call): methodcall = call[0] diff --git a/mytests/test_string.py b/mytests/test_string.py index a3e6622..13dbd28 100644 --- a/mytests/test_string.py +++ b/mytests/test_string.py @@ -1,10 +1,24 @@ import py +from rply import Token +from simpleast import Program, ExprStatement, BooleanLiteral, ImplicitSelf, Assignment, StringLiteral +from simplelexer import lex from simpleparser import parse -from objmodel import W_NormalObject, W_Integer +from objmodel import W_NormalObject, W_Integer, W_String, W_Boolean from interpreter import Interpreter +def test_basic_string_lexing(): + assert lex("\"Hallo\"")[0] == Token("String", "\"Hallo\"") + assert lex("\'Hallo\'")[0] == Token("String", "\'Hallo\'") + assert lex("x = \"true\"")[:3] == [Token("Name", "x"), Token("Assign", "="), Token("String", "\"true\"")] + + +def test_basic_string_parsing(): + assert parse("\"false\"") == Program([ExprStatement(StringLiteral("false"))]) + assert parse("x = \"false\"") == Program([Assignment(ImplicitSelf(), "x", StringLiteral("false"))]) + + def test_string_assignment(): ast = parse(""" x = "Hallo" @@ -19,6 +33,7 @@ x = "Hallo" def test_string_operations(): ast = parse(""" +w = "Hallo" eq("Hallo") x = "Hallo" add("ollaH") y = "Hallo" rev z = "Hallo" len @@ -27,9 +42,33 @@ z = "Hallo" len w_model = interpreter.make_module() interpreter.eval(ast, w_model) + assert isinstance(w_model.getvalue("w"), W_Boolean) + assert w_model.getvalue("w").istrue() is True assert isinstance(w_model.getvalue("x"), W_String) assert w_model.getvalue("x").value == "HalloollaH" assert isinstance(w_model.getvalue("y"), W_String) assert w_model.getvalue("y").value == "ollaH" assert isinstance(w_model.getvalue("z"), W_Integer) assert w_model.getvalue("z").value == 5 + + +def test_string_conversion(): + ast = parse(""" +w = "true" tobool +x = true tostr +y = 25 tostr +z = "25" toint +""") + + interpreter = Interpreter() + w_model = interpreter.make_module() + interpreter.eval(ast, w_model) + + assert isinstance(w_model.getvalue("w"), W_Boolean) + assert w_model.getvalue("w").istrue() is True + assert isinstance(w_model.getvalue("x"), W_String) + assert w_model.getvalue("x").value == "true" + assert isinstance(w_model.getvalue("y"), W_String) + assert w_model.getvalue("y").value == "25" + assert isinstance(w_model.getvalue("z"), W_Integer) + assert w_model.getvalue("z").value == 25