1

implement string

This commit is contained in:
Christoph
2021-08-13 16:39:23 +02:00
parent 5b32c4bcd6
commit 080d932d56
11 changed files with 200 additions and 45 deletions

View File

@ -34,6 +34,8 @@ object inttrait:
def tobool: def tobool:
self $int_tobool self $int_tobool
def tostr:
self $int_tostr
# Project: Boolean # Project: Boolean
object booltrait: object booltrait:
@ -46,3 +48,21 @@ object booltrait:
def toint: def toint:
self $bool_toint self $bool_toint
def tostr:
self $bool_tostr
# Project: String
object strtrait:
def eq(other):
self $str_eq(other)
def add(other):
self $str_add(other)
def rev:
self $str_rev
def len:
self $str_len
def tobool:
self $str_tobool
def toint:
self $str_toint

View File

@ -20,10 +20,11 @@ class Interpreter(object):
return self.run(code, w_context) return self.run(code, w_context)
def read4(self, code, pc): def read4(self, code, pc):
highval = ord(code[pc + 3]) """ Converts 4 unicode characters to single 4 byte value """
if highval >= 128: highval = ord(code[pc + 3]) # most significant byte
if highval >= 128: # convert from 2's complement?
highval -= 256 highval -= 256
return (ord(code[pc]) | return (ord(code[pc]) | # merge single bytes into 4 byte value
(ord(code[pc + 1]) << 8) | (ord(code[pc + 1]) << 8) |
(ord(code[pc + 2]) << 16) | (ord(code[pc + 2]) << 16) |
(highval << 24)) (highval << 24))
@ -34,7 +35,7 @@ class Interpreter(object):
code = bytecode.code code = bytecode.code
print(disassemble(bytecode)) print(disassemble(bytecode))
while pc < len(code): while pc < len(code):
opcode = ord(code[pc]) opcode = ord(code[pc]) # convert unicode to number
pc += 1 pc += 1
if compile.isjump(opcode): if compile.isjump(opcode):
oparg = self.read4(code, pc) oparg = self.read4(code, pc)
@ -67,6 +68,10 @@ class Interpreter(object):
elif opcode == compile.BOOL_LITERAL: # Project: Boolean elif opcode == compile.BOOL_LITERAL: # Project: Boolean
w_value = self.space.newbool(oparg) # oparg is 1 or 0 w_value = self.space.newbool(oparg) # oparg is 1 or 0
stack.append(w_value) stack.append(w_value)
elif opcode == compile.STRING_LITERAL: # Project: String
value = bytecode.symbols[oparg]
w_value = self.space.newstring(value)
stack.append(w_value)
elif opcode == compile.MAKE_FUNCTION: elif opcode == compile.MAKE_FUNCTION:
bc = bytecode.subbytecodes[oparg] bc = bytecode.subbytecodes[oparg]
w_method = self.space.definemethod(name=bc.name, code=bc, w_target=w_context) w_method = self.space.definemethod(name=bc.name, code=bc, w_target=w_context)

View File

@ -111,6 +111,7 @@ import simpleast
BOOL_LITERAL = 1 # 1 or 0 # Project: Boolean BOOL_LITERAL = 1 # 1 or 0 # Project: Boolean
INT_LITERAL = 2 # integer value INT_LITERAL = 2 # integer value
STRING_LITERAL = 3 # Project: String
ASSIGNMENT = 4 # index of attrname ASSIGNMENT = 4 # index of attrname
METHOD_LOOKUP = 5 # index of method name METHOD_LOOKUP = 5 # index of method name
METHOD_CALL = 6 # number of arguments METHOD_CALL = 6 # number of arguments
@ -190,6 +191,7 @@ def compile(ast, argumentnames=[], name=None):
stack_effects = { stack_effects = {
BOOL_LITERAL: 1, # Project: Boolean BOOL_LITERAL: 1, # Project: Boolean
STRING_LITERAL: 1, # Project: String
INT_LITERAL: 1, INT_LITERAL: 1,
ASSIGNMENT: -1, ASSIGNMENT: -1,
METHOD_LOOKUP: 1, METHOD_LOOKUP: 1,
@ -240,11 +242,11 @@ class Compiler(object):
self.code.append(c) self.code.append(c)
elif hasarg(opcode): elif hasarg(opcode):
assert isinstance(arg, int) assert isinstance(arg, int)
if -127 <= arg <= 127: if -127 <= arg <= 127: # arg can be encoded as one byte
self.code.append(chr(arg & 0xFF)) self.code.append(chr(arg & 0xFF)) # mask the least significant byte and convert to unicode
else: else:
self.code.append(chr(128)) self.code.append(chr(128)) # padding character
for c in self.encode4(arg): for c in self.encode4(arg): # append as 4 single bytes
self.code.append(c) self.code.append(c)
else: else:
assert arg is None assert arg is None
@ -266,14 +268,17 @@ class Compiler(object):
i += 1 i += 1
def encode4(self, value): def encode4(self, value):
""" Encodes 4 byte value as list of 4 unicode characters. """
return [chr(value & 0xFF), return [chr(value & 0xFF),
chr((value >> 8) & 0xFF), chr((value >> 8) & 0xFF),
chr((value >> 16) & 0xFF), chr((value >> 16) & 0xFF),
chr((value >> 24) & 0xFF)] chr((value >> 24) & 0xFF)]
def lookup_symbol(self, symbol): def lookup_symbol(self, symbol):
""" Assigns indices to symbols/strings """
if symbol not in self.symbols: if symbol not in self.symbols:
self.symbols[symbol] = len(self.symbols) self.symbols[symbol] = len(self.symbols)
return self.symbols[symbol] return self.symbols[symbol]
def compile(self, ast, needsresult=True): def compile(self, ast, needsresult=True):
@ -286,6 +291,10 @@ class Compiler(object):
def compile_BooleanLiteral(self, astnode, needsresult): def compile_BooleanLiteral(self, astnode, needsresult):
self.emit(BOOL_LITERAL, astnode.value) self.emit(BOOL_LITERAL, astnode.value)
# Project: String
def compile_StringLiteral(self, astnode, needsresult):
self.emit(STRING_LITERAL, self.lookup_symbol(astnode.value)) # save string value to symboltable
def compile_ImplicitSelf(self, astnode, needsresult): def compile_ImplicitSelf(self, astnode, needsresult):
self.emit(IMPLICIT_SELF) self.emit(IMPLICIT_SELF)

View File

@ -1,5 +1,6 @@
import compile import compile
def disassemble(bytecode, indent='', pc=-1): def disassemble(bytecode, indent='', pc=-1):
""" disassemble a bytecode object and print a readabable version of it""" """ disassemble a bytecode object and print a readabable version of it"""
assert isinstance(bytecode, compile.Bytecode) assert isinstance(bytecode, compile.Bytecode)
@ -18,12 +19,13 @@ for name, value in list(compile.__dict__.items()):
class AbstractDisassembler(object): class AbstractDisassembler(object):
def read4(self, code, pc): def read4(self, code, pc):
highval = ord(code[pc+3]) """ Converts 4 unicode characters to single 4 byte value """
highval = ord(code[pc + 3])
if highval >= 128: if highval >= 128:
highval -= 256 highval -= 256
return (ord(code[pc]) | return (ord(code[pc]) |
(ord(code[pc+1]) << 8) | (ord(code[pc + 1]) << 8) |
(ord(code[pc+2]) << 16) | (ord(code[pc + 2]) << 16) |
(highval << 24)) (highval << 24))
def disassemble(self, bytecode, currpc=-1): def disassemble(self, bytecode, currpc=-1):
@ -114,5 +116,3 @@ class Disassembler(AbstractDisassembler):
print() print()
else: else:
print('\t', oparg) print('\t', oparg)

View File

@ -77,7 +77,7 @@ class W_Integer(AbstractObject):
def __init__(self, value, space=None, trait="inttrait"): def __init__(self, value, space=None, trait="inttrait"):
self.value = int(value) self.value = int(value)
self.space = space self.space = space
self.__trait = trait # used this to extend from W_Integer self.__trait = trait # able to extend from W_Integer
def getparents(self): def getparents(self):
if self.space is None: if self.space is None:
@ -103,6 +103,37 @@ class W_Boolean(W_Integer): # don't know if extending is good idea
def __init__(self, value, space=None): def __init__(self, value, space=None):
super().__init__(int(value), space=space, trait="booltrait") super().__init__(int(value), space=space, trait="booltrait")
def __str__(self):
return str(bool(self.value))
__repr__ = __str__
# Project: String
class W_String(AbstractObject):
def __init__(self, value, space=None):
self.value = str(value)
self.space = space
self.__trait = "strtrait"
def getparents(self):
if self.space is None:
return [] # for tests
trait = self.space.getbuiltin(self.__trait)
assert trait is not None, 'O_o bogus state'
return [trait]
def hasslot(self, name):
return False
def __str__(self):
return self.value
__repr__ = __str__
def istrue(self):
return self.value != ""
class W_Method(W_NormalObject): class W_Method(W_NormalObject):

View File

@ -1,4 +1,4 @@
from objmodel import W_Integer, W_Boolean from objmodel import W_Integer, W_Boolean, W_String
from objmodel import W_Method from objmodel import W_Method
from objmodel import W_NormalObject from objmodel import W_NormalObject
@ -53,6 +53,10 @@ class ObjectSpace(object):
def newbool(self, value): def newbool(self, value):
return W_Boolean(value, space=self) return W_Boolean(value, space=self)
# Project: String
def newstring(self, value):
return W_String(value, space=self)
def definemethod(self, name, code, w_target): def definemethod(self, name, code, w_target):
w_meth = W_Method(code, name=name, w_meth = W_Method(code, name=name,
slots={'__parent__': w_target}, slots={'__parent__': w_target},

View File

@ -19,7 +19,9 @@ def primitive(name, unwrap_spec, wrap_spec): # decorator arguments
if t is int: if t is int:
unwrapped_args += (arg.value,) unwrapped_args += (arg.value,)
elif t is bool: # Project: Boolean elif t is bool: # Project: Boolean
unwrapped_args += (bool(arg.value),) # isn't necessary because "1 or 0" is valid unwrapped_args += (bool(arg.value),) # isn't really necessary because "1 or 0" is also valid
elif t is str: # Project: String
unwrapped_args += (arg.value,)
else: else:
unwrapped_args += (arg,) unwrapped_args += (arg,)
@ -29,6 +31,8 @@ def primitive(name, unwrap_spec, wrap_spec): # decorator arguments
return space.newint(result) return space.newint(result)
elif wrap_spec is bool: # Project: Boolean elif wrap_spec is bool: # Project: Boolean
return space.newbool(result) return space.newbool(result)
elif wrap_spec is str: # Project: String
return space.newstring(result)
return result return result
unwrapper.__qualname__ = name unwrapper.__qualname__ = name
@ -78,6 +82,17 @@ def simple_int_increment(a):
return a + 1 return a + 1
# Project: Boolean, String, Double
@primitive("int_tobool", [int], bool)
def simple_int_tobool(a):
return a
@primitive("int_tostr", [int], str)
def simple_int_tobstr(a):
return a
# Project: Boolean # Project: Boolean
@primitive("bool_and", [bool, bool], bool) @primitive("bool_and", [bool, bool], bool)
def simple_bool_and(a, b): def simple_bool_and(a, b):
@ -94,6 +109,17 @@ def simple_bool_not(a):
return not a return not a
@primitive("bool_toint", [bool], int)
def simple_bool_toint(a):
return a
@primitive("bool_tostr", [bool], str)
def simple_bool_tostr(a):
return str(a).lower()
# bool stuff for int
@primitive("int_eq", [int, int], bool) @primitive("int_eq", [int, int], bool)
def simple_int_eq(a, b): def simple_int_eq(a, b):
return a == b return a == b
@ -119,11 +145,32 @@ def simple_int_less(a, b):
return a < b return a < b
@primitive("int_tobool", [int], bool) # Project: String
def simple_int_tobool(a): @primitive("str_eq", [str, str], bool)
def simple_str_eq(a, b):
return a == b
@primitive("str_add", [str, str], str)
def simple_str_add(a, b):
return a + b
@primitive("str_rev", [str], str)
def simple_str_eq(a):
return a[::-1]
@primitive("str_len", [str], int)
def simple_str_eq(a):
return len(a)
@primitive("str_toint", [str], int)
def simple_str_toint(a):
return a return a
@primitive("bool_toint", [bool], int) @primitive("str_tobool", [str], bool)
def simple_bool_toint(a): def simple_str_tobool(a):
return a return a == "true"

View File

@ -93,16 +93,6 @@ class IntLiteral(Expression):
self.value = int(value) self.value = int(value)
# Project: String
class StringLiteral(Expression):
""" A string literal (like "hello world") """
attrs = ["value"]
def __init__(self, value):
self.value = str(value)
# Project: Boolean # Project: Boolean
class BooleanLiteral(Expression): class BooleanLiteral(Expression):
""" A boolean literal (like "false") """ """ A boolean literal (like "false") """
@ -113,6 +103,16 @@ class BooleanLiteral(Expression):
self.value = value == "true" self.value = value == "true"
# Project: String
class StringLiteral(Expression):
""" A string literal (like "hello world") """
attrs = ["value"]
def __init__(self, value):
self.value = str(value)
class MethodCall(Expression): class MethodCall(Expression):
""" A call to a method with name 'methodname' on 'receiver' with """ A call to a method with name 'methodname' on 'receiver' with
'arguments' (which is a list of expression ASTs). 'arguments' (which is a list of expression ASTs).

View File

@ -1,4 +1,4 @@
from rply import LexerGenerator from rply import LexerGenerator, LexingError
from rply.token import Token from rply.token import Token
# attempts at writing a simple Python-like lexer # attempts at writing a simple Python-like lexer
@ -90,7 +90,7 @@ def postprocess(tokens, source):
elif token.name == "CloseBracket": elif token.name == "CloseBracket":
parenthesis_level -= 1 parenthesis_level -= 1
if parenthesis_level < 0: if parenthesis_level < 0:
raise LexerError(source, token.source_pos, "unmatched parenthesis") raise LexingError("unmatched parenthesis", token.source_pos)
output_tokens.append(token) output_tokens.append(token)
elif token.name == "NewlineAndWhitespace": elif token.name == "NewlineAndWhitespace":
if i + 1 < len(tokens) and tokens[i + 1].name == "NewlineAndWhitespace": if i + 1 < len(tokens) and tokens[i + 1].name == "NewlineAndWhitespace":
@ -182,8 +182,8 @@ While = r'while'
Def = r'def' Def = r'def'
Object = r'object' Object = r'object'
tokens = ["If", "Else", "While", "Def", "Object", "Number", "String", "Ignore", tokens = ["If", "Else", "While", "Def", "Object", "Number", "Ignore",
"Boolean", # Project: Boolean "String", "Boolean", # Project: Boolean, String
"NewlineAndWhitespace", "OpenBracket", "CloseBracket", "Comma", "Assign", "Colon", "NewlineAndWhitespace", "OpenBracket", "CloseBracket", "Comma", "Assign", "Colon",
"Increment", "Plus", "Minus", "Multiply", "Divide", "Modulo", # Project: Sugar "Increment", "Plus", "Minus", "Multiply", "Divide", "Modulo", # Project: Sugar
"Name", "PrimitiveName"] "Name", "PrimitiveName"]

View File

@ -6,10 +6,10 @@ from simplelexer import lex
import simpleast import simpleast
pg = ParserGenerator(["If", "Else", "While", "Def", "Object", "Number", pg = ParserGenerator(["If", "Else", "While", "Def", "Object", "Number",
"Boolean", # New Types "String", "Boolean", # Project: Boolean, String
"String", "Name", "Indent", "Dedent", "Newline", "OpenBracket", "Name", "Indent", "Dedent", "Newline", "OpenBracket",
"CloseBracket", "Comma", "Assign", "Colon", "CloseBracket", "Comma", "Assign", "Colon",
"Increment", "Plus", "Minus", "Multiply", "Divide", "Modulo", # Sugar "Increment", "Plus", "Minus", "Multiply", "Divide", "Modulo", # Project: Sugar
"PrimitiveName", "EOF"], "PrimitiveName", "EOF"],
# Operator precedence for ambiguous rules, ascending # Operator precedence for ambiguous rules, ascending
precedence=[("left", ["Plus", "Minus"]), precedence=[("left", ["Plus", "Minus"]),
@ -198,18 +198,18 @@ def number_expression(stmt):
return simpleast.IntLiteral(stmt[0].value) return simpleast.IntLiteral(stmt[0].value)
# Project: String
@pg.production("basic_expression : String")
def string_expression(stmt):
return simpleast.StringLiteral(stmt[0].value[1:-1]) # cut off delimiters
# Project: Boolean # Project: Boolean
@pg.production("basic_expression : Boolean") @pg.production("basic_expression : Boolean")
def boolean_expression(stmt): def boolean_expression(stmt):
return simpleast.BooleanLiteral(stmt[0].value) return simpleast.BooleanLiteral(stmt[0].value)
# Project: String
@pg.production("basic_expression : String")
def string_expression(stmt):
return simpleast.StringLiteral(stmt[0].value[1:-1]) # cut off delimiters
@pg.production("basic_expression : implicitselfmethodcall") @pg.production("basic_expression : implicitselfmethodcall")
def implicitselfmethodcall(call): def implicitselfmethodcall(call):
methodcall = call[0] methodcall = call[0]

View File

@ -1,10 +1,24 @@
import py import py
from rply import Token
from simpleast import Program, ExprStatement, BooleanLiteral, ImplicitSelf, Assignment, StringLiteral
from simplelexer import lex
from simpleparser import parse from simpleparser import parse
from objmodel import W_NormalObject, W_Integer from objmodel import W_NormalObject, W_Integer, W_String, W_Boolean
from interpreter import Interpreter from interpreter import Interpreter
def test_basic_string_lexing():
assert lex("\"Hallo\"")[0] == Token("String", "\"Hallo\"")
assert lex("\'Hallo\'")[0] == Token("String", "\'Hallo\'")
assert lex("x = \"true\"")[:3] == [Token("Name", "x"), Token("Assign", "="), Token("String", "\"true\"")]
def test_basic_string_parsing():
assert parse("\"false\"") == Program([ExprStatement(StringLiteral("false"))])
assert parse("x = \"false\"") == Program([Assignment(ImplicitSelf(), "x", StringLiteral("false"))])
def test_string_assignment(): def test_string_assignment():
ast = parse(""" ast = parse("""
x = "Hallo" x = "Hallo"
@ -19,6 +33,7 @@ x = "Hallo"
def test_string_operations(): def test_string_operations():
ast = parse(""" ast = parse("""
w = "Hallo" eq("Hallo")
x = "Hallo" add("ollaH") x = "Hallo" add("ollaH")
y = "Hallo" rev y = "Hallo" rev
z = "Hallo" len z = "Hallo" len
@ -27,9 +42,33 @@ z = "Hallo" len
w_model = interpreter.make_module() w_model = interpreter.make_module()
interpreter.eval(ast, w_model) interpreter.eval(ast, w_model)
assert isinstance(w_model.getvalue("w"), W_Boolean)
assert w_model.getvalue("w").istrue() is True
assert isinstance(w_model.getvalue("x"), W_String) assert isinstance(w_model.getvalue("x"), W_String)
assert w_model.getvalue("x").value == "HalloollaH" assert w_model.getvalue("x").value == "HalloollaH"
assert isinstance(w_model.getvalue("y"), W_String) assert isinstance(w_model.getvalue("y"), W_String)
assert w_model.getvalue("y").value == "ollaH" assert w_model.getvalue("y").value == "ollaH"
assert isinstance(w_model.getvalue("z"), W_Integer) assert isinstance(w_model.getvalue("z"), W_Integer)
assert w_model.getvalue("z").value == 5 assert w_model.getvalue("z").value == 5
def test_string_conversion():
ast = parse("""
w = "true" tobool
x = true tostr
y = 25 tostr
z = "25" toint
""")
interpreter = Interpreter()
w_model = interpreter.make_module()
interpreter.eval(ast, w_model)
assert isinstance(w_model.getvalue("w"), W_Boolean)
assert w_model.getvalue("w").istrue() is True
assert isinstance(w_model.getvalue("x"), W_String)
assert w_model.getvalue("x").value == "true"
assert isinstance(w_model.getvalue("y"), W_String)
assert w_model.getvalue("y").value == "25"
assert isinstance(w_model.getvalue("z"), W_Integer)
assert w_model.getvalue("z").value == 25