1

implement string

This commit is contained in:
Christoph
2021-08-13 16:39:23 +02:00
parent 5b32c4bcd6
commit 080d932d56
11 changed files with 200 additions and 45 deletions

View File

@ -34,6 +34,8 @@ object inttrait:
def tobool:
self $int_tobool
def tostr:
self $int_tostr
# Project: Boolean
object booltrait:
@ -46,3 +48,21 @@ object booltrait:
def toint:
self $bool_toint
def tostr:
self $bool_tostr
# Project: String
object strtrait:
def eq(other):
self $str_eq(other)
def add(other):
self $str_add(other)
def rev:
self $str_rev
def len:
self $str_len
def tobool:
self $str_tobool
def toint:
self $str_toint

View File

@ -20,10 +20,11 @@ class Interpreter(object):
return self.run(code, w_context)
def read4(self, code, pc):
highval = ord(code[pc + 3])
if highval >= 128:
""" Converts 4 unicode characters to single 4 byte value """
highval = ord(code[pc + 3]) # most significant byte
if highval >= 128: # convert from 2's complement?
highval -= 256
return (ord(code[pc]) |
return (ord(code[pc]) | # merge single bytes into 4 byte value
(ord(code[pc + 1]) << 8) |
(ord(code[pc + 2]) << 16) |
(highval << 24))
@ -34,7 +35,7 @@ class Interpreter(object):
code = bytecode.code
print(disassemble(bytecode))
while pc < len(code):
opcode = ord(code[pc])
opcode = ord(code[pc]) # convert unicode to number
pc += 1
if compile.isjump(opcode):
oparg = self.read4(code, pc)
@ -67,6 +68,10 @@ class Interpreter(object):
elif opcode == compile.BOOL_LITERAL: # Project: Boolean
w_value = self.space.newbool(oparg) # oparg is 1 or 0
stack.append(w_value)
elif opcode == compile.STRING_LITERAL: # Project: String
value = bytecode.symbols[oparg]
w_value = self.space.newstring(value)
stack.append(w_value)
elif opcode == compile.MAKE_FUNCTION:
bc = bytecode.subbytecodes[oparg]
w_method = self.space.definemethod(name=bc.name, code=bc, w_target=w_context)

View File

@ -111,6 +111,7 @@ import simpleast
BOOL_LITERAL = 1 # 1 or 0 # Project: Boolean
INT_LITERAL = 2 # integer value
STRING_LITERAL = 3 # Project: String
ASSIGNMENT = 4 # index of attrname
METHOD_LOOKUP = 5 # index of method name
METHOD_CALL = 6 # number of arguments
@ -190,6 +191,7 @@ def compile(ast, argumentnames=[], name=None):
stack_effects = {
BOOL_LITERAL: 1, # Project: Boolean
STRING_LITERAL: 1, # Project: String
INT_LITERAL: 1,
ASSIGNMENT: -1,
METHOD_LOOKUP: 1,
@ -240,11 +242,11 @@ class Compiler(object):
self.code.append(c)
elif hasarg(opcode):
assert isinstance(arg, int)
if -127 <= arg <= 127:
self.code.append(chr(arg & 0xFF))
if -127 <= arg <= 127: # arg can be encoded as one byte
self.code.append(chr(arg & 0xFF)) # mask the least significant byte and convert to unicode
else:
self.code.append(chr(128))
for c in self.encode4(arg):
self.code.append(chr(128)) # padding character
for c in self.encode4(arg): # append as 4 single bytes
self.code.append(c)
else:
assert arg is None
@ -266,14 +268,17 @@ class Compiler(object):
i += 1
def encode4(self, value):
""" Encodes 4 byte value as list of 4 unicode characters. """
return [chr(value & 0xFF),
chr((value >> 8) & 0xFF),
chr((value >> 16) & 0xFF),
chr((value >> 24) & 0xFF)]
def lookup_symbol(self, symbol):
""" Assigns indices to symbols/strings """
if symbol not in self.symbols:
self.symbols[symbol] = len(self.symbols)
return self.symbols[symbol]
def compile(self, ast, needsresult=True):
@ -286,6 +291,10 @@ class Compiler(object):
def compile_BooleanLiteral(self, astnode, needsresult):
self.emit(BOOL_LITERAL, astnode.value)
# Project: String
def compile_StringLiteral(self, astnode, needsresult):
self.emit(STRING_LITERAL, self.lookup_symbol(astnode.value)) # save string value to symboltable
def compile_ImplicitSelf(self, astnode, needsresult):
self.emit(IMPLICIT_SELF)

View File

@ -1,5 +1,6 @@
import compile
def disassemble(bytecode, indent='', pc=-1):
""" disassemble a bytecode object and print a readabable version of it"""
assert isinstance(bytecode, compile.Bytecode)
@ -18,12 +19,13 @@ for name, value in list(compile.__dict__.items()):
class AbstractDisassembler(object):
def read4(self, code, pc):
highval = ord(code[pc+3])
""" Converts 4 unicode characters to single 4 byte value """
highval = ord(code[pc + 3])
if highval >= 128:
highval -= 256
return (ord(code[pc]) |
(ord(code[pc+1]) << 8) |
(ord(code[pc+2]) << 16) |
(ord(code[pc + 1]) << 8) |
(ord(code[pc + 2]) << 16) |
(highval << 24))
def disassemble(self, bytecode, currpc=-1):
@ -114,5 +116,3 @@ class Disassembler(AbstractDisassembler):
print()
else:
print('\t', oparg)

View File

@ -77,7 +77,7 @@ class W_Integer(AbstractObject):
def __init__(self, value, space=None, trait="inttrait"):
self.value = int(value)
self.space = space
self.__trait = trait # used this to extend from W_Integer
self.__trait = trait # able to extend from W_Integer
def getparents(self):
if self.space is None:
@ -103,6 +103,37 @@ class W_Boolean(W_Integer): # don't know if extending is good idea
def __init__(self, value, space=None):
super().__init__(int(value), space=space, trait="booltrait")
def __str__(self):
return str(bool(self.value))
__repr__ = __str__
# Project: String
class W_String(AbstractObject):
def __init__(self, value, space=None):
self.value = str(value)
self.space = space
self.__trait = "strtrait"
def getparents(self):
if self.space is None:
return [] # for tests
trait = self.space.getbuiltin(self.__trait)
assert trait is not None, 'O_o bogus state'
return [trait]
def hasslot(self, name):
return False
def __str__(self):
return self.value
__repr__ = __str__
def istrue(self):
return self.value != ""
class W_Method(W_NormalObject):

View File

@ -1,4 +1,4 @@
from objmodel import W_Integer, W_Boolean
from objmodel import W_Integer, W_Boolean, W_String
from objmodel import W_Method
from objmodel import W_NormalObject
@ -53,6 +53,10 @@ class ObjectSpace(object):
def newbool(self, value):
return W_Boolean(value, space=self)
# Project: String
def newstring(self, value):
return W_String(value, space=self)
def definemethod(self, name, code, w_target):
w_meth = W_Method(code, name=name,
slots={'__parent__': w_target},

View File

@ -19,7 +19,9 @@ def primitive(name, unwrap_spec, wrap_spec): # decorator arguments
if t is int:
unwrapped_args += (arg.value,)
elif t is bool: # Project: Boolean
unwrapped_args += (bool(arg.value),) # isn't necessary because "1 or 0" is valid
unwrapped_args += (bool(arg.value),) # isn't really necessary because "1 or 0" is also valid
elif t is str: # Project: String
unwrapped_args += (arg.value,)
else:
unwrapped_args += (arg,)
@ -29,6 +31,8 @@ def primitive(name, unwrap_spec, wrap_spec): # decorator arguments
return space.newint(result)
elif wrap_spec is bool: # Project: Boolean
return space.newbool(result)
elif wrap_spec is str: # Project: String
return space.newstring(result)
return result
unwrapper.__qualname__ = name
@ -78,6 +82,17 @@ def simple_int_increment(a):
return a + 1
# Project: Boolean, String, Double
@primitive("int_tobool", [int], bool)
def simple_int_tobool(a):
return a
@primitive("int_tostr", [int], str)
def simple_int_tobstr(a):
return a
# Project: Boolean
@primitive("bool_and", [bool, bool], bool)
def simple_bool_and(a, b):
@ -94,6 +109,17 @@ def simple_bool_not(a):
return not a
@primitive("bool_toint", [bool], int)
def simple_bool_toint(a):
return a
@primitive("bool_tostr", [bool], str)
def simple_bool_tostr(a):
return str(a).lower()
# bool stuff for int
@primitive("int_eq", [int, int], bool)
def simple_int_eq(a, b):
return a == b
@ -119,11 +145,32 @@ def simple_int_less(a, b):
return a < b
@primitive("int_tobool", [int], bool)
def simple_int_tobool(a):
# Project: String
@primitive("str_eq", [str, str], bool)
def simple_str_eq(a, b):
return a == b
@primitive("str_add", [str, str], str)
def simple_str_add(a, b):
return a + b
@primitive("str_rev", [str], str)
def simple_str_eq(a):
return a[::-1]
@primitive("str_len", [str], int)
def simple_str_eq(a):
return len(a)
@primitive("str_toint", [str], int)
def simple_str_toint(a):
return a
@primitive("bool_toint", [bool], int)
def simple_bool_toint(a):
return a
@primitive("str_tobool", [str], bool)
def simple_str_tobool(a):
return a == "true"

View File

@ -93,16 +93,6 @@ class IntLiteral(Expression):
self.value = int(value)
# Project: String
class StringLiteral(Expression):
""" A string literal (like "hello world") """
attrs = ["value"]
def __init__(self, value):
self.value = str(value)
# Project: Boolean
class BooleanLiteral(Expression):
""" A boolean literal (like "false") """
@ -113,6 +103,16 @@ class BooleanLiteral(Expression):
self.value = value == "true"
# Project: String
class StringLiteral(Expression):
""" A string literal (like "hello world") """
attrs = ["value"]
def __init__(self, value):
self.value = str(value)
class MethodCall(Expression):
""" A call to a method with name 'methodname' on 'receiver' with
'arguments' (which is a list of expression ASTs).

View File

@ -1,4 +1,4 @@
from rply import LexerGenerator
from rply import LexerGenerator, LexingError
from rply.token import Token
# attempts at writing a simple Python-like lexer
@ -90,7 +90,7 @@ def postprocess(tokens, source):
elif token.name == "CloseBracket":
parenthesis_level -= 1
if parenthesis_level < 0:
raise LexerError(source, token.source_pos, "unmatched parenthesis")
raise LexingError("unmatched parenthesis", token.source_pos)
output_tokens.append(token)
elif token.name == "NewlineAndWhitespace":
if i + 1 < len(tokens) and tokens[i + 1].name == "NewlineAndWhitespace":
@ -182,8 +182,8 @@ While = r'while'
Def = r'def'
Object = r'object'
tokens = ["If", "Else", "While", "Def", "Object", "Number", "String", "Ignore",
"Boolean", # Project: Boolean
tokens = ["If", "Else", "While", "Def", "Object", "Number", "Ignore",
"String", "Boolean", # Project: Boolean, String
"NewlineAndWhitespace", "OpenBracket", "CloseBracket", "Comma", "Assign", "Colon",
"Increment", "Plus", "Minus", "Multiply", "Divide", "Modulo", # Project: Sugar
"Name", "PrimitiveName"]

View File

@ -6,10 +6,10 @@ from simplelexer import lex
import simpleast
pg = ParserGenerator(["If", "Else", "While", "Def", "Object", "Number",
"Boolean", # New Types
"String", "Name", "Indent", "Dedent", "Newline", "OpenBracket",
"String", "Boolean", # Project: Boolean, String
"Name", "Indent", "Dedent", "Newline", "OpenBracket",
"CloseBracket", "Comma", "Assign", "Colon",
"Increment", "Plus", "Minus", "Multiply", "Divide", "Modulo", # Sugar
"Increment", "Plus", "Minus", "Multiply", "Divide", "Modulo", # Project: Sugar
"PrimitiveName", "EOF"],
# Operator precedence for ambiguous rules, ascending
precedence=[("left", ["Plus", "Minus"]),
@ -198,18 +198,18 @@ def number_expression(stmt):
return simpleast.IntLiteral(stmt[0].value)
# Project: String
@pg.production("basic_expression : String")
def string_expression(stmt):
return simpleast.StringLiteral(stmt[0].value[1:-1]) # cut off delimiters
# Project: Boolean
@pg.production("basic_expression : Boolean")
def boolean_expression(stmt):
return simpleast.BooleanLiteral(stmt[0].value)
# Project: String
@pg.production("basic_expression : String")
def string_expression(stmt):
return simpleast.StringLiteral(stmt[0].value[1:-1]) # cut off delimiters
@pg.production("basic_expression : implicitselfmethodcall")
def implicitselfmethodcall(call):
methodcall = call[0]

View File

@ -1,10 +1,24 @@
import py
from rply import Token
from simpleast import Program, ExprStatement, BooleanLiteral, ImplicitSelf, Assignment, StringLiteral
from simplelexer import lex
from simpleparser import parse
from objmodel import W_NormalObject, W_Integer
from objmodel import W_NormalObject, W_Integer, W_String, W_Boolean
from interpreter import Interpreter
def test_basic_string_lexing():
assert lex("\"Hallo\"")[0] == Token("String", "\"Hallo\"")
assert lex("\'Hallo\'")[0] == Token("String", "\'Hallo\'")
assert lex("x = \"true\"")[:3] == [Token("Name", "x"), Token("Assign", "="), Token("String", "\"true\"")]
def test_basic_string_parsing():
assert parse("\"false\"") == Program([ExprStatement(StringLiteral("false"))])
assert parse("x = \"false\"") == Program([Assignment(ImplicitSelf(), "x", StringLiteral("false"))])
def test_string_assignment():
ast = parse("""
x = "Hallo"
@ -19,6 +33,7 @@ x = "Hallo"
def test_string_operations():
ast = parse("""
w = "Hallo" eq("Hallo")
x = "Hallo" add("ollaH")
y = "Hallo" rev
z = "Hallo" len
@ -27,9 +42,33 @@ z = "Hallo" len
w_model = interpreter.make_module()
interpreter.eval(ast, w_model)
assert isinstance(w_model.getvalue("w"), W_Boolean)
assert w_model.getvalue("w").istrue() is True
assert isinstance(w_model.getvalue("x"), W_String)
assert w_model.getvalue("x").value == "HalloollaH"
assert isinstance(w_model.getvalue("y"), W_String)
assert w_model.getvalue("y").value == "ollaH"
assert isinstance(w_model.getvalue("z"), W_Integer)
assert w_model.getvalue("z").value == 5
def test_string_conversion():
ast = parse("""
w = "true" tobool
x = true tostr
y = 25 tostr
z = "25" toint
""")
interpreter = Interpreter()
w_model = interpreter.make_module()
interpreter.eval(ast, w_model)
assert isinstance(w_model.getvalue("w"), W_Boolean)
assert w_model.getvalue("w").istrue() is True
assert isinstance(w_model.getvalue("x"), W_String)
assert w_model.getvalue("x").value == "true"
assert isinstance(w_model.getvalue("y"), W_String)
assert w_model.getvalue("y").value == "25"
assert isinstance(w_model.getvalue("z"), W_Integer)
assert w_model.getvalue("z").value == 25