""" A 'simple' parser. Don't look into this file :-) """ from rply import ParserGenerator, Token from simplelexer import lex import simpleast pg = ParserGenerator(["If", "Else", "While", "Def", "Object", "Number", "String", "Name", "Indent", "Dedent", "Newline", "OpenBracket", "CloseBracket", "Comma", "Assign", "Colon", "Increment", "Plus", "Minus", "Multiply", "Divide", "Modulo", # Sugar "PrimitiveName", "EOF"], # Operator precedence for ambiguous rules, ascending precedence=[("left", ["Plus", "Minus"]), ("left", ["Multiply", "Divide", "Modulo"]), ("left", ["Increment"])]) def build_methodcall(call, cls, receiver=None): if len(call) == 1: args = [] else: args = call[1] name = call[0] return cls(receiver, name, args) @pg.production("program : statements EOF") @pg.production("program : newlines statements EOF") def program(prog): # import pdb; pdb.set_trace() if prog[0] is None: # source starts with newlines prog = prog[1] else: prog = prog[0] return prog @pg.production("statements : statement") @pg.production("statements : statement statements") @pg.production("statements : statement newlines statements") def statements(stmts): if len(stmts) == 1: # single statement stmt = stmts[0] return simpleast.Program([stmt]) elif stmts[0] is None: # ? assert len(stmts) == 2 return stmts[1] elif len(stmts) == 2: # merge programs (simpleast.Program) stmt = stmts[0] result = stmts[1] result.statements.insert(0, stmt) return result @pg.production("newlines : Newline") @pg.production("newlines : Newline newlines") def newlines(n): return None @pg.production("statement : simplestatement") @pg.production("statement : ifstatement") @pg.production("statement : whilestatement") @pg.production("statement : defstatement") @pg.production("statement : objectstatement") def statement(stmt): return stmt[0] @pg.production("ifstatement : If expression block") @pg.production("ifstatement : If expression block Else block") def ifstatement(ifstmt): elseblock = None if len(ifstmt) > 3: elseblock = ifstmt[-1] return simpleast.IfStatement(ifstmt[1], ifstmt[2], elseblock) @pg.production("whilestatement : While expression block") def ifstatement(whilestmt): return simpleast.WhileStatement(whilestmt[1], whilestmt[2]) @pg.production("objectstatement : Object name block") @pg.production("objectstatement : Object name parentlist block") def objectstatement(obj): name = obj[1] names = [] expressions = [] if len(obj) == 3: blk = obj[2] else: print(obj) parents = obj[2] # list of assignments (simpleast.Assignment) names = [p.attrname for p in parents] expressions = [p.expression for p in parents] blk = obj[3] return simpleast.ObjectDefinition(name, blk, names, expressions) @pg.production("defstatement : Def name argumentnamelist block") @pg.production("defstatement : Def name block") def defstatement(defn): name = defn[1] if len(defn) == 4: args = defn[2] blk = defn[3] else: args = [] blk = defn[2] return simpleast.FunctionDefinition(name, args, blk) @pg.production("block : Colon newlines Indent statements Dedent") def block(blk): return blk[3] @pg.production("simplestatement : expression Newline") @pg.production("simplestatement : expression Assign expression Newline") def simplestatement(stmts): if len(stmts) == 2: return simpleast.ExprStatement(stmts[0]) # assignment result = stmts[0] assign = stmts[2] if isinstance(result, simpleast.MethodCall) and result.arguments == []: # assign to attribute return simpleast.Assignment(result.receiver, result.methodname, assign) source_pos = stmts[1].source_pos raise ParseError(source_pos, ErrorInformation(source_pos.idx, customerror="can only assign to attribute")) # , self.source) @pg.production("expression : basic_expression") @pg.production("expression : basic_expression msg-chain") def expression(expr): if len(expr) > 1: # expression has messages prev = expr[0] for i in expr[1]: # reverse message order i.receiver = prev prev = i return expr[1][-1] # return last message return expr[0] # TODO: Parenthesis # Syntactic Sugar: Plus, Minus, Multiply, Divide, Modulo, Increment @pg.production("expression : expression Plus expression") @pg.production("expression : expression Minus expression") @pg.production("expression : expression Multiply expression") @pg.production("expression : expression Divide expression") @pg.production("expression : expression Modulo expression") @pg.production("expression : expression Increment expression") @pg.production("expression : expression Increment") def sugar(expr): op = { "Plus": "$int_add", "Minus": "$int_sub", "Multiply": "$int_mul", "Divide": "$int_div", "Modulo": "$int_mod", "Increment": "$int_inc" }[expr[1].name] if len(expr) == 2: return build_methodcall([op, []], simpleast.PrimitiveMethodCall, expr[0]) # ([name, arg], class, receiver) return build_methodcall([op, [expr[2]]], simpleast.PrimitiveMethodCall, expr[0]) # ([name, arg], class, receiver) @pg.production("msg-chain : methodcall") @pg.production("msg-chain : methodcall msg-chain") def msg_chain(cc): if len(cc) > 1: return [cc[0]] + cc[1] # merge message lists for chain return cc @pg.production("basic_expression : Number") def number_expression(stmt): return simpleast.IntLiteral(stmt[0].value) @pg.production("basic_expression : String") def string_expression(stmt): return simpleast.StringLiteral(stmt[0].value) @pg.production("basic_expression : implicitselfmethodcall") def implicitselfmethodcall(call): methodcall = call[0] methodcall.receiver = simpleast.ImplicitSelf() return methodcall @pg.production("implicitselfmethodcall : methodcall") def implicitselfmethodcall_methodcall(call): return call[0] @pg.production("methodcall : primitivemethodcall") @pg.production("methodcall : simplemethodcall") def methodcall(call): return call[0] @pg.production("simplemethodcall : name") @pg.production("simplemethodcall : name argumentslist") def simplemethodcall(call): return build_methodcall(call, simpleast.MethodCall) @pg.production("primitivemethodcall : primitivename") @pg.production("primitivemethodcall : primitivename argumentslist") def primitivemethodcall(call): return build_methodcall(call, simpleast.PrimitiveMethodCall) @pg.production("argumentslist : OpenBracket arguments CloseBracket") @pg.production("argumentnamelist : OpenBracket argumentnames CloseBracket") @pg.production("parentlist : OpenBracket parentdefinitions CloseBracket") def argumentslist(args): return args[1] @pg.production("arguments : expression") @pg.production("arguments : expression Comma") @pg.production("arguments : expression Comma arguments") @pg.production("argumentnames : name") @pg.production("argumentnames : name Comma") @pg.production("argumentnames : name Comma argumentnames") @pg.production("parentdefinitions : assignment") @pg.production("parentdefinitions : assignment Comma") @pg.production("parentdefinitions : assignment Comma parentdefinitions") def arguments(args): if len(args) == 3: return [args[0]] + args[2] # merge argument lists for chain return [args[0]] @pg.production("assignment : name Assign expression") def assignement(args): return simpleast.Assignment(None, args[0], args[2]) @pg.production("primitivename : PrimitiveName") @pg.production("name : Name") def name(name): return name[0].value @pg.error def error_handler(token): raise ParseError(source_pos=token.getsourcepos(), errorinformation=ErrorInformation(token.getsourcepos().idx, customerror="Ran into a %s where it wasn't expected" % token.gettokentype())) parser = pg.build() def print_conflicts(): print("rr conflicts") for rule_num, token, conflict in parser.lr_table.rr_conflicts: print(rule_num, token, conflict) print("sr conflicts") for rule_num, token, conflict in parser.lr_table.sr_conflicts: print(rule_num, token, conflict) print_conflicts() def parse(s): l = lex(s) return parser.parse(iter(l)) # ____________________________________________________________ class ParseError(Exception): def __init__(self, source_pos, errorinformation, source=""): self.source_pos = source_pos self.errorinformation = errorinformation self.args = (source_pos, errorinformation) self.source = source def nice_error_message(self, filename=""): result = [" File %s, line %s" % (filename, self.source_pos.lineno + 1)] source = self.source if source: result.append(source.split("\n")[self.source_pos.lineno]) result.append(" " * self.source_pos.colno + "^") else: result.append("") result.append("ParseError") if self.errorinformation: failure_reasons = self.errorinformation.expected if failure_reasons: expected = '' if len(failure_reasons) > 1: all_but_one = failure_reasons[:-1] last = failure_reasons[-1] expected = "%s or '%s'" % ( ", ".join(["'%s'" % e for e in all_but_one]), last) elif len(failure_reasons) == 1: expected = failure_reasons[0] if expected: result.append("expected %s" % (expected,)) if self.errorinformation.customerror: result.append(self.errorinformation.customerror) return "\n".join(result) def __str__(self): return self.nice_error_message() class ErrorInformation(object): def __init__(self, pos, expected=None, customerror=None): if expected is None: expected = [] self.expected = expected self.pos = pos self.customerror = customerror def combine_errors(self, other): if self is None: return other if (other is None or self.pos > other.pos or len(other.expected) == 0): return self elif other.pos > self.pos or len(self.expected) == 0: return other failure_reasons = [] already_there = {} for fr in [self.expected, other.expected]: for reason in fr: if reason not in already_there: already_there[reason] = True failure_reasons.append(reason) return ErrorInformation(self.pos, failure_reasons, self.customerror or other.customerror) def make_arglist(methodname): def arglist(self): self.match("OpenBracket", "(") method = getattr(self, methodname) result = [method()] result.extend(self.repeat(self.comma, method)) self.maybe(self.comma) self.match("CloseBracket", ")") return result return arglist