update + fixes for parser
This commit is contained in:
@ -34,44 +34,53 @@ public class LL1Parser {
|
||||
Deque<Node> stack = new ArrayDeque<>();
|
||||
stack.push(root);
|
||||
|
||||
int currentToken = 0;
|
||||
int inputPosition = 0;
|
||||
System.out.println("\nParsing " + token + ":");
|
||||
|
||||
// Parsing
|
||||
while (!stack.isEmpty()) {
|
||||
final String top = stack.peek().getName();
|
||||
|
||||
if (currentToken >= token.size()) {
|
||||
final String currentToken;
|
||||
if (inputPosition >= token.size()) {
|
||||
// Wenn auf dem Stack mehr Nichtterminale liegen als Terminale in der Eingabe vorhanden sind
|
||||
// Die Eingabe wurde komplett konsumiert
|
||||
|
||||
throw new MyParseException("Input too long");
|
||||
currentToken = "$"; // EOF
|
||||
} else {
|
||||
// Es sind noch Eingabesymbole vorhanden
|
||||
|
||||
currentToken = token.get(inputPosition);
|
||||
}
|
||||
final String prod = this.parsetable.get(top, token.get(currentToken));
|
||||
|
||||
if (top.equals(token.get(currentToken))) {
|
||||
// Wenn auf dem Stack ein Terminal liegt
|
||||
final String prod = this.parsetable.get(top, currentToken);
|
||||
|
||||
if (top.equals(this.parsetable.getEpsilon())) {
|
||||
// Wenn auf dem Stack das Epsilonsymbol liegt
|
||||
|
||||
stack.pop();
|
||||
currentToken++;
|
||||
} else if (top.equals(currentToken)) {
|
||||
// Wenn auf dem Stack ein Terminal liegt (dieses muss mit der Eingabe übereinstimmen)
|
||||
|
||||
stack.pop();
|
||||
inputPosition++;
|
||||
} else if (this.parsetable.getTerminals().contains(top)) {
|
||||
// Wenn das Terminal auf dem Stack nicht mit der aktuellen Eingabe übereinstimmt
|
||||
|
||||
throw new MyParseException("Invalid terminal on stack: " + top);
|
||||
|
||||
} else if (prod == null) {
|
||||
// Wenn es für das aktuelle Terminal und das Nichtterminal auf dem Stack keine Regel gibt
|
||||
|
||||
throw new MyParseException("No prod. for nonterminal " + top + ", terminal " + token.get(currentToken));
|
||||
|
||||
throw new MyParseException("No prod. for nonterminal " + top + ", terminal " + currentToken);
|
||||
} else {
|
||||
// Wenn das Nichtterminal auf dem Stack durch (s)eine Produktion ersetzt werden kann
|
||||
// Hier wird auch der AST aufgebaut
|
||||
|
||||
final String[] split = prod.split(" ");
|
||||
|
||||
System.out.println(top + " -> " + prod);
|
||||
|
||||
Node pop = stack.pop();
|
||||
|
||||
final String[] split = prod.split(" ");
|
||||
for (int i = split.length - 1; i >= 0; i--) {
|
||||
Node node = new Node(split[i]);
|
||||
stack.push(node);
|
||||
|
@ -4,9 +4,12 @@ import parser.grammar.Grammar;
|
||||
import parser.grammar.LL1GrammarAnalyzer;
|
||||
|
||||
import java.util.AbstractMap.SimpleEntry;
|
||||
import java.util.Formatter;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class LL1ParsingTable implements ILL1ParsingTable {
|
||||
|
||||
@ -47,4 +50,42 @@ public class LL1ParsingTable implements ILL1ParsingTable {
|
||||
public String getEpsilon() {
|
||||
return this.grammar.getEpsilonSymbol();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder output = new StringBuilder();
|
||||
Formatter format = new Formatter(output);
|
||||
|
||||
List<String> inputSymbols = this.parsetable.keySet().stream()
|
||||
.map(Entry::getValue)
|
||||
.distinct()
|
||||
.collect(Collectors.toList());
|
||||
|
||||
output.append(" ".repeat(8))
|
||||
.append("| ");
|
||||
for (String terminal : inputSymbols) {
|
||||
format.format("%-9s ", terminal);
|
||||
}
|
||||
output.append("|\n");
|
||||
|
||||
output.append("-".repeat(8))
|
||||
.append("+")
|
||||
.append("-".repeat(10 * inputSymbols.size() + 1))
|
||||
.append("+")
|
||||
.append("\n");
|
||||
|
||||
for (String nonterminal : this.grammar.getNonterminals()) {
|
||||
format.format("%-7s | ", nonterminal);
|
||||
|
||||
for (String terminal : inputSymbols) {
|
||||
String prod = this.parsetable.get(new SimpleEntry<>(nonterminal, terminal));
|
||||
format.format("%-9s ", prod == null ? " ".repeat(9) : prod);
|
||||
}
|
||||
output.append("|\n");
|
||||
}
|
||||
|
||||
format.close();
|
||||
|
||||
return output.toString();
|
||||
}
|
||||
}
|
||||
|
@ -7,6 +7,7 @@ import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class Grammar {
|
||||
|
||||
@ -30,6 +31,9 @@ public class Grammar {
|
||||
public static Grammar fromFile(Path path) throws IOException {
|
||||
List<String> lines = Files.readAllLines(path);
|
||||
|
||||
lines = lines.stream().filter(line -> !(line.isBlank() || line.startsWith("//")))
|
||||
.collect(Collectors.toUnmodifiableList());
|
||||
|
||||
try {
|
||||
String startSymbol = lines.get(0).split(" ")[1];
|
||||
String epsilonSymbol = lines.get(1).split(" ")[1];
|
||||
@ -43,7 +47,7 @@ public class Grammar {
|
||||
Set<String> nonterminals = new HashSet<>(Arrays.asList(nterm));
|
||||
|
||||
Set<GrammarRule> rules = new HashSet<>();
|
||||
for (int i = 5; i < lines.size(); i++) {
|
||||
for (int i = 4; i < lines.size(); i++) {
|
||||
// "S -> E T2 | EPS" wird zu leftside = "S" und rightside = "E T2 | epsilon"
|
||||
String[] split = lines.get(i)
|
||||
.replaceAll("EPS", epsilonSymbol)
|
||||
|
@ -26,6 +26,11 @@ public class LL1GrammarAnalyzer {
|
||||
this.follow = this.initFollow(grammar);
|
||||
|
||||
this.table = this.initParseTable(grammar);
|
||||
|
||||
System.out.println("Nullable:\n" + this.nullable);
|
||||
System.out.println("First:\n" + this.first);
|
||||
System.out.println("Follow:\n" + this.follow);
|
||||
System.out.println("LL-Table:\n" + this.table);
|
||||
}
|
||||
|
||||
private Map<String, Set<String>> getProductionMap(Grammar grammar) {
|
||||
@ -142,15 +147,15 @@ public class LL1GrammarAnalyzer {
|
||||
|
||||
// Das First des linken Nichtterminals X enthält das first des ersten rechten Symbols dieser
|
||||
// Produktionsregel S1 (da X -> S1 ... Sk)
|
||||
firstOut.get(leftX).addAll(firstOut.get(split[0]));
|
||||
change = firstOut.get(leftX).addAll(firstOut.get(split[0]));
|
||||
|
||||
for (int i = 1; i < split.length; i++) {
|
||||
// Für das 2-te bis k-te rechte Symbol dieser Produktionsregel
|
||||
|
||||
final String sym = split[i];
|
||||
|
||||
if (this.nullable(split[i - 1])) {
|
||||
change = firstOut.get(leftX).addAll(firstOut.get(sym));
|
||||
// Ein rechtes Symbol ist nullable, also zählt das first des nächsten Symbols
|
||||
|
||||
change = firstOut.get(leftX).addAll(firstOut.get(split[i]));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
@ -209,6 +214,8 @@ public class LL1GrammarAnalyzer {
|
||||
followOut.put(sym, new HashSet<>());
|
||||
}
|
||||
|
||||
followOut.get(startsymbol).add("$");
|
||||
|
||||
do {
|
||||
change = false;
|
||||
|
||||
@ -230,7 +237,7 @@ public class LL1GrammarAnalyzer {
|
||||
|
||||
// Das follow des i-ten rechten Symbols dieser Produktionsregel enthält das first des
|
||||
// (i+1)-ten rechten Sybols dieser Produktionsregel
|
||||
followOut.get(sym).addAll(this.first(split[i + 1]));
|
||||
change = followOut.get(sym).addAll(this.first(split[i + 1]));
|
||||
|
||||
for (int j = i + 2; j < prods.getValue().size(); j++) {
|
||||
// Für das (i+2)-te bis letzte rechte Symbol dieser Produktionsregel
|
||||
|
@ -160,13 +160,13 @@ class LL1ParserTest {
|
||||
void testArithExpression() {
|
||||
LL1Parser parser = new LL1Parser(table1);
|
||||
|
||||
String[] token1 = {"id", "+", "id", "*", "id"};
|
||||
String[] token1 = {"id", "+", "id"};
|
||||
String[] token2 = {"id", "*", "id", "*", "id"};
|
||||
String[] token3 = {"id", "+", "id"};
|
||||
String[] token3 = {"id", "+", "id", "*", "id"};
|
||||
|
||||
assertThatThrownBy(() -> parser.parse(Arrays.asList(token1))).isInstanceOf(MyParseException.class);
|
||||
assertThatThrownBy(() -> parser.parse(Arrays.asList(token2))).isInstanceOf(MyParseException.class);
|
||||
assertThatThrownBy(() -> parser.parse(Arrays.asList(token3))).isInstanceOf(MyParseException.class);
|
||||
assertThat(parser.parse(Arrays.asList(token1))).isTrue();
|
||||
assertThat(parser.parse(Arrays.asList(token2))).isTrue();
|
||||
assertThat(parser.parse(Arrays.asList(token3))).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -174,13 +174,13 @@ class LL1ParserTest {
|
||||
Path path = Paths.get(this.getClass().getClassLoader().getResource("exampleGrammars/SimpleGrammar1.grammar").toURI());
|
||||
LL1Parser parser = LL1Parser.fromGrammar(path);
|
||||
|
||||
String[] token1 = {"id", "+", "id", "*", "id"};
|
||||
String[] token1 = {"id", "+", "id"};
|
||||
String[] token2 = {"id", "*", "id", "*", "id"};
|
||||
String[] token3 = {"id", "+", "id"};
|
||||
String[] token3 = {"id", "+", "id", "*", "id"};
|
||||
|
||||
assertThatThrownBy(() -> parser.parse(Arrays.asList(token1))).isInstanceOf(MyParseException.class);
|
||||
assertThatThrownBy(() -> parser.parse(Arrays.asList(token2))).isInstanceOf(MyParseException.class);
|
||||
assertThatThrownBy(() -> parser.parse(Arrays.asList(token3))).isInstanceOf(MyParseException.class);
|
||||
assertThat(parser.parse(Arrays.asList(token1))).isTrue();
|
||||
assertThat(parser.parse(Arrays.asList(token2))).isTrue();
|
||||
assertThat(parser.parse(Arrays.asList(token3))).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -1,29 +1,21 @@
|
||||
// javac -cp .:junit-4.12.jar:hamcrest-core-1.3.jar TestFirstFollow.java
|
||||
// java -cp .:junit-4.12.jar:hamcrest-core-1.3.jar org.junit.runner.JUnitCore TestFirstFollow
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import org.antlr.v4.tool.Grammar;
|
||||
import org.antlr.v4.tool.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import parser.ILL1ParsingTable;
|
||||
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
import java.util.AbstractMap.SimpleEntry;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import java.io.FileReader;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
|
||||
public class TestFirstFollow {
|
||||
@ -48,25 +40,25 @@ public class TestFirstFollow {
|
||||
*/
|
||||
|
||||
List<String> nonterminals = new ArrayList<>();
|
||||
String narray[] = {"X", "Y", "Z"};
|
||||
String[] narray = {"X", "Y", "Z"};
|
||||
nonterminals = Arrays.asList(narray);
|
||||
|
||||
List<String> terminals = new ArrayList<>();
|
||||
String tarray[] = {"a", "c", "d", "$"};
|
||||
String[] tarray = {"a", "c", "d", "$"};
|
||||
terminals = Arrays.asList(tarray);
|
||||
|
||||
String startSymbol = "Z";
|
||||
List<Rule> productions = new ArrayList<>();
|
||||
String production0[] = {"d"};
|
||||
String[] production0 = {"d"};
|
||||
productions.add(new Rule("Z", Arrays.asList(production0)));
|
||||
String production1[] = {"X", "Y", "Z"};
|
||||
String[] production1 = {"X", "Y", "Z"};
|
||||
productions.add(new Rule("Z", Arrays.asList(production1)));
|
||||
productions.add(new Rule("Y", Collections.emptyList()));
|
||||
String production2[] = {"c"};
|
||||
String[] production2 = {"c"};
|
||||
productions.add(new Rule("Y", Arrays.asList(production2)));
|
||||
String production3[] = {"Y"};
|
||||
String[] production3 = {"Y"};
|
||||
productions.add(new Rule("X", Arrays.asList(production3)));
|
||||
String production4[] = {"a"};
|
||||
String[] production4 = {"a"};
|
||||
productions.add(new Rule("X", Arrays.asList(production4)));
|
||||
|
||||
this.grammar0 = new Grammar(nonterminals, terminals,
|
||||
@ -88,28 +80,28 @@ public class TestFirstFollow {
|
||||
*/
|
||||
|
||||
List<String> nonterminals = new ArrayList<>();
|
||||
String narray[] = {"E", "E'", "T", "T'", "F"};
|
||||
String[] narray = {"E", "E'", "T", "T'", "F"};
|
||||
nonterminals = Arrays.asList(narray);
|
||||
|
||||
List<String> terminals = new ArrayList<>();
|
||||
String tarray[] = {"+", "*", "(", ")", "id", "$"};
|
||||
String[] tarray = {"+", "*", "(", ")", "id", "$"};
|
||||
terminals = Arrays.asList(tarray);
|
||||
|
||||
String startSymbol = "E";
|
||||
List<Rule> productions = new ArrayList<>();
|
||||
String production0[] = {"T", "E'"};
|
||||
String[] production0 = {"T", "E'"};
|
||||
productions.add(new Rule("E", Arrays.asList(production0)));
|
||||
String production1[] = {"+", "T", "E'"};
|
||||
String[] production1 = {"+", "T", "E'"};
|
||||
productions.add(new Rule("E'", Arrays.asList(production1)));
|
||||
productions.add(new Rule("E'", Collections.emptyList()));
|
||||
String production2[] = {"F", "T'"};
|
||||
String[] production2 = {"F", "T'"};
|
||||
productions.add(new Rule("T", Arrays.asList(production2)));
|
||||
String production3[] = {"*", "F", "T'"};
|
||||
String[] production3 = {"*", "F", "T'"};
|
||||
productions.add(new Rule("T'", Arrays.asList(production3)));
|
||||
productions.add(new Rule("T'", Collections.emptyList()));
|
||||
String production4[] = {"(", "E", ")"};
|
||||
String[] production4 = {"(", "E", ")"};
|
||||
productions.add(new Rule("F", Arrays.asList(production4)));
|
||||
String production5[] = {"id"};
|
||||
String[] production5 = {"id"};
|
||||
productions.add(new Rule("F", Arrays.asList(production5)));
|
||||
|
||||
this.grammar1 = new Grammar(nonterminals, terminals,
|
@ -3,6 +3,9 @@ EPS: epsilon
|
||||
TERM: id + * ( )
|
||||
NTERM: E E2 T T2 F
|
||||
|
||||
// Leerzeilen sind egal
|
||||
// Man kann Line-Comments schreiben und Produktionen verodern
|
||||
|
||||
E -> T E2
|
||||
E2 -> + T E2 | EPS
|
||||
T -> F T2
|
||||
|
Reference in New Issue
Block a user