From a2b35ad4dad3957753622fa4b1dee0bbbf086639 Mon Sep 17 00:00:00 2001 From: ChUrl Date: Sun, 31 Jan 2021 21:55:47 +0100 Subject: [PATCH] rework Grammar --- src/main/java/parser/ParsingTable.java | 8 - src/main/java/parser/StupsParser.java | 4 +- src/main/java/parser/grammar/Grammar.java | 404 ++++++++++-------- .../java/parser/grammar/GrammarAnalyzer.java | 34 +- .../parser/grammar/GrammarAnalyzerTest.java | 65 ++- src/test/java/parser/grammar/GrammarTest.java | 12 +- .../resources/exampleGrammars/Grammar.grammar | 9 +- .../exampleGrammars/SimpleGrammar0.grammar | 2 - .../exampleGrammars/SimpleGrammar1.grammar | 12 +- stups.grammar | 9 +- 10 files changed, 298 insertions(+), 261 deletions(-) diff --git a/src/main/java/parser/ParsingTable.java b/src/main/java/parser/ParsingTable.java index 46701f3..b6fb19d 100644 --- a/src/main/java/parser/ParsingTable.java +++ b/src/main/java/parser/ParsingTable.java @@ -28,10 +28,6 @@ public class ParsingTable { return this.parsetable.get(new SimpleEntry<>(nonterminal, terminal)); } - public String getStartSymbol() { - return this.grammar.getStartSymbol(); - } - public Set getNonterminals() { return this.grammar.getNonterminals(); } @@ -40,10 +36,6 @@ public class ParsingTable { return this.grammar.getTerminals(); } - public String getEpsilon() { - return this.grammar.getEpsilonSymbol(); - } - @Override public String toString() { final StringBuilder output = new StringBuilder(); diff --git a/src/main/java/parser/StupsParser.java b/src/main/java/parser/StupsParser.java index e39b661..326cd9c 100644 --- a/src/main/java/parser/StupsParser.java +++ b/src/main/java/parser/StupsParser.java @@ -39,7 +39,7 @@ public class StupsParser { public SyntaxTree parse(List token, Vocabulary voc) { System.out.println(" - Parsing program..."); - final SyntaxTreeNode root = new SyntaxTreeNode(this.parsetable.getStartSymbol(), 0); + final SyntaxTreeNode root = new SyntaxTreeNode(Grammar.START_SYMBOL, 0); final SyntaxTree tree = new SyntaxTree(root); final Deque stack = new ArrayDeque<>(); stack.push(root); @@ -69,7 +69,7 @@ public class StupsParser { final String prod = this.parsetable.get(top, currentTokenSym); - if (top.equals(this.parsetable.getEpsilon())) { + if (top.equals(Grammar.EPSILON_SYMBOL)) { // Wenn auf dem Stack das Epsilonsymbol liegt stack.pop(); diff --git a/src/main/java/parser/grammar/Grammar.java b/src/main/java/parser/grammar/Grammar.java index 11662fa..983ade5 100644 --- a/src/main/java/parser/grammar/Grammar.java +++ b/src/main/java/parser/grammar/Grammar.java @@ -6,6 +6,7 @@ import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.EnumMap; import java.util.HashMap; @@ -13,7 +14,6 @@ import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.regex.Pattern; import java.util.stream.Collectors; import static parser.grammar.GrammarAction.DELCHILD; @@ -22,30 +22,56 @@ import static parser.grammar.GrammarAction.NAMETOVAL; import static parser.grammar.GrammarAction.PROMOTE; import static parser.grammar.GrammarAction.RENAMETO; import static parser.grammar.GrammarAction.VALTOVAL; -import static parser.grammar.GrammarAction.values; import static util.Logger.log; +/** + * Repräsentiert die Parse-Grammatik und die Kontextaktionen. + */ public class Grammar { - private static final Pattern EPS = Pattern.compile("EPS"); - private static final Pattern ARROW = Pattern.compile("->"); + // Grammar + public static final String START_SYMBOL = "S"; + public static final String EPSILON_SYMBOL = "eps"; + private final Set terminals; private final Set nonterminals; - private final String startSymbol; - private final String epsilonSymbol; // Actions - private final Map> actions; + + /** + * Jeder Kontextaktion werden alle leftsides zugewiesen, welche diese Aktion ausführen. + */ + private final Map> actionMap; + + /** + * Jeder leftside mit [renameto=name] wird der entsprechende neue Name zugewiesen. + */ private final Map renameMappings; + + /** + * Jeder leftside mit [nametoval=children] werden die entpsrechenden Children zugewiesen, + * deren Namen in die Parentvalue gschoben werden. + */ private final Map> nameToValMappings; + + /** + * Jeder leftside mit [valtoval=children] werden die entpsrechenden Children zugewiesen, + * deren Values in die Parentvalue gschoben werden. + */ private final Map> valToValMappings; + + /** + * Jeder Leftside mit [delchild=children] werden die entpsrechenden Children zugewiesen, welche entfernt werden. + */ private final Map> delChildMappings; + /** + * Die eigentlichen Produktionsregeln der Form leftside -> rightside. + */ private final Set rules; public Grammar(Set terminals, Set nonterminals, - String startSymbol, String epsilonSymbol, - Map> actions, + Map> actionMap, Map renameMappings, Map> nameToValMappings, Map> valToValMappings, @@ -55,10 +81,8 @@ public class Grammar { this.terminals = Collections.unmodifiableSet(terminals); this.nonterminals = Collections.unmodifiableSet(nonterminals); this.rules = Collections.unmodifiableSet(rules); - this.startSymbol = startSymbol; - this.epsilonSymbol = epsilonSymbol; - this.actions = Collections.unmodifiableMap(actions); + this.actionMap = Collections.unmodifiableMap(actionMap); this.renameMappings = Collections.unmodifiableMap(renameMappings); this.nameToValMappings = Collections.unmodifiableMap(nameToValMappings); this.valToValMappings = Collections.unmodifiableMap(valToValMappings); @@ -69,141 +93,197 @@ public class Grammar { System.out.println(" - Reading parser-grammar..."); List lines = Files.readAllLines(path); + // Remove Whitespace + Comments lines = lines.stream() .map(String::trim) .filter(line -> !(line.isBlank() || line.startsWith("//"))) .collect(Collectors.toUnmodifiableList()); - try { - // Grammar - String startSymbol = ""; - String epsilonSymbol = ""; - final Set terminals = new HashSet<>(); - final Set nonterminals = new HashSet<>(); - final Set rules = new HashSet<>(); + // Grammar + final Set terminals = new HashSet<>(); + final Set nonterminals = new HashSet<>(); + final Set rules = new HashSet<>(); - // Actions - final Map> actions = new EnumMap<>(GrammarAction.class); - final Map renameMappings = new HashMap<>(); - final Map> nameToValMappings = new HashMap<>(); - final Map> valToValMappings = new HashMap<>(); - final Map> delChildMappings = new HashMap<>(); + // Actions + final Map> actionMap = new EnumMap<>(GrammarAction.class); + final Map renameMappings = new HashMap<>(); + final Map> nameToValMappings = new HashMap<>(); + final Map> valToValMappings = new HashMap<>(); + final Map> delChildMappings = new HashMap<>(); - for (GrammarAction action : values()) { - actions.put(action, new HashSet<>()); - } - - // Init for validity check - final Set actionSet = Arrays.stream(values()) - .map(Enum::toString) - .collect(Collectors.toUnmodifiableSet()); - - log("Parsing Grammar from File:"); - for (String line : lines) { - - log("Parsed: " + line); - - // Parse Keywords - if (line.startsWith("START:")) { - - startSymbol = line.split(" ")[1]; - } else if (line.startsWith("EPS:")) { - - epsilonSymbol = line.split(" ")[1]; - } else if (line.startsWith("TERM:")) { - - terminals.addAll(Arrays.stream(line.split(" ")).skip(1).collect(Collectors.toSet())); - } else if (line.startsWith("NTERM:")) { - - nonterminals.addAll(Arrays.stream(line.split(" ")).skip(1).collect(Collectors.toSet())); - } else { - // Parse Grammar Rules + Actions - - // "S[...] -> E T2 | EPS" wird zu leftside = "S[...]" und rightside = "E T2 | eps" - final String[] split = ARROW.split(EPS.matcher(line).replaceAll(epsilonSymbol)); - String leftside = split[0].trim(); - final String rightside = split[1].trim(); - - if (leftside.indexOf('[') >= 0 && leftside.indexOf(']') >= 0) { - // Handle actions if they are given - - final int open = leftside.indexOf('['); - final int close = leftside.indexOf(']'); - - // Aus "S[C R=...]" wird flags = {"C", "R=..."} - final String[] flags = leftside.substring(open + 1, close).split(" "); - final Set flagSet = Arrays.stream(flags) - .map(String::trim) - .filter(flag -> !flag.isEmpty()) - .collect(Collectors.toUnmodifiableSet()); - - // Check for action validity - for (String flag : flagSet) { - if (!actionSet.contains(flag.split("=")[0].toUpperCase())) { - throw new GrammarParseException("Invalid Action: " + flag); - } - } - - // "S[C R=...]" wird zu "S" - leftside = leftside.substring(0, open).trim(); - - // Register actions, flagSet = {"C", "R=..."} - for (String flag : flagSet) { - final String[] flagSplit = flag.split("="); - final GrammarAction action = GrammarAction.valueOf(flagSplit[0].toUpperCase()); - - actions.get(action).add(leftside.trim()); - log("Registered " + flag + ": " + leftside.trim()); - - if (flagSplit.length > 1) { - // Handle Action with arguments - - // "R=A,B,C" -> argSplit = {"A", "B", "C"} - final int argStart = flag.indexOf('='); - final String[] argSplit = flag.substring(argStart + 1).split(","); - - switch (action) { - case DELCHILD -> delChildMappings.put(leftside, Arrays.asList(argSplit)); - case VALTOVAL -> valToValMappings.put(leftside, Arrays.asList(argSplit)); - case NAMETOVAL -> nameToValMappings.put(leftside, Arrays.asList(argSplit)); - case RENAMETO -> renameMappings.put(leftside, argSplit[0]); - } - } - } - } - - // "E T2 | epsilon" wird zu prods[0] = "E T2" und prods[1] = "epsilon" - final String[] prods = rightside.split("\\|"); - - for (String prod : prods) { - final GrammarRule rule = new GrammarRule(leftside, prod.split(" ")); - rules.add(rule); - - } - } - } - - log("\n" + actions); - log("-".repeat(100)); - System.out.println("Grammar parsed successfully."); - - return new Grammar(terminals, nonterminals, - startSymbol, epsilonSymbol, - actions, - renameMappings, - nameToValMappings, - valToValMappings, - delChildMappings, - rules); - } catch (Exception e) { - log("Die Grammatik kann nicht gelesen werden!"); - log(path.toString()); - e.printStackTrace(); + // Init actionMap + for (GrammarAction action : GrammarAction.values()) { + actionMap.put(action, new HashSet<>()); } - return null; + log("Parsing Grammar from File:"); + for (String currentLine : lines) { + + log("Parsed: " + currentLine); + + // Parse Keywords + if (currentLine.startsWith("TERM:")) { + + terminals.addAll(Arrays.stream(currentLine.split(" ")).skip(1).collect(Collectors.toSet())); + } else if (currentLine.startsWith("NTERM:")) { + + nonterminals.addAll(Arrays.stream(currentLine.split(" ")).skip(1).collect(Collectors.toSet())); + } else { + // Parse regular lines + + parseRegularLine(currentLine, actionMap, + delChildMappings, valToValMappings, nameToValMappings, renameMappings, + rules); + } + } + + log("\n" + actionMap); + log("-".repeat(100)); + System.out.println("Grammar parsed successfully."); + + return new Grammar(terminals, nonterminals, + actionMap, renameMappings, nameToValMappings, + valToValMappings, delChildMappings, rules); } + + /** + * Es wird eine normale Zeile der Form leftside[actions] -> rightside geparst. + * Die Produktionsregeln sowie die Kontextaktionen werden registriert. + */ + private static void parseRegularLine(String currentLine, + Map> actions, + Map> delChildMappings, + Map> valToValMappings, + Map> nameToValMappings, + Map renameMappings, + Collection rules) { + + // "S[...] -> E T2 | eps" wird zu leftside = "S[...]" und rightside = "E T2 | eps" + final String[] split = currentLine.split("->"); + String leftside = split[0].trim(); + final String rightside = split[1].trim(); + + final int open = leftside.indexOf('['); + final int close = leftside.indexOf(']'); + + if (open >= 0 && close >= 0) { + // Handle actions if they are given + + final Set actionSet = parseActionSet(leftside, open, close); + + // Validate Actions + throwOnInvalidActionSet(actionSet); + + // "S[C R=...]" wird zu "S" + leftside = leftside.substring(0, open).trim(); + + // Register actions, flagSet = {"C", "R=..."} + for (String flag : actionSet) { + registerAction(flag, leftside, actions, + delChildMappings, valToValMappings, nameToValMappings, renameMappings); + } + } + + registerProductionRules(leftside, rightside, rules); + } + + /** + * Es wird die Menge an Kontextaktionen [action1,action2,...] ermittelt. + */ + private static Set parseActionSet(String leftside, int open, int close) { + // Aus "S[C R=...]" wird flags = {"C", "R=..."} + final String[] flags = leftside.substring(open + 1, close).split(" "); + + return Arrays.stream(flags) + .map(String::trim) + .filter(flag -> !flag.isEmpty()) + .collect(Collectors.toUnmodifiableSet()); + } + + /** + * Es wird eine beliebige Kontextaktion geparst und der entsprechenden Map hinzugefügt. + */ + private static void registerAction(String flag, String leftside, + Map> actions, + Map> delChildMappings, + Map> valToValMappings, + Map> nameToValMappings, + Map renameMappings) { + + final String[] flagSplit = flag.split("="); + final GrammarAction action = GrammarAction.valueOf(flagSplit[0].toUpperCase()); + + registerRegularAction(action, leftside, flag, actions); + + if (flagSplit.length > 1) { + + registerActionArguments(flag, action, leftside, + delChildMappings, valToValMappings, nameToValMappings, renameMappings); + } + } + + /** + * Es wird ein Eintrag in der action-Map mit der entsprechenden leftside hinzugefügt. + */ + private static void registerRegularAction(GrammarAction action, String leftside, String flag, + Map> actions) { + + actions.get(action).add(leftside.trim()); + log("Registered " + flag + ": " + leftside.trim()); + } + + /** + * Es wird eine Kontextaktion der Form [action=arguments] geparst und der entsprechenden Map hinzugefügt. + */ + private static void registerActionArguments(String flag, GrammarAction action, String leftside, + Map> delChildMappings, + Map> valToValMappings, + Map> nameToValMappings, + Map renameMappings) { + + // "R=A,B,C" -> argSplit = {"A", "B", "C"} + final int argStart = flag.indexOf('='); + final String[] argSplit = flag.substring(argStart + 1).split(","); + + switch (action) { + case DELCHILD -> delChildMappings.put(leftside, Arrays.asList(argSplit)); + case VALTOVAL -> valToValMappings.put(leftside, Arrays.asList(argSplit)); + case NAMETOVAL -> nameToValMappings.put(leftside, Arrays.asList(argSplit)); + case RENAMETO -> renameMappings.put(leftside, argSplit[0]); + default -> throw new GrammarParseException("Unexpected value for arguments: " + action); + } + } + + /** + * Der Regelmenge wird eine neue Regel der Form leftside -> rightside hinzugefügt. + * Ist rightside dabei verodert, also leftside -> right1 | right2 | right3, dann + * wird rightside gesplittet. + */ + private static void registerProductionRules(String leftside, String rightside, Collection rules) { + // "E T2 | epsilon" wird zu prods[0] = "E T2" und prods[1] = "epsilon" + final String[] prods = rightside.split("\\|"); + + for (String prod : prods) { + final GrammarRule rule = new GrammarRule(leftside, prod.split(" ")); + rules.add(rule); + } + } + + private static void throwOnInvalidActionSet(Iterable flagSet) { + final Set actionSet = Arrays.stream(GrammarAction.values()) + .map(Enum::toString) + .collect(Collectors.toUnmodifiableSet()); + + for (String flag : flagSet) { + if (!actionSet.contains(flag.split("=")[0].toUpperCase())) { + throw new GrammarParseException("Invalid Action: " + flag); + } + } + } + + // Getters + public Set getTerminals() { return this.terminals; } @@ -212,18 +292,13 @@ public class Grammar { return this.nonterminals; } - public String getStartSymbol() { - return this.startSymbol; - } - - public String getEpsilonSymbol() { - return this.epsilonSymbol; - } - public Set getRules() { return this.rules; } + /** + * Ermittelt alle möglichen Produktionen, welche zu einer leftside gehören können. + */ public Set getRightsides(String leftside) { return this.rules.stream() .filter(rule -> rule.getLeftside().equals(leftside)) @@ -249,11 +324,10 @@ public class Grammar { && root.getValue().isEmpty(); } - private boolean canPromoteChild(String sym) { - return this.actions.get(PROMOTE).contains(sym); + private boolean canPromoteChild(String rootName) { + return this.actionMap.get(PROMOTE).contains(rootName); } - /** * Checkt auch auf Anzahl der Kinder und vorhandene Value. */ @@ -263,11 +337,10 @@ public class Grammar { && root.isEmpty(); } - public boolean canDeleteIfEmpty(String sym) { - return this.actions.get(DELIFEMPTY).contains(sym); + public boolean canDeleteIfEmpty(String rootName) { + return this.actionMap.get(DELIFEMPTY).contains(rootName); } - /** * Checkt auch auf Anzahl der Kinder. * Epsilon-Knoten werden immer gelöscht. @@ -277,40 +350,37 @@ public class Grammar { && child.isEmpty(); } - public boolean canDeleteChild(String parent, String child) { - return (this.actions.get(DELCHILD).contains(parent) - && this.delChildMappings.get(parent).contains(child)) - || (child.equals(this.epsilonSymbol)); + public boolean canDeleteChild(String parentName, String childName) { + return (this.actionMap.get(DELCHILD).contains(parentName) + && this.delChildMappings.get(parentName).contains(childName)) + || (Grammar.EPSILON_SYMBOL.equals(childName)); } - public boolean canBeRenamed(SyntaxTreeNode root) { return this.canBeRenamed(root.getName()); } - public boolean canBeRenamed(String sym) { - return this.actions.get(RENAMETO).contains(sym); + public boolean canBeRenamed(String rootName) { + return this.actionMap.get(RENAMETO).contains(rootName); } public String getNewName(SyntaxTreeNode root) { return this.getNewName(root.getName()); } - public String getNewName(String sym) { - return this.renameMappings.get(sym); + public String getNewName(String rootName) { + return this.renameMappings.get(rootName); } - public boolean hasValToVal(SyntaxTreeNode parent, SyntaxTreeNode child) { return this.hasValToVal(parent.getName(), child.getName()); } - public boolean hasValToVal(String parent, String child) { - return this.actions.get(VALTOVAL).contains(parent) - && this.valToValMappings.get(parent).contains(child); + public boolean hasValToVal(String parentName, String childName) { + return this.actionMap.get(VALTOVAL).contains(parentName) + && this.valToValMappings.get(parentName).contains(childName); } - /** * Checkt auch auf bereits existierende Values. */ @@ -319,8 +389,8 @@ public class Grammar { && parent.getValue().isEmpty(); } - public boolean canMoveNameToVal(String parent, String child) { - return this.actions.get(NAMETOVAL).contains(parent) - && this.nameToValMappings.get(parent).contains(child); + public boolean canMoveNameToVal(String parentName, String childName) { + return this.actionMap.get(NAMETOVAL).contains(parentName) + && this.nameToValMappings.get(parentName).contains(childName); } } diff --git a/src/main/java/parser/grammar/GrammarAnalyzer.java b/src/main/java/parser/grammar/GrammarAnalyzer.java index 90354e9..5245dcb 100644 --- a/src/main/java/parser/grammar/GrammarAnalyzer.java +++ b/src/main/java/parser/grammar/GrammarAnalyzer.java @@ -46,9 +46,9 @@ public class GrammarAnalyzer { // Die Methode funktioniert erst, nachdem first initialisiert ist. // Deshalb hier doppelt. - final Predicate nullable = sym -> sym.equals(this.grammar.getEpsilonSymbol()) + final Predicate nullable = sym -> sym.equals(Grammar.EPSILON_SYMBOL) || sym.isBlank() - || firstOut.get(sym).contains(this.grammar.getEpsilonSymbol()); + || firstOut.get(sym).contains(Grammar.EPSILON_SYMBOL); final Predicate allNullable = split -> split.length == 0 || Arrays.stream(split).allMatch(nullable); @@ -76,7 +76,7 @@ public class GrammarAnalyzer { for (String rightside : this.grammar.getRightsides(leftside)) { // ...and X -> Y1 Y2 ... Yk is a production... - if (!rightside.equals(this.grammar.getEpsilonSymbol())) { + if (!rightside.equals(Grammar.EPSILON_SYMBOL)) { // ...for some k >= 1... final String[] split = rightside.split(" "); @@ -94,7 +94,7 @@ public class GrammarAnalyzer { // Because a != epsilon final Set firstYiNoEps = firstOut.get(split[i]).stream() - .filter(sym -> !sym.equals(this.grammar.getEpsilonSymbol())) + .filter(sym -> !sym.equals(Grammar.EPSILON_SYMBOL)) .collect(Collectors.toSet()); final boolean changeNow = firstOut.get(leftside).addAll(firstYiNoEps); @@ -106,21 +106,21 @@ public class GrammarAnalyzer { if (i == split.length - 1 && allNullable.test(split)) { // 2. (b) If epsilon is in first(Y1) ... first(Yk), then add epsilon to first(X). - final boolean changeNow = firstOut.get(leftside).add(this.grammar.getEpsilonSymbol()); + final boolean changeNow = firstOut.get(leftside).add(Grammar.EPSILON_SYMBOL); change = change || changeNow; - logIfTrue(changeNow, "First: Added " + this.grammar.getEpsilonSymbol() + " to " + leftside + " (All are nullable)"); + logIfTrue(changeNow, "First: Added " + Grammar.EPSILON_SYMBOL + " to " + leftside + " (All are nullable)"); } } } - if (rightside.equals(this.grammar.getEpsilonSymbol())) { + if (rightside.equals(Grammar.EPSILON_SYMBOL)) { // 3. If X -> epsilon is a production, then add epsilon to first(X). - final boolean changeNow = firstOut.get(leftside).add(this.grammar.getEpsilonSymbol()); + final boolean changeNow = firstOut.get(leftside).add(Grammar.EPSILON_SYMBOL); change = change || changeNow; - logIfTrue(changeNow, "First: Added " + this.grammar.getEpsilonSymbol() + " to " + leftside + " (X -> EPS exists)"); + logIfTrue(changeNow, "First: Added " + Grammar.EPSILON_SYMBOL + " to " + leftside + " (X -> EPS exists)"); } } } @@ -143,7 +143,7 @@ public class GrammarAnalyzer { } // 1. Place $ in follow(S), where S is the start symbol, and $ is the input right endmarker - followOut.get(this.grammar.getStartSymbol()).add("$"); + followOut.get(Grammar.START_SYMBOL).add("$"); boolean change; @@ -176,7 +176,7 @@ public class GrammarAnalyzer { if (this.allNullable(sub)) { final Set firstXkNoEps = this.first(split[k]).stream() - .filter(sym -> !sym.equals(this.grammar.getEpsilonSymbol())) + .filter(sym -> !sym.equals(Grammar.EPSILON_SYMBOL)) .collect(Collectors.toSet()); final boolean changeNow = followOut.get(split[i - 1]).addAll(firstXkNoEps); @@ -241,7 +241,7 @@ public class GrammarAnalyzer { final Set followLeftside = this.follow(leftside); - if (firstRightside.contains(this.grammar.getEpsilonSymbol())) { + if (firstRightside.contains(Grammar.EPSILON_SYMBOL)) { // 2. If epsilon in first(a), then... for (String sym : followLeftside) { @@ -276,8 +276,8 @@ public class GrammarAnalyzer { public boolean nullable(String sym) { return sym.isBlank() - || sym.equals(this.grammar.getEpsilonSymbol()) - || this.first.get(sym).contains(this.grammar.getEpsilonSymbol()); + || sym.equals(Grammar.EPSILON_SYMBOL) + || this.first.get(sym).contains(Grammar.EPSILON_SYMBOL); } public boolean allNullable(String[] split) { @@ -304,7 +304,7 @@ public class GrammarAnalyzer { // X1 ... Xi-1 are nullable, so first(X1 ... Xn) contains first(Xi) final Set firstXiNoEps; - if (split.length == 1 && split[0].equals(this.grammar.getEpsilonSymbol())) { + if (split.length == 1 && split[0].equals(Grammar.EPSILON_SYMBOL)) { // Stream collect has to be evaluated, doesn't work on empty stream firstXiNoEps = Collections.emptySet(); @@ -312,7 +312,7 @@ public class GrammarAnalyzer { // Only non-epsilon symbols firstXiNoEps = this.first(split[i]).stream() - .filter(sym -> !sym.equals(this.grammar.getEpsilonSymbol())) + .filter(sym -> !sym.equals(Grammar.EPSILON_SYMBOL)) .collect(Collectors.toSet()); } @@ -321,7 +321,7 @@ public class GrammarAnalyzer { if (i == split.length - 1 && this.allNullable(split)) { // Finally, add epsilon to first(X1 X2 ... Xn) if, for all i, epsilon is in first(Xi). - firstOut.add(this.grammar.getEpsilonSymbol()); + firstOut.add(Grammar.EPSILON_SYMBOL); } } } diff --git a/src/test/java/parser/grammar/GrammarAnalyzerTest.java b/src/test/java/parser/grammar/GrammarAnalyzerTest.java index c8c7bf3..17bd03a 100644 --- a/src/test/java/parser/grammar/GrammarAnalyzerTest.java +++ b/src/test/java/parser/grammar/GrammarAnalyzerTest.java @@ -33,15 +33,12 @@ class GrammarAnalyzerTest { final String[] tarray = {"a", "b", "e", "i", "t"}; terminals = new HashSet<>(Arrays.asList(tarray)); - final String startSymbol = "S"; - final String epsilonSymbol = "epsilon"; - final Set rules = new HashSet<>(); rules.add(new GrammarRule("S", "a")); rules.add(new GrammarRule("S", "i", "E", "t", "S")); rules.add(new GrammarRule("E", "b")); - grammar0 = new Grammar(terminals, nonterminals, startSymbol, epsilonSymbol, + grammar0 = new Grammar(terminals, nonterminals, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), rules); } @@ -53,27 +50,24 @@ class GrammarAnalyzerTest { */ final Set nonterminals; - final String[] narray = {"E", "T", "E2", "T2", "F"}; + final String[] narray = {"S", "T", "E2", "T2", "F"}; nonterminals = new HashSet<>(Arrays.asList(narray)); final Set terminals; final String[] tarray = {"id", "+", "*", "(", ")"}; terminals = new HashSet<>(Arrays.asList(tarray)); - final String startSymbol = "E"; - final String epsilonSymbol = "epsilon"; - final Set rules = new HashSet<>(); - rules.add(new GrammarRule("E", "T", "E2")); + rules.add(new GrammarRule("S", "T", "E2")); rules.add(new GrammarRule("E2", "+", "T", "E2")); - rules.add(new GrammarRule("E2", epsilonSymbol)); + rules.add(new GrammarRule("E2", Grammar.EPSILON_SYMBOL)); rules.add(new GrammarRule("T", "F", "T2")); rules.add(new GrammarRule("T2", "*", "F", "T2")); - rules.add(new GrammarRule("T2", epsilonSymbol)); - rules.add(new GrammarRule("F", "(", "E", ")")); + rules.add(new GrammarRule("T2", Grammar.EPSILON_SYMBOL)); + rules.add(new GrammarRule("F", "(", "S", ")")); rules.add(new GrammarRule("F", "id")); - grammar1 = new Grammar(terminals, nonterminals, startSymbol, epsilonSymbol, + grammar1 = new Grammar(terminals, nonterminals, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), rules); } @@ -90,25 +84,22 @@ class GrammarAnalyzerTest { */ final Set nonterminals; - final String[] narray = {"X", "Y", "Z"}; + final String[] narray = {"X", "Y", "S"}; nonterminals = new HashSet<>(Arrays.asList(narray)); final Set terminals; final String[] tarray = {"a", "c", "d"}; terminals = new HashSet<>(Arrays.asList(tarray)); - final String startSymbol = "Z"; - final String epsilonSymbol = "epsilon"; - final Set rules = new HashSet<>(); - rules.add(new GrammarRule("Z", "d")); - rules.add(new GrammarRule("Z", "X", "Y", "Z")); - rules.add(new GrammarRule("Y", epsilonSymbol)); + rules.add(new GrammarRule("S", "d")); + rules.add(new GrammarRule("S", "X", "Y", "S")); + rules.add(new GrammarRule("Y", Grammar.EPSILON_SYMBOL)); rules.add(new GrammarRule("Y", "c")); rules.add(new GrammarRule("X", "Y")); rules.add(new GrammarRule("X", "a")); - grammar2 = new Grammar(terminals, nonterminals, startSymbol, epsilonSymbol, + grammar2 = new Grammar(terminals, nonterminals, Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), Collections.emptyMap(), rules); } @@ -125,10 +116,10 @@ class GrammarAnalyzerTest { void testFirstGrammar1() { final GrammarAnalyzer analyzer = new GrammarAnalyzer(grammar1); - assertThat(analyzer.getFirst().get("E")).containsOnly("id", "("); - assertThat(analyzer.getFirst().get("E2")).containsOnly("+", grammar1.getEpsilonSymbol()); + assertThat(analyzer.getFirst().get("S")).containsOnly("id", "("); + assertThat(analyzer.getFirst().get("E2")).containsOnly("+", Grammar.EPSILON_SYMBOL); assertThat(analyzer.getFirst().get("T")).containsOnly("id", "("); - assertThat(analyzer.getFirst().get("T2")).containsOnly("*", grammar1.getEpsilonSymbol()); + assertThat(analyzer.getFirst().get("T2")).containsOnly("*", Grammar.EPSILON_SYMBOL); assertThat(analyzer.getFirst().get("F")).containsOnly("id", "("); } @@ -136,9 +127,9 @@ class GrammarAnalyzerTest { void testFirstGrammar2() { final GrammarAnalyzer analyzer = new GrammarAnalyzer(grammar2); - assertThat(analyzer.getFirst().get("X")).containsOnly("c", "a", grammar2.getEpsilonSymbol()); - assertThat(analyzer.getFirst().get("Y")).containsOnly("c", grammar2.getEpsilonSymbol()); - assertThat(analyzer.getFirst().get("Z")).containsOnly("c", "a", "d"); + assertThat(analyzer.getFirst().get("X")).containsOnly("c", "a", Grammar.EPSILON_SYMBOL); + assertThat(analyzer.getFirst().get("Y")).containsOnly("c", Grammar.EPSILON_SYMBOL); + assertThat(analyzer.getFirst().get("S")).containsOnly("c", "a", "d"); } @Test @@ -153,7 +144,7 @@ class GrammarAnalyzerTest { void testFollowGrammar1() { final GrammarAnalyzer analyzer = new GrammarAnalyzer(grammar1); - assertThat(analyzer.getFollow().get("E")).containsOnly(")", "$"); + assertThat(analyzer.getFollow().get("S")).containsOnly(")", "$"); assertThat(analyzer.getFollow().get("E2")).containsOnly(")", "$"); assertThat(analyzer.getFollow().get("T")).containsOnly("+", ")", "$"); assertThat(analyzer.getFollow().get("T2")).containsOnly("+", ")", "$"); @@ -166,7 +157,7 @@ class GrammarAnalyzerTest { assertThat(analyzer.getFollow().get("X")).containsOnly("a", "c", "d"); assertThat(analyzer.getFollow().get("Y")).containsOnly("a", "c", "d"); - assertThat(analyzer.getFollow().get("Z")).containsOnly("$"); + assertThat(analyzer.getFollow().get("S")).containsOnly("$"); } @Test @@ -174,18 +165,18 @@ class GrammarAnalyzerTest { final GrammarAnalyzer analyzer = new GrammarAnalyzer(grammar1); final ParsingTable table = analyzer.getTable(); - assertThat(table.get("E", "id")).isEqualTo("T E2"); - assertThat(table.get("E", "(")).isEqualTo("T E2"); + assertThat(table.get("S", "id")).isEqualTo("T E2"); + assertThat(table.get("S", "(")).isEqualTo("T E2"); assertThat(table.get("E2", "+")).isEqualTo("+ T E2"); - assertThat(table.get("E2", ")")).isEqualTo(grammar1.getEpsilonSymbol()); - assertThat(table.get("E2", "$")).isEqualTo(grammar1.getEpsilonSymbol()); + assertThat(table.get("E2", ")")).isEqualTo(Grammar.EPSILON_SYMBOL); + assertThat(table.get("E2", "$")).isEqualTo(Grammar.EPSILON_SYMBOL); assertThat(table.get("T", "id")).isEqualTo("F T2"); assertThat(table.get("T", "(")).isEqualTo("F T2"); - assertThat(table.get("T2", "+")).isEqualTo(grammar1.getEpsilonSymbol()); + assertThat(table.get("T2", "+")).isEqualTo(Grammar.EPSILON_SYMBOL); assertThat(table.get("T2", "*")).isEqualTo("* F T2"); - assertThat(table.get("T2", ")")).isEqualTo(grammar1.getEpsilonSymbol()); - assertThat(table.get("T2", "$")).isEqualTo(grammar1.getEpsilonSymbol()); + assertThat(table.get("T2", ")")).isEqualTo(Grammar.EPSILON_SYMBOL); + assertThat(table.get("T2", "$")).isEqualTo(Grammar.EPSILON_SYMBOL); assertThat(table.get("F", "id")).isEqualTo("id"); - assertThat(table.get("F", "(")).isEqualTo("( E )"); + assertThat(table.get("F", "(")).isEqualTo("( S )"); } } diff --git a/src/test/java/parser/grammar/GrammarTest.java b/src/test/java/parser/grammar/GrammarTest.java index d4475a3..e8b044b 100644 --- a/src/test/java/parser/grammar/GrammarTest.java +++ b/src/test/java/parser/grammar/GrammarTest.java @@ -26,10 +26,7 @@ class GrammarTest { final Path path = getPath("SimpleGrammar0.grammar"); final Grammar grammar = Grammar.fromFile(path); - assert grammar != null; - assertThat(grammar.getEpsilonSymbol()).isEqualTo("epsilon"); - assertThat(grammar.getStartSymbol()).isEqualTo("S"); assertThat(grammar.getTerminals()).containsOnly("a", "i", "t", "b"); assertThat(grammar.getNonterminals()).containsOnly("S", "E"); assertThat(grammar.getRules()).containsOnly(new GrammarRule("S", "a"), @@ -42,14 +39,11 @@ class GrammarTest { final Path path = getPath("SimpleGrammar1.grammar"); final Grammar grammar = Grammar.fromFile(path); - assert grammar != null; - assertThat(grammar.getEpsilonSymbol()).isEqualTo("epsilon"); - assertThat(grammar.getStartSymbol()).isEqualTo("E"); assertThat(grammar.getTerminals()).containsOnly("id", "+", "*", "(", ")"); - assertThat(grammar.getNonterminals()).containsOnly("E", "E2", "T", "T2", "F"); - assertThat(grammar.getRules()).contains(new GrammarRule("E", "T", "E2"), + assertThat(grammar.getNonterminals()).containsOnly("S", "E2", "T", "T2", "F"); + assertThat(grammar.getRules()).contains(new GrammarRule("S", "T", "E2"), new GrammarRule("E2", "+", "T", "E2"), - new GrammarRule("E2", "epsilon")); + new GrammarRule("E2", Grammar.EPSILON_SYMBOL)); } } diff --git a/src/test/resources/exampleGrammars/Grammar.grammar b/src/test/resources/exampleGrammars/Grammar.grammar index 068b78f..8c22435 100644 --- a/src/test/resources/exampleGrammars/Grammar.grammar +++ b/src/test/resources/exampleGrammars/Grammar.grammar @@ -1,7 +1,4 @@ -START: s -EPS: eps - -// START, EPS, NTERM, TERM are reserved +// NTERM, TERM are reserved // Some Grammar-Symbols have to be named this way: // assignment, declaration (for TypeTable creation) @@ -11,7 +8,7 @@ EPS: eps // Nonterminals: NTERM: val type NTERM: op unary arith_op logic_op compare_op -NTERM: s class_cnt block_cnt +NTERM: S class_cnt block_cnt NTERM: statement stmt print NTERM: declaration assignment NTERM: par_expr expr expr_2 expr_f @@ -64,7 +61,7 @@ compare_op[promote] -> LESS | LESS_EQUAL | GREATER | GREATER_EQUAL | EQUAL | NOT // ------------------------------------------------------------------------------------------------- // START -> class IDENTIFIER { class_cnt } -s[promote] -> CLASS IDENTIFIER L_BRACE class_cnt R_BRACE | eps +S[promote] -> CLASS IDENTIFIER L_BRACE class_cnt R_BRACE | eps // class_cnt -> public static void main(String[] args) { block_cnt } class_cnt[promote delifempty] -> PUBLIC STATIC VOID_TYPE IDENTIFIER_MAIN L_PAREN STRING_TYPE L_BRACKET R_BRACKET IDENTIFIER R_PAREN L_BRACE block_cnt R_BRACE | eps diff --git a/src/test/resources/exampleGrammars/SimpleGrammar0.grammar b/src/test/resources/exampleGrammars/SimpleGrammar0.grammar index d29ec5b..a9a7fc0 100644 --- a/src/test/resources/exampleGrammars/SimpleGrammar0.grammar +++ b/src/test/resources/exampleGrammars/SimpleGrammar0.grammar @@ -1,5 +1,3 @@ -START: S -EPS: epsilon TERM: a i t b NTERM: S E diff --git a/src/test/resources/exampleGrammars/SimpleGrammar1.grammar b/src/test/resources/exampleGrammars/SimpleGrammar1.grammar index 6ee93f1..efd7eca 100644 --- a/src/test/resources/exampleGrammars/SimpleGrammar1.grammar +++ b/src/test/resources/exampleGrammars/SimpleGrammar1.grammar @@ -1,13 +1,11 @@ -START: E -EPS: epsilon TERM: id + * ( ) -NTERM: E E2 T T2 F +NTERM: S E2 T T2 F // Leerzeilen sind egal // Man kann Line-Comments schreiben und Produktionen verodern -E -> T E2 -E2 -> + T E2 | EPS +S -> T E2 +E2 -> + T E2 | eps T -> F T2 -T2 -> * F T2 | EPS -F -> ( E ) | id +T2 -> * F T2 | eps +F -> ( S ) | id diff --git a/stups.grammar b/stups.grammar index 068b78f..8c22435 100644 --- a/stups.grammar +++ b/stups.grammar @@ -1,7 +1,4 @@ -START: s -EPS: eps - -// START, EPS, NTERM, TERM are reserved +// NTERM, TERM are reserved // Some Grammar-Symbols have to be named this way: // assignment, declaration (for TypeTable creation) @@ -11,7 +8,7 @@ EPS: eps // Nonterminals: NTERM: val type NTERM: op unary arith_op logic_op compare_op -NTERM: s class_cnt block_cnt +NTERM: S class_cnt block_cnt NTERM: statement stmt print NTERM: declaration assignment NTERM: par_expr expr expr_2 expr_f @@ -64,7 +61,7 @@ compare_op[promote] -> LESS | LESS_EQUAL | GREATER | GREATER_EQUAL | EQUAL | NOT // ------------------------------------------------------------------------------------------------- // START -> class IDENTIFIER { class_cnt } -s[promote] -> CLASS IDENTIFIER L_BRACE class_cnt R_BRACE | eps +S[promote] -> CLASS IDENTIFIER L_BRACE class_cnt R_BRACE | eps // class_cnt -> public static void main(String[] args) { block_cnt } class_cnt[promote delifempty] -> PUBLIC STATIC VOID_TYPE IDENTIFIER_MAIN L_PAREN STRING_TYPE L_BRACKET R_BRACKET IDENTIFIER R_PAREN L_BRACE block_cnt R_BRACE | eps