From 8114c9b5fd5f78d9613966bf6e1aa2e2da52f831 Mon Sep 17 00:00:00 2001 From: ChUrl Date: Wed, 29 Mar 2023 18:47:37 +0200 Subject: [PATCH] Initial commit (from Logisim-Assembler) --- .clang-format | 139 ++++++++++++++++++++++++++++ .clang-tidy | 2 + .gitignore | 2 + CMakeLists.txt | 28 ++++++ README.md | 30 +++++++ flake.lock | 94 +++++++++++++++++++ flake.nix | 117 ++++++++++++++++++++++++ programs/add_and_jump.cpu8_v1 | 17 ++++ programs/add_and_jump.lasm8_v1 | 13 +++ programs/counting.cpu8_v1 | 17 ++++ programs/counting.lasm8_v1 | 13 +++ programs/input_output.cpu8_v1 | 17 ++++ programs/input_output.lasm8_v1 | 5 ++ programs/nop_and_jump.cpu8_v1 | 17 ++++ programs/nop_and_jump.lasm8_v1 | 4 + src/ast/Node.cpp | 13 +++ src/ast/Node.h | 49 ++++++++++ src/ast/Observer.cpp | 11 +++ src/ast/Observer.h | 28 ++++++ src/ast/PostfixObserver.cpp | 18 ++++ src/ast/PostfixObserver.h | 23 +++++ src/ast/PrefixObserver.cpp | 18 ++++ src/ast/PrefixObserver.h | 23 +++++ src/ast/nodes/AluNode.cpp | 15 ++++ src/ast/nodes/AluNode.h | 32 +++++++ src/ast/nodes/ConstNode.cpp | 15 ++++ src/ast/nodes/ConstNode.h | 22 +++++ src/ast/nodes/JumpNode.cpp | 11 +++ src/ast/nodes/JumpNode.h | 34 +++++++ src/ast/nodes/MovNode.cpp | 11 +++ src/ast/nodes/MovNode.h | 23 +++++ src/ast/nodes/RootNode.cpp | 9 ++ src/ast/nodes/RootNode.h | 19 ++++ src/codegen/CodegenObserver.cpp | 24 +++++ src/codegen/CodegenObserver.h | 23 +++++ src/codegen/PrintObserver.cpp | 23 +++++ src/codegen/PrintObserver.h | 20 +++++ src/lexer/Lexer.cpp | 149 ++++++++++++++++++++++++++++++ src/lexer/Lexer.h | 46 ++++++++++ src/lexer/Token.cpp | 48 ++++++++++ src/lexer/Token.h | 61 +++++++++++++ src/main.cpp | 154 ++++++++++++++++++++++++++++++++ src/parser/Parser.cpp | 117 ++++++++++++++++++++++++ src/parser/Parser.h | 56 ++++++++++++ 44 files changed, 1610 insertions(+) create mode 100755 .clang-format create mode 100644 .clang-tidy create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 README.md create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 programs/add_and_jump.cpu8_v1 create mode 100644 programs/add_and_jump.lasm8_v1 create mode 100644 programs/counting.cpu8_v1 create mode 100644 programs/counting.lasm8_v1 create mode 100644 programs/input_output.cpu8_v1 create mode 100644 programs/input_output.lasm8_v1 create mode 100644 programs/nop_and_jump.cpu8_v1 create mode 100644 programs/nop_and_jump.lasm8_v1 create mode 100644 src/ast/Node.cpp create mode 100644 src/ast/Node.h create mode 100644 src/ast/Observer.cpp create mode 100644 src/ast/Observer.h create mode 100644 src/ast/PostfixObserver.cpp create mode 100644 src/ast/PostfixObserver.h create mode 100644 src/ast/PrefixObserver.cpp create mode 100644 src/ast/PrefixObserver.h create mode 100644 src/ast/nodes/AluNode.cpp create mode 100644 src/ast/nodes/AluNode.h create mode 100644 src/ast/nodes/ConstNode.cpp create mode 100644 src/ast/nodes/ConstNode.h create mode 100644 src/ast/nodes/JumpNode.cpp create mode 100644 src/ast/nodes/JumpNode.h create mode 100644 src/ast/nodes/MovNode.cpp create mode 100644 src/ast/nodes/MovNode.h create mode 100644 src/ast/nodes/RootNode.cpp create mode 100644 src/ast/nodes/RootNode.h create mode 100644 src/codegen/CodegenObserver.cpp create mode 100644 src/codegen/CodegenObserver.h create mode 100644 src/codegen/PrintObserver.cpp create mode 100644 src/codegen/PrintObserver.h create mode 100644 src/lexer/Lexer.cpp create mode 100644 src/lexer/Lexer.h create mode 100644 src/lexer/Token.cpp create mode 100644 src/lexer/Token.h create mode 100644 src/main.cpp create mode 100644 src/parser/Parser.cpp create mode 100644 src/parser/Parser.h diff --git a/.clang-format b/.clang-format new file mode 100755 index 0000000..f945184 --- /dev/null +++ b/.clang-format @@ -0,0 +1,139 @@ +--- +BasedOnStyle: LLVM + +AccessModifierOffset: -4 +AlignAfterOpenBracket: Align + +# Don't vertically align too much to not fuck with VC +AlignArrayOfStructures: None +AlignConsecutiveAssignments: None +AlignConsecutiveBitFields: None +AlignConsecutiveDeclarations: None +AlignConsecutiveMacros: None +AlignEscapedNewlines: DontAlign + +AlignOperands: AlignAfterOperator +AlignTrailingComments: true +AllowAllArgumentsOnNextLine: false +AllowAllParametersOfDeclarationOnNextLine: false + +# Allow single line stuff +AllowShortBlocksOnASingleLine: Always +AllowShortCaseLabelsOnASingleLine: true +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: AllIfsAndElse +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: Yes + +# Don't force single line for each +BinPackArguments: true +BinPackParameters: true + +BitFieldColonSpacing: Both +BreakBeforeBinaryOperators: None + +# Braces completely attached, should be the same as BreakBeforeBraces: Attach +BreakBeforeBraces: Custom +BraceWrapping: + AfterCaseLabel: false + AfterClass: false + AfterControlStatement: Never + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterStruct: false + AfterUnion: false + AfterExternBlock: false + BeforeCatch: false + BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false + IndentBraces: false + SplitEmptyFunction: false + SplitEmptyRecord: false + SplitEmptyNamespace: false + +BreakBeforeConceptDeclarations: Always +BreakBeforeTernaryOperators: true +BreakConstructorInitializers: BeforeColon +BreakInheritanceList: BeforeColon +BreakStringLiterals: false +ColumnLimit: 0 +CompactNamespaces: false +ConstructorInitializerIndentWidth: 2 +ContinuationIndentWidth: 2 +Cpp11BracedListStyle: true + +# Force my style onto everything :) +DeriveLineEnding: false +DerivePointerAlignment: false +DisableFormat: false + +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: Always +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +IncludeBlocks: Merge +IndentAccessModifiers: false # don't indent, use AccessModifierOffset instead +IndentCaseBlocks: false +IndentCaseLabels: false +IndentExternBlock: NoIndent +IndentGotoLabels: false +IndentPPDirectives: None +IndentRequiresClause: false +IndentWidth: 4 +IndentWrappedFunctionNames: false +InsertBraces: false +KeepEmptyLinesAtTheStartOfBlocks: false +LambdaBodyIndentation: Signature +Language: Cpp +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +PackConstructorInitializers: BinPack +PointerAlignment: Right +PPIndentWidth: -1 # use IndentWidth +QualifierAlignment: Leave # Don't mess with const int const *ptr; +ReferenceAlignment: Right +ReflowComments: false +# TODO: RemoveBracesLLVM: false +RequiresClausePosition: OwnLine +SeparateDefinitionBlocks: Always +ShortNamespaceLines: 0 +SortIncludes: CaseInsensitive +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: false +SpaceAroundPointerQualifiers: Default +SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false +SpaceBeforeCpp11BracedList: false +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: Never +SpacesInCStyleCastParentheses: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: false + +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: 1 + +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: c++20 +TabWidth: 4 +UseCRLF: false +UseTab: Never + +... diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 0000000..ff94288 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,2 @@ +Checks: "-*,bugprone-*,clang-analyzer-*,concurrency-*,cppcoreguidelines-*,misc-*,modernize-*,performance-*,portability-*,readability-*" +FormatStyle: file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..66fed7f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.direnv +cmake-build-debug \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..98b87ab --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,28 @@ +cmake_minimum_required(VERSION 3.25) +project(LogisimAssembler) + +set(CMAKE_CXX_STANDARD 20) + +find_package(Boost 1.81 COMPONENTS program_options REQUIRED) + +add_executable(lasm + src/main.cpp + src/lexer/Token.cpp + src/lexer/Lexer.cpp + src/parser/Parser.cpp + + src/ast/Node.cpp + src/ast/nodes/RootNode.cpp + src/ast/nodes/ConstNode.cpp + src/ast/nodes/MovNode.cpp + src/ast/nodes/AluNode.cpp + src/ast/nodes/JumpNode.cpp + + src/ast/Observer.cpp + src/ast/PrefixObserver.cpp + src/ast/PostfixObserver.cpp + src/codegen/PrintObserver.cpp + src/codegen/CodegenObserver.cpp + ) + +target_link_libraries(lasm Boost::program_options) \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e1d255d --- /dev/null +++ b/README.md @@ -0,0 +1,30 @@ +# README + +Small assembler that generates SystemVerilog ROM modules for [this](https://gitlab.com/ChUrl/quartus-8-bit-cpu). + +## Usage + +`svrasm -i -o ` + +## Instructionset + +| Instruction | 1. Operand | 2. Operand | Note | +|-------------|-----------------------------|-----------------|----------------------------------------| +| MOV | Constant or Source Register | Target Register | | +| AND | NONE | NONE | Works on reg1 and reg2, result in reg3 | +| OR | NONE | NONE | Works on reg1 and reg2, result in reg3 | +| NAND | NONE | NONE | Works on reg1 and reg2, result in reg3 | +| NOR | NONE | NONE | Works on reg1 and reg2, result in reg3 | +| ADD | NONE | NONE | Works on reg1 and reg2, result in reg3 | +| SUB | NONE | NONE | Works on reg1 and reg2, result in reg3 | +| JMP | NONE | NONE | Works on reg3 | +| JEQ | NONE | NONE | Works on reg3 | +| JLE | NONE | NONE | Works on reg3 | +| JLEQ | NONE | NONE | Works on reg3 | +| NOP | NONE | NONE | Works on reg3 | +| JNEQ | NONE | NONE | Works on reg3 | +| JGR | NONE | NONE | Works on reg3 | +| JGEQ | NONE | NONE | Works on reg3 | + +Line comments are recognized, indicated by `#`. + diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..559fe29 --- /dev/null +++ b/flake.lock @@ -0,0 +1,94 @@ +{ + "nodes": { + "devshell": { + "inputs": { + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs" + }, + "locked": { + "lastModified": 1678957337, + "narHash": "sha256-Gw4nVbuKRdTwPngeOZQOzH/IFowmz4LryMPDiJN/ah4=", + "owner": "numtide", + "repo": "devshell", + "rev": "3e0e60ab37cd0bf7ab59888f5c32499d851edb47", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "devshell", + "type": "github" + } + }, + "flake-utils": { + "locked": { + "lastModified": 1642700792, + "narHash": "sha256-XqHrk7hFb+zBvRg6Ghl+AZDq03ov6OshJLiSWOoX5es=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "846b2ae0fc4cc943637d3d1def4454213e203cba", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "flake-utils_2": { + "locked": { + "lastModified": 1678901627, + "narHash": "sha256-U02riOqrKKzwjsxc/400XnElV+UtPUQWpANPlyazjH0=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "93a2b84fc4b70d9e089d029deacc3583435c2ed6", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1677383253, + "narHash": "sha256-UfpzWfSxkfXHnb4boXZNaKsAcUrZT9Hw+tao1oZxd08=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "9952d6bc395f5841262b006fbace8dd7e143b634", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs_2": { + "locked": { + "lastModified": 1679281263, + "narHash": "sha256-neMref1GTruSLt1jBgAw+lvGsZj8arQYfdxvSi5yp4Q=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "8276a165b9fa3db1a7a4f29ee29b680e0799b9dc", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "devshell": "devshell", + "flake-utils": "flake-utils_2", + "nixpkgs": "nixpkgs_2" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..883c422 --- /dev/null +++ b/flake.nix @@ -0,0 +1,117 @@ +{ + description = "Logisim Assembler Development Environment"; + + inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; + inputs.flake-utils.url = "github:numtide/flake-utils"; + inputs.devshell.url = "github:numtide/devshell"; + + outputs = { + self, + nixpkgs, + flake-utils, + devshell, + }: + flake-utils.lib.eachDefaultSystem (system: let + pkgs = import nixpkgs { + inherit system; + config.allowUnfree = true; # For clion + overlays = [devshell.overlays.default]; + }; + + # NOTE: Usual 64 bit compilers that don't collide + bintools = pkgs.wrapBintoolsWith { + bintools = pkgs.bintools.bintools; + libc = pkgs.glibc; + }; + gcc12 = pkgs.hiPrio (pkgs.wrapCCWith { + cc = pkgs.gcc12.cc; + libc = pkgs.glibc; + bintools = bintools; + }); + clang15 = pkgs.wrapCCWith { + cc = pkgs.clang_15.cc; + libc = pkgs.glibc; + bintools = bintools; + }; + + # NOTE: Multilib compilers that don't collide + bintools_multi = pkgs.wrapBintoolsWith { + bintools = pkgs.bintools.bintools; # Get the unwrapped bintools from the wrapper + libc = pkgs.glibc_multi; + }; + gcc12_multi = pkgs.hiPrio (pkgs.wrapCCWith { + cc = pkgs.gcc12.cc; # Get the unwrapped gcc from the wrapper + libc = pkgs.glibc_multi; + bintools = bintools_multi; + }); + clang15_multi = pkgs.wrapCCWith { + cc = pkgs.clang_15.cc; + libc = pkgs.glibc_multi; + bintools = bintools_multi; + }; + in { + devShells.default = pkgs.mkShell { + buildInputs = with pkgs; [ + # Compilers + bintools + gcc12 + clang15 + # bintools_multi + # gcc12_multi + # clang14_multi + + # Libraries + boost181 + + # Native buildinputs + gnumake + cmake + # nasm + + # Development + # bear # To generate compilation database + gdb + cling # To try out my bullshit implementations + # doxygen # Generate docs + graphs + ]; + }; + + # TODO: DevShell doesn't propagate buildinputs, so its difficult to find e.g. Boost... + # # devShell = pkgs.devshell.mkShell ... + # devShell = pkgs.devshell.mkShell { + # name = "Logisim Assembler Development Environment"; + + # packages = with pkgs; [ + # # Compilers + # bintools + # gcc12 + # clang15 + # # bintools_multi + # # gcc12_multi + # # clang14_multi + + # # Libraries + # boost181 + + # # Native buildinputs + # gnumake + # cmake + # # nasm + + # # Development + # # bear # To generate compilation database + # gdb + # cling # To try out my bullshit implementations + # # doxygen # Generate docs + graphs + # ]; + + # commands = [ + # { + # name = "ide"; + # help = "Run clion for project"; + # command = "clion &>/dev/null ./ &"; + # } + # ]; + # }; + }); +} diff --git a/programs/add_and_jump.cpu8_v1 b/programs/add_and_jump.cpu8_v1 new file mode 100644 index 0000000..5e769cf --- /dev/null +++ b/programs/add_and_jump.cpu8_v1 @@ -0,0 +1,17 @@ +v3.0 hex words addressed +00: 05 81 0a 82 44 99 0f 82 45 00 c1 00 00 00 00 00 +10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +40: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +50: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +60: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +70: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ No newline at end of file diff --git a/programs/add_and_jump.lasm8_v1 b/programs/add_and_jump.lasm8_v1 new file mode 100644 index 0000000..83b93c9 --- /dev/null +++ b/programs/add_and_jump.lasm8_v1 @@ -0,0 +1,13 @@ +# Add 5 + 10 +MOV 5, reg1 +MOV 10, reg2 +ADD + +# Subtract result - 15 +MOV reg3, reg1 +MOV 15, reg2 +SUB + +# Jump to 0 if result == 15 +MOV 0, reg0 +JEQ \ No newline at end of file diff --git a/programs/counting.cpu8_v1 b/programs/counting.cpu8_v1 new file mode 100644 index 0000000..ff1610b --- /dev/null +++ b/programs/counting.cpu8_v1 @@ -0,0 +1,17 @@ +v3.0 hex words addressed +00: 30 86 31 86 32 86 33 86 34 86 35 86 36 86 37 86 +10: 38 86 39 86 00 c4 00 00 00 00 00 00 00 00 00 00 +20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +40: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +50: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +60: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +70: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ No newline at end of file diff --git a/programs/counting.lasm8_v1 b/programs/counting.lasm8_v1 new file mode 100644 index 0000000..7f9254d --- /dev/null +++ b/programs/counting.lasm8_v1 @@ -0,0 +1,13 @@ +# Write "0123456789..." in ASCII to the output +MOV 48, output +MOV 49, output +MOV 50, output +MOV 51, output +MOV 52, output +MOV 53, output +MOV 54, output +MOV 55, output +MOV 56, output +MOV 57, output +MOV 0, reg0 +JMP \ No newline at end of file diff --git a/programs/input_output.cpu8_v1 b/programs/input_output.cpu8_v1 new file mode 100644 index 0000000..fb0512d --- /dev/null +++ b/programs/input_output.cpu8_v1 @@ -0,0 +1,17 @@ +v3.0 hex words addressed +00: b1 0a 82 44 9e 00 00 00 00 00 00 00 00 00 00 00 +10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +40: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +50: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +60: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +70: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ No newline at end of file diff --git a/programs/input_output.lasm8_v1 b/programs/input_output.lasm8_v1 new file mode 100644 index 0000000..2f1c01d --- /dev/null +++ b/programs/input_output.lasm8_v1 @@ -0,0 +1,5 @@ +# Add 10 to the input number, then output the result +MOV input, reg1 +MOV 10, reg2 +ADD +MOV reg3, output \ No newline at end of file diff --git a/programs/nop_and_jump.cpu8_v1 b/programs/nop_and_jump.cpu8_v1 new file mode 100644 index 0000000..59ceb00 --- /dev/null +++ b/programs/nop_and_jump.cpu8_v1 @@ -0,0 +1,17 @@ +v3.0 hex words addressed +00: c0 00 c4 00 00 00 00 00 00 00 00 00 00 00 00 00 +10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +40: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +50: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +60: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +70: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +a0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +b0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +c0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ No newline at end of file diff --git a/programs/nop_and_jump.lasm8_v1 b/programs/nop_and_jump.lasm8_v1 new file mode 100644 index 0000000..9dbc767 --- /dev/null +++ b/programs/nop_and_jump.lasm8_v1 @@ -0,0 +1,4 @@ +# Endless NOP loop +NOP +MOV 0, reg0 +JMP \ No newline at end of file diff --git a/src/ast/Node.cpp b/src/ast/Node.cpp new file mode 100644 index 0000000..85bd16b --- /dev/null +++ b/src/ast/Node.cpp @@ -0,0 +1,13 @@ +// +// Created by christoph on 20.03.23. +// + +#include "Node.h" + +void Node::addChild(std::unique_ptr child) { + children.push_back(std::move(child)); +} + +auto Node::getChildren() const -> const std::vector> & { + return children; +} diff --git a/src/ast/Node.h b/src/ast/Node.h new file mode 100644 index 0000000..d723637 --- /dev/null +++ b/src/ast/Node.h @@ -0,0 +1,49 @@ +// +// Created by christoph on 20.03.23. +// + +#ifndef LOGISIMASSEMBLER_NODE_H +#define LOGISIMASSEMBLER_NODE_H + +#include +#include +#include "../lexer/Token.h" + +class Node { +public: + Node() = default; + + Node(const Node ©) = default; + + auto operator=(const Node ©) -> Node & = default; + + Node(Node &&move) = default; + + auto operator=(Node &&move) -> Node & = default; + + virtual ~Node() = default; + + void addChild(std::unique_ptr child); + + // TODO: For more complex instructions, compile needs to return a vector + // TODO: In this case, the Observer may not traverse all nodes... + // The Observer is the wrong choice for compilation. + // I can just call compile on the root, and the root compiles its children. + [[nodiscard]] virtual auto compile() const -> uint8_t = 0; + + [[nodiscard]] auto getChildren() const -> const std::vector> &; + +protected: + enum Operation : uint8_t { + CONSTANT, + ALU, + COPY, + CONDITION + }; + +protected: + // TODO: Currently the AST degrades to a list, but someday we'll need a real tree + std::vector> children; +}; + +#endif //LOGISIMASSEMBLER_NODE_H diff --git a/src/ast/Observer.cpp b/src/ast/Observer.cpp new file mode 100644 index 0000000..aed1aad --- /dev/null +++ b/src/ast/Observer.cpp @@ -0,0 +1,11 @@ +// +// Created by christoph on 21.03.23. +// + +#include "Observer.h" + +Observer::Observer(const Node &root) : root(root) {} + +void Observer::traverse() { + traverse(root); +} diff --git a/src/ast/Observer.h b/src/ast/Observer.h new file mode 100644 index 0000000..a274908 --- /dev/null +++ b/src/ast/Observer.h @@ -0,0 +1,28 @@ +// +// Created by christoph on 21.03.23. +// + +#ifndef LOGISIMASSEMBLER_OBSERVER_H +#define LOGISIMASSEMBLER_OBSERVER_H + +#include "Node.h" +#include + +class Observer { +public: + Observer(const Node &root); + + virtual ~Observer() = default; + + void traverse(); + +protected: + virtual void traverse(const Node &node) = 0; + + virtual void action(const Node &node) = 0; + +private: + const Node &root; +}; + +#endif //LOGISIMASSEMBLER_OBSERVER_H diff --git a/src/ast/PostfixObserver.cpp b/src/ast/PostfixObserver.cpp new file mode 100644 index 0000000..7f8a9fb --- /dev/null +++ b/src/ast/PostfixObserver.cpp @@ -0,0 +1,18 @@ +// +// Created by christoph on 20.03.23. +// + +#include "PostfixObserver.h" + +PostfixObserver::PostfixObserver(const Node &root) : Observer(root) {} + +// TODO: Shouldn't be recursive +void PostfixObserver::traverse(const Node &node) { + for (const auto &child : node.getChildren()) { + depth++; + traverse(*child); + depth--; + } + + action(node); +} diff --git a/src/ast/PostfixObserver.h b/src/ast/PostfixObserver.h new file mode 100644 index 0000000..2e1964a --- /dev/null +++ b/src/ast/PostfixObserver.h @@ -0,0 +1,23 @@ +// +// Created by christoph on 20.03.23. +// + +#ifndef LOGISIMASSEMBLER_POSTFIXOBSERVER_H +#define LOGISIMASSEMBLER_POSTFIXOBSERVER_H + +#include "Observer.h" + +class PostfixObserver : public Observer { +public: + PostfixObserver(const Node &root); + + ~PostfixObserver() override = default; + +protected: + void traverse(const Node &node) override; + +protected: + uint32_t depth = 0; +}; + +#endif //LOGISIMASSEMBLER_PREFIXOBSERVER_H diff --git a/src/ast/PrefixObserver.cpp b/src/ast/PrefixObserver.cpp new file mode 100644 index 0000000..dbb9f7d --- /dev/null +++ b/src/ast/PrefixObserver.cpp @@ -0,0 +1,18 @@ +// +// Created by christoph on 20.03.23. +// + +#include "PrefixObserver.h" + +PrefixObserver::PrefixObserver(const Node &root) : Observer(root) {} + +// TODO: Shouldn't be recursive +void PrefixObserver::traverse(const Node &node) { + action(node); + + for (const auto &child : node.getChildren()) { + depth++; + traverse(*child); + depth--; + } +} diff --git a/src/ast/PrefixObserver.h b/src/ast/PrefixObserver.h new file mode 100644 index 0000000..cf12913 --- /dev/null +++ b/src/ast/PrefixObserver.h @@ -0,0 +1,23 @@ +// +// Created by christoph on 20.03.23. +// + +#ifndef LOGISIMASSEMBLER_PREFIXOBSERVER_H +#define LOGISIMASSEMBLER_PREFIXOBSERVER_H + +#include "Observer.h" + +class PrefixObserver : public Observer { +public: + PrefixObserver(const Node &root); + + ~PrefixObserver() override = default; + +protected: + void traverse(const Node &node) override; + +protected: + uint32_t depth = 0; +}; + +#endif //LOGISIMASSEMBLER_PREFIXOBSERVER_H diff --git a/src/ast/nodes/AluNode.cpp b/src/ast/nodes/AluNode.cpp new file mode 100644 index 0000000..93df05b --- /dev/null +++ b/src/ast/nodes/AluNode.cpp @@ -0,0 +1,15 @@ +// +// Created by christoph on 21.03.23. +// + +#include "AluNode.h" + +AluNode::AluNode(AluNode::AluOperation operation) : operation(operation) {} + +auto AluNode::compile() const -> uint8_t { + if (operation > SUB) { + throw "Compile Error: Invalid ALU Operation!"; + } + + return (ALU & 0b11) << 6 | (operation & 0b111); +} diff --git a/src/ast/nodes/AluNode.h b/src/ast/nodes/AluNode.h new file mode 100644 index 0000000..6676207 --- /dev/null +++ b/src/ast/nodes/AluNode.h @@ -0,0 +1,32 @@ +// +// Created by christoph on 21.03.23. +// + +#ifndef LOGISIMASSEMBLER_ALUNODE_H +#define LOGISIMASSEMBLER_ALUNODE_H + +#include "../Node.h" + +class AluNode : public Node { +public: + enum AluOperation : uint8_t { + AND, + OR, + NAND, + NOR, + ADD, + SUB + }; + +public: + AluNode(AluOperation operation); + + ~AluNode() override = default; + + [[nodiscard]] auto compile() const -> uint8_t override; + +private: + AluOperation operation; +}; + +#endif //LOGISIMASSEMBLER_ALUNODE_H diff --git a/src/ast/nodes/ConstNode.cpp b/src/ast/nodes/ConstNode.cpp new file mode 100644 index 0000000..93c8ba8 --- /dev/null +++ b/src/ast/nodes/ConstNode.cpp @@ -0,0 +1,15 @@ +// +// Created by christoph on 21.03.23. +// + +#include "ConstNode.h" + +ConstNode::ConstNode(uint8_t value) : value(value) {} + +auto ConstNode::compile() const -> uint8_t { + if (value > 0b00111111) { + throw "Compile Error: Constant too large!"; + } + + return (CONSTANT & 0b11) << 6 | (value & 0b111111); +} diff --git a/src/ast/nodes/ConstNode.h b/src/ast/nodes/ConstNode.h new file mode 100644 index 0000000..e4a50ce --- /dev/null +++ b/src/ast/nodes/ConstNode.h @@ -0,0 +1,22 @@ +// +// Created by christoph on 21.03.23. +// + +#ifndef LOGISIMASSEMBLER_CONSTNODE_H +#define LOGISIMASSEMBLER_CONSTNODE_H + +#include "../Node.h" + +class ConstNode : public Node { +public: + ConstNode(uint8_t value); + + ~ConstNode() override = default; + + [[nodiscard]] auto compile() const -> uint8_t override; + +private: + uint8_t value; +}; + +#endif //LOGISIMASSEMBLER_CONSTNODE_H diff --git a/src/ast/nodes/JumpNode.cpp b/src/ast/nodes/JumpNode.cpp new file mode 100644 index 0000000..c466b9f --- /dev/null +++ b/src/ast/nodes/JumpNode.cpp @@ -0,0 +1,11 @@ +// +// Created by christoph on 21.03.23. +// + +#include "JumpNode.h" + +JumpNode::JumpNode(JumpNode::JumpOperation operation) : operation(operation) {} + +uint8_t JumpNode::compile() const { + return (CONDITION & 0b11) << 6 | (operation & 0b111); +} diff --git a/src/ast/nodes/JumpNode.h b/src/ast/nodes/JumpNode.h new file mode 100644 index 0000000..6107fec --- /dev/null +++ b/src/ast/nodes/JumpNode.h @@ -0,0 +1,34 @@ +// +// Created by christoph on 21.03.23. +// + +#ifndef LOGISIMASSEMBLER_JUMPNODE_H +#define LOGISIMASSEMBLER_JUMPNODE_H + +#include "../Node.h" + +class JumpNode : public Node { +public: + enum JumpOperation : uint8_t { + NEVER, + EQUAL_ZERO, + LESS_ZERO, + LESS_EQUAL_ZERO, + ALWAYS, + NOT_ZERO, + GREATER_ZERO, + GREATER_EQUAL_ZERO + }; + +public: + JumpNode(JumpOperation operation); + + ~JumpNode() override = default; + + [[nodiscard]] auto compile() const -> uint8_t override; + +private: + JumpOperation operation; +}; + +#endif //LOGISIMASSEMBLER_JUMPNODE_H diff --git a/src/ast/nodes/MovNode.cpp b/src/ast/nodes/MovNode.cpp new file mode 100644 index 0000000..5528a3b --- /dev/null +++ b/src/ast/nodes/MovNode.cpp @@ -0,0 +1,11 @@ +// +// Created by christoph on 21.03.23. +// + +#include "MovNode.h" + +MovNode::MovNode(uint8_t source, uint8_t target) : source(source), target(target) {} + +auto MovNode::compile() const -> uint8_t { + return (COPY & 0b11) << 6 | (source & 0b111) << 3 | (target & 0b111); +} diff --git a/src/ast/nodes/MovNode.h b/src/ast/nodes/MovNode.h new file mode 100644 index 0000000..fd6f4d6 --- /dev/null +++ b/src/ast/nodes/MovNode.h @@ -0,0 +1,23 @@ +// +// Created by christoph on 21.03.23. +// + +#ifndef LOGISIMASSEMBLER_MOVNODE_H +#define LOGISIMASSEMBLER_MOVNODE_H + +#include "../Node.h" + +class MovNode : public Node { +public: + MovNode(uint8_t source, uint8_t target); + + ~MovNode() override = default; + + [[nodiscard]] auto compile() const -> uint8_t override; + +private: + uint8_t source; + uint8_t target; +}; + +#endif //LOGISIMASSEMBLER_MOVNODE_H diff --git a/src/ast/nodes/RootNode.cpp b/src/ast/nodes/RootNode.cpp new file mode 100644 index 0000000..eefd8c1 --- /dev/null +++ b/src/ast/nodes/RootNode.cpp @@ -0,0 +1,9 @@ +// +// Created by christoph on 21.03.23. +// + +#include "RootNode.h" + +auto RootNode::compile() const -> uint8_t { + return -1; +} diff --git a/src/ast/nodes/RootNode.h b/src/ast/nodes/RootNode.h new file mode 100644 index 0000000..1416acb --- /dev/null +++ b/src/ast/nodes/RootNode.h @@ -0,0 +1,19 @@ +// +// Created by christoph on 21.03.23. +// + +#ifndef LOGISIMASSEMBLER_ROOTNODE_H +#define LOGISIMASSEMBLER_ROOTNODE_H + +#include "../Node.h" + +class RootNode : public Node { +public: + RootNode() = default; + + ~RootNode() override = default; + + [[nodiscard]] auto compile() const -> uint8_t override; +}; + +#endif //LOGISIMASSEMBLER_ROOTNODE_H diff --git a/src/codegen/CodegenObserver.cpp b/src/codegen/CodegenObserver.cpp new file mode 100644 index 0000000..93d80b0 --- /dev/null +++ b/src/codegen/CodegenObserver.cpp @@ -0,0 +1,24 @@ +// +// Created by christoph on 21.03.23. +// + +#include "CodegenObserver.h" +#include +#include + +CodegenObserver::CodegenObserver(const Node &node, std::vector &output_string) + : PostfixObserver(node), output_string(output_string) {} + +void CodegenObserver::action(const Node &node) { + const uint8_t dec = node.compile(); + const uint8_t INVALID = -1; + + if (dec != INVALID) { + // uint8_t is always interpreted as char, so cast to uint32_t + const std::string hex = (boost::format("%x") % static_cast(dec)).str(); + if (hex.empty() || hex.size() > 2) { + throw "Compile Error: Resulting instruction has invalid size!"; + } + output_string.push_back(hex.length() == 2 ? hex : "0" + hex); + } +} diff --git a/src/codegen/CodegenObserver.h b/src/codegen/CodegenObserver.h new file mode 100644 index 0000000..551f8bd --- /dev/null +++ b/src/codegen/CodegenObserver.h @@ -0,0 +1,23 @@ +// +// Created by christoph on 21.03.23. +// + +#ifndef LOGISIMASSEMBLER_CODEGENOBSERVER_H +#define LOGISIMASSEMBLER_CODEGENOBSERVER_H + +#include "../ast/PostfixObserver.h" + +class CodegenObserver : public PostfixObserver { +public: + CodegenObserver(const Node &node, std::vector &output_string); + + ~CodegenObserver() override = default; + +protected: + void action(const Node &node) override; + +private: + std::vector &output_string; +}; + +#endif //LOGISIMASSEMBLER_CODEGENOBSERVER_H diff --git a/src/codegen/PrintObserver.cpp b/src/codegen/PrintObserver.cpp new file mode 100644 index 0000000..1179d5b --- /dev/null +++ b/src/codegen/PrintObserver.cpp @@ -0,0 +1,23 @@ +// +// Created by christoph on 21.03.23. +// + +#include +#include "PrintObserver.h" + +PrintObserver::PrintObserver(const Node &node) : PrefixObserver(node) {} + +void PrintObserver::action(const Node &node) { + // Print a simple indent guide + std::string depth_padding(depth * 2, '|'); + if (depth > 0) { + for (uint32_t i = 0; i < depth_padding.length(); ++i) { + if (i % 2 == 1) { + depth_padding[i] = ' '; + } + } + depth_padding[(depth * 2) - 1] = '-'; + } + + std::cout << depth_padding << typeid(node).name() << std::endl; +} diff --git a/src/codegen/PrintObserver.h b/src/codegen/PrintObserver.h new file mode 100644 index 0000000..f8242de --- /dev/null +++ b/src/codegen/PrintObserver.h @@ -0,0 +1,20 @@ +// +// Created by christoph on 21.03.23. +// + +#ifndef LOGISIMASSEMBLER_PRINTOBSERVER_H +#define LOGISIMASSEMBLER_PRINTOBSERVER_H + +#include "../ast/PrefixObserver.h" + +class PrintObserver : public PrefixObserver { +public: + PrintObserver(const Node &node); + + ~PrintObserver() override = default; + +protected: + void action(const Node &node) override; +}; + +#endif //LOGISIMASSEMBLER_PRINTOBSERVER_H diff --git a/src/lexer/Lexer.cpp b/src/lexer/Lexer.cpp new file mode 100644 index 0000000..82b7e7a --- /dev/null +++ b/src/lexer/Lexer.cpp @@ -0,0 +1,149 @@ +// +// Created by christoph on 20.03.23. +// + +#include +#include +#include "Lexer.h" + +// ! Helper Functions + +auto is_whitespace(const char character) -> bool { + const auto ascii_value = static_cast(character); + const uint8_t ascii_tab = 9; + const uint8_t ascii_cr = 13; + const uint8_t ascii_space = 32; + + return (ascii_value >= ascii_tab && ascii_value <= ascii_cr) + || ascii_value == ascii_space; +} + +auto is_ignored(const char character) -> bool { + // TODO: Any other ignored characters that could happen in the program? + return character == ','; +} + +auto is_numeric(const char character) -> bool { + const auto ascii_value = static_cast(character); + const uint8_t ascii_zero = 48; + const uint8_t ascii_nine = 57; + + return ascii_value >= ascii_zero && ascii_value <= ascii_nine; +} + +auto is_alphabetical(const char character) -> bool { + const auto ascii_value = static_cast(character); + const uint8_t ascii_a = 97; + const uint8_t ascii_A = 65; + const uint8_t ascii_z = 122; + const uint8_t ascii_Z = 90; + const uint8_t ascii_underscore = 95; + + return (ascii_value >= ascii_a && ascii_value <= ascii_z) + || (ascii_value >= ascii_A && ascii_value <= ascii_Z) + || ascii_value == ascii_underscore; +} + +auto is_mnemonic(const Token &token) -> bool { + // TODO: Move this to a separate header + const std::vector mnemonics = {"MOV", + "AND", "OR", "NAND", "NOR", "ADD", "SUB", + "JMP", "JEQ", "JLE", "JLEQ", "NOP", "JNEQ", "JGR", "JGEQ"}; + + return std::find(mnemonics.begin(), mnemonics.end(), static_cast(token)) + != mnemonics.end(); +} + +// ! Public Functions + +Lexer::Lexer(std::string_view input_string) + : input_string(input_string), position(input_string.begin()) {} + +auto Lexer::next() -> Token { + // Skip past everything that doesn't contain program information + while (is_whitespace(peek()) || peek() == ',' || peek() == '#') { + if (peek() == '#') { + // Eat whole comment, we can't decide if sth is a comment after eating # + comment(); + } else { + get(); + } + } + + if (position >= input_string.end()) { + return static_cast(Token::END); + } + + if (is_numeric(peek())) { + return number(); + } + + if (peek() == '[') { + return address(); + } + + if (is_alphabetical(peek())) { + const Token token = identifier_or_mnemonic(); + if (is_mnemonic(token)) { + return {Token::MNEMONIC, static_cast(token)}; + } + return {Token::IDENTIFIER, static_cast(token)}; + } + + return {Token::UNEXPECTED, std::string_view(position, position + 1)}; +} + +// ! Private Functions + +auto Lexer::peek() const -> char { + return *position; +} + +auto Lexer::get() -> char { + return *(position++); +} + +auto Lexer::identifier_or_mnemonic() -> Token { + const std::string_view::const_iterator begin = position; + while (peek() != ' ' && (is_alphabetical(peek()) || is_numeric(peek()))) { + get(); + } + const std::string_view::const_iterator end = position; + + // We don't know the type yet, so use UNEXPECTED + return {Token::UNEXPECTED, std::string_view(begin, end)}; +} + +auto Lexer::number() -> Token { + const std::string_view::const_iterator begin = position; + while (is_numeric(peek())) { + get(); + } + const std::string_view::const_iterator end = position; + + return {Token::NUMBER, std::string_view(begin, end)}; +} + +auto Lexer::address() -> Token { + get(); // Eat '[' + + const std::string_view::const_iterator begin = position; + while (is_numeric(peek())) { + get(); // Eat the address number + } + const std::string_view::const_iterator end = position; + + if (peek() != ']') { + throw "Lexer Error: Expected ']'!"; + } + get(); // Eat ']' + + return {Token::ADDRESS, std::string_view(begin, end)}; +} + +void Lexer::comment() { + // Eat the whole line + while (peek() != '\n') { + get(); + } +} diff --git a/src/lexer/Lexer.h b/src/lexer/Lexer.h new file mode 100644 index 0000000..78ea93d --- /dev/null +++ b/src/lexer/Lexer.h @@ -0,0 +1,46 @@ +// +// Created by christoph on 20.03.23. +// + +#ifndef LOGISIMASSEMBLER_LEXER_H +#define LOGISIMASSEMBLER_LEXER_H + +#include +#include +#include "Token.h" + +class Lexer { +public: + explicit Lexer(std::string_view input_string); + + Lexer(const Lexer ©) = delete; + + auto operator=(const Lexer ©) -> Lexer & = delete; + + Lexer(Lexer &&move) = delete; + + auto operator=(Lexer &&move) -> Lexer & = delete; + + ~Lexer() = default; + + auto next() -> Token; + +private: + [[nodiscard]] auto peek() const -> char; + + auto get() -> char; + + auto identifier_or_mnemonic() -> Token; + + auto number() -> Token; + + auto address() -> Token; + + void comment(); + +private: + std::string_view input_string; + std::string_view::const_iterator position; +}; + +#endif //LOGISIMASSEMBLER_LEXER_H diff --git a/src/lexer/Token.cpp b/src/lexer/Token.cpp new file mode 100644 index 0000000..fc51001 --- /dev/null +++ b/src/lexer/Token.cpp @@ -0,0 +1,48 @@ +// +// Created by christoph on 20.03.23. +// + +#include +#include +#include +#include "Token.h" + +Token::Token(Token::Type type) : type(type) {} + +Token::Token(Token::Type type, std::string_view lexeme) : type(type), lexeme(lexeme) {} + +auto Token::getType() const -> Token::Type { + return type; +} + +auto Token::getTypeName() const -> std::string { + return std::array {"MNEMONIC", + "IDENTIFIER", + "NUMBER", + "ADDRESS", + "END", + "UNEXPECTED"}[type]; +} + +auto Token::subtoken(uint8_t pos, uint8_t len) const -> Token { + return std::move(Token(type, lexeme.substr(pos, len))); +} + +Token::operator std::string_view() const { + return lexeme; +} + +Token::operator std::string() const { + return lexeme; +} + +// https://stackoverflow.com/questions/56634507/safely-convert-stdstring-view-to-int-like-stoi-or-atoi#answer-65675200 +Token::operator uint8_t() const { + uint8_t out; + const std::from_chars_result result = std::from_chars(lexeme.data(), lexeme.data() + lexeme.size(), out); + if (result.ec == std::errc::invalid_argument || result.ec == std::errc::result_out_of_range) + { + throw "Conversion Error: Can't convert Token to uint8_t!"; + } + return out; +} diff --git a/src/lexer/Token.h b/src/lexer/Token.h new file mode 100644 index 0000000..3296b82 --- /dev/null +++ b/src/lexer/Token.h @@ -0,0 +1,61 @@ +// +// Created by christoph on 20.03.23. +// + +#ifndef LOGISIMASSEMBLER_TOKEN_H +#define LOGISIMASSEMBLER_TOKEN_H + +#include +#include + +class Token { +public: + enum Type : uint8_t { + MNEMONIC, + IDENTIFIER, + NUMBER, + ADDRESS, // Using [] + + // TODO: Inline calculations + // PLUS, + // MINUS, + // ASTERISK, + // SLASH, + + END, + UNEXPECTED + }; + +public: + explicit Token(Type type); + + Token(Type type, std::string_view lexeme); + + Token(const Token ©) = default; + + auto operator=(const Token ©) -> Token & = default; + + Token(Token &&move) noexcept = default; + + auto operator=(Token &&move) noexcept -> Token & = default; + + ~Token() = default; + + [[nodiscard]] auto getType() const -> Type; + + [[nodiscard]] auto getTypeName() const -> std::string; + + [[nodiscard]] auto subtoken(uint8_t pos, uint8_t len) const -> Token; + + explicit operator std::string_view() const; + + explicit operator std::string() const; + + explicit operator uint8_t() const; + +private: + Type type; + std::string lexeme; +}; + +#endif //LOGISIMASSEMBLER_TOKEN_H diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..94e62fd --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,154 @@ +#include +#include +#include +#include +#include +#include +#include +#include "lexer/Lexer.h" +#include "ast/Node.h" +#include "parser/Parser.h" +#include "codegen/PrintObserver.h" +#include "codegen/CodegenObserver.h" + +namespace po = boost::program_options; + +auto read(const std::string& input_file, std::string &input_string) -> bool { + std::ifstream ifs; + ifs.open(input_file, std::ios_base::in); + if (!ifs) { + std::cout << "Failed to open input stream!" << std::endl; + return false; + } + + while (!ifs.eof()) { + std::string line; + getline(ifs, line); + input_string += line + '\n'; + } + + ifs.close(); + return true; +} + +auto lex(std::string_view input_string, std::vector &tokens) -> bool { + Lexer lexer(input_string); + while (true) { + Token token = lexer.next(); + std::cout << std::setw(10) << token.getTypeName() << ": " << static_cast(token) << std::endl; + + if (token.getType() == Token::UNEXPECTED) { + return false; + } + + tokens.push_back(std::move(token)); + + if (tokens.back().getType() == Token::END) { + return true; + } + } +} + +auto parse(std::vector &tokens) -> std::unique_ptr { + Parser parser(tokens); + return std::move(parser.parse()); +} + +auto write(const std::string &output_file, const std::vector &output_string) -> bool { + if (output_string.size() > 255) { + std::cout << "Program too large!" << std::endl; + return false; + } + + std::ofstream ofs; + ofs.open(output_file, std::ios_base::out); + if (!ofs) { + std::cout << "Failed to open output stream!" << std::endl; + return false; + } + + ofs << "v3.0 hex words addressed"; + for (uint32_t i = 0; i <= 255; ++i) { + if (i % 16 == 0) { + ofs << std::endl; + // Print Address + std::string address = (boost::format("%x") % i).str(); + ofs << (address == "0" ? "00" : address) << ": "; + } + + if (i < output_string.size()) { + ofs << output_string[i] << " "; + } else { + ofs << "00 "; + } + } + + ofs.flush(); + ofs.close(); + return true; +} + +auto main(int argc, char **argv) -> int { + // Argument parsing straight from the Boost manual: https://www.boost.org/doc/libs/1_60_0/doc/html/program_options/tutorial.html + po::options_description desc("Allowed options"); + desc.add_options() + ("help,h", "Show this help message") + ("input,i", po::value(), "Input file") + ("output,o", po::value(), "Output file"); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + + if (vm.count("help")) { + std::cout << desc << std::endl; + return 1; + } + if (!vm.count("input")) { + std::cout << "Did not provide input file!" << std::endl; + return -1; + } + if (!vm.count("output")) { + std::cout << "Did not provide output file!" << std::endl; + return -1; + } + + std::cout << "Input File: " << vm["input"].as() << std::endl; + std::cout << "Output File: " << vm["output"].as() << std::endl; + + // Read the input file + std::string input_string; + if (!read(vm["input"].as(), input_string)) { + std::cout << "File Error: Couldn't read file!" << std::endl; + return -1; + } + + // Lexing + std::cout << "Lexing:" << std::endl; + std::vector tokens; + if (!lex(input_string, tokens)) { + std::cout << "Lexer Error: Unexpected Token!" << std::endl; + return -1; + } + + // Parsing + std::cout << "Parsing:" << std::endl; + const std::unique_ptr ast = parse(tokens); + PrintObserver(*ast).Observer::traverse(); // Print the AST + + // Codegen + std::cout << "Codegen:" << std::endl; + std::vector output_string; + CodegenObserver(*ast, output_string).Observer::traverse(); + for (const auto &instruction : output_string) { + std::cout << instruction << std::endl; + } + + // Write the output file + if (!write(vm["output"].as(), output_string)) { + std::cout << "File Error: Couldn't write file!" << std::endl; + return -1; + } + + return 0; +} diff --git a/src/parser/Parser.cpp b/src/parser/Parser.cpp new file mode 100644 index 0000000..6e00c61 --- /dev/null +++ b/src/parser/Parser.cpp @@ -0,0 +1,117 @@ +// +// Created by christoph on 20.03.23. +// + +#include "Parser.h" +#include "../ast/nodes/MovNode.h" +#include "../ast/nodes/ConstNode.h" +#include "../ast/nodes/AluNode.h" +#include "../ast/nodes/JumpNode.h" + +// ! Helper Functions + +// ! Public Functions + +Parser::Parser(const std::vector &tokens) : position(tokens.begin()) {} + +auto Parser::parse() -> std::unique_ptr { + while (peek().getType() != Token::END) { + if (peek().getType() != Token::MNEMONIC) { + throw "Parser Error: Expected Mnemonic!"; + } + + eaters[static_cast(peek())](*this); + } + + return std::move(ast); +} + +// ! Private Functions + +auto Parser::peek() const -> const Token & { + return *position; +} + +auto Parser::get() -> const Token & { + return *(position++); +} + +void Parser::mov() { + if (peek().getType() != Token::MNEMONIC || static_cast(peek()) != "MOV") { + throw "Parser Error: Expected 'MOV'!"; + } + get(); // Eat 'MOV' + + uint8_t source = 0; // Load from reg0 + if (peek().getType() == Token::NUMBER) { + ast->addChild(std::move(std::make_unique(static_cast(peek())))); // Load constant to reg0 + } else if (peek().getType() == Token::IDENTIFIER) { + if (static_cast(peek().subtoken(0, 3)) == "reg") { + source = static_cast(peek().subtoken(3, 1)); + } else if (static_cast(peek()) == "input") { + source = 6; + } + } else { + throw "Parser Error: Expected Constant or Register!"; + } + get(); // Eat source + + uint8_t target = 0; + if (peek().getType() == Token::IDENTIFIER) { + if (static_cast(peek().subtoken(0, 3)) == "reg") { + target = static_cast(peek().subtoken(3, 1)); + } else if (static_cast(peek()) == "output") { + target = 6; + } + } else { + throw "Parser Error: Expected Register!"; + } + get(); // Eat target + + if (source > 6 || target > 6) { + throw "Parser Error: Invalid Register!"; + } + + if (source != target) { + // This happens on e.g. MOV 10, reg0 + ast->addChild(std::move(std::make_unique(source, target))); + } +} + +void Parser::alu() { + std::map aluMap = {{"AND", AluNode::AND}, + {"OR", AluNode::OR}, + {"NAND", AluNode::NAND}, + {"NOR", AluNode::NOR}, + {"ADD", AluNode::ADD}, + {"SUB", AluNode::SUB}}; + + if (peek().getType() != Token::MNEMONIC) { + throw "Parser Error: Expected Mnemonic!"; + } + if (!aluMap.contains(static_cast(peek()))) { + throw "Parser Error: Invalid ALU operation!"; + } + + ast->addChild(std::move(std::make_unique(aluMap[static_cast(get())]))); // Eat alu +} + +void Parser::jmp() { + std::map jmpMap = {{"JMP", JumpNode::ALWAYS}, + {"JEQ", JumpNode::EQUAL_ZERO}, + {"JLE", JumpNode::LESS_ZERO}, + {"JLEQ", JumpNode::LESS_EQUAL_ZERO}, + {"NOP", JumpNode::NEVER}, // TODO: ? + {"JNEQ", JumpNode::NOT_ZERO}, + {"JGR", JumpNode::GREATER_ZERO}, + {"JGEQ", JumpNode::GREATER_EQUAL_ZERO}}; + + if (peek().getType() != Token::MNEMONIC) { + throw "Parser Error: Expected Mnemonic!"; + } + if (!jmpMap.contains(static_cast(peek()))) { + throw "Parser Error: Invalid JMP operation!"; + } + + ast->addChild(std::move(std::make_unique(jmpMap[static_cast(get())]))); // Eat jmp +} diff --git a/src/parser/Parser.h b/src/parser/Parser.h new file mode 100644 index 0000000..108841e --- /dev/null +++ b/src/parser/Parser.h @@ -0,0 +1,56 @@ +// +// Created by christoph on 20.03.23. +// + +#ifndef LOGISIMASSEMBLER_PARSER_H +#define LOGISIMASSEMBLER_PARSER_H + +#include +#include +#include +#include +#include "../lexer/Token.h" +#include "../ast/Node.h" +#include "../ast/nodes/RootNode.h" + +class Parser { +public: + Parser(const std::vector &tokens); + + Parser(std::vector &&tokens) = delete; + + auto parse() -> std::unique_ptr; + +private: + [[nodiscard]] auto peek() const -> const Token &; + + auto get() -> const Token &; + + void mov(); + + void alu(); + + void jmp(); + +private: + std::vector::const_iterator position; + std::unique_ptr ast = std::make_unique(); + + std::map> eaters = {{"MOV", &Parser::mov}, + {"AND", &Parser::alu}, + {"OR", &Parser::alu}, + {"NAND", &Parser::alu}, + {"NOR", &Parser::alu}, + {"ADD", &Parser::alu}, + {"SUB", &Parser::alu}, + {"JMP", &Parser::jmp}, + {"JEQ", &Parser::jmp}, + {"JLE", &Parser::jmp}, + {"JLEQ", &Parser::jmp}, + {"NOP", &Parser::jmp}, + {"JNEQ", &Parser::jmp}, + {"JGR", &Parser::jmp}, + {"JGEQ", &Parser::jmp}}; +}; + +#endif //LOGISIMASSEMBLER_PARSER_H