1
Files
logisim-assembler/src/lexer/Lexer.cpp
2023-03-20 20:00:34 +01:00

149 lines
3.9 KiB
C++

//
// Created by christoph on 20.03.23.
//
#include <algorithm>
#include <vector>
#include "Lexer.h"
// ! Helper Functions
auto is_whitespace(const char character) -> bool {
const auto ascii_value = static_cast<uint8_t>(character);
const uint8_t ascii_tab = 9;
const uint8_t ascii_cr = 13;
const uint8_t ascii_space = 32;
return (ascii_value >= ascii_tab && ascii_value <= ascii_cr)
|| ascii_value == ascii_space;
}
auto is_ignored(const char character) -> bool {
// TODO: Any other ignored characters that could happen in the program?
return character == ',';
}
auto is_numeric(const char character) -> bool {
const auto ascii_value = static_cast<uint8_t>(character);
const uint8_t ascii_zero = 48;
const uint8_t ascii_nine = 57;
return ascii_value >= ascii_zero && ascii_value <= ascii_nine;
}
auto is_alphabetical(const char character) -> bool {
const auto ascii_value = static_cast<uint8_t>(character);
const uint8_t ascii_a = 97;
const uint8_t ascii_A = 65;
const uint8_t ascii_z = 122;
const uint8_t ascii_Z = 90;
const uint8_t ascii_underscore = 95;
return (ascii_value >= ascii_a && ascii_value <= ascii_z)
|| (ascii_value >= ascii_A && ascii_value <= ascii_Z)
|| ascii_value == ascii_underscore;
}
auto is_mnemonic(const Token &token) -> bool {
// TODO: Add other mnemonics
const std::vector<std::string> mnemonics = {"MOV",
"ADD"};
return std::find(mnemonics.begin(), mnemonics.end(), static_cast<std::string_view>(token))
!= mnemonics.end();
}
// ! Public Functions
Lexer::Lexer(std::string_view input_string)
: input_string(input_string), position(input_string.begin()) {}
auto Lexer::next() -> Token {
// Skip past everything that doesn't contain program information
while (is_whitespace(peek()) || peek() == ',' || peek() == '#') {
if (peek() == '#') {
// Eat whole comment, we can't decide if sth is a comment after eating #
comment();
} else {
get();
}
}
if (position >= input_string.end()) {
return static_cast<Token>(Token::END);
}
if (is_numeric(peek())) {
return number();
}
if (peek() == '[') {
return address();
}
if (is_alphabetical(peek())) {
const Token token = identifier_or_mnemonic();
if (is_mnemonic(token)) {
return {Token::MNEMONIC, static_cast<std::string_view>(token)};
}
return {Token::IDENTIFIER, static_cast<std::string_view>(token)};
}
return {Token::UNEXPECTED, std::string_view(position, position + 1)};
}
// ! Private Functions
auto Lexer::peek() const -> char {
return *position;
}
auto Lexer::get() -> char {
return *(position++);
}
auto Lexer::identifier_or_mnemonic() -> Token {
const std::string_view::const_iterator begin = position;
while (peek() != ' ' && (is_alphabetical(peek()) || is_numeric(peek()))) {
get();
}
const std::string_view::const_iterator end = position;
// We don't know the type yet, so use UNEXPECTED
return {Token::UNEXPECTED, std::string_view(begin, end)};
}
auto Lexer::number() -> Token {
const std::string_view::const_iterator begin = position;
while (is_numeric(peek())) {
get();
}
const std::string_view::const_iterator end = position;
return {Token::NUMBER, std::string_view(begin, end)};
}
auto Lexer::address() -> Token {
get(); // Eat '['
const std::string_view::const_iterator begin = position;
while (is_numeric(peek())) {
get(); // Eat the address number
}
const std::string_view::const_iterator end = position;
if (peek() != ']') {
throw "Lexer Error: Expected ']'!";
}
get(); // Eat ']'
return {Token::ADDRESS, std::string_view(begin, end)};
}
void Lexer::comment() {
// Eat the whole line
while (peek() != '\n') {
get();
}
}