From ede4dea616168246f42edf2a2afaf17ab7a1c45e Mon Sep 17 00:00:00 2001 From: ala89 Date: Fri, 10 Nov 2023 17:35:33 +0100 Subject: [PATCH] Add interpreter --- Makefile | 4 +- src/include/input.h | 6 +++ src/include/interpreter.h | 8 +++ src/include/parser.h | 1 - src/include/tokenize.h | 14 ++++- src/interpreter.cpp | 105 +++++++++++++++++++++++++++++++++++--- src/main.cpp | 16 +++++- src/parser.cpp | 9 ++-- src/tokenize.cpp | 10 ++-- 9 files changed, 147 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index e57c6c3..668694e 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ CXX := g++ LD_CXXFLAGS = # Compilation flag -CXXFLAGS = -Wall -Wextra -g -O3 +CXXFLAGS = -Wall -Wextra -g -O3 -std=c++2a # Remove warnings about unused variables, functions, ... # -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable # Compile with debug @@ -14,7 +14,7 @@ CXXFLAGS = -Wall -Wextra -g -O3 # See memory leaks and Incorrect Read/Write # -fsanitize=address -lasan -$(BUILDDIR)/main: $(SRCDIR)/main.cpp $(BUILDDIR)/input.o src/include/parser.h +$(BUILDDIR)/main: $(SRCDIR)/main.cpp $(BUILDDIR)/input.o $(BUILDDIR)/interpreter.o $(BUILDDIR)/parser.o $(BUILDDIR)/tokenize.o $(CXX) $^ -o $@ $(CXXFLAGS) $(LD_CXXFLAGS) diff --git a/src/include/input.h b/src/include/input.h index cb9c695..1964eda 100644 --- a/src/include/input.h +++ b/src/include/input.h @@ -1,6 +1,12 @@ #ifndef DEF_INPUT_H #define DEF_INPUT_H +#include +using namespace std; + +/* + Retrieves user input +*/ string get_input(); #endif \ No newline at end of file diff --git a/src/include/interpreter.h b/src/include/interpreter.h index 86f6c75..e4b5a64 100644 --- a/src/include/interpreter.h +++ b/src/include/interpreter.h @@ -1,6 +1,14 @@ #ifndef INTERPRETER_H #define INTERPRETER_H +#include +using namespace std; +using EvalResult = variant; + +/* + Evaluates the AST, returning the latest calulated value +*/ +EvalResult eval(Node &ast); #endif \ No newline at end of file diff --git a/src/include/parser.h b/src/include/parser.h index bc37065..2b41879 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -56,7 +56,6 @@ enum class NodeType { Mod, // -> F % T UnaryMinus, // -> -F UnaryPlus, // -> +F - Parenthesis, // -> (Expr) Assignment // -> Identifier = Expr }; diff --git a/src/include/tokenize.h b/src/include/tokenize.h index a4a40f5..b6daf01 100644 --- a/src/include/tokenize.h +++ b/src/include/tokenize.h @@ -6,14 +6,24 @@ #include using namespace std; -enum class TokenType { Type, Identifier, Number, Plus, Minus, Star, Slash, Percent, Equal, Semicolon, LParenthese, RParenthese }; +enum class TokenType { Type, Identifier, Int, Plus, Minus, Star, Slash, Percent, Equal, Semicolon, LParenthese, RParenthese }; enum class Type { Int }; -using TokenData = variant; +using TokenData = variant; struct Token { TokenType type; TokenData data; }; +/* + Parses a string into a vector of tokens +*/ +vector tokenize(string str); + +/* + Formats a list of tokens and prints it +*/ +void _debug_print_tokens(vector tokens); + #endif \ No newline at end of file diff --git a/src/interpreter.cpp b/src/interpreter.cpp index 3cada2b..d41ff18 100644 --- a/src/interpreter.cpp +++ b/src/interpreter.cpp @@ -1,16 +1,105 @@ #include +#include +#include +#include #include "include/parser.h" #include "include/interpreter.h" using namespace std; -void eval(Node &ast) { +unordered_map memory; -} +EvalResult eval(Node &ast) { + if (ast.index() == 0) { + InnerNode node = get(ast); + switch (node.type) { + case NodeType::Prog: + eval(node.children[0]); + return eval(node.children[1]); + break; + case NodeType::Epsilon: + return {}; + break; + case NodeType::Plus: { + int e1 = get(eval(node.children[0])); + int e2 = get(eval(node.children[1])); + return e1 + e2; + } break; + case NodeType::Minus: { + int e1 = get(eval(node.children[0])); + int e2 = get(eval(node.children[1])); + return e1 - e2; + } break; + case NodeType::Mult: { + int e1 = get(eval(node.children[0])); + int e2 = get(eval(node.children[1])); + return e1 * e2; + } break; + case NodeType::Div: { + int e1 = get(eval(node.children[0])); + int e2 = get(eval(node.children[1])); + // if (e2 == 0) + return e1 / e2; + } break; + case NodeType::Mod: { + int e1 = get(eval(node.children[0])); + int e2 = get(eval(node.children[1])); + // if (e2 == 0) + return e1 % e2; + } break; + case NodeType::UnaryPlus: { + int e1 = get(eval(node.children[0])); + return +e1; + } break; + case NodeType::UnaryMinus: { + int e1 = get(eval(node.children[0])); + return -e1; + } break; + case NodeType::Declaration: { + Token typeTok = get(node.children[0]); + Token identifierTok = get(node.children[1]); + string identifier = get(identifierTok.data); -int main() { - Token oneTok = { - .type = TokenType::Number, - .data = 1.0 - }; - Node one = oneTok; + memory[identifier] = 0; + + return {}; + } break; + case NodeType::AssignedDeclaration: { + Token typeTok = get(node.children[0]); + Token identifierTok = get(node.children[1]); + string identifier = get(identifierTok.data); + int expr = get(eval(node.children[2])); + + memory[identifier] = expr; + + return expr; + } break; + case NodeType::Assignment: { + Token identifierTok = get(node.children[0]); + string identifier = get(identifierTok.data); + int expr = get(eval(node.children[1])); + + memory[identifier] = expr; + + return expr; + } break; + } + } + else { + Token token = get(ast); + switch (token.type) { + case TokenType::Int: { + return get(token.data); + } break; + case TokenType::Identifier: { + string identifier = get(token.data); + // if (!memory.contains(identifier)) return; + return memory[identifier]; + } break; + default: + throw; + break; + } + } + + return {}; } \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index fea2f16..e4382d4 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -2,9 +2,21 @@ using namespace std; #include "include/input.h" +#include "include/tokenize.h" #include "include/parser.h" - +#include "include/interpreter.h" int main(int argc, char* argv[]) { - return 0; + while (true) { + try { + string input = get_input(); + vector tokens = tokenize(input); + Node ast = parse(tokens); + EvalResult res = eval(ast); + cout << get(res) << endl; + } + catch (...) { // temp + cout << "err" << endl; + } + } } \ No newline at end of file diff --git a/src/parser.cpp b/src/parser.cpp index 57238f6..56f2588 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -271,7 +271,7 @@ ParseReturn parse_f(vector tokens) { throw new ParseException; switch (tokens.back().type) { - case TokenType::Number: { //* U -> Number + case TokenType::Int: { //* U -> Number Token number = tokens.back(); tokens.pop_back(); return { @@ -288,12 +288,9 @@ ParseReturn parse_f(vector tokens) { throw new ParseException; tokens.pop_back(); - InnerNode node = { - .type=NodeType::Parenthesis, - .children={ ret.node } - }; + return { - .node=node, + .node=ret.node, .tokens=tokens }; } diff --git a/src/tokenize.cpp b/src/tokenize.cpp index 3dffa99..d848438 100644 --- a/src/tokenize.cpp +++ b/src/tokenize.cpp @@ -9,14 +9,14 @@ regex NUMBER_REGEX ("\\d+(\\.\\d+)?"); regex TYPE_INT_REGEX ("int\\s"); regex IDENTIFIER_REGEX ("[A-Za-z_]\\w*"); -void print_tokens(vector tokens) { +void _debug_print_tokens(vector tokens) { for (Token token : tokens) { switch (token.type) { case TokenType::Type: cout << "Type(INT)"; break; - case TokenType::Number: - cout << "Number(" << get(token.data) << ")"; + case TokenType::Int: + cout << "Number(" << get(token.data) << ")"; break; case TokenType::Identifier: cout << "Identifier(" << get(token.data) << ")"; @@ -61,8 +61,8 @@ vector tokenize(string str) { smatch m; if (regex_search(str, m, NUMBER_REGEX, regex_constants::match_continuous)) { Token token = { - .type = TokenType::Number, - .data = stod(m.str()) + .type = TokenType::Int, + .data = stoi(m.str()) }; tokens.emplace_back(token); str.erase(0, m.str().length());