diff --git a/src/include/interpreter.h b/src/include/interpreter.h index e3e6795..0db1691 100644 --- a/src/include/interpreter.h +++ b/src/include/interpreter.h @@ -1,21 +1,12 @@ #ifndef INTERPRETER_H #define INTERPRETER_H -#include #include #include +#include "types.h" +#include "memory.h" using namespace std; -#include "tokenize.h" - -using EvalResult = variant; - -struct MemoryEntry { - EvalResult value { }; - bool assigned { false }; - Type type; -}; - class RuntimeError : public runtime_error { public: explicit RuntimeError(const string& message, CodePosition pos) @@ -27,8 +18,6 @@ public: /* Evaluates the AST, returning the latest calulated value */ -EvalResult eval(Node &ast); - -void _debug_flush_memory(void); +EvalResult eval(Node &ast, Memory& memory); #endif \ No newline at end of file diff --git a/src/include/memory.h b/src/include/memory.h new file mode 100644 index 0000000..7ed3d6f --- /dev/null +++ b/src/include/memory.h @@ -0,0 +1,18 @@ +#ifndef MEMORY_H +#define MEMORY_H + +#include +#include "types.h" +using namespace std; + +Memory new_memory(void); + +bool memory_contains(Memory& memory, string identifier); + +EvalResult memory_get(Memory& memory, string identifier); + +void memory_set(Memory& memory, string identifier, EvalResult value); + +void memory_update(Memory& memory, string identifier, EvalResult value); + +#endif \ No newline at end of file diff --git a/src/include/parser.h b/src/include/parser.h index 266ada7..082cb6d 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -7,109 +7,6 @@ #include "tokenize.h" using namespace std; -/** Grammar: -Prog -> Instruction Prog | Instruction - -Instruction -> Statement | ExprStatement; | Expr; | ; - -Statement -> - | { Prog } - | If (Expr) Instruction - | If (Expr) Instruction Else Instruction - -ExprStatement -> - | Type ParIdentifier = Expr // AssignedDeclaration - | Type ParIdentifier // Declaration - - -Expr -> - | T - | T + Expr - | T - Expr - -T -> - | U - | U * T - | U / T - | U % T - -U -> - | F - | - U - | + U - -F -> - | Number - - | ++ParIdentifier - | --ParIdentifier - - | ParIdentifier = Expr // Assignment - | ParIdentifier++ - | ParIdentifier-- - | ParIdentifier // This makes the grammar ambiguous but simpler to parse - - | (Expr) - -ParIdentifier -> - | Identifier - | (ParIdentifier) -*/ - -/** - * Type de Noeuds -*/ -enum class NodeType { - /* On ne créé pas de nouveau noeud -> ; Prog */ - Prog, // -> Instruction Prog - Epsilon, // -> ; - AssignedDeclaration, // -> Type Identifier = Expr - Declaration, // -> Type Identifier - Plus, // -> T + Expr - Minus, // -> T - Expr - Mult, // -> F * T - Div, // -> F / T - Mod, // -> F % T - UnaryMinus, // -> -F - UnaryPlus, // -> +F - Assignment, // -> Identifier = Expr - LIncr, // -> ++ParIdentifier - RIncr, // -> ParIdentifier++ - LDecr, // -> --ParIdentifier - RDecr, // -> ParIdentifier-- - If, // -> If (Expr) Instruction - IfElse, // -> If (Expr) Instruction Else Instruction - Bloc // -> { Prog } -}; - -struct InnerNode; - -/** - * InnerNode: noeud interne - * Token: feuille -*/ -using Node = variant; - -/** - * Noeud interne -*/ -struct InnerNode { - NodeType type; - vector children; - CodePosition pos; -}; - -// A Leaf is always corresponding to a Token - -/** - * Node: AST - * tokens: tokens pas encore parsés -*/ -struct ParseReturn { - Node node; - vector tokens; -}; - /** * Utilisé pour revenir en arrière quand quelque chose n'est pas reconnu */ @@ -181,7 +78,7 @@ ParseReturn parse_par_identifier(vector tokens); /** * Prints a tree for debugging it */ -void _debug_print_tree(const Node& node, int depth, const string& prefix); +void _debug_print_tree(const Node& node, int depth = 0, const string& prefix = ""); /** * Returns the CodePosition of a node diff --git a/src/include/tokenize.h b/src/include/tokenize.h index fad08ba..a8f3688 100644 --- a/src/include/tokenize.h +++ b/src/include/tokenize.h @@ -2,27 +2,11 @@ #define TOKENIZE_H #include -#include #include #include +#include "types.h" using namespace std; -enum class TokenType { Type, Identifier, Int, Plus, Minus, DoublePlus, DoubleMinus, DoubleEqual, Star, Slash, Percent, Equal, Semicolon, LParenthese, RParenthese, LCurlyBracket, RCurlyBracket, If, Else }; -enum class Type { Int }; - -using TokenData = variant; - -struct CodePosition { - int line; - int column; -}; - -struct Token { - TokenType type; - TokenData data { }; - CodePosition pos; -}; - class TokenError : public runtime_error { public: explicit TokenError(const string& message, CodePosition pos) diff --git a/src/include/types.h b/src/include/types.h new file mode 100644 index 0000000..f16947c --- /dev/null +++ b/src/include/types.h @@ -0,0 +1,146 @@ +#ifndef TYPES_H +#define TYPES_H + +#include +#include +#include +#include +#include +using namespace std; + +/** + * Tokens definition +*/ +enum class TokenType { Type, Identifier, Int, Plus, Minus, DoublePlus, DoubleMinus, DoubleEqual, Star, Slash, Percent, Equal, Semicolon, LParenthese, RParenthese, LCurlyBracket, RCurlyBracket, If, Else }; +enum class Type { Int }; + +using TokenData = variant; + +struct CodePosition { + int line; + int column; +}; + +struct Token { + TokenType type; + TokenData data { }; + CodePosition pos; +}; + +/** Grammar: +Prog -> Instruction Prog | Instruction + +Instruction -> Statement | ExprStatement; | Expr; | ; + +Statement -> + | { Prog } + | If (Expr) Instruction + | If (Expr) Instruction Else Instruction + +ExprStatement -> + | Type ParIdentifier = Expr // AssignedDeclaration + | Type ParIdentifier // Declaration + + +Expr -> + | T + | T + Expr + | T - Expr + +T -> + | U + | U * T + | U / T + | U % T + +U -> + | F + | - U + | + U + +F -> + | Number + + | ++ParIdentifier + | --ParIdentifier + + | ParIdentifier = Expr // Assignment + | ParIdentifier++ + | ParIdentifier-- + | ParIdentifier // This makes the grammar ambiguous but simpler to parse + + | (Expr) + +ParIdentifier -> + | Identifier + | (ParIdentifier) +*/ + +/** + * Type de Noeuds +*/ +enum class NodeType { + /* On ne créé pas de nouveau noeud -> ; Prog */ + Prog, // -> Instruction Prog + Epsilon, // -> ; + AssignedDeclaration, // -> Type Identifier = Expr + Declaration, // -> Type Identifier + Plus, // -> T + Expr + Minus, // -> T - Expr + Mult, // -> F * T + Div, // -> F / T + Mod, // -> F % T + UnaryMinus, // -> -F + UnaryPlus, // -> +F + Assignment, // -> Identifier = Expr + LIncr, // -> ++ParIdentifier + RIncr, // -> ParIdentifier++ + LDecr, // -> --ParIdentifier + RDecr, // -> ParIdentifier-- + If, // -> If (Expr) Instruction + IfElse, // -> If (Expr) Instruction Else Instruction + Bloc // -> { Prog } +}; + +struct InnerNode; + +/** + * InnerNode: noeud interne + * Token: feuille +*/ +using Node = variant; + +/** + * Noeud interne +*/ +struct InnerNode { + NodeType type; + vector children; + CodePosition pos; +}; + +// A Leaf is always corresponding to a Token + +/** + * Node: AST + * tokens: tokens pas encore parsés +*/ +struct ParseReturn { + Node node; + vector tokens; +}; + +/** + * Interpreter +*/ +using EvalResult = variant; + +struct Scope { + unordered_map vars; + int depth; + string name; +}; + +using Memory = list; + +#endif \ No newline at end of file diff --git a/src/interpreter.cpp b/src/interpreter.cpp index ebe22b1..6e4bb96 100644 --- a/src/interpreter.cpp +++ b/src/interpreter.cpp @@ -1,152 +1,181 @@ #include -#include -#include #include +#include #include "include/parser.h" #include "include/interpreter.h" +#include "include/memory.h" using namespace std; -unordered_map memory; - -void _debug_flush_memory(void) { - memory.clear(); -} - -EvalResult eval(Node &ast) { - if (ast.index() == 0) { +EvalResult eval(Node &ast, Memory &memory) { + if (holds_alternative(ast)) { InnerNode node = get(ast); switch (node.type) { case NodeType::Prog: - eval(node.children[0]); - return eval(node.children[1]); + eval(node.children[0], memory); + return eval(node.children[1], memory); break; case NodeType::Epsilon: return {}; break; + case NodeType::If: { + int cond = get(eval(node.children[0], memory)); + + if (cond) { + eval(node.children[1], memory); + } + + return {}; + } break; + case NodeType::IfElse: { + int cond = get(eval(node.children[0], memory)); + + if (cond) { + eval(node.children[1], memory); + } + else { + eval(node.children[2], memory); + } + + return {}; + } break; + case NodeType::Bloc: { + + } break; case NodeType::Plus: { - int e1 = get(eval(node.children[0])); - int e2 = get(eval(node.children[1])); + int e1 = get(eval(node.children[0], memory)); + int e2 = get(eval(node.children[1], memory)); return e1 + e2; } break; case NodeType::Minus: { - int e1 = get(eval(node.children[0])); - int e2 = get(eval(node.children[1])); + int e1 = get(eval(node.children[0], memory)); + int e2 = get(eval(node.children[1], memory)); return e1 - e2; } break; case NodeType::Mult: { - int e1 = get(eval(node.children[0])); - int e2 = get(eval(node.children[1])); + int e1 = get(eval(node.children[0], memory)); + int e2 = get(eval(node.children[1], memory)); return e1 * e2; } break; case NodeType::Div: { - int e1 = get(eval(node.children[0])); - int e2 = get(eval(node.children[1])); + int e1 = get(eval(node.children[0], memory)); + int e2 = get(eval(node.children[1], memory)); if (e2 == 0) throw RuntimeError("Division by 0", node.pos); return e1 / e2; } break; case NodeType::Mod: { - int e1 = get(eval(node.children[0])); - int e2 = get(eval(node.children[1])); + int e1 = get(eval(node.children[0], memory)); + int e2 = get(eval(node.children[1], memory)); if (e2 == 0) throw RuntimeError("Modulo by 0", node.pos); return e1 % e2; } break; case NodeType::UnaryPlus: { - int e1 = get(eval(node.children[0])); + int e1 = get(eval(node.children[0], memory)); return +e1; } break; case NodeType::UnaryMinus: { - int e1 = get(eval(node.children[0])); + int e1 = get(eval(node.children[0], memory)); return -e1; } break; case NodeType::Declaration: { Token typeTok = get(node.children[0]); Token identifierTok = get(node.children[1]); - Type type = get(typeTok.data); string identifier = get(identifierTok.data); - memory[identifier] = { - .type = type - }; + memory_set(memory, identifier, { }); return {}; } break; case NodeType::AssignedDeclaration: { Token typeTok = get(node.children[0]); Token identifierTok = get(node.children[1]); - Type type = get(typeTok.data); + // Type type = get(typeTok.data); string identifier = get(identifierTok.data); - EvalResult expr = eval(node.children[2]); + EvalResult value = eval(node.children[2], memory); - memory[identifier] = { - .value = expr, - .assigned = true, - .type = type, - }; + memory_set(memory, identifier, value); - return expr; + return value; } break; case NodeType::Assignment: { Token identifierTok = get(node.children[0]); string identifier = get(identifierTok.data); - EvalResult expr = eval(node.children[1]); + EvalResult value = eval(node.children[1], memory); - if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); + if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); - memory[identifier].value = expr; - - return expr; + memory_update(memory, identifier, value); + + return value; } break; case NodeType::LIncr: { Token identifierTok = get(node.children[0]); string identifier = get(identifierTok.data); - if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); + if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); - if (memory[identifier].type == Type::Int) { - memory[identifier].value = get(memory[identifier].value) + 1; + EvalResult value = memory_get(memory, identifier); + + if (holds_alternative(value)) { + memory_update(memory, identifier, get(value) + 1); + return memory_get(memory, identifier); + } + else if (holds_alternative(value)) { + throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", identifierTok.pos); } - return memory[identifier].value; + return value; } case NodeType::RIncr: { Token identifierTok = get(node.children[0]); string identifier = get(identifierTok.data); - if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); + if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); - if (memory[identifier].type == Type::Int) { - EvalResult res = memory[identifier].value; - memory[identifier].value = get(memory[identifier].value) + 1; - return res; + EvalResult value = memory_get(memory, identifier); + + if (holds_alternative(memory_get(memory, identifier))) { + memory_update(memory, identifier, get(value) + 1); + } + else if (holds_alternative(value)) { + throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", identifierTok.pos); } - return memory[identifier].value; + return value; } case NodeType::LDecr: { Token identifierTok = get(node.children[0]); string identifier = get(identifierTok.data); - if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); + if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); - if (memory[identifier].type == Type::Int) { - memory[identifier].value = get(memory[identifier].value) - 1; + EvalResult value = memory_get(memory, identifier); + + if (holds_alternative(value)) { + memory_update(memory, identifier, get(value) - 1); + return memory_get(memory, identifier); + } + else if (holds_alternative(value)) { + throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", identifierTok.pos); } - return memory[identifier].value; + return value; } case NodeType::RDecr: { Token identifierTok = get(node.children[0]); string identifier = get(identifierTok.data); - if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); + if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); - if (memory[identifier].type == Type::Int) { - EvalResult res = memory[identifier].value; - memory[identifier].value = get(memory[identifier].value) - 1; - return res; + EvalResult value = memory_get(memory, identifier); + + if (holds_alternative(memory_get(memory, identifier))) { + memory_update(memory, identifier, get(value) - 1); + } + else if (holds_alternative(value)) { + throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", identifierTok.pos); } - return memory[identifier].value; + return value; } } } @@ -159,9 +188,13 @@ EvalResult eval(Node &ast) { case TokenType::Identifier: { string identifier = get(token.data); - if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", token.pos); + if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", token.pos); + + EvalResult value = memory_get(memory, identifier); + + if (holds_alternative(value)) throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", token.pos); - return memory[identifier].value; + return value; } break; default: throw; diff --git a/src/main.cpp b/src/main.cpp index 52c27ef..204d6d0 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -8,6 +8,7 @@ using namespace std; #include "include/parser.h" #include "include/tokenize.h" #include "include/interpreter.h" +#include "include/memory.h" int main(int argc, char* argv[]) { bool print_ast = false; @@ -23,6 +24,8 @@ int main(int argc, char* argv[]) { vector input; vector tokens; + Memory memory = new_memory(); + while (true) { try { int initial_line = input.size(); @@ -34,8 +37,14 @@ int main(int argc, char* argv[]) { if (print_ast) _debug_print_tree(ast, 0, ""); - EvalResult res = eval(ast); - cout << get(res) << endl; + EvalResult res = eval(ast, memory); + + if (holds_alternative(res)) { + cout << get(res) << endl; + } + else if (holds_alternative(res)) { + cout << get(res) << endl; + } } catch (const TokenError& e) { pretty_print_error(input, e.pos); diff --git a/src/memory.cpp b/src/memory.cpp new file mode 100644 index 0000000..20c9868 --- /dev/null +++ b/src/memory.cpp @@ -0,0 +1,48 @@ +#include +#include "include/memory.h" +using namespace std; + +Memory new_memory(void) { + Memory memory; + + memory.emplace_back(); + memory.back().depth = 0; + memory.back().name = "global"; + + return memory; +} + +bool memory_contains(Memory& memory, string identifier) { + for (auto rit = memory.rbegin(); rit != memory.rend(); ++rit) { + Scope& scope = *rit; + if (scope.vars.contains(identifier)) return true; + } + + return false; +} + +EvalResult memory_get(Memory& memory, string identifier) { + for (auto rit = memory.rbegin(); rit != memory.rend(); ++rit) { + Scope& scope = *rit; + if (scope.vars.contains(identifier)) return scope.vars[identifier]; + } + + throw; +} + +void memory_set(Memory& memory, string identifier, EvalResult value) { + Scope& top = memory.back(); + top.vars[identifier] = value; +} + +void memory_update(Memory& memory, string identifier, EvalResult value) { + for (auto rit = memory.rbegin(); rit != memory.rend(); ++rit) { + Scope& scope = *rit; + if (scope.vars.contains(identifier)) { + scope.vars[identifier] = value; + return; + } + } + + throw; +} \ No newline at end of file diff --git a/src/parser.cpp b/src/parser.cpp index c0773de..6e57e12 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -3,6 +3,7 @@ #include using namespace std; +#include "include/tokenize.h" #include "include/colors.h" #include "include/parser.h" @@ -16,7 +17,7 @@ const char* _node_names[] = { "Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr", "If", "IfElse", "Bloc" }; -void _debug_print_tree(const Node& node, int depth, const string& prefix = "") { +void _debug_print_tree(const Node& node, int depth, const string& prefix) { if (holds_alternative(node)) { const InnerNode& innerNode = get(node); diff --git a/test/expr_arithmetiques.cpp b/test/expr_arithmetiques.cpp index 2f8025f..4c8b373 100644 --- a/test/expr_arithmetiques.cpp +++ b/test/expr_arithmetiques.cpp @@ -1,13 +1,15 @@ #include "include/test.h" +#include "../src/include/memory.h" #include "../src/include/tokenize.h" #include "../src/include/parser.h" #include "../src/include/interpreter.h" int execute(string s) { + Memory memory = new_memory(); vector tokens = tokenize({ s }); Node ast = parse(tokens); - EvalResult res = eval(ast); + EvalResult res = eval(ast, memory); return get(res); } diff --git a/test/variables.cpp b/test/variables.cpp index e99946c..b3daff2 100644 --- a/test/variables.cpp +++ b/test/variables.cpp @@ -1,15 +1,15 @@ #include "include/test.h" +#include "../src/include/memory.h" #include "../src/include/tokenize.h" #include "../src/include/parser.h" #include "../src/include/interpreter.h" int execute(string s) { + Memory memory = new_memory(); vector tokens = tokenize({ s }); Node ast = parse(tokens); - EvalResult res = eval(ast); - - _debug_flush_memory(); + EvalResult res = eval(ast, memory); return get(res); }