Add scoped memory and centralize type dependencies

This commit is contained in:
ala89 2023-11-22 13:52:16 +01:00
parent 60eccf1c15
commit 24228f80f5
11 changed files with 332 additions and 205 deletions

View File

@ -1,21 +1,12 @@
#ifndef INTERPRETER_H #ifndef INTERPRETER_H
#define INTERPRETER_H #define INTERPRETER_H
#include <variant>
#include <string> #include <string>
#include <stdexcept> #include <stdexcept>
#include "types.h"
#include "memory.h"
using namespace std; using namespace std;
#include "tokenize.h"
using EvalResult = variant<monostate, int, double>;
struct MemoryEntry {
EvalResult value { };
bool assigned { false };
Type type;
};
class RuntimeError : public runtime_error { class RuntimeError : public runtime_error {
public: public:
explicit RuntimeError(const string& message, CodePosition pos) explicit RuntimeError(const string& message, CodePosition pos)
@ -27,8 +18,6 @@ public:
/* /*
Evaluates the AST, returning the latest calulated value Evaluates the AST, returning the latest calulated value
*/ */
EvalResult eval(Node &ast); EvalResult eval(Node &ast, Memory& memory);
void _debug_flush_memory(void);
#endif #endif

18
src/include/memory.h Normal file
View File

@ -0,0 +1,18 @@
#ifndef MEMORY_H
#define MEMORY_H
#include <string>
#include "types.h"
using namespace std;
Memory new_memory(void);
bool memory_contains(Memory& memory, string identifier);
EvalResult memory_get(Memory& memory, string identifier);
void memory_set(Memory& memory, string identifier, EvalResult value);
void memory_update(Memory& memory, string identifier, EvalResult value);
#endif

View File

@ -7,109 +7,6 @@
#include "tokenize.h" #include "tokenize.h"
using namespace std; using namespace std;
/** Grammar:
Prog -> Instruction Prog | Instruction
Instruction -> Statement | ExprStatement; | Expr; | ;
Statement ->
| { Prog }
| If (Expr) Instruction
| If (Expr) Instruction Else Instruction
ExprStatement ->
| Type ParIdentifier = Expr // AssignedDeclaration
| Type ParIdentifier // Declaration
Expr ->
| T
| T + Expr
| T - Expr
T ->
| U
| U * T
| U / T
| U % T
U ->
| F
| - U
| + U
F ->
| Number
| ++ParIdentifier
| --ParIdentifier
| ParIdentifier = Expr // Assignment
| ParIdentifier++
| ParIdentifier--
| ParIdentifier // This makes the grammar ambiguous but simpler to parse
| (Expr)
ParIdentifier ->
| Identifier
| (ParIdentifier)
*/
/**
* Type de Noeuds
*/
enum class NodeType {
/* On ne créé pas de nouveau noeud -> ; Prog */
Prog, // -> Instruction Prog
Epsilon, // -> ;
AssignedDeclaration, // -> Type Identifier = Expr
Declaration, // -> Type Identifier
Plus, // -> T + Expr
Minus, // -> T - Expr
Mult, // -> F * T
Div, // -> F / T
Mod, // -> F % T
UnaryMinus, // -> -F
UnaryPlus, // -> +F
Assignment, // -> Identifier = Expr
LIncr, // -> ++ParIdentifier
RIncr, // -> ParIdentifier++
LDecr, // -> --ParIdentifier
RDecr, // -> ParIdentifier--
If, // -> If (Expr) Instruction
IfElse, // -> If (Expr) Instruction Else Instruction
Bloc // -> { Prog }
};
struct InnerNode;
/**
* InnerNode: noeud interne
* Token: feuille
*/
using Node = variant<InnerNode, Token>;
/**
* Noeud interne
*/
struct InnerNode {
NodeType type;
vector<Node> children;
CodePosition pos;
};
// A Leaf is always corresponding to a Token
/**
* Node: AST
* tokens: tokens pas encore parsés
*/
struct ParseReturn {
Node node;
vector<Token> tokens;
};
/** /**
* Utilisé pour revenir en arrière quand quelque chose n'est pas reconnu * Utilisé pour revenir en arrière quand quelque chose n'est pas reconnu
*/ */
@ -181,7 +78,7 @@ ParseReturn parse_par_identifier(vector<Token> tokens);
/** /**
* Prints a tree for debugging it * Prints a tree for debugging it
*/ */
void _debug_print_tree(const Node& node, int depth, const string& prefix); void _debug_print_tree(const Node& node, int depth = 0, const string& prefix = "");
/** /**
* Returns the CodePosition of a node * Returns the CodePosition of a node

View File

@ -2,27 +2,11 @@
#define TOKENIZE_H #define TOKENIZE_H
#include <vector> #include <vector>
#include <variant>
#include <string> #include <string>
#include <stdexcept> #include <stdexcept>
#include "types.h"
using namespace std; using namespace std;
enum class TokenType { Type, Identifier, Int, Plus, Minus, DoublePlus, DoubleMinus, DoubleEqual, Star, Slash, Percent, Equal, Semicolon, LParenthese, RParenthese, LCurlyBracket, RCurlyBracket, If, Else };
enum class Type { Int };
using TokenData = variant<int, string, Type>;
struct CodePosition {
int line;
int column;
};
struct Token {
TokenType type;
TokenData data { };
CodePosition pos;
};
class TokenError : public runtime_error { class TokenError : public runtime_error {
public: public:
explicit TokenError(const string& message, CodePosition pos) explicit TokenError(const string& message, CodePosition pos)

146
src/include/types.h Normal file
View File

@ -0,0 +1,146 @@
#ifndef TYPES_H
#define TYPES_H
#include <variant>
#include <string>
#include <unordered_map>
#include <list>
#include <vector>
using namespace std;
/**
* Tokens definition
*/
enum class TokenType { Type, Identifier, Int, Plus, Minus, DoublePlus, DoubleMinus, DoubleEqual, Star, Slash, Percent, Equal, Semicolon, LParenthese, RParenthese, LCurlyBracket, RCurlyBracket, If, Else };
enum class Type { Int };
using TokenData = variant<int, string, Type>;
struct CodePosition {
int line;
int column;
};
struct Token {
TokenType type;
TokenData data { };
CodePosition pos;
};
/** Grammar:
Prog -> Instruction Prog | Instruction
Instruction -> Statement | ExprStatement; | Expr; | ;
Statement ->
| { Prog }
| If (Expr) Instruction
| If (Expr) Instruction Else Instruction
ExprStatement ->
| Type ParIdentifier = Expr // AssignedDeclaration
| Type ParIdentifier // Declaration
Expr ->
| T
| T + Expr
| T - Expr
T ->
| U
| U * T
| U / T
| U % T
U ->
| F
| - U
| + U
F ->
| Number
| ++ParIdentifier
| --ParIdentifier
| ParIdentifier = Expr // Assignment
| ParIdentifier++
| ParIdentifier--
| ParIdentifier // This makes the grammar ambiguous but simpler to parse
| (Expr)
ParIdentifier ->
| Identifier
| (ParIdentifier)
*/
/**
* Type de Noeuds
*/
enum class NodeType {
/* On ne créé pas de nouveau noeud -> ; Prog */
Prog, // -> Instruction Prog
Epsilon, // -> ;
AssignedDeclaration, // -> Type Identifier = Expr
Declaration, // -> Type Identifier
Plus, // -> T + Expr
Minus, // -> T - Expr
Mult, // -> F * T
Div, // -> F / T
Mod, // -> F % T
UnaryMinus, // -> -F
UnaryPlus, // -> +F
Assignment, // -> Identifier = Expr
LIncr, // -> ++ParIdentifier
RIncr, // -> ParIdentifier++
LDecr, // -> --ParIdentifier
RDecr, // -> ParIdentifier--
If, // -> If (Expr) Instruction
IfElse, // -> If (Expr) Instruction Else Instruction
Bloc // -> { Prog }
};
struct InnerNode;
/**
* InnerNode: noeud interne
* Token: feuille
*/
using Node = variant<InnerNode, Token>;
/**
* Noeud interne
*/
struct InnerNode {
NodeType type;
vector<Node> children;
CodePosition pos;
};
// A Leaf is always corresponding to a Token
/**
* Node: AST
* tokens: tokens pas encore parsés
*/
struct ParseReturn {
Node node;
vector<Token> tokens;
};
/**
* Interpreter
*/
using EvalResult = variant<monostate, int, double>;
struct Scope {
unordered_map<string, EvalResult> vars;
int depth;
string name;
};
using Memory = list<Scope>;
#endif

View File

@ -1,152 +1,181 @@
#include <vector> #include <vector>
#include <iostream>
#include <unordered_map>
#include <string> #include <string>
#include <iostream>
#include "include/parser.h" #include "include/parser.h"
#include "include/interpreter.h" #include "include/interpreter.h"
#include "include/memory.h"
using namespace std; using namespace std;
unordered_map<string, MemoryEntry> memory; EvalResult eval(Node &ast, Memory &memory) {
if (holds_alternative<InnerNode>(ast)) {
void _debug_flush_memory(void) {
memory.clear();
}
EvalResult eval(Node &ast) {
if (ast.index() == 0) {
InnerNode node = get<InnerNode>(ast); InnerNode node = get<InnerNode>(ast);
switch (node.type) { switch (node.type) {
case NodeType::Prog: case NodeType::Prog:
eval(node.children[0]); eval(node.children[0], memory);
return eval(node.children[1]); return eval(node.children[1], memory);
break; break;
case NodeType::Epsilon: case NodeType::Epsilon:
return {}; return {};
break; break;
case NodeType::If: {
int cond = get<int>(eval(node.children[0], memory));
if (cond) {
eval(node.children[1], memory);
}
return {};
} break;
case NodeType::IfElse: {
int cond = get<int>(eval(node.children[0], memory));
if (cond) {
eval(node.children[1], memory);
}
else {
eval(node.children[2], memory);
}
return {};
} break;
case NodeType::Bloc: {
} break;
case NodeType::Plus: { case NodeType::Plus: {
int e1 = get<int>(eval(node.children[0])); int e1 = get<int>(eval(node.children[0], memory));
int e2 = get<int>(eval(node.children[1])); int e2 = get<int>(eval(node.children[1], memory));
return e1 + e2; return e1 + e2;
} break; } break;
case NodeType::Minus: { case NodeType::Minus: {
int e1 = get<int>(eval(node.children[0])); int e1 = get<int>(eval(node.children[0], memory));
int e2 = get<int>(eval(node.children[1])); int e2 = get<int>(eval(node.children[1], memory));
return e1 - e2; return e1 - e2;
} break; } break;
case NodeType::Mult: { case NodeType::Mult: {
int e1 = get<int>(eval(node.children[0])); int e1 = get<int>(eval(node.children[0], memory));
int e2 = get<int>(eval(node.children[1])); int e2 = get<int>(eval(node.children[1], memory));
return e1 * e2; return e1 * e2;
} break; } break;
case NodeType::Div: { case NodeType::Div: {
int e1 = get<int>(eval(node.children[0])); int e1 = get<int>(eval(node.children[0], memory));
int e2 = get<int>(eval(node.children[1])); int e2 = get<int>(eval(node.children[1], memory));
if (e2 == 0) throw RuntimeError("Division by 0", node.pos); if (e2 == 0) throw RuntimeError("Division by 0", node.pos);
return e1 / e2; return e1 / e2;
} break; } break;
case NodeType::Mod: { case NodeType::Mod: {
int e1 = get<int>(eval(node.children[0])); int e1 = get<int>(eval(node.children[0], memory));
int e2 = get<int>(eval(node.children[1])); int e2 = get<int>(eval(node.children[1], memory));
if (e2 == 0) throw RuntimeError("Modulo by 0", node.pos); if (e2 == 0) throw RuntimeError("Modulo by 0", node.pos);
return e1 % e2; return e1 % e2;
} break; } break;
case NodeType::UnaryPlus: { case NodeType::UnaryPlus: {
int e1 = get<int>(eval(node.children[0])); int e1 = get<int>(eval(node.children[0], memory));
return +e1; return +e1;
} break; } break;
case NodeType::UnaryMinus: { case NodeType::UnaryMinus: {
int e1 = get<int>(eval(node.children[0])); int e1 = get<int>(eval(node.children[0], memory));
return -e1; return -e1;
} break; } break;
case NodeType::Declaration: { case NodeType::Declaration: {
Token typeTok = get<Token>(node.children[0]); Token typeTok = get<Token>(node.children[0]);
Token identifierTok = get<Token>(node.children[1]); Token identifierTok = get<Token>(node.children[1]);
Type type = get<Type>(typeTok.data);
string identifier = get<string>(identifierTok.data); string identifier = get<string>(identifierTok.data);
memory[identifier] = { memory_set(memory, identifier, { });
.type = type
};
return {}; return {};
} break; } break;
case NodeType::AssignedDeclaration: { case NodeType::AssignedDeclaration: {
Token typeTok = get<Token>(node.children[0]); Token typeTok = get<Token>(node.children[0]);
Token identifierTok = get<Token>(node.children[1]); Token identifierTok = get<Token>(node.children[1]);
Type type = get<Type>(typeTok.data); // Type type = get<Type>(typeTok.data);
string identifier = get<string>(identifierTok.data); string identifier = get<string>(identifierTok.data);
EvalResult expr = eval(node.children[2]); EvalResult value = eval(node.children[2], memory);
memory[identifier] = { memory_set(memory, identifier, value);
.value = expr,
.assigned = true,
.type = type,
};
return expr; return value;
} break; } break;
case NodeType::Assignment: { case NodeType::Assignment: {
Token identifierTok = get<Token>(node.children[0]); Token identifierTok = get<Token>(node.children[0]);
string identifier = get<string>(identifierTok.data); string identifier = get<string>(identifierTok.data);
EvalResult expr = eval(node.children[1]); EvalResult value = eval(node.children[1], memory);
if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
memory[identifier].value = expr; memory_update(memory, identifier, value);
return expr; return value;
} break; } break;
case NodeType::LIncr: { case NodeType::LIncr: {
Token identifierTok = get<Token>(node.children[0]); Token identifierTok = get<Token>(node.children[0]);
string identifier = get<string>(identifierTok.data); string identifier = get<string>(identifierTok.data);
if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
if (memory[identifier].type == Type::Int) { EvalResult value = memory_get(memory, identifier);
memory[identifier].value = get<int>(memory[identifier].value) + 1;
if (holds_alternative<int>(value)) {
memory_update(memory, identifier, get<int>(value) + 1);
return memory_get(memory, identifier);
}
else if (holds_alternative<monostate>(value)) {
throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", identifierTok.pos);
} }
return memory[identifier].value; return value;
} }
case NodeType::RIncr: { case NodeType::RIncr: {
Token identifierTok = get<Token>(node.children[0]); Token identifierTok = get<Token>(node.children[0]);
string identifier = get<string>(identifierTok.data); string identifier = get<string>(identifierTok.data);
if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
if (memory[identifier].type == Type::Int) { EvalResult value = memory_get(memory, identifier);
EvalResult res = memory[identifier].value;
memory[identifier].value = get<int>(memory[identifier].value) + 1; if (holds_alternative<int>(memory_get(memory, identifier))) {
return res; memory_update(memory, identifier, get<int>(value) + 1);
}
else if (holds_alternative<monostate>(value)) {
throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", identifierTok.pos);
} }
return memory[identifier].value; return value;
} }
case NodeType::LDecr: { case NodeType::LDecr: {
Token identifierTok = get<Token>(node.children[0]); Token identifierTok = get<Token>(node.children[0]);
string identifier = get<string>(identifierTok.data); string identifier = get<string>(identifierTok.data);
if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
if (memory[identifier].type == Type::Int) { EvalResult value = memory_get(memory, identifier);
memory[identifier].value = get<int>(memory[identifier].value) - 1;
if (holds_alternative<int>(value)) {
memory_update(memory, identifier, get<int>(value) - 1);
return memory_get(memory, identifier);
}
else if (holds_alternative<monostate>(value)) {
throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", identifierTok.pos);
} }
return memory[identifier].value; return value;
} }
case NodeType::RDecr: { case NodeType::RDecr: {
Token identifierTok = get<Token>(node.children[0]); Token identifierTok = get<Token>(node.children[0]);
string identifier = get<string>(identifierTok.data); string identifier = get<string>(identifierTok.data);
if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
if (memory[identifier].type == Type::Int) { EvalResult value = memory_get(memory, identifier);
EvalResult res = memory[identifier].value;
memory[identifier].value = get<int>(memory[identifier].value) - 1; if (holds_alternative<int>(memory_get(memory, identifier))) {
return res; memory_update(memory, identifier, get<int>(value) - 1);
}
else if (holds_alternative<monostate>(value)) {
throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", identifierTok.pos);
} }
return memory[identifier].value; return value;
} }
} }
} }
@ -159,9 +188,13 @@ EvalResult eval(Node &ast) {
case TokenType::Identifier: { case TokenType::Identifier: {
string identifier = get<string>(token.data); string identifier = get<string>(token.data);
if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", token.pos); if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", token.pos);
EvalResult value = memory_get(memory, identifier);
if (holds_alternative<monostate>(value)) throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", token.pos);
return memory[identifier].value; return value;
} break; } break;
default: default:
throw; throw;

View File

@ -8,6 +8,7 @@ using namespace std;
#include "include/parser.h" #include "include/parser.h"
#include "include/tokenize.h" #include "include/tokenize.h"
#include "include/interpreter.h" #include "include/interpreter.h"
#include "include/memory.h"
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
bool print_ast = false; bool print_ast = false;
@ -23,6 +24,8 @@ int main(int argc, char* argv[]) {
vector<string> input; vector<string> input;
vector<Token> tokens; vector<Token> tokens;
Memory memory = new_memory();
while (true) { while (true) {
try { try {
int initial_line = input.size(); int initial_line = input.size();
@ -34,8 +37,14 @@ int main(int argc, char* argv[]) {
if (print_ast) if (print_ast)
_debug_print_tree(ast, 0, ""); _debug_print_tree(ast, 0, "");
EvalResult res = eval(ast); EvalResult res = eval(ast, memory);
cout << get<int>(res) << endl;
if (holds_alternative<int>(res)) {
cout << get<int>(res) << endl;
}
else if (holds_alternative<double>(res)) {
cout << get<double>(res) << endl;
}
} catch (const TokenError& e) { } catch (const TokenError& e) {
pretty_print_error(input, e.pos); pretty_print_error(input, e.pos);

48
src/memory.cpp Normal file
View File

@ -0,0 +1,48 @@
#include <iostream>
#include "include/memory.h"
using namespace std;
Memory new_memory(void) {
Memory memory;
memory.emplace_back();
memory.back().depth = 0;
memory.back().name = "global";
return memory;
}
bool memory_contains(Memory& memory, string identifier) {
for (auto rit = memory.rbegin(); rit != memory.rend(); ++rit) {
Scope& scope = *rit;
if (scope.vars.contains(identifier)) return true;
}
return false;
}
EvalResult memory_get(Memory& memory, string identifier) {
for (auto rit = memory.rbegin(); rit != memory.rend(); ++rit) {
Scope& scope = *rit;
if (scope.vars.contains(identifier)) return scope.vars[identifier];
}
throw;
}
void memory_set(Memory& memory, string identifier, EvalResult value) {
Scope& top = memory.back();
top.vars[identifier] = value;
}
void memory_update(Memory& memory, string identifier, EvalResult value) {
for (auto rit = memory.rbegin(); rit != memory.rend(); ++rit) {
Scope& scope = *rit;
if (scope.vars.contains(identifier)) {
scope.vars[identifier] = value;
return;
}
}
throw;
}

View File

@ -3,6 +3,7 @@
#include <algorithm> #include <algorithm>
using namespace std; using namespace std;
#include "include/tokenize.h"
#include "include/colors.h" #include "include/colors.h"
#include "include/parser.h" #include "include/parser.h"
@ -16,7 +17,7 @@ const char* _node_names[] = {
"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod",
"UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr", "If", "IfElse", "Bloc" "UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr", "If", "IfElse", "Bloc"
}; };
void _debug_print_tree(const Node& node, int depth, const string& prefix = "") { void _debug_print_tree(const Node& node, int depth, const string& prefix) {
if (holds_alternative<InnerNode>(node)) { if (holds_alternative<InnerNode>(node)) {
const InnerNode& innerNode = get<InnerNode>(node); const InnerNode& innerNode = get<InnerNode>(node);

View File

@ -1,13 +1,15 @@
#include "include/test.h" #include "include/test.h"
#include "../src/include/memory.h"
#include "../src/include/tokenize.h" #include "../src/include/tokenize.h"
#include "../src/include/parser.h" #include "../src/include/parser.h"
#include "../src/include/interpreter.h" #include "../src/include/interpreter.h"
int execute(string s) { int execute(string s) {
Memory memory = new_memory();
vector<Token> tokens = tokenize({ s }); vector<Token> tokens = tokenize({ s });
Node ast = parse(tokens); Node ast = parse(tokens);
EvalResult res = eval(ast); EvalResult res = eval(ast, memory);
return get<int>(res); return get<int>(res);
} }

View File

@ -1,15 +1,15 @@
#include "include/test.h" #include "include/test.h"
#include "../src/include/memory.h"
#include "../src/include/tokenize.h" #include "../src/include/tokenize.h"
#include "../src/include/parser.h" #include "../src/include/parser.h"
#include "../src/include/interpreter.h" #include "../src/include/interpreter.h"
int execute(string s) { int execute(string s) {
Memory memory = new_memory();
vector<Token> tokens = tokenize({ s }); vector<Token> tokens = tokenize({ s });
Node ast = parse(tokens); Node ast = parse(tokens);
EvalResult res = eval(ast); EvalResult res = eval(ast, memory);
_debug_flush_memory();
return get<int>(res); return get<int>(res);
} }