Add scoped memory and centralize type dependencies

This commit is contained in:
ala89 2023-11-22 13:52:16 +01:00
parent 60eccf1c15
commit 24228f80f5
11 changed files with 332 additions and 205 deletions

View File

@ -1,21 +1,12 @@
#ifndef INTERPRETER_H
#define INTERPRETER_H
#include <variant>
#include <string>
#include <stdexcept>
#include "types.h"
#include "memory.h"
using namespace std;
#include "tokenize.h"
using EvalResult = variant<monostate, int, double>;
struct MemoryEntry {
EvalResult value { };
bool assigned { false };
Type type;
};
class RuntimeError : public runtime_error {
public:
explicit RuntimeError(const string& message, CodePosition pos)
@ -27,8 +18,6 @@ public:
/*
Evaluates the AST, returning the latest calulated value
*/
EvalResult eval(Node &ast);
void _debug_flush_memory(void);
EvalResult eval(Node &ast, Memory& memory);
#endif

18
src/include/memory.h Normal file
View File

@ -0,0 +1,18 @@
#ifndef MEMORY_H
#define MEMORY_H
#include <string>
#include "types.h"
using namespace std;
Memory new_memory(void);
bool memory_contains(Memory& memory, string identifier);
EvalResult memory_get(Memory& memory, string identifier);
void memory_set(Memory& memory, string identifier, EvalResult value);
void memory_update(Memory& memory, string identifier, EvalResult value);
#endif

View File

@ -7,109 +7,6 @@
#include "tokenize.h"
using namespace std;
/** Grammar:
Prog -> Instruction Prog | Instruction
Instruction -> Statement | ExprStatement; | Expr; | ;
Statement ->
| { Prog }
| If (Expr) Instruction
| If (Expr) Instruction Else Instruction
ExprStatement ->
| Type ParIdentifier = Expr // AssignedDeclaration
| Type ParIdentifier // Declaration
Expr ->
| T
| T + Expr
| T - Expr
T ->
| U
| U * T
| U / T
| U % T
U ->
| F
| - U
| + U
F ->
| Number
| ++ParIdentifier
| --ParIdentifier
| ParIdentifier = Expr // Assignment
| ParIdentifier++
| ParIdentifier--
| ParIdentifier // This makes the grammar ambiguous but simpler to parse
| (Expr)
ParIdentifier ->
| Identifier
| (ParIdentifier)
*/
/**
* Type de Noeuds
*/
enum class NodeType {
/* On ne créé pas de nouveau noeud -> ; Prog */
Prog, // -> Instruction Prog
Epsilon, // -> ;
AssignedDeclaration, // -> Type Identifier = Expr
Declaration, // -> Type Identifier
Plus, // -> T + Expr
Minus, // -> T - Expr
Mult, // -> F * T
Div, // -> F / T
Mod, // -> F % T
UnaryMinus, // -> -F
UnaryPlus, // -> +F
Assignment, // -> Identifier = Expr
LIncr, // -> ++ParIdentifier
RIncr, // -> ParIdentifier++
LDecr, // -> --ParIdentifier
RDecr, // -> ParIdentifier--
If, // -> If (Expr) Instruction
IfElse, // -> If (Expr) Instruction Else Instruction
Bloc // -> { Prog }
};
struct InnerNode;
/**
* InnerNode: noeud interne
* Token: feuille
*/
using Node = variant<InnerNode, Token>;
/**
* Noeud interne
*/
struct InnerNode {
NodeType type;
vector<Node> children;
CodePosition pos;
};
// A Leaf is always corresponding to a Token
/**
* Node: AST
* tokens: tokens pas encore parsés
*/
struct ParseReturn {
Node node;
vector<Token> tokens;
};
/**
* Utilisé pour revenir en arrière quand quelque chose n'est pas reconnu
*/
@ -181,7 +78,7 @@ ParseReturn parse_par_identifier(vector<Token> tokens);
/**
* Prints a tree for debugging it
*/
void _debug_print_tree(const Node& node, int depth, const string& prefix);
void _debug_print_tree(const Node& node, int depth = 0, const string& prefix = "");
/**
* Returns the CodePosition of a node

View File

@ -2,27 +2,11 @@
#define TOKENIZE_H
#include <vector>
#include <variant>
#include <string>
#include <stdexcept>
#include "types.h"
using namespace std;
enum class TokenType { Type, Identifier, Int, Plus, Minus, DoublePlus, DoubleMinus, DoubleEqual, Star, Slash, Percent, Equal, Semicolon, LParenthese, RParenthese, LCurlyBracket, RCurlyBracket, If, Else };
enum class Type { Int };
using TokenData = variant<int, string, Type>;
struct CodePosition {
int line;
int column;
};
struct Token {
TokenType type;
TokenData data { };
CodePosition pos;
};
class TokenError : public runtime_error {
public:
explicit TokenError(const string& message, CodePosition pos)

146
src/include/types.h Normal file
View File

@ -0,0 +1,146 @@
#ifndef TYPES_H
#define TYPES_H
#include <variant>
#include <string>
#include <unordered_map>
#include <list>
#include <vector>
using namespace std;
/**
* Tokens definition
*/
enum class TokenType { Type, Identifier, Int, Plus, Minus, DoublePlus, DoubleMinus, DoubleEqual, Star, Slash, Percent, Equal, Semicolon, LParenthese, RParenthese, LCurlyBracket, RCurlyBracket, If, Else };
enum class Type { Int };
using TokenData = variant<int, string, Type>;
struct CodePosition {
int line;
int column;
};
struct Token {
TokenType type;
TokenData data { };
CodePosition pos;
};
/** Grammar:
Prog -> Instruction Prog | Instruction
Instruction -> Statement | ExprStatement; | Expr; | ;
Statement ->
| { Prog }
| If (Expr) Instruction
| If (Expr) Instruction Else Instruction
ExprStatement ->
| Type ParIdentifier = Expr // AssignedDeclaration
| Type ParIdentifier // Declaration
Expr ->
| T
| T + Expr
| T - Expr
T ->
| U
| U * T
| U / T
| U % T
U ->
| F
| - U
| + U
F ->
| Number
| ++ParIdentifier
| --ParIdentifier
| ParIdentifier = Expr // Assignment
| ParIdentifier++
| ParIdentifier--
| ParIdentifier // This makes the grammar ambiguous but simpler to parse
| (Expr)
ParIdentifier ->
| Identifier
| (ParIdentifier)
*/
/**
* Type de Noeuds
*/
enum class NodeType {
/* On ne créé pas de nouveau noeud -> ; Prog */
Prog, // -> Instruction Prog
Epsilon, // -> ;
AssignedDeclaration, // -> Type Identifier = Expr
Declaration, // -> Type Identifier
Plus, // -> T + Expr
Minus, // -> T - Expr
Mult, // -> F * T
Div, // -> F / T
Mod, // -> F % T
UnaryMinus, // -> -F
UnaryPlus, // -> +F
Assignment, // -> Identifier = Expr
LIncr, // -> ++ParIdentifier
RIncr, // -> ParIdentifier++
LDecr, // -> --ParIdentifier
RDecr, // -> ParIdentifier--
If, // -> If (Expr) Instruction
IfElse, // -> If (Expr) Instruction Else Instruction
Bloc // -> { Prog }
};
struct InnerNode;
/**
* InnerNode: noeud interne
* Token: feuille
*/
using Node = variant<InnerNode, Token>;
/**
* Noeud interne
*/
struct InnerNode {
NodeType type;
vector<Node> children;
CodePosition pos;
};
// A Leaf is always corresponding to a Token
/**
* Node: AST
* tokens: tokens pas encore parsés
*/
struct ParseReturn {
Node node;
vector<Token> tokens;
};
/**
* Interpreter
*/
using EvalResult = variant<monostate, int, double>;
struct Scope {
unordered_map<string, EvalResult> vars;
int depth;
string name;
};
using Memory = list<Scope>;
#endif

View File

@ -1,152 +1,181 @@
#include <vector>
#include <iostream>
#include <unordered_map>
#include <string>
#include <iostream>
#include "include/parser.h"
#include "include/interpreter.h"
#include "include/memory.h"
using namespace std;
unordered_map<string, MemoryEntry> memory;
void _debug_flush_memory(void) {
memory.clear();
}
EvalResult eval(Node &ast) {
if (ast.index() == 0) {
EvalResult eval(Node &ast, Memory &memory) {
if (holds_alternative<InnerNode>(ast)) {
InnerNode node = get<InnerNode>(ast);
switch (node.type) {
case NodeType::Prog:
eval(node.children[0]);
return eval(node.children[1]);
eval(node.children[0], memory);
return eval(node.children[1], memory);
break;
case NodeType::Epsilon:
return {};
break;
case NodeType::If: {
int cond = get<int>(eval(node.children[0], memory));
if (cond) {
eval(node.children[1], memory);
}
return {};
} break;
case NodeType::IfElse: {
int cond = get<int>(eval(node.children[0], memory));
if (cond) {
eval(node.children[1], memory);
}
else {
eval(node.children[2], memory);
}
return {};
} break;
case NodeType::Bloc: {
} break;
case NodeType::Plus: {
int e1 = get<int>(eval(node.children[0]));
int e2 = get<int>(eval(node.children[1]));
int e1 = get<int>(eval(node.children[0], memory));
int e2 = get<int>(eval(node.children[1], memory));
return e1 + e2;
} break;
case NodeType::Minus: {
int e1 = get<int>(eval(node.children[0]));
int e2 = get<int>(eval(node.children[1]));
int e1 = get<int>(eval(node.children[0], memory));
int e2 = get<int>(eval(node.children[1], memory));
return e1 - e2;
} break;
case NodeType::Mult: {
int e1 = get<int>(eval(node.children[0]));
int e2 = get<int>(eval(node.children[1]));
int e1 = get<int>(eval(node.children[0], memory));
int e2 = get<int>(eval(node.children[1], memory));
return e1 * e2;
} break;
case NodeType::Div: {
int e1 = get<int>(eval(node.children[0]));
int e2 = get<int>(eval(node.children[1]));
int e1 = get<int>(eval(node.children[0], memory));
int e2 = get<int>(eval(node.children[1], memory));
if (e2 == 0) throw RuntimeError("Division by 0", node.pos);
return e1 / e2;
} break;
case NodeType::Mod: {
int e1 = get<int>(eval(node.children[0]));
int e2 = get<int>(eval(node.children[1]));
int e1 = get<int>(eval(node.children[0], memory));
int e2 = get<int>(eval(node.children[1], memory));
if (e2 == 0) throw RuntimeError("Modulo by 0", node.pos);
return e1 % e2;
} break;
case NodeType::UnaryPlus: {
int e1 = get<int>(eval(node.children[0]));
int e1 = get<int>(eval(node.children[0], memory));
return +e1;
} break;
case NodeType::UnaryMinus: {
int e1 = get<int>(eval(node.children[0]));
int e1 = get<int>(eval(node.children[0], memory));
return -e1;
} break;
case NodeType::Declaration: {
Token typeTok = get<Token>(node.children[0]);
Token identifierTok = get<Token>(node.children[1]);
Type type = get<Type>(typeTok.data);
string identifier = get<string>(identifierTok.data);
memory[identifier] = {
.type = type
};
memory_set(memory, identifier, { });
return {};
} break;
case NodeType::AssignedDeclaration: {
Token typeTok = get<Token>(node.children[0]);
Token identifierTok = get<Token>(node.children[1]);
Type type = get<Type>(typeTok.data);
// Type type = get<Type>(typeTok.data);
string identifier = get<string>(identifierTok.data);
EvalResult expr = eval(node.children[2]);
EvalResult value = eval(node.children[2], memory);
memory[identifier] = {
.value = expr,
.assigned = true,
.type = type,
};
memory_set(memory, identifier, value);
return expr;
return value;
} break;
case NodeType::Assignment: {
Token identifierTok = get<Token>(node.children[0]);
string identifier = get<string>(identifierTok.data);
EvalResult expr = eval(node.children[1]);
EvalResult value = eval(node.children[1], memory);
if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
memory[identifier].value = expr;
return expr;
memory_update(memory, identifier, value);
return value;
} break;
case NodeType::LIncr: {
Token identifierTok = get<Token>(node.children[0]);
string identifier = get<string>(identifierTok.data);
if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
if (memory[identifier].type == Type::Int) {
memory[identifier].value = get<int>(memory[identifier].value) + 1;
EvalResult value = memory_get(memory, identifier);
if (holds_alternative<int>(value)) {
memory_update(memory, identifier, get<int>(value) + 1);
return memory_get(memory, identifier);
}
else if (holds_alternative<monostate>(value)) {
throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", identifierTok.pos);
}
return memory[identifier].value;
return value;
}
case NodeType::RIncr: {
Token identifierTok = get<Token>(node.children[0]);
string identifier = get<string>(identifierTok.data);
if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
if (memory[identifier].type == Type::Int) {
EvalResult res = memory[identifier].value;
memory[identifier].value = get<int>(memory[identifier].value) + 1;
return res;
EvalResult value = memory_get(memory, identifier);
if (holds_alternative<int>(memory_get(memory, identifier))) {
memory_update(memory, identifier, get<int>(value) + 1);
}
else if (holds_alternative<monostate>(value)) {
throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", identifierTok.pos);
}
return memory[identifier].value;
return value;
}
case NodeType::LDecr: {
Token identifierTok = get<Token>(node.children[0]);
string identifier = get<string>(identifierTok.data);
if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
if (memory[identifier].type == Type::Int) {
memory[identifier].value = get<int>(memory[identifier].value) - 1;
EvalResult value = memory_get(memory, identifier);
if (holds_alternative<int>(value)) {
memory_update(memory, identifier, get<int>(value) - 1);
return memory_get(memory, identifier);
}
else if (holds_alternative<monostate>(value)) {
throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", identifierTok.pos);
}
return memory[identifier].value;
return value;
}
case NodeType::RDecr: {
Token identifierTok = get<Token>(node.children[0]);
string identifier = get<string>(identifierTok.data);
if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
if (memory[identifier].type == Type::Int) {
EvalResult res = memory[identifier].value;
memory[identifier].value = get<int>(memory[identifier].value) - 1;
return res;
EvalResult value = memory_get(memory, identifier);
if (holds_alternative<int>(memory_get(memory, identifier))) {
memory_update(memory, identifier, get<int>(value) - 1);
}
else if (holds_alternative<monostate>(value)) {
throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", identifierTok.pos);
}
return memory[identifier].value;
return value;
}
}
}
@ -159,9 +188,13 @@ EvalResult eval(Node &ast) {
case TokenType::Identifier: {
string identifier = get<string>(token.data);
if (!memory.contains(identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", token.pos);
if (!memory_contains(memory, identifier)) throw RuntimeError("Unknown identifier \""+identifier+"\"", token.pos);
EvalResult value = memory_get(memory, identifier);
if (holds_alternative<monostate>(value)) throw RuntimeError("Accessing uninitialized identifier \""+identifier+"\"", token.pos);
return memory[identifier].value;
return value;
} break;
default:
throw;

View File

@ -8,6 +8,7 @@ using namespace std;
#include "include/parser.h"
#include "include/tokenize.h"
#include "include/interpreter.h"
#include "include/memory.h"
int main(int argc, char* argv[]) {
bool print_ast = false;
@ -23,6 +24,8 @@ int main(int argc, char* argv[]) {
vector<string> input;
vector<Token> tokens;
Memory memory = new_memory();
while (true) {
try {
int initial_line = input.size();
@ -34,8 +37,14 @@ int main(int argc, char* argv[]) {
if (print_ast)
_debug_print_tree(ast, 0, "");
EvalResult res = eval(ast);
cout << get<int>(res) << endl;
EvalResult res = eval(ast, memory);
if (holds_alternative<int>(res)) {
cout << get<int>(res) << endl;
}
else if (holds_alternative<double>(res)) {
cout << get<double>(res) << endl;
}
} catch (const TokenError& e) {
pretty_print_error(input, e.pos);

48
src/memory.cpp Normal file
View File

@ -0,0 +1,48 @@
#include <iostream>
#include "include/memory.h"
using namespace std;
Memory new_memory(void) {
Memory memory;
memory.emplace_back();
memory.back().depth = 0;
memory.back().name = "global";
return memory;
}
bool memory_contains(Memory& memory, string identifier) {
for (auto rit = memory.rbegin(); rit != memory.rend(); ++rit) {
Scope& scope = *rit;
if (scope.vars.contains(identifier)) return true;
}
return false;
}
EvalResult memory_get(Memory& memory, string identifier) {
for (auto rit = memory.rbegin(); rit != memory.rend(); ++rit) {
Scope& scope = *rit;
if (scope.vars.contains(identifier)) return scope.vars[identifier];
}
throw;
}
void memory_set(Memory& memory, string identifier, EvalResult value) {
Scope& top = memory.back();
top.vars[identifier] = value;
}
void memory_update(Memory& memory, string identifier, EvalResult value) {
for (auto rit = memory.rbegin(); rit != memory.rend(); ++rit) {
Scope& scope = *rit;
if (scope.vars.contains(identifier)) {
scope.vars[identifier] = value;
return;
}
}
throw;
}

View File

@ -3,6 +3,7 @@
#include <algorithm>
using namespace std;
#include "include/tokenize.h"
#include "include/colors.h"
#include "include/parser.h"
@ -16,7 +17,7 @@ const char* _node_names[] = {
"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod",
"UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr", "If", "IfElse", "Bloc"
};
void _debug_print_tree(const Node& node, int depth, const string& prefix = "") {
void _debug_print_tree(const Node& node, int depth, const string& prefix) {
if (holds_alternative<InnerNode>(node)) {
const InnerNode& innerNode = get<InnerNode>(node);

View File

@ -1,13 +1,15 @@
#include "include/test.h"
#include "../src/include/memory.h"
#include "../src/include/tokenize.h"
#include "../src/include/parser.h"
#include "../src/include/interpreter.h"
int execute(string s) {
Memory memory = new_memory();
vector<Token> tokens = tokenize({ s });
Node ast = parse(tokens);
EvalResult res = eval(ast);
EvalResult res = eval(ast, memory);
return get<int>(res);
}

View File

@ -1,15 +1,15 @@
#include "include/test.h"
#include "../src/include/memory.h"
#include "../src/include/tokenize.h"
#include "../src/include/parser.h"
#include "../src/include/interpreter.h"
int execute(string s) {
Memory memory = new_memory();
vector<Token> tokens = tokenize({ s });
Node ast = parse(tokens);
EvalResult res = eval(ast);
_debug_flush_memory();
EvalResult res = eval(ast, memory);
return get<int>(res);
}