2023-10-27 17:07:58 +02:00
|
|
|
#ifndef DEF_PARSER_H
|
|
|
|
#define DEF_PARSER_H
|
|
|
|
|
2023-10-27 17:16:41 +02:00
|
|
|
#include <vector>
|
2023-11-10 13:42:53 +01:00
|
|
|
#include <variant>
|
2023-11-11 09:11:35 +01:00
|
|
|
#include <stdexcept>
|
2023-10-27 17:07:58 +02:00
|
|
|
#include "tokenize.h"
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
/** Grammar:
|
2023-11-10 16:56:50 +01:00
|
|
|
Prog -> Instruction Prog | Instruction
|
2023-10-27 17:07:58 +02:00
|
|
|
|
2023-11-10 16:56:50 +01:00
|
|
|
Instruction -> Statement | ExprStatement; | Expr; | ;
|
2023-10-27 17:07:58 +02:00
|
|
|
|
2023-11-15 16:33:25 +01:00
|
|
|
Statement ->
|
|
|
|
| { Prog }
|
|
|
|
| If (Expr) Instruction Else Instruction
|
2023-10-27 17:07:58 +02:00
|
|
|
ExprStatement ->
|
2023-11-14 17:00:34 +01:00
|
|
|
| Type ParIdentifier = Expr // AssignedDeclaration
|
|
|
|
| Type ParIdentifier // Declaration
|
2023-10-27 17:07:58 +02:00
|
|
|
|
|
|
|
|
|
|
|
Expr ->
|
|
|
|
| T
|
2023-10-27 17:16:41 +02:00
|
|
|
| T + Expr
|
|
|
|
| T - Expr
|
2023-10-27 17:07:58 +02:00
|
|
|
|
|
|
|
T ->
|
2023-11-10 16:56:50 +01:00
|
|
|
| U
|
|
|
|
| U * T
|
|
|
|
| U / T
|
|
|
|
| U % T
|
2023-10-27 17:07:58 +02:00
|
|
|
|
|
|
|
U ->
|
2023-10-27 17:16:41 +02:00
|
|
|
| F
|
|
|
|
| - U
|
|
|
|
| + U
|
|
|
|
|
|
|
|
F ->
|
2023-11-14 17:00:34 +01:00
|
|
|
| Number
|
|
|
|
|
|
|
|
| ++ParIdentifier
|
|
|
|
| --ParIdentifier
|
|
|
|
|
|
|
|
| ParIdentifier = Expr // Assignment
|
|
|
|
| ParIdentifier++
|
|
|
|
| ParIdentifier--
|
|
|
|
| ParIdentifier // This makes the grammar ambiguous but simpler to parse
|
|
|
|
|
2023-10-27 17:07:58 +02:00
|
|
|
| (Expr)
|
2023-11-14 17:00:34 +01:00
|
|
|
|
|
|
|
ParIdentifier ->
|
2023-10-27 17:07:58 +02:00
|
|
|
| Identifier
|
2023-11-14 17:00:34 +01:00
|
|
|
| (ParIdentifier)
|
2023-10-27 17:07:58 +02:00
|
|
|
*/
|
|
|
|
|
2023-11-10 16:56:50 +01:00
|
|
|
/**
|
|
|
|
* Type de Noeuds
|
|
|
|
*/
|
2023-10-27 17:07:58 +02:00
|
|
|
enum class NodeType {
|
2023-11-10 16:56:50 +01:00
|
|
|
/* On ne créé pas de nouveau noeud -> ; Prog */
|
|
|
|
Prog, // -> Instruction Prog
|
|
|
|
Epsilon, // -> ;
|
2023-10-27 17:07:58 +02:00
|
|
|
AssignedDeclaration, // -> Type Identifier = Expr
|
|
|
|
Declaration, // -> Type Identifier
|
2023-10-27 17:16:41 +02:00
|
|
|
Plus, // -> T + Expr
|
|
|
|
Minus, // -> T - Expr
|
2023-10-27 17:07:58 +02:00
|
|
|
Mult, // -> F * T
|
|
|
|
Div, // -> F / T
|
|
|
|
Mod, // -> F % T
|
|
|
|
UnaryMinus, // -> -F
|
|
|
|
UnaryPlus, // -> +F
|
2023-11-14 17:00:34 +01:00
|
|
|
Assignment, // -> Identifier = Expr
|
|
|
|
LIncr, // -> ++ParIdentifier
|
|
|
|
RIncr, // -> ParIdentifier++
|
|
|
|
LDecr, // -> --ParIdentifier
|
|
|
|
RDecr // -> ParIdentifier--
|
2023-10-27 17:07:58 +02:00
|
|
|
};
|
|
|
|
|
2023-11-10 13:42:53 +01:00
|
|
|
struct InnerNode;
|
|
|
|
|
2023-11-10 16:56:50 +01:00
|
|
|
/**
|
|
|
|
* InnerNode: noeud interne
|
|
|
|
* Token: feuille
|
|
|
|
*/
|
2023-11-10 13:42:53 +01:00
|
|
|
using Node = variant<InnerNode, Token>;
|
|
|
|
|
2023-11-10 16:56:50 +01:00
|
|
|
/**
|
|
|
|
* Noeud interne
|
|
|
|
*/
|
2023-10-27 17:16:41 +02:00
|
|
|
struct InnerNode {
|
2023-10-27 17:07:58 +02:00
|
|
|
NodeType type;
|
2023-10-27 17:16:41 +02:00
|
|
|
vector<Node> children;
|
2023-11-15 14:59:28 +01:00
|
|
|
CodePosition pos;
|
2023-10-27 17:07:58 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
// A Leaf is always corresponding to a Token
|
|
|
|
|
2023-11-10 16:56:50 +01:00
|
|
|
/**
|
|
|
|
* Node: AST
|
|
|
|
* tokens: tokens pas encore parsés
|
|
|
|
*/
|
|
|
|
struct ParseReturn {
|
|
|
|
Node node;
|
|
|
|
vector<Token> tokens;
|
|
|
|
};
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Utilisé pour revenir en arrière quand quelque chose n'est pas reconnu
|
|
|
|
*/
|
2023-11-11 09:05:49 +01:00
|
|
|
class ParseException : public std::exception {
|
|
|
|
const char* what() const noexcept override {
|
|
|
|
return "Parse Exception";
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2023-11-15 14:37:20 +01:00
|
|
|
class SyntaxError : public runtime_error {
|
2023-11-11 09:05:49 +01:00
|
|
|
public:
|
2023-11-15 14:37:20 +01:00
|
|
|
explicit SyntaxError(const string& message, CodePosition pos)
|
|
|
|
: runtime_error(message), pos(pos) {}
|
|
|
|
|
|
|
|
const CodePosition pos;
|
2023-11-11 09:05:49 +01:00
|
|
|
};
|
2023-11-10 16:56:50 +01:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse a list of tokens and return the associated AST
|
|
|
|
*/
|
|
|
|
Node parse(vector<Token> tokens);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse something derivated from Instruction
|
|
|
|
*/
|
|
|
|
ParseReturn parse_instruction(vector<Token> tokens);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse something derivated from Statement
|
|
|
|
*/
|
|
|
|
ParseReturn parse_statement(vector<Token> tokens);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse something derivated from ExprStatement
|
|
|
|
*/
|
|
|
|
ParseReturn parse_expr_statement(vector<Token> tokens);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse something derivated from Expr
|
|
|
|
*/
|
|
|
|
ParseReturn parse_expr(vector<Token> tokens);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse something derivated from T
|
|
|
|
*/
|
|
|
|
ParseReturn parse_t(vector<Token> tokens);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse something derivated from U
|
|
|
|
*/
|
|
|
|
ParseReturn parse_u(vector<Token> tokens);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Parse something derivated from F
|
|
|
|
*/
|
|
|
|
ParseReturn parse_f(vector<Token> tokens);
|
|
|
|
|
2023-11-14 17:00:34 +01:00
|
|
|
/**
|
|
|
|
* Parse something derivated from ParIdentifier
|
|
|
|
* (An identifier with 0+ parentheses around it)
|
|
|
|
*/
|
|
|
|
ParseReturn parse_par_identifier(vector<Token> tokens);
|
|
|
|
|
2023-11-10 19:04:24 +01:00
|
|
|
/**
|
|
|
|
* Prints a tree for debugging it
|
|
|
|
*/
|
|
|
|
void _debug_print_tree(const Node& node, int depth, const string& prefix);
|
|
|
|
|
2023-11-15 14:59:28 +01:00
|
|
|
/**
|
|
|
|
* Returns the CodePosition of a node
|
|
|
|
*/
|
|
|
|
CodePosition get_node_pos(Node node);
|
|
|
|
|
2023-10-27 17:07:58 +02:00
|
|
|
#endif
|