629 lines
18 KiB
C++
629 lines
18 KiB
C++
#include <vector>
|
|
#include <iostream>
|
|
#include <algorithm>
|
|
using namespace std;
|
|
|
|
#include "include/tokenize.h"
|
|
#include "include/colors.h"
|
|
#include "include/parser.h"
|
|
|
|
CodePosition null_pos = {
|
|
.line = -1,
|
|
.column = -1
|
|
};
|
|
|
|
|
|
const char* _node_names[] = {
|
|
"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod",
|
|
"UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr", "If", "IfElse", "Bloc"
|
|
};
|
|
void _debug_print_tree(const Node& node, int depth, const string& prefix) {
|
|
if (holds_alternative<InnerNode>(node)) {
|
|
const InnerNode& innerNode = get<InnerNode>(node);
|
|
|
|
cout << prefix << _node_names[int(innerNode.type)] << "\n";
|
|
|
|
string new_prefix = prefix;
|
|
size_t pos = new_prefix.find("└──");
|
|
while (pos != string::npos) {
|
|
new_prefix.replace(pos, 9, " ");
|
|
pos = new_prefix.find("└──", pos + 4);
|
|
}
|
|
|
|
pos = new_prefix.find("├──");
|
|
while (pos != string::npos) {
|
|
new_prefix.replace(pos, 9, "│ ");
|
|
pos = new_prefix.find("├──", pos + 6);
|
|
}
|
|
|
|
for (size_t i = 0; i < innerNode.children.size(); ++i) {
|
|
string childPrefix = (i == innerNode.children.size() - 1) ? "└── " : "├── ";
|
|
_debug_print_tree(innerNode.children[i], depth + 1, new_prefix + childPrefix);
|
|
}
|
|
} else {
|
|
const Token& token = get<Token>(node);
|
|
cout << prefix;
|
|
_debug_print_token(token);
|
|
cout << endl;
|
|
}
|
|
}
|
|
|
|
CodePosition get_node_pos(Node node) {
|
|
if (holds_alternative<InnerNode>(node))
|
|
return get<InnerNode>(node).pos;
|
|
return get<Token>(node).pos;
|
|
}
|
|
|
|
Node parse(vector<Token> tokens) {
|
|
reverse(tokens.begin(), tokens.end());
|
|
|
|
if (tokens.size() == 0) {
|
|
throw SyntaxError("Input must not be empty", null_pos);
|
|
}
|
|
|
|
ParseReturn ret = parse_prog(tokens);
|
|
|
|
if (ret.tokens.size() != 0) {
|
|
CodePosition pos = ret.tokens.back().pos;
|
|
throw SyntaxError("Unable to parse", pos);
|
|
}
|
|
|
|
return ret.node;
|
|
}
|
|
|
|
|
|
ParseReturn parse_prog(vector<Token> tokens) {
|
|
vector<Node> children;
|
|
InnerNode epsilon_node = {
|
|
.type=NodeType::Epsilon,
|
|
.children=children,
|
|
.pos=null_pos
|
|
};
|
|
Node node = epsilon_node;
|
|
|
|
try {
|
|
while (tokens.size() != 0) {
|
|
ParseReturn ret = parse_instruction(tokens);
|
|
tokens = ret.tokens;
|
|
|
|
if (!holds_alternative<InnerNode>(ret.node) || get<InnerNode>(ret.node).type != NodeType::Epsilon) {
|
|
if (holds_alternative<InnerNode>(node) && get<InnerNode>(node).type == NodeType::Epsilon) {
|
|
node = ret.node; // Remove base epsilon node
|
|
} else {
|
|
InnerNode new_node = {
|
|
.type=NodeType::Prog,
|
|
.children={node, ret.node},
|
|
.pos=get_node_pos(ret.node)
|
|
};
|
|
node = new_node;
|
|
}
|
|
}
|
|
}
|
|
} catch (const ParseException& pex) {}
|
|
|
|
return {
|
|
.node=node,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
|
|
|
|
ParseReturn parse_instruction(vector<Token> tokens) {
|
|
try { //* Instruction -> Statement
|
|
ParseReturn ret = parse_statement(tokens);
|
|
|
|
return ret;
|
|
}
|
|
catch (const ParseException& pex) {
|
|
try { //* Instruction -> ExprStatement;
|
|
ParseReturn ret = parse_expr_statement(tokens);
|
|
|
|
tokens = ret.tokens;
|
|
|
|
if (tokens.back().type != TokenType::Semicolon)
|
|
throw ParseException();
|
|
|
|
tokens.pop_back();
|
|
|
|
return {
|
|
.node=ret.node,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
catch (const ParseException& pex) {
|
|
try { //* Instruction -> Expr;
|
|
ParseReturn ret = parse_expr(tokens);
|
|
tokens = ret.tokens;
|
|
|
|
if (tokens.size() < 1 || tokens.back().type != TokenType::Semicolon)
|
|
throw ParseException();
|
|
|
|
tokens.pop_back();
|
|
|
|
return {
|
|
.node=ret.node,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
catch (const ParseException& pex) { //* Instruction -> ;
|
|
if (tokens.size() < 1 || tokens.back().type != TokenType::Semicolon)
|
|
throw ParseException();
|
|
|
|
vector<Node> children;
|
|
InnerNode epsilon_node = {
|
|
.type=NodeType::Epsilon,
|
|
.children=children,
|
|
.pos=null_pos
|
|
};
|
|
tokens.pop_back(); // On enlève le ';' de la liste de tokens
|
|
|
|
return {
|
|
.node=epsilon_node,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
ParseReturn parse_statement(vector<Token> tokens) {
|
|
if (tokens.size() < 2) // 'If' / 'For' '(' at least
|
|
throw ParseException();
|
|
|
|
switch (tokens.back().type) {
|
|
case TokenType::If: {
|
|
CodePosition pos = tokens.back().pos;
|
|
|
|
tokens.pop_back();
|
|
if (tokens.back().type != TokenType::LParenthese) // Opening (
|
|
throw SyntaxError("Missing '('", tokens.back().pos);
|
|
|
|
tokens.pop_back();
|
|
ParseReturn ret = parse_expr(tokens); // Expr
|
|
|
|
int nb_tok = ret.tokens.size(); // Closing )
|
|
if (nb_tok == 0 || ret.tokens.back().type != TokenType::RParenthese)
|
|
throw SyntaxError(
|
|
"Missing ')'",
|
|
nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos
|
|
);
|
|
|
|
ret.tokens.pop_back();
|
|
tokens = ret.tokens;
|
|
Node expr = ret.node;
|
|
|
|
try {
|
|
ret = parse_instruction(tokens); // Instruction1
|
|
} catch (const ParseException& pex) {
|
|
throw SyntaxError(
|
|
"Invalid Syntax",
|
|
pos=tokens.back().pos
|
|
);
|
|
}
|
|
|
|
if (holds_alternative<InnerNode>(ret.node) &&
|
|
( get<InnerNode>(ret.node).type == NodeType::AssignedDeclaration ||
|
|
get<InnerNode>(ret.node).type == NodeType::Declaration ) )
|
|
throw SyntaxError(
|
|
"a dependent statement may not be a declaration",
|
|
pos=tokens.back().pos
|
|
);
|
|
|
|
tokens = ret.tokens;
|
|
Node instruction1 = ret.node;
|
|
|
|
if (tokens.size() == 0 || tokens.back().type != TokenType::Else) { //* -> If (Expr) Instruction
|
|
InnerNode node = {
|
|
.type=NodeType::If,
|
|
.children={expr, instruction1},
|
|
.pos=pos
|
|
};
|
|
return {
|
|
.node=node,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
|
|
tokens.pop_back(); // Else
|
|
ret = parse_instruction(tokens); // Instruction2
|
|
tokens = ret.tokens;
|
|
Node instruction2 = ret.node;
|
|
|
|
InnerNode node = {
|
|
.type=NodeType::IfElse,
|
|
.children={expr, instruction1, instruction2},
|
|
.pos=pos
|
|
};
|
|
return {
|
|
.node=node,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
case TokenType::LCurlyBracket: {
|
|
CodePosition pos = tokens.back().pos;
|
|
tokens.pop_back();
|
|
|
|
ParseReturn ret = parse_prog(tokens);
|
|
if ( // No expression parsed, the next token is not a '}'
|
|
holds_alternative<InnerNode>(ret.node) &&
|
|
get<InnerNode>(ret.node).type == NodeType::Epsilon &&
|
|
tokens.back().type != TokenType::RCurlyBracket
|
|
)
|
|
throw SyntaxError(
|
|
"Invalid Syntax. Missing ';' ?",
|
|
tokens.back().pos
|
|
);
|
|
|
|
int nb_tok = ret.tokens.size();
|
|
if (nb_tok == 0 || ret.tokens.back().type != TokenType::RCurlyBracket)
|
|
throw SyntaxError(
|
|
"Missing '}'",
|
|
nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos
|
|
);
|
|
|
|
tokens = ret.tokens;
|
|
tokens.pop_back();
|
|
|
|
InnerNode node = {
|
|
.type=NodeType::Bloc,
|
|
.children={ret.node},
|
|
.pos=pos
|
|
};
|
|
return {
|
|
.node=node,
|
|
.tokens=tokens
|
|
};
|
|
|
|
}
|
|
default:
|
|
throw ParseException();
|
|
}
|
|
}
|
|
|
|
ParseReturn parse_expr_statement(vector<Token> tokens) {
|
|
if (tokens.size() < 1 || tokens.back().type != TokenType::Type)
|
|
throw ParseException();
|
|
|
|
Token type = tokens.back();
|
|
tokens.pop_back();
|
|
|
|
ParseReturn ret = parse_par_identifier(tokens);
|
|
tokens = ret.tokens;
|
|
|
|
if (!holds_alternative<Token>(ret.node))
|
|
throw ParseException(); // The parsing is incorrect
|
|
|
|
Token identifier = get<Token>(ret.node);
|
|
|
|
//* ExprStatement -> Type Identifier
|
|
if (tokens.size() < 1 || tokens.back().type != TokenType::Equal) {
|
|
InnerNode node = {
|
|
.type=NodeType::Declaration,
|
|
.children={type, identifier},
|
|
.pos=identifier.pos
|
|
};
|
|
return {
|
|
.node=node,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
|
|
//* ExprStatement -> Type Identifier = Expr
|
|
// On retire le '='
|
|
tokens.pop_back();
|
|
|
|
ret = parse_expr(tokens);
|
|
|
|
InnerNode node = {
|
|
.type=NodeType::AssignedDeclaration,
|
|
.children={type, identifier, ret.node},
|
|
.pos=get_node_pos(ret.node)
|
|
};
|
|
return {
|
|
.node=node,
|
|
.tokens=ret.tokens
|
|
};
|
|
}
|
|
|
|
ParseReturn parse_expr(vector<Token> tokens) {
|
|
if (tokens.size() == 0)
|
|
throw ParseException();
|
|
|
|
// At least 1 T
|
|
ParseReturn ret = parse_t(tokens);
|
|
tokens = ret.tokens;
|
|
|
|
Node node = ret.node;
|
|
|
|
//* We construct a tree
|
|
while (tokens.size() != 0) {
|
|
NodeType type;
|
|
|
|
switch (tokens.back().type) {
|
|
case (TokenType::Plus):
|
|
type = NodeType::Plus;
|
|
break;
|
|
case (TokenType::Minus):
|
|
type = NodeType::Minus;
|
|
break;
|
|
default:
|
|
return {
|
|
.node=node,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
|
|
Token last_token;
|
|
try {
|
|
last_token = tokens.back();
|
|
tokens.pop_back();
|
|
|
|
ParseReturn ret = parse_t(tokens);
|
|
tokens = ret.tokens;
|
|
|
|
InnerNode new_node = {
|
|
.type=type,
|
|
.children={node, ret.node},
|
|
.pos=last_token.pos
|
|
};
|
|
node = new_node;
|
|
} catch (const ParseException& pex) {
|
|
tokens.emplace_back(last_token);
|
|
return {
|
|
.node=node,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
}
|
|
|
|
return {
|
|
.node=node,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
|
|
ParseReturn parse_t(vector<Token> tokens) {
|
|
if (tokens.size() == 0)
|
|
throw ParseException();
|
|
|
|
|
|
// At least 1 U
|
|
ParseReturn ret = parse_u(tokens);
|
|
tokens = ret.tokens;
|
|
|
|
Node node = ret.node;
|
|
|
|
//* We construct a tree
|
|
while (tokens.size() != 0) {
|
|
NodeType type;
|
|
|
|
switch(tokens.back().type) {
|
|
case (TokenType::Star):
|
|
type = NodeType::Mult;
|
|
break;
|
|
case (TokenType::Slash):
|
|
type = NodeType::Div;
|
|
break;
|
|
case (TokenType::Percent):
|
|
type = NodeType::Mod;
|
|
break;
|
|
default:
|
|
return {
|
|
.node=node,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
|
|
|
|
Token last_token;
|
|
try {
|
|
last_token = tokens.back();
|
|
tokens.pop_back();
|
|
ParseReturn ret = parse_u(tokens);
|
|
tokens = ret.tokens;
|
|
|
|
InnerNode new_node = {
|
|
.type=type,
|
|
.children={node, ret.node},
|
|
.pos=last_token.pos
|
|
};
|
|
node = new_node;
|
|
} catch (const ParseException& pex) {
|
|
tokens.emplace_back(last_token);
|
|
return {
|
|
.node=node,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
}
|
|
|
|
return {
|
|
.node=node,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
|
|
ParseReturn parse_u(vector<Token> tokens) {
|
|
if (tokens.size() > 0 && tokens.back().type == TokenType::Minus) { //* U -> - U
|
|
tokens.pop_back();
|
|
|
|
ParseReturn ret = parse_u(tokens);
|
|
|
|
InnerNode node = {
|
|
.type=NodeType::UnaryMinus,
|
|
.children={ ret.node },
|
|
.pos=get_node_pos(ret.node)
|
|
};
|
|
return {
|
|
.node=node,
|
|
.tokens=ret.tokens
|
|
};
|
|
}
|
|
|
|
if (tokens.size() > 0 && tokens.back().type == TokenType::Plus) { //* U -> + U
|
|
tokens.pop_back();
|
|
|
|
ParseReturn ret = parse_u(tokens);
|
|
|
|
InnerNode node = {
|
|
.type=NodeType::UnaryPlus,
|
|
.children={ ret.node },
|
|
.pos=get_node_pos(ret.node)
|
|
};
|
|
return {
|
|
.node=node,
|
|
.tokens=ret.tokens
|
|
};
|
|
}
|
|
|
|
//* U -> F
|
|
return parse_f(tokens);
|
|
}
|
|
|
|
ParseReturn parse_f(vector<Token> tokens) {
|
|
if (tokens.size() == 0)
|
|
throw ParseException();
|
|
|
|
switch (tokens.back().type) {
|
|
case TokenType::Int: { //* F -> Number
|
|
Token number = tokens.back();
|
|
tokens.pop_back();
|
|
return {
|
|
.node=number,
|
|
.tokens=tokens
|
|
};;
|
|
}
|
|
case TokenType::DoublePlus: { //* F -> ++ParIdentifier
|
|
tokens.pop_back();
|
|
ParseReturn ret = parse_par_identifier(tokens);
|
|
|
|
InnerNode node = {
|
|
.type = NodeType::LIncr,
|
|
.children = { ret.node },
|
|
.pos=get_node_pos(ret.node)
|
|
};
|
|
return {
|
|
.node=node,
|
|
.tokens=ret.tokens
|
|
};
|
|
}
|
|
case TokenType::DoubleMinus: { //* F -> --ParIdentifier
|
|
tokens.pop_back();
|
|
ParseReturn ret = parse_par_identifier(tokens);
|
|
|
|
InnerNode node = {
|
|
.type = NodeType::LDecr,
|
|
.children = { ret.node },
|
|
.pos=get_node_pos(ret.node)
|
|
};
|
|
return {
|
|
.node=node,
|
|
.tokens=ret.tokens
|
|
};
|
|
}
|
|
default: {
|
|
try { //* F -> ParIdentifier...
|
|
ParseReturn ret = parse_par_identifier(tokens);
|
|
|
|
if (ret.tokens.size() >= 1) {
|
|
switch (ret.tokens.back().type){
|
|
case TokenType::DoublePlus: { //* F -> ParIdentifier++
|
|
ret.tokens.pop_back();
|
|
InnerNode node = {
|
|
.type = NodeType::RIncr,
|
|
.children = { ret.node },
|
|
.pos=get_node_pos(ret.node)
|
|
};
|
|
return {
|
|
.node=node,
|
|
.tokens=ret.tokens
|
|
};
|
|
}
|
|
case TokenType::DoubleMinus: { //* F -> ParIdentifier--
|
|
ret.tokens.pop_back();
|
|
InnerNode node = {
|
|
.type = NodeType::RDecr,
|
|
.children = { ret.node },
|
|
.pos=get_node_pos(ret.node)
|
|
};
|
|
return {
|
|
.node=node,
|
|
.tokens=ret.tokens
|
|
};
|
|
}
|
|
case TokenType::Equal: { //* F -> ParIdentifier = (Expr)
|
|
ret.tokens.pop_back();
|
|
ParseReturn ret_expr = parse_expr(ret.tokens);
|
|
|
|
InnerNode node = {
|
|
.type = NodeType::Assignment,
|
|
.children = { ret.node, ret_expr.node },
|
|
.pos=get_node_pos(ret_expr.node)
|
|
};
|
|
return {
|
|
.node=node,
|
|
.tokens=ret_expr.tokens
|
|
};
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
//* F -> ParIdentifier
|
|
return ret;
|
|
|
|
} catch (const ParseException& pex) { //* F -> (Expr)
|
|
if (tokens.back().type != TokenType::LParenthese)
|
|
throw ParseException();
|
|
|
|
tokens.pop_back();
|
|
|
|
ParseReturn ret = parse_expr(tokens);
|
|
tokens=ret.tokens;
|
|
|
|
if (tokens.size() < 1 || tokens.back().type != TokenType::RParenthese)
|
|
throw SyntaxError("Missing ')'", tokens.back().pos);
|
|
|
|
tokens.pop_back();
|
|
|
|
return {
|
|
.node=ret.node,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
ParseReturn parse_par_identifier(vector<Token> tokens) {
|
|
if (tokens.size() < 1)
|
|
throw ParseException();
|
|
|
|
if (tokens.back().type == TokenType::Identifier) {
|
|
Token identifier = tokens.back();
|
|
tokens.pop_back();
|
|
|
|
return { //* ParIdentifier -> Identifier
|
|
.node=identifier,
|
|
.tokens=tokens
|
|
};
|
|
}
|
|
|
|
if (tokens.back().type != TokenType::LParenthese)
|
|
throw ParseException();
|
|
|
|
tokens.pop_back();
|
|
ParseReturn ret = parse_par_identifier(tokens);
|
|
tokens = ret.tokens;
|
|
|
|
if (tokens.back().type != TokenType::RParenthese)
|
|
throw ParseException();
|
|
|
|
tokens.pop_back();
|
|
|
|
return { //* ParIdentifier -> (ParIdentifier)
|
|
.node=ret.node,
|
|
.tokens=tokens
|
|
};
|
|
} |