c-repl/src/parser.cpp

629 lines
18 KiB
C++

#include <vector>
#include <iostream>
#include <algorithm>
using namespace std;
#include "include/tokenize.h"
#include "include/colors.h"
#include "include/parser.h"
CodePosition null_pos = {
.line = -1,
.column = -1
};
const char* _node_names[] = {
"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod",
"UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr", "If", "IfElse", "Bloc"
};
void _debug_print_tree(const Node& node, int depth, const string& prefix) {
if (holds_alternative<InnerNode>(node)) {
const InnerNode& innerNode = get<InnerNode>(node);
cout << prefix << _node_names[int(innerNode.type)] << "\n";
string new_prefix = prefix;
size_t pos = new_prefix.find("└──");
while (pos != string::npos) {
new_prefix.replace(pos, 9, " ");
pos = new_prefix.find("└──", pos + 4);
}
pos = new_prefix.find("├──");
while (pos != string::npos) {
new_prefix.replace(pos, 9, "");
pos = new_prefix.find("├──", pos + 6);
}
for (size_t i = 0; i < innerNode.children.size(); ++i) {
string childPrefix = (i == innerNode.children.size() - 1) ? "└── " : "├── ";
_debug_print_tree(innerNode.children[i], depth + 1, new_prefix + childPrefix);
}
} else {
const Token& token = get<Token>(node);
cout << prefix;
_debug_print_token(token);
cout << endl;
}
}
CodePosition get_node_pos(Node node) {
if (holds_alternative<InnerNode>(node))
return get<InnerNode>(node).pos;
return get<Token>(node).pos;
}
Node parse(vector<Token> tokens) {
reverse(tokens.begin(), tokens.end());
if (tokens.size() == 0) {
throw SyntaxError("Input must not be empty", null_pos);
}
ParseReturn ret = parse_prog(tokens);
if (ret.tokens.size() != 0) {
CodePosition pos = ret.tokens.back().pos;
throw SyntaxError("Unable to parse", pos);
}
return ret.node;
}
ParseReturn parse_prog(vector<Token> tokens) {
vector<Node> children;
InnerNode epsilon_node = {
.type=NodeType::Epsilon,
.children=children,
.pos=null_pos
};
Node node = epsilon_node;
try {
while (tokens.size() != 0) {
ParseReturn ret = parse_instruction(tokens);
tokens = ret.tokens;
if (!holds_alternative<InnerNode>(ret.node) || get<InnerNode>(ret.node).type != NodeType::Epsilon) {
if (holds_alternative<InnerNode>(node) && get<InnerNode>(node).type == NodeType::Epsilon) {
node = ret.node; // Remove base epsilon node
} else {
InnerNode new_node = {
.type=NodeType::Prog,
.children={node, ret.node},
.pos=get_node_pos(ret.node)
};
node = new_node;
}
}
}
} catch (const ParseException& pex) {}
return {
.node=node,
.tokens=tokens
};
}
ParseReturn parse_instruction(vector<Token> tokens) {
try { //* Instruction -> Statement
ParseReturn ret = parse_statement(tokens);
return ret;
}
catch (const ParseException& pex) {
try { //* Instruction -> ExprStatement;
ParseReturn ret = parse_expr_statement(tokens);
tokens = ret.tokens;
if (tokens.back().type != TokenType::Semicolon)
throw ParseException();
tokens.pop_back();
return {
.node=ret.node,
.tokens=tokens
};
}
catch (const ParseException& pex) {
try { //* Instruction -> Expr;
ParseReturn ret = parse_expr(tokens);
tokens = ret.tokens;
if (tokens.size() < 1 || tokens.back().type != TokenType::Semicolon)
throw ParseException();
tokens.pop_back();
return {
.node=ret.node,
.tokens=tokens
};
}
catch (const ParseException& pex) { //* Instruction -> ;
if (tokens.size() < 1 || tokens.back().type != TokenType::Semicolon)
throw ParseException();
vector<Node> children;
InnerNode epsilon_node = {
.type=NodeType::Epsilon,
.children=children,
.pos=null_pos
};
tokens.pop_back(); // On enlève le ';' de la liste de tokens
return {
.node=epsilon_node,
.tokens=tokens
};
}
}
}
}
ParseReturn parse_statement(vector<Token> tokens) {
if (tokens.size() < 2) // 'If' / 'For' '(' at least
throw ParseException();
switch (tokens.back().type) {
case TokenType::If: {
CodePosition pos = tokens.back().pos;
tokens.pop_back();
if (tokens.back().type != TokenType::LParenthese) // Opening (
throw SyntaxError("Missing '('", tokens.back().pos);
tokens.pop_back();
ParseReturn ret = parse_expr(tokens); // Expr
int nb_tok = ret.tokens.size(); // Closing )
if (nb_tok == 0 || ret.tokens.back().type != TokenType::RParenthese)
throw SyntaxError(
"Missing ')'",
nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos
);
ret.tokens.pop_back();
tokens = ret.tokens;
Node expr = ret.node;
try {
ret = parse_instruction(tokens); // Instruction1
} catch (const ParseException& pex) {
throw SyntaxError(
"Invalid Syntax",
pos=tokens.back().pos
);
}
if (holds_alternative<InnerNode>(ret.node) &&
( get<InnerNode>(ret.node).type == NodeType::AssignedDeclaration ||
get<InnerNode>(ret.node).type == NodeType::Declaration ) )
throw SyntaxError(
"a dependent statement may not be a declaration",
pos=tokens.back().pos
);
tokens = ret.tokens;
Node instruction1 = ret.node;
if (tokens.size() == 0 || tokens.back().type != TokenType::Else) { //* -> If (Expr) Instruction
InnerNode node = {
.type=NodeType::If,
.children={expr, instruction1},
.pos=pos
};
return {
.node=node,
.tokens=tokens
};
}
tokens.pop_back(); // Else
ret = parse_instruction(tokens); // Instruction2
tokens = ret.tokens;
Node instruction2 = ret.node;
InnerNode node = {
.type=NodeType::IfElse,
.children={expr, instruction1, instruction2},
.pos=pos
};
return {
.node=node,
.tokens=tokens
};
}
case TokenType::LCurlyBracket: {
CodePosition pos = tokens.back().pos;
tokens.pop_back();
ParseReturn ret = parse_prog(tokens);
if ( // No expression parsed, the next token is not a '}'
holds_alternative<InnerNode>(ret.node) &&
get<InnerNode>(ret.node).type == NodeType::Epsilon &&
tokens.back().type != TokenType::RCurlyBracket
)
throw SyntaxError(
"Invalid Syntax. Missing ';' ?",
tokens.back().pos
);
int nb_tok = ret.tokens.size();
if (nb_tok == 0 || ret.tokens.back().type != TokenType::RCurlyBracket)
throw SyntaxError(
"Missing '}'",
nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos
);
tokens = ret.tokens;
tokens.pop_back();
InnerNode node = {
.type=NodeType::Bloc,
.children={ret.node},
.pos=pos
};
return {
.node=node,
.tokens=tokens
};
}
default:
throw ParseException();
}
}
ParseReturn parse_expr_statement(vector<Token> tokens) {
if (tokens.size() < 1 || tokens.back().type != TokenType::Type)
throw ParseException();
Token type = tokens.back();
tokens.pop_back();
ParseReturn ret = parse_par_identifier(tokens);
tokens = ret.tokens;
if (!holds_alternative<Token>(ret.node))
throw ParseException(); // The parsing is incorrect
Token identifier = get<Token>(ret.node);
//* ExprStatement -> Type Identifier
if (tokens.size() < 1 || tokens.back().type != TokenType::Equal) {
InnerNode node = {
.type=NodeType::Declaration,
.children={type, identifier},
.pos=identifier.pos
};
return {
.node=node,
.tokens=tokens
};
}
//* ExprStatement -> Type Identifier = Expr
// On retire le '='
tokens.pop_back();
ret = parse_expr(tokens);
InnerNode node = {
.type=NodeType::AssignedDeclaration,
.children={type, identifier, ret.node},
.pos=get_node_pos(ret.node)
};
return {
.node=node,
.tokens=ret.tokens
};
}
ParseReturn parse_expr(vector<Token> tokens) {
if (tokens.size() == 0)
throw ParseException();
// At least 1 T
ParseReturn ret = parse_t(tokens);
tokens = ret.tokens;
Node node = ret.node;
//* We construct a tree
while (tokens.size() != 0) {
NodeType type;
switch (tokens.back().type) {
case (TokenType::Plus):
type = NodeType::Plus;
break;
case (TokenType::Minus):
type = NodeType::Minus;
break;
default:
return {
.node=node,
.tokens=tokens
};
}
Token last_token;
try {
last_token = tokens.back();
tokens.pop_back();
ParseReturn ret = parse_t(tokens);
tokens = ret.tokens;
InnerNode new_node = {
.type=type,
.children={node, ret.node},
.pos=last_token.pos
};
node = new_node;
} catch (const ParseException& pex) {
tokens.emplace_back(last_token);
return {
.node=node,
.tokens=tokens
};
}
}
return {
.node=node,
.tokens=tokens
};
}
ParseReturn parse_t(vector<Token> tokens) {
if (tokens.size() == 0)
throw ParseException();
// At least 1 U
ParseReturn ret = parse_u(tokens);
tokens = ret.tokens;
Node node = ret.node;
//* We construct a tree
while (tokens.size() != 0) {
NodeType type;
switch(tokens.back().type) {
case (TokenType::Star):
type = NodeType::Mult;
break;
case (TokenType::Slash):
type = NodeType::Div;
break;
case (TokenType::Percent):
type = NodeType::Mod;
break;
default:
return {
.node=node,
.tokens=tokens
};
}
Token last_token;
try {
last_token = tokens.back();
tokens.pop_back();
ParseReturn ret = parse_u(tokens);
tokens = ret.tokens;
InnerNode new_node = {
.type=type,
.children={node, ret.node},
.pos=last_token.pos
};
node = new_node;
} catch (const ParseException& pex) {
tokens.emplace_back(last_token);
return {
.node=node,
.tokens=tokens
};
}
}
return {
.node=node,
.tokens=tokens
};
}
ParseReturn parse_u(vector<Token> tokens) {
if (tokens.size() > 0 && tokens.back().type == TokenType::Minus) { //* U -> - U
tokens.pop_back();
ParseReturn ret = parse_u(tokens);
InnerNode node = {
.type=NodeType::UnaryMinus,
.children={ ret.node },
.pos=get_node_pos(ret.node)
};
return {
.node=node,
.tokens=ret.tokens
};
}
if (tokens.size() > 0 && tokens.back().type == TokenType::Plus) { //* U -> + U
tokens.pop_back();
ParseReturn ret = parse_u(tokens);
InnerNode node = {
.type=NodeType::UnaryPlus,
.children={ ret.node },
.pos=get_node_pos(ret.node)
};
return {
.node=node,
.tokens=ret.tokens
};
}
//* U -> F
return parse_f(tokens);
}
ParseReturn parse_f(vector<Token> tokens) {
if (tokens.size() == 0)
throw ParseException();
switch (tokens.back().type) {
case TokenType::Int: { //* F -> Number
Token number = tokens.back();
tokens.pop_back();
return {
.node=number,
.tokens=tokens
};;
}
case TokenType::DoublePlus: { //* F -> ++ParIdentifier
tokens.pop_back();
ParseReturn ret = parse_par_identifier(tokens);
InnerNode node = {
.type = NodeType::LIncr,
.children = { ret.node },
.pos=get_node_pos(ret.node)
};
return {
.node=node,
.tokens=ret.tokens
};
}
case TokenType::DoubleMinus: { //* F -> --ParIdentifier
tokens.pop_back();
ParseReturn ret = parse_par_identifier(tokens);
InnerNode node = {
.type = NodeType::LDecr,
.children = { ret.node },
.pos=get_node_pos(ret.node)
};
return {
.node=node,
.tokens=ret.tokens
};
}
default: {
try { //* F -> ParIdentifier...
ParseReturn ret = parse_par_identifier(tokens);
if (ret.tokens.size() >= 1) {
switch (ret.tokens.back().type){
case TokenType::DoublePlus: { //* F -> ParIdentifier++
ret.tokens.pop_back();
InnerNode node = {
.type = NodeType::RIncr,
.children = { ret.node },
.pos=get_node_pos(ret.node)
};
return {
.node=node,
.tokens=ret.tokens
};
}
case TokenType::DoubleMinus: { //* F -> ParIdentifier--
ret.tokens.pop_back();
InnerNode node = {
.type = NodeType::RDecr,
.children = { ret.node },
.pos=get_node_pos(ret.node)
};
return {
.node=node,
.tokens=ret.tokens
};
}
case TokenType::Equal: { //* F -> ParIdentifier = (Expr)
ret.tokens.pop_back();
ParseReturn ret_expr = parse_expr(ret.tokens);
InnerNode node = {
.type = NodeType::Assignment,
.children = { ret.node, ret_expr.node },
.pos=get_node_pos(ret_expr.node)
};
return {
.node=node,
.tokens=ret_expr.tokens
};
}
default:
break;
}
}
//* F -> ParIdentifier
return ret;
} catch (const ParseException& pex) { //* F -> (Expr)
if (tokens.back().type != TokenType::LParenthese)
throw ParseException();
tokens.pop_back();
ParseReturn ret = parse_expr(tokens);
tokens=ret.tokens;
if (tokens.size() < 1 || tokens.back().type != TokenType::RParenthese)
throw SyntaxError("Missing ')'", tokens.back().pos);
tokens.pop_back();
return {
.node=ret.node,
.tokens=tokens
};
}
}
}
}
ParseReturn parse_par_identifier(vector<Token> tokens) {
if (tokens.size() < 1)
throw ParseException();
if (tokens.back().type == TokenType::Identifier) {
Token identifier = tokens.back();
tokens.pop_back();
return { //* ParIdentifier -> Identifier
.node=identifier,
.tokens=tokens
};
}
if (tokens.back().type != TokenType::LParenthese)
throw ParseException();
tokens.pop_back();
ParseReturn ret = parse_par_identifier(tokens);
tokens = ret.tokens;
if (tokens.back().type != TokenType::RParenthese)
throw ParseException();
tokens.pop_back();
return { //* ParIdentifier -> (ParIdentifier)
.node=ret.node,
.tokens=tokens
};
}