c-repl/src/parser.cpp
2023-12-15 14:11:44 +01:00

848 lines
25 KiB
C++

#include <vector>
#include <iostream>
#include <algorithm>
using namespace std;
#include "include/tokenize.h"
#include "include/colors.h"
#include "include/parser.h"
#include "include/utils.h"
CodePosition null_pos = {
.line = -1,
.column = -1
};
const char* _debug_ast_node_names[] = {
"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod",
"UnaryMinus", "UnaryPlus", "Neg", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr", "If", "IfElse",
"For", "While", "Bloc", "Lt", "Gt", "Leq", "Geq", "Eq", "Neq", "Land", "Lor", "Comma"
};
void _debug_print_tree(const Node& node, int depth, const string& prefix) {
if (holds_alternative<InnerNode>(node)) {
const InnerNode& innerNode = get<InnerNode>(node);
cout << prefix << _debug_ast_node_names[int(innerNode.type)] << "\n";
string new_prefix = prefix;
size_t pos = new_prefix.find("└──");
while (pos != string::npos) {
new_prefix.replace(pos, 9, " ");
pos = new_prefix.find("└──", pos + 4);
}
pos = new_prefix.find("├──");
while (pos != string::npos) {
new_prefix.replace(pos, 9, "");
pos = new_prefix.find("├──", pos + 6);
}
for (size_t i = 0; i < innerNode.children.size(); ++i) {
string childPrefix = (i == innerNode.children.size() - 1) ? "└── " : "├── ";
_debug_print_tree(innerNode.children[i], depth + 1, new_prefix + childPrefix);
}
} else {
const Token& token = get<Token>(node);
cout << prefix;
_debug_print_token(token);
cout << endl;
}
}
Node parse(vector<Token> tokens) {
reverse(tokens.begin(), tokens.end());
if (tokens.size() == 0) {
throw SyntaxError(ErrorType::EmptyInput, null_pos);
}
ParseReturn ret = parse_prog(tokens);
if (ret.tokens.size() != 0) {
CodePosition pos = ret.tokens.back().pos;
throw SyntaxError(ErrorType::InvalidSyntax, pos);
}
return ret.node;
}
vector<Node> children;
InnerNode epsilon_node = {
.type=NodeType::Epsilon,
.children=children,
.pos=null_pos
};
ParseReturn parse_prog(vector<Token> tokens) {
Node node = epsilon_node;
try {
while (tokens.size() != 0) {
ParseReturn ret = parse_instruction(tokens);
tokens = ret.tokens;
if (!holds_alternative<InnerNode>(ret.node) || get<InnerNode>(ret.node).type != NodeType::Epsilon) {
if (holds_alternative<InnerNode>(node) && get<InnerNode>(node).type == NodeType::Epsilon) {
node = ret.node; // Remove base epsilon node
} else {
InnerNode new_node = {
.type=NodeType::Prog,
.children={node, ret.node},
.pos=get_node_pos(ret.node)
};
node = new_node;
}
}
}
} catch (const ParseException& pex) {}
return {
.node=node,
.tokens=tokens
};
}
ParseReturn parse_instruction(vector<Token> tokens) {
try { //* Instruction -> Statement
ParseReturn ret = parse_statement(tokens);
return ret;
}
catch (const ParseException& pex) {
try { //* Instruction -> ExprStatement;
ParseReturn ret = parse_expr_statement(tokens);
tokens = ret.tokens;
if (tokens.back().type != TokenType::Semicolon)
throw ParseException();
tokens.pop_back();
return {
.node=ret.node,
.tokens=tokens
};
}
catch (const ParseException& pex) {
try { //* Instruction -> Expr;
ParseReturn ret = parse_expr(tokens);
tokens = ret.tokens;
if (tokens.size() < 1 || tokens.back().type != TokenType::Semicolon)
throw ParseException();
tokens.pop_back();
return {
.node=ret.node,
.tokens=tokens
};
}
catch (const ParseException& pex) { //* Instruction -> ;
if (tokens.size() < 1 || tokens.back().type != TokenType::Semicolon)
throw ParseException();
tokens.pop_back(); // On enlève le ';' de la liste de tokens
return {
.node=epsilon_node,
.tokens=tokens
};
}
}
}
}
ParseReturn parse_statement(vector<Token> tokens) {
if (tokens.size() < 2)
throw ParseException();
switch (tokens.back().type) {
case TokenType::Break:
case TokenType::Continue: {
Token token = tokens.back();
tokens.pop_back();
if (tokens.back().type != TokenType::Semicolon)
throw SyntaxError(ErrorType::ExpectedSemicolon, tokens.back().pos);
tokens.pop_back();
return {
.node=token,
.tokens=tokens
};
}
case TokenType::While:
case TokenType::If: {
CodePosition pos = tokens.back().pos;
NodeType type;
switch (tokens.back().type) {
case TokenType::If:
type = NodeType::If;
break;
case TokenType::While:
type = NodeType::While;
break;
default:
break; // Impossible
}
tokens.pop_back();
if (tokens.back().type != TokenType::LParenthese) // Opening (
throw SyntaxError(ErrorType::ExceptedLParen, tokens.back().pos);
tokens.pop_back();
ParseReturn ret = parse_expr(tokens); // Expr
int nb_tok = ret.tokens.size(); // Closing )
if (nb_tok == 0 || ret.tokens.back().type != TokenType::RParenthese)
throw SyntaxError(
ErrorType::ExpectedRParen,
nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos
);
ret.tokens.pop_back();
tokens = ret.tokens;
Node expr = ret.node;
try {
ret = parse_instruction(tokens); // Instruction1
} catch (const ParseException& pex) {
throw SyntaxError(
ErrorType::InvalidSyntax,
pos=tokens.back().pos
);
}
if (holds_alternative<InnerNode>(ret.node) &&
( get<InnerNode>(ret.node).type == NodeType::AssignedDeclaration ||
get<InnerNode>(ret.node).type == NodeType::Declaration ) )
throw SyntaxError(
ErrorType::DependentDeclaration,
pos=tokens.back().pos
);
tokens = ret.tokens;
Node instruction1 = ret.node;
if (tokens.size() == 0 || tokens.back().type != TokenType::Else) { //* -> If (Expr) Instruction
InnerNode node = {
.type=type,
.children={expr, instruction1},
.pos=pos
};
return {
.node=node,
.tokens=tokens
};
}
tokens.pop_back(); // Else
ret = parse_instruction(tokens); // Instruction2
tokens = ret.tokens;
Node instruction2 = ret.node;
InnerNode node = {
.type=NodeType::IfElse,
.children={expr, instruction1, instruction2},
.pos=pos
};
return {
.node=node,
.tokens=tokens
};
}
case TokenType::For: {
CodePosition pos = tokens.back().pos;
tokens.pop_back();
if (tokens.back().type != TokenType::LParenthese) // Opening (
throw SyntaxError(ErrorType::ExceptedLParen, tokens.back().pos);
tokens.pop_back();
ParseReturn ret1;
if (tokens.size() >= 1 && tokens.back().type == TokenType::Semicolon) {
ret1 = {
.node=epsilon_node,
.tokens=tokens
};
} else {
try {
ret1 = parse_expr_statement(tokens);
} catch (const ParseException& pex) {
ret1 = parse_expr(tokens);
}
}
int nb_tok = ret1.tokens.size(); // First ;
if (nb_tok == 0 || ret1.tokens.back().type != TokenType::Semicolon)
throw SyntaxError(
ErrorType::ExpectedSemicolon,
nb_tok == 0 ? tokens.back().pos : ret1.tokens.back().pos
);
tokens = ret1.tokens;
tokens.pop_back();
ParseReturn ret2;
if (tokens.size() >= 1 && tokens.back().type == TokenType::Semicolon) {
ret2 = {
.node=epsilon_node,
.tokens=tokens
};
} else {
ret2 = parse_expr(tokens);
}
nb_tok = ret2.tokens.size(); // Second ;
if (nb_tok == 0 || ret2.tokens.back().type != TokenType::Semicolon)
throw SyntaxError(
ErrorType::ExpectedSemicolon,
nb_tok == 0 ? tokens.back().pos : ret2.tokens.back().pos
);
tokens = ret2.tokens;
tokens.pop_back();
ParseReturn ret3;
if (tokens.size() >= 1 && tokens.back().type == TokenType::RParenthese) {
ret3 = {
.node=epsilon_node,
.tokens=tokens
};
} else {
ret3 = parse_expr(tokens);
}
nb_tok = ret3.tokens.size(); // Closing )
if (nb_tok == 0 || ret3.tokens.back().type != TokenType::RParenthese)
throw SyntaxError(
ErrorType::ExpectedRParen,
nb_tok == 0 ? tokens.back().pos : ret3.tokens.back().pos
);
tokens = ret3.tokens;
tokens.pop_back();
ParseReturn ret_instruction = parse_instruction(tokens);
tokens = ret_instruction.tokens;
InnerNode node = {
.type=NodeType::For,
.children={ret1.node, ret2.node, ret3.node, ret_instruction.node},
.pos=pos
};
return {
.node=node,
.tokens=tokens
};
}
case TokenType::LCurlyBracket: {
CodePosition pos = tokens.back().pos;
tokens.pop_back();
ParseReturn ret = parse_prog(tokens);
if ( // No expression parsed, the next token is not a '}'
holds_alternative<InnerNode>(ret.node) &&
get<InnerNode>(ret.node).type == NodeType::Epsilon &&
tokens.back().type != TokenType::RCurlyBracket
)
throw SyntaxError(
ErrorType::InvalidSyntax,
tokens.back().pos
);
int nb_tok = ret.tokens.size();
if (nb_tok == 0 || ret.tokens.back().type != TokenType::RCurlyBracket)
throw SyntaxError(
ErrorType::ExpectedRCurlyBracket,
nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos
);
tokens = ret.tokens;
tokens.pop_back();
InnerNode node = {
.type=NodeType::Bloc,
.children={ret.node},
.pos=pos
};
return {
.node=node,
.tokens=tokens
};
}
default:
throw ParseException();
}
}
ParseReturn parse_expr_statement(vector<Token> tokens) {
if (tokens.size() < 1 || tokens.back().type != TokenType::Identifier)
throw ParseException();
Token type = tokens.back();
tokens.pop_back();
ParseReturn ret = parse_par_identifier(tokens);
tokens = ret.tokens;
if (!holds_alternative<Token>(ret.node))
throw ParseException(); // The parsing is incorrect
Token identifier = get<Token>(ret.node);
//* ExprStatement -> Type Identifier
if (tokens.size() < 1 || tokens.back().type != TokenType::Equal) {
InnerNode node = {
.type=NodeType::Declaration,
.children={type, identifier},
.pos=identifier.pos
};
return {
.node=node,
.tokens=tokens
};
}
//* ExprStatement -> Type Identifier = Expr
// On retire le '='
tokens.pop_back();
ret = parse_expr(tokens);
InnerNode node = {
.type=NodeType::AssignedDeclaration,
.children={type, identifier, ret.node},
.pos=get_node_pos(ret.node)
};
return {
.node=node,
.tokens=ret.tokens
};
}
ParseReturn parse_expr(vector<Token> tokens) {
if (tokens.size() == 0)
throw ParseException();
// At least 1 Term
ParseReturn ret = parse_comp(tokens);
tokens = ret.tokens;
Node node = ret.node;
//* We construct a tree
while (tokens.size() != 0 && tokens.back().type == TokenType::Comma) {
Token last_token;
try {
last_token = tokens.back();
tokens.pop_back();
ParseReturn ret = parse_comp(tokens);
tokens = ret.tokens;
InnerNode new_node = {
.type=NodeType::Comma,
.children={node, ret.node},
.pos=last_token.pos
};
node = new_node;
} catch (const ParseException& pex) {
tokens.emplace_back(last_token);
return {
.node=node,
.tokens=tokens
};
}
}
return {
.node=node,
.tokens=tokens
};
}
ParseReturn parse_comp(vector<Token> tokens) {
if (tokens.size() == 0)
throw ParseException();
// At least 1 Sum
ParseReturn ret = parse_sum(tokens);
tokens = ret.tokens;
Node node = ret.node;
//* We construct a tree
while (tokens.size() != 0) {
NodeType type;
switch (tokens.back().type) {
case TokenType::DoubleEqual:
type = NodeType::Eq;
break;
case TokenType::NotEqual:
type = NodeType::Neq;
break;
case TokenType::Lt:
type = NodeType::Lt;
break;
case TokenType::Gt:
type = NodeType::Gt;
break;
case TokenType::Leq:
type = NodeType::Leq;
break;
case TokenType::Geq:
type = NodeType::Geq;
break;
case TokenType::Land:
type = NodeType::Land;
break;
case TokenType::Lor:
type = NodeType::Lor;
break;
default:
return {
.node=node,
.tokens=tokens
};
}
Token last_token;
try {
last_token = tokens.back();
tokens.pop_back();
ParseReturn ret = parse_sum(tokens);
tokens = ret.tokens;
InnerNode new_node = {
.type=type,
.children={node, ret.node},
.pos=last_token.pos
};
node = new_node;
} catch (const ParseException& pex) {
tokens.emplace_back(last_token);
return {
.node=node,
.tokens=tokens
};
}
}
return {
.node=node,
.tokens=tokens
};
}
ParseReturn parse_sum(vector<Token> tokens) {
if (tokens.size() == 0)
throw ParseException();
// At least 1 Term
ParseReturn ret = parse_term(tokens);
tokens = ret.tokens;
Node node = ret.node;
//* We construct a tree
while (tokens.size() != 0) {
NodeType type;
switch (tokens.back().type) {
case (TokenType::Plus):
type = NodeType::Plus;
break;
case (TokenType::Minus):
type = NodeType::Minus;
break;
default:
return {
.node=node,
.tokens=tokens
};
}
Token last_token;
try {
last_token = tokens.back();
tokens.pop_back();
ParseReturn ret = parse_term(tokens);
tokens = ret.tokens;
InnerNode new_node = {
.type=type,
.children={node, ret.node},
.pos=last_token.pos
};
node = new_node;
} catch (const ParseException& pex) {
tokens.emplace_back(last_token);
return {
.node=node,
.tokens=tokens
};
}
}
return {
.node=node,
.tokens=tokens
};
}
ParseReturn parse_term(vector<Token> tokens) {
if (tokens.size() == 0)
throw ParseException();
// At least 1 Unary
ParseReturn ret = parse_unary(tokens);
tokens = ret.tokens;
Node node = ret.node;
//* We construct a tree
while (tokens.size() != 0) {
NodeType type;
switch(tokens.back().type) {
case (TokenType::Star):
type = NodeType::Mult;
break;
case (TokenType::Slash):
type = NodeType::Div;
break;
case (TokenType::Percent):
type = NodeType::Mod;
break;
default:
return {
.node=node,
.tokens=tokens
};
}
Token last_token;
try {
last_token = tokens.back();
tokens.pop_back();
ParseReturn ret = parse_unary(tokens);
tokens = ret.tokens;
InnerNode new_node = {
.type=type,
.children={node, ret.node},
.pos=last_token.pos
};
node = new_node;
} catch (const ParseException& pex) {
tokens.emplace_back(last_token);
return {
.node=node,
.tokens=tokens
};
}
}
return {
.node=node,
.tokens=tokens
};
}
ParseReturn parse_unary(vector<Token> tokens) {
if (tokens.size() > 0) {
NodeType type;
switch (tokens.back().type) {
case TokenType::Minus: //* Unary -> - Unary
type = NodeType::UnaryMinus;
break;
case TokenType::Plus: //* Unary -> + Unary
type = NodeType::UnaryPlus;
break;
case TokenType::Not: //* Unary -> ! Unary
type = NodeType::Neg;
break;
default:
type = NodeType::Epsilon; // No valid element found
}
if (type != NodeType::Epsilon) {
tokens.pop_back();
ParseReturn ret = parse_unary(tokens);
InnerNode node = {
.type=type,
.children={ ret.node },
.pos=get_node_pos(ret.node)
};
return {
.node=node,
.tokens=ret.tokens
};
}
}
//* Unary -> Val
return parse_val(tokens);
}
ParseReturn parse_val(vector<Token> tokens) {
if (tokens.size() == 0)
throw ParseException();
switch (tokens.back().type) {
case TokenType::Litteral: { //* Val -> Number
Token number = tokens.back();
tokens.pop_back();
return {
.node=number,
.tokens=tokens
};;
}
case TokenType::DoublePlus: { //* Val -> ++ParIdentifier
tokens.pop_back();
ParseReturn ret = parse_par_identifier(tokens);
InnerNode node = {
.type = NodeType::LIncr,
.children = { ret.node },
.pos=get_node_pos(ret.node)
};
return {
.node=node,
.tokens=ret.tokens
};
}
case TokenType::DoubleMinus: { //* Val -> --ParIdentifier
tokens.pop_back();
ParseReturn ret = parse_par_identifier(tokens);
InnerNode node = {
.type = NodeType::LDecr,
.children = { ret.node },
.pos=get_node_pos(ret.node)
};
return {
.node=node,
.tokens=ret.tokens
};
}
default: {
try { //* Val -> ParIdentifier...
ParseReturn ret = parse_par_identifier(tokens);
if (ret.tokens.size() >= 1) {
switch (ret.tokens.back().type){
case TokenType::DoublePlus: { //* Val -> ParIdentifier++
ret.tokens.pop_back();
InnerNode node = {
.type = NodeType::RIncr,
.children = { ret.node },
.pos=get_node_pos(ret.node)
};
return {
.node=node,
.tokens=ret.tokens
};
}
case TokenType::DoubleMinus: { //* Val -> ParIdentifier--
ret.tokens.pop_back();
InnerNode node = {
.type = NodeType::RDecr,
.children = { ret.node },
.pos=get_node_pos(ret.node)
};
return {
.node=node,
.tokens=ret.tokens
};
}
case TokenType::Equal: { //* Val -> ParIdentifier = (Expr)
ret.tokens.pop_back();
ParseReturn ret_expr = parse_expr(ret.tokens);
InnerNode node = {
.type = NodeType::Assignment,
.children = { ret.node, ret_expr.node },
.pos=get_node_pos(ret_expr.node)
};
return {
.node=node,
.tokens=ret_expr.tokens
};
}
default:
break;
}
}
//* Val -> ParIdentifier
return ret;
} catch (const ParseException& pex) { //* Val -> (Expr)
if (tokens.back().type != TokenType::LParenthese)
throw ParseException();
tokens.pop_back();
ParseReturn ret = parse_expr(tokens);
tokens=ret.tokens;
if (tokens.size() < 1 || tokens.back().type != TokenType::RParenthese)
throw SyntaxError(ErrorType::ExpectedRParen, tokens.back().pos);
tokens.pop_back();
return {
.node=ret.node,
.tokens=tokens
};
}
}
}
}
ParseReturn parse_par_identifier(vector<Token> tokens) {
if (tokens.size() < 1)
throw ParseException();
if (tokens.back().type == TokenType::Identifier) {
Token identifier = tokens.back();
tokens.pop_back();
return { //* ParIdentifier -> Identifier
.node=identifier,
.tokens=tokens
};
}
if (tokens.back().type != TokenType::LParenthese)
throw ParseException();
tokens.pop_back();
ParseReturn ret = parse_par_identifier(tokens);
tokens = ret.tokens;
if (tokens.back().type != TokenType::RParenthese)
throw ParseException();
tokens.pop_back();
return { //* ParIdentifier -> (ParIdentifier)
.node=ret.node,
.tokens=tokens
};
}