#include #include #include using namespace std; #include "include/tokenize.h" #include "include/colors.h" #include "include/parser.h" #include "include/config.h" #include "include/utils.h" CodePosition null_pos = { .line = -1, .column = -1 }; const char* _debug_ast_node_names[] = { "Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "UnaryMinus", "UnaryPlus", "Neg", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr", "If", "IfElse", "For", "While", "Bloc", "Lt", "Gt", "Leq", "Geq", "Eq", "Neq", "Land", "Lor", "Comma", "FunctionPrototype", "FunctionDeclaration", "FunctionCall", "FunctionArgs", "Return" }; void _debug_print_tree(const Node& node, int depth, const string& prefix) { if (holds_alternative(node)) { const InnerNode& innerNode = get(node); cout << prefix << _debug_ast_node_names[int(innerNode.type)] << "\n"; string new_prefix = prefix; size_t pos = new_prefix.find("└──"); while (pos != string::npos) { new_prefix.replace(pos, 9, " "); pos = new_prefix.find("└──", pos + 4); } pos = new_prefix.find("├──"); while (pos != string::npos) { new_prefix.replace(pos, 9, "│ "); pos = new_prefix.find("├──", pos + 6); } for (size_t i = 0; i < innerNode.children.size(); ++i) { string childPrefix = (i == innerNode.children.size() - 1) ? "└── " : "├── "; _debug_print_tree(innerNode.children[i], depth + 1, new_prefix + childPrefix); } } else { const Token& token = get(node); cout << prefix; _debug_print_token(token); cout << endl; } } Node parse(vector tokens) { reverse(tokens.begin(), tokens.end()); if (tokens.size() == 0) { throw SyntaxError(ErrorType::EmptyInput, null_pos); } ParseReturn ret = parse_prog(tokens); if (ret.tokens.size() != 0) { CodePosition pos = ret.tokens.back().pos; throw SyntaxError(ErrorType::InvalidSyntax, pos); } return ret.node; } vector children; InnerNode epsilon_node = { .type=NodeType::Epsilon, .children=children, .pos=null_pos }; ParseReturn parse_prog(vector tokens) { Node node = epsilon_node; try { while (tokens.size() != 0) { ParseReturn ret = parse_instruction(tokens); tokens = ret.tokens; if (!holds_alternative(ret.node) || get(ret.node).type != NodeType::Epsilon) { if (holds_alternative(node) && get(node).type == NodeType::Epsilon) { node = ret.node; // Remove base epsilon node } else { InnerNode new_node = { .type=NodeType::Prog, .children={node, ret.node}, .pos=get_node_pos(ret.node) }; node = new_node; } } } } catch (const ParseException& pex) {} return { .node=node, .tokens=tokens }; } ParseReturn parse_instruction(vector tokens) { try { //* Instruction -> Statement ParseReturn ret = parse_statement(tokens); return ret; } catch (const ParseException& pex) { try { //* Instruction -> ExprStatement; ParseReturn ret = parse_expr_statement(tokens); tokens = ret.tokens; if (tokens.back().type != TokenType::Semicolon) throw ParseException(); tokens.pop_back(); return { .node=ret.node, .tokens=tokens }; } catch (const ParseException& pex) { try { //* Instruction -> Expr; ParseReturn ret = parse_expr(tokens); tokens = ret.tokens; if (tokens.size() < 1 || tokens.back().type != TokenType::Semicolon) throw ParseException(); tokens.pop_back(); return { .node=ret.node, .tokens=tokens }; } catch (const ParseException& pex) { //* Instruction -> ; if (tokens.size() < 1 || tokens.back().type != TokenType::Semicolon) throw ParseException(); tokens.pop_back(); // On enlève le ';' de la liste de tokens return { .node=epsilon_node, .tokens=tokens }; } } } } ParseReturn parse_statement(vector tokens) { if (tokens.size() < 2) throw ParseException(); switch (tokens.back().type) { case TokenType::Break: case TokenType::Continue: { Token token = tokens.back(); tokens.pop_back(); if (tokens.back().type != TokenType::Semicolon) throw SyntaxError(ErrorType::ExpectedSemicolon, tokens.back().pos); tokens.pop_back(); return { .node=token, .tokens=tokens }; } case TokenType::Return: { CodePosition pos = tokens.back().pos; tokens.pop_back(); vector children; if (tokens.back().type != TokenType::Semicolon) { ParseReturn ret = parse_expr(tokens); tokens = ret.tokens; if (tokens.back().type != TokenType::Semicolon) throw SyntaxError(ErrorType::ExpectedSemicolon, tokens.back().pos); children = {ret.node}; } tokens.pop_back(); InnerNode node = { .type=NodeType::Return, .children=children, .pos=pos }; return { .node=node, .tokens=tokens }; } case TokenType::While: case TokenType::If: { CodePosition pos = tokens.back().pos; NodeType type; switch (tokens.back().type) { case TokenType::If: type = NodeType::If; break; case TokenType::While: type = NodeType::While; break; default: break; // Impossible } tokens.pop_back(); if (tokens.back().type != TokenType::LParenthese) // Opening ( throw SyntaxError(ErrorType::ExceptedLParen, tokens.back().pos); tokens.pop_back(); ParseReturn ret = parse_expr(tokens); // Expr int nb_tok = ret.tokens.size(); // Closing ) if (nb_tok == 0 || ret.tokens.back().type != TokenType::RParenthese) throw SyntaxError( ErrorType::ExpectedRParen, nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos ); ret.tokens.pop_back(); tokens = ret.tokens; Node expr = ret.node; try { ret = parse_instruction(tokens); // Instruction1 } catch (const ParseException& pex) { throw SyntaxError( ErrorType::InvalidSyntax, pos=tokens.back().pos ); } if (holds_alternative(ret.node) && ( get(ret.node).type == NodeType::AssignedDeclaration || get(ret.node).type == NodeType::Declaration ) ) throw SyntaxError( ErrorType::DependentDeclaration, pos=tokens.back().pos ); tokens = ret.tokens; Node instruction1 = ret.node; if (tokens.size() == 0 || tokens.back().type != TokenType::Else) { //* -> If (Expr) Instruction InnerNode node = { .type=type, .children={expr, instruction1}, .pos=pos }; return { .node=node, .tokens=tokens }; } tokens.pop_back(); // Else ret = parse_instruction(tokens); // Instruction2 tokens = ret.tokens; Node instruction2 = ret.node; InnerNode node = { .type=NodeType::IfElse, .children={expr, instruction1, instruction2}, .pos=pos }; return { .node=node, .tokens=tokens }; } case TokenType::For: { CodePosition pos = tokens.back().pos; tokens.pop_back(); if (tokens.back().type != TokenType::LParenthese) // Opening ( throw SyntaxError(ErrorType::ExceptedLParen, tokens.back().pos); tokens.pop_back(); ParseReturn ret1; if (tokens.size() >= 1 && tokens.back().type == TokenType::Semicolon) { ret1 = { .node=epsilon_node, .tokens=tokens }; } else { try { ret1 = parse_expr_statement(tokens); } catch (const ParseException& pex) { ret1 = parse_expr(tokens); } } int nb_tok = ret1.tokens.size(); // First ; if (nb_tok == 0 || ret1.tokens.back().type != TokenType::Semicolon) throw SyntaxError( ErrorType::ExpectedSemicolon, nb_tok == 0 ? tokens.back().pos : ret1.tokens.back().pos ); tokens = ret1.tokens; tokens.pop_back(); ParseReturn ret2; if (tokens.size() >= 1 && tokens.back().type == TokenType::Semicolon) { ret2 = { .node=epsilon_node, .tokens=tokens }; } else { ret2 = parse_expr(tokens); } nb_tok = ret2.tokens.size(); // Second ; if (nb_tok == 0 || ret2.tokens.back().type != TokenType::Semicolon) throw SyntaxError( ErrorType::ExpectedSemicolon, nb_tok == 0 ? tokens.back().pos : ret2.tokens.back().pos ); tokens = ret2.tokens; tokens.pop_back(); ParseReturn ret3; if (tokens.size() >= 1 && tokens.back().type == TokenType::RParenthese) { ret3 = { .node=epsilon_node, .tokens=tokens }; } else { ret3 = parse_expr(tokens); } nb_tok = ret3.tokens.size(); // Closing ) if (nb_tok == 0 || ret3.tokens.back().type != TokenType::RParenthese) throw SyntaxError( ErrorType::ExpectedRParen, nb_tok == 0 ? tokens.back().pos : ret3.tokens.back().pos ); tokens = ret3.tokens; tokens.pop_back(); ParseReturn ret_instruction = parse_instruction(tokens); tokens = ret_instruction.tokens; InnerNode node = { .type=NodeType::For, .children={ret1.node, ret2.node, ret3.node, ret_instruction.node}, .pos=pos }; return { .node=node, .tokens=tokens }; } case TokenType::LCurlyBracket: { CodePosition pos = tokens.back().pos; tokens.pop_back(); ParseReturn ret = parse_prog(tokens); if ( // No expression parsed, the next token is not a '}' holds_alternative(ret.node) && get(ret.node).type == NodeType::Epsilon && tokens.back().type != TokenType::RCurlyBracket ) throw SyntaxError( ErrorType::InvalidSyntax, tokens.back().pos ); int nb_tok = ret.tokens.size(); if (nb_tok == 0 || ret.tokens.back().type != TokenType::RCurlyBracket) throw SyntaxError( ErrorType::ExpectedRCurlyBracket, nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos ); tokens = ret.tokens; tokens.pop_back(); InnerNode node = { .type=NodeType::Bloc, .children={ret.node}, .pos=pos }; return { .node=node, .tokens=tokens }; } case TokenType::Identifier: { //* Type Token type = tokens.back(); tokens.pop_back(); //* ParIdentifier ParseReturn ret = parse_par_identifier(tokens); tokens = ret.tokens; if (!holds_alternative(ret.node)) throw ParseException(); // The parsing is incorrect Token identifier = get(ret.node); // LPar if (tokens.size() < 1 || tokens.back().type != TokenType::LParenthese) throw ParseException(); tokens.pop_back(); //* Args ParseReturn args_ret = parse_args(tokens); // RPar if (args_ret.tokens.size() < 1 || args_ret.tokens.back().type != TokenType::RParenthese) throw SyntaxError(ErrorType::ExpectedRParen, tokens.back().pos, {}); tokens = args_ret.tokens; tokens.pop_back(); if (tokens.size() < 1) throw SyntaxError(ErrorType::ExpectedSemicolon, identifier.pos, {}); if (tokens.back().type == TokenType::Semicolon) { //* -> Type ParIdentifier ( Args ) ; tokens.pop_back(); InnerNode node = { .type=NodeType::FunctionPrototype, .children={type, identifier, args_ret.node}, .pos=identifier.pos }; return { .node=node, .tokens=tokens }; } //* LCurly if (tokens.size() < 1 || tokens.back().type != TokenType::LCurlyBracket) throw SyntaxError(ErrorType::ExpectedLCurlyBracket, identifier.pos, {}); tokens.pop_back(); ParseReturn ret_prog = parse_prog(tokens); tokens = ret_prog.tokens; //* RCurly if (tokens.size() < 1 || tokens.back().type != TokenType::RCurlyBracket) throw SyntaxError(ErrorType::ExpectedRCurlyBracket, identifier.pos, {}); tokens.pop_back(); InnerNode node = { .type=NodeType::FunctionDeclaration, .children={type, identifier, args_ret.node, ret_prog.node}, .pos=identifier.pos }; return { .node=node, .tokens=tokens }; } default: throw ParseException(); } } ParseReturn parse_expr_statement(vector tokens) { if (tokens.size() < 1 || tokens.back().type != TokenType::Identifier) throw ParseException(); Token type = tokens.back(); tokens.pop_back(); ParseReturn ret = parse_par_identifier(tokens); tokens = ret.tokens; if (!holds_alternative(ret.node)) throw ParseException(); // The parsing is incorrect Token identifier = get(ret.node); //* ExprStatement -> Type Identifier if (tokens.size() < 1 || tokens.back().type != TokenType::Equal) { InnerNode node = { .type=NodeType::Declaration, .children={type, identifier}, .pos=identifier.pos }; return { .node=node, .tokens=tokens }; } //* ExprStatement -> Type Identifier = Expr // On retire le '=' tokens.pop_back(); ret = parse_expr(tokens); InnerNode node = { .type=NodeType::AssignedDeclaration, .children={type, identifier, ret.node}, .pos=get_node_pos(ret.node) }; return { .node=node, .tokens=ret.tokens }; } ParseReturn parse_expr(vector tokens) { if (tokens.size() == 0) throw ParseException(); // At least 1 Term ParseReturn ret = parse_comp(tokens); tokens = ret.tokens; Node node = ret.node; //* We construct a tree while (tokens.size() != 0 && tokens.back().type == TokenType::Comma) { Token last_token; try { last_token = tokens.back(); tokens.pop_back(); ParseReturn ret = parse_comp(tokens); tokens = ret.tokens; InnerNode new_node = { .type=NodeType::Comma, .children={node, ret.node}, .pos=last_token.pos }; node = new_node; } catch (const ParseException& pex) { tokens.emplace_back(last_token); return { .node=node, .tokens=tokens }; } } return { .node=node, .tokens=tokens }; } ParseReturn parse_args(vector tokens) { vector nodes; CodePosition pos = tokens.back().pos; while (tokens.size() != 0 && tokens.back().type != TokenType::RParenthese) { if (tokens.back().type != TokenType::Identifier) throw ParseException(); Token type = tokens.back(); tokens.pop_back(); // TODO: if type is void, return ParseReturn ret = parse_par_identifier(tokens); tokens = ret.tokens; if (!holds_alternative(ret.node)) throw ParseException(); // The parsing is incorrect Token identifier = get(ret.node); InnerNode node = { .type=NodeType::Declaration, .children={type, identifier}, .pos=identifier.pos }; nodes.push_back(node); if (tokens.back().type == TokenType::Comma) { tokens.pop_back(); } else { break; } } InnerNode node = { .type=NodeType::FunctionArgs, .children=nodes, .pos=pos }; return { .node=node, .tokens=tokens }; } ParseReturn parse_comp(vector tokens) { if (tokens.size() == 0) throw ParseException(); // At least 1 Sum ParseReturn ret = parse_sum(tokens); tokens = ret.tokens; Node node = ret.node; //* We construct a tree while (tokens.size() != 0) { NodeType type; switch (tokens.back().type) { case TokenType::DoubleEqual: type = NodeType::Eq; break; case TokenType::NotEqual: type = NodeType::Neq; break; case TokenType::Lt: type = NodeType::Lt; break; case TokenType::Gt: type = NodeType::Gt; break; case TokenType::Leq: type = NodeType::Leq; break; case TokenType::Geq: type = NodeType::Geq; break; case TokenType::Land: type = NodeType::Land; break; case TokenType::Lor: type = NodeType::Lor; break; default: return { .node=node, .tokens=tokens }; } Token last_token; try { last_token = tokens.back(); tokens.pop_back(); ParseReturn ret = parse_sum(tokens); tokens = ret.tokens; InnerNode new_node = { .type=type, .children={node, ret.node}, .pos=last_token.pos }; node = new_node; } catch (const ParseException& pex) { tokens.emplace_back(last_token); return { .node=node, .tokens=tokens }; } } return { .node=node, .tokens=tokens }; } ParseReturn parse_sum(vector tokens) { if (tokens.size() == 0) throw ParseException(); // At least 1 Term ParseReturn ret = parse_term(tokens); tokens = ret.tokens; Node node = ret.node; //* We construct a tree while (tokens.size() != 0) { NodeType type; switch (tokens.back().type) { case (TokenType::Plus): type = NodeType::Plus; break; case (TokenType::Minus): type = NodeType::Minus; break; default: return { .node=node, .tokens=tokens }; } Token last_token; try { last_token = tokens.back(); tokens.pop_back(); ParseReturn ret = parse_term(tokens); tokens = ret.tokens; InnerNode new_node = { .type=type, .children={node, ret.node}, .pos=last_token.pos }; node = new_node; } catch (const ParseException& pex) { tokens.emplace_back(last_token); return { .node=node, .tokens=tokens }; } } return { .node=node, .tokens=tokens }; } ParseReturn parse_term(vector tokens) { if (tokens.size() == 0) throw ParseException(); // At least 1 Unary ParseReturn ret = parse_unary(tokens); tokens = ret.tokens; Node node = ret.node; //* We construct a tree while (tokens.size() != 0) { NodeType type; switch(tokens.back().type) { case (TokenType::Star): type = NodeType::Mult; break; case (TokenType::Slash): type = NodeType::Div; break; case (TokenType::Percent): type = NodeType::Mod; break; default: return { .node=node, .tokens=tokens }; } Token last_token; try { last_token = tokens.back(); tokens.pop_back(); ParseReturn ret = parse_unary(tokens); tokens = ret.tokens; InnerNode new_node = { .type=type, .children={node, ret.node}, .pos=last_token.pos }; node = new_node; } catch (const ParseException& pex) { tokens.emplace_back(last_token); return { .node=node, .tokens=tokens }; } } return { .node=node, .tokens=tokens }; } ParseReturn parse_unary(vector tokens) { if (tokens.size() > 0) { NodeType type; switch (tokens.back().type) { case TokenType::Minus: //* Unary -> - Unary type = NodeType::UnaryMinus; break; case TokenType::Plus: //* Unary -> + Unary type = NodeType::UnaryPlus; break; case TokenType::Not: //* Unary -> ! Unary type = NodeType::Neg; break; default: type = NodeType::Epsilon; // No valid element found } if (type != NodeType::Epsilon) { tokens.pop_back(); ParseReturn ret = parse_unary(tokens); InnerNode node = { .type=type, .children={ ret.node }, .pos=get_node_pos(ret.node) }; return { .node=node, .tokens=ret.tokens }; } } //* Unary -> Val return parse_val(tokens); } ParseReturn parse_val(vector tokens) { if (tokens.size() == 0) throw ParseException(); switch (tokens.back().type) { case TokenType::Litteral: { //* Val -> Number Token number = tokens.back(); tokens.pop_back(); return { .node=number, .tokens=tokens };; } case TokenType::DoublePlus: { //* Val -> ++ParIdentifier tokens.pop_back(); ParseReturn ret = parse_par_identifier(tokens); InnerNode node = { .type = NodeType::LIncr, .children = { ret.node }, .pos=get_node_pos(ret.node) }; return { .node=node, .tokens=ret.tokens }; } case TokenType::DoubleMinus: { //* Val -> --ParIdentifier tokens.pop_back(); ParseReturn ret = parse_par_identifier(tokens); InnerNode node = { .type = NodeType::LDecr, .children = { ret.node }, .pos=get_node_pos(ret.node) }; return { .node=node, .tokens=ret.tokens }; } default: { try { //* Val -> ParIdentifier... ParseReturn ret = parse_par_identifier(tokens); if (ret.tokens.size() >= 1) { switch (ret.tokens.back().type){ case TokenType::DoublePlus: { //* Val -> ParIdentifier++ ret.tokens.pop_back(); InnerNode node = { .type = NodeType::RIncr, .children = { ret.node }, .pos=get_node_pos(ret.node) }; return { .node=node, .tokens=ret.tokens }; } case TokenType::DoubleMinus: { //* Val -> ParIdentifier-- ret.tokens.pop_back(); InnerNode node = { .type = NodeType::RDecr, .children = { ret.node }, .pos=get_node_pos(ret.node) }; return { .node=node, .tokens=ret.tokens }; } case TokenType::LParenthese: { ret.tokens.pop_back(); ParseReturn ret_expr = parse_expr(ret.tokens); if (ret_expr.tokens.size() < 1 || ret_expr.tokens.back().type != TokenType::RParenthese) throw SyntaxError( ErrorType::ExpectedRParen, ret_expr.tokens.size() < 1 ? ret.tokens.back().pos : ret_expr.tokens.back().pos, {} ); ret_expr.tokens.pop_back(); InnerNode node = { .type = NodeType::FunctionCall, .children = { ret.node, ret_expr.node }, .pos=get_node_pos(ret.node) }; return { .node=node, .tokens=ret_expr.tokens }; } case TokenType::Equal: { //* Val -> ParIdentifier = (Expr) ret.tokens.pop_back(); ParseReturn ret_expr = parse_expr(ret.tokens); InnerNode node = { .type = NodeType::Assignment, .children = { ret.node, ret_expr.node }, .pos=get_node_pos(ret_expr.node) }; return { .node=node, .tokens=ret_expr.tokens }; } default: break; } } //* Val -> ParIdentifier return ret; } catch (const ParseException& pex) { //* Val -> (Expr) if (tokens.back().type != TokenType::LParenthese) throw ParseException(); tokens.pop_back(); ParseReturn ret = parse_expr(tokens); tokens=ret.tokens; if (tokens.size() < 1 || tokens.back().type != TokenType::RParenthese) throw SyntaxError(ErrorType::ExpectedRParen, tokens.back().pos); tokens.pop_back(); return { .node=ret.node, .tokens=tokens }; } } } } ParseReturn parse_par_identifier(vector tokens) { if (tokens.size() < 1) throw ParseException(); if (tokens.back().type == TokenType::Identifier) { Token identifier = tokens.back(); tokens.pop_back(); return { //* ParIdentifier -> Identifier .node=identifier, .tokens=tokens }; } #ifndef PAR_IDENTIFIER throw ParseException(); #else if (tokens.back().type != TokenType::LParenthese) throw ParseException(); tokens.pop_back(); ParseReturn ret = parse_par_identifier(tokens); tokens = ret.tokens; if (tokens.back().type != TokenType::RParenthese) throw ParseException(); tokens.pop_back(); return { //* ParIdentifier -> (ParIdentifier) .node=ret.node, .tokens=tokens }; #endif }