diff --git a/src/include/parser.h b/src/include/parser.h index 479ea6e..6608a2c 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -14,7 +14,9 @@ Instruction -> Statement | ExprStatement; | Expr; | ; Statement -> | { Prog } + | If (Expr) Instruction | If (Expr) Instruction Else Instruction + ExprStatement -> | Type ParIdentifier = Expr // AssignedDeclaration | Type ParIdentifier // Declaration @@ -74,7 +76,9 @@ enum class NodeType { LIncr, // -> ++ParIdentifier RIncr, // -> ParIdentifier++ LDecr, // -> --ParIdentifier - RDecr // -> ParIdentifier-- + RDecr, // -> ParIdentifier-- + If, // -> If (Expr) Instruction + IfElse // -> If (Expr) Instruction Else Instruction }; struct InnerNode; @@ -127,6 +131,11 @@ public: */ Node parse(vector tokens); +/** + * Parse something derivated from Prog +*/ +ParseReturn parse_prog(vector tokens); + /** * Parse something derivated from Instruction */ diff --git a/src/parser.cpp b/src/parser.cpp index 8cd42fb..e5cc4c1 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -12,7 +12,7 @@ CodePosition null_pos = { }; -const char* _node_names[] = {"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr"}; +const char* _node_names[] = {"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr", "If", "IfElse"}; void _debug_print_tree(const Node& node, int depth, const string& prefix = "") { if (holds_alternative(node)) { const InnerNode& innerNode = get(node); @@ -56,13 +56,26 @@ Node parse(vector tokens) { if (tokens.size() == 0) { throw SyntaxError("Input must not be empty", null_pos); } - - // At least 1 instruction - ParseReturn ret = parse_instruction(tokens); - tokens = ret.tokens; + ParseReturn ret = parse_prog(tokens); - Node node = ret.node; + if (ret.tokens.size() != 0) { + CodePosition pos = ret.tokens.back().pos; + throw SyntaxError("Unable to parse", pos); + } + + return ret.node; +} + + +ParseReturn parse_prog(vector tokens) { + vector children; + InnerNode epsilon_node = { + .type=NodeType::Epsilon, + .children=children, + .pos=null_pos + }; + Node node = epsilon_node; try { while (tokens.size() != 0) { @@ -70,21 +83,27 @@ Node parse(vector tokens) { tokens = ret.tokens; if (!holds_alternative(ret.node) || get(ret.node).type != NodeType::Epsilon) { - InnerNode new_node = { - .type=NodeType::Prog, - .children={node, ret.node}, - .pos=get_node_pos(ret.node) - }; - node = new_node; + if (holds_alternative(node) && get(node).type == NodeType::Epsilon) { + node = ret.node; // Remove base epsilon node + } else { + InnerNode new_node = { + .type=NodeType::Prog, + .children={node, ret.node}, + .pos=get_node_pos(ret.node) + }; + node = new_node; + } } } - return node; - } catch (const ParseException& pex) { - CodePosition pos = tokens.back().pos; - throw SyntaxError("Unable to parse", pos); - } + } catch (const ParseException& pex) {} + + return { + .node=node, + .tokens=tokens + }; } + ParseReturn parse_instruction(vector tokens) { try { //* Instruction -> Statement ParseReturn ret = parse_statement(tokens); @@ -140,9 +159,95 @@ ParseReturn parse_instruction(vector tokens) { } ParseReturn parse_statement(vector tokens) { - (void)tokens; - // Aucune règle - throw ParseException(); + if (tokens.size() < 2) // 'If' / 'For' '(' at least + throw ParseException(); + + switch (tokens.back().type) { + case TokenType::If: { + CodePosition pos = tokens.back().pos; + + tokens.pop_back(); + if (tokens.back().type != TokenType::LParenthese) // Opening ( + throw SyntaxError("Missing '('", tokens.back().pos); + + tokens.pop_back(); + ParseReturn ret = parse_expr(tokens); // Expr + + int nb_tok = ret.tokens.size(); // Closing ) + if (nb_tok == 0 || ret.tokens.back().type != TokenType::RParenthese) + throw SyntaxError( + "Missing ')'", + nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos + ); + + ret.tokens.pop_back(); + tokens = ret.tokens; + Node expr = ret.node; + + ret = parse_instruction(tokens); // Instruction1 + tokens = ret.tokens; + Node instruction1 = ret.node; + + if (tokens.size() == 0 || tokens.back().type != TokenType::Else) { //* -> If (Expr) Instruction + InnerNode node = { + .type=NodeType::If, + .children={expr, instruction1}, + .pos=pos + }; + return { + .node=node, + .tokens=tokens + }; + } + + tokens.pop_back(); // Else + ret = parse_instruction(tokens); // Instruction2 + tokens = ret.tokens; + Node instruction2 = ret.node; + + InnerNode node = { + .type=NodeType::IfElse, + .children={expr, instruction1, instruction2}, + .pos=pos + }; + return { + .node=node, + .tokens=tokens + }; + } + case TokenType::LCurlyBracket: { + tokens.pop_back(); + + ParseReturn ret = parse_prog(tokens); + if ( // No expression parsed, the next token is not a '}' + holds_alternative(ret.node) && + get(ret.node).type == NodeType::Epsilon && + tokens.back().type != TokenType::RCurlyBracket + ) + throw SyntaxError( + "Invalid Syntax. Missing ';' ?", + tokens.back().pos + ); + + int nb_tok = ret.tokens.size(); + if (nb_tok == 0 || ret.tokens.back().type != TokenType::RCurlyBracket) + throw SyntaxError( + "Missing '}'", + nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos + ); + + tokens = ret.tokens; + tokens.pop_back(); + + return { + .node=ret.node, + .tokens=tokens + }; + + } + default: + throw ParseException(); + } } ParseReturn parse_expr_statement(vector tokens) { diff --git a/src/tokenize.cpp b/src/tokenize.cpp index 537d68f..4bb204e 100644 --- a/src/tokenize.cpp +++ b/src/tokenize.cpp @@ -109,6 +109,16 @@ vector tokenize(vector input, int initial_line) { tokens.emplace_back(token); j += m.str().length(); } + else if (str.starts_with("if")) { + Token token = { .type = TokenType::If, .pos = pos }; + tokens.emplace_back(token); + j += 2; + } + else if (str.starts_with("else")) { + Token token = { .type = TokenType::Else, .pos = pos }; + tokens.emplace_back(token); + j += 4; + } else if (regex_search(str, m, IDENTIFIER_REGEX, regex_constants::match_continuous)) { Token token = { .type = TokenType::Identifier, @@ -188,16 +198,6 @@ vector tokenize(vector input, int initial_line) { tokens.emplace_back(token); j += 1; } - else if (str.starts_with("if")) { - Token token = { .type = TokenType::If, .pos = pos }; - tokens.emplace_back(token); - j += 2; - } - else if (str.starts_with("else")) { - Token token = { .type = TokenType::Else, .pos = pos }; - tokens.emplace_back(token); - j += 4; - } else if (isspace(str[0])) { j += 1; }