diff --git a/src/include/parser.h b/src/include/parser.h index e95cccb..f9d07de 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -14,8 +14,8 @@ Instruction -> Statement | ExprStatement; | Expr; | ; Statement -> // Rien pour l'instant, mais "for", "if" etc ExprStatement -> - | Type Identifier = Expr // AssignedDeclaration - | Type Identifier // Declaration + | Type ParIdentifier = Expr // AssignedDeclaration + | Type ParIdentifier // Declaration Expr -> @@ -35,10 +35,21 @@ U -> | + U F -> - | (Expr) - | Identifier | Number - | Identifier = Expr // Assignment + + | ++ParIdentifier + | --ParIdentifier + + | ParIdentifier = Expr // Assignment + | ParIdentifier++ + | ParIdentifier-- + | ParIdentifier // This makes the grammar ambiguous but simpler to parse + + | (Expr) + +ParIdentifier -> + | Identifier + | (ParIdentifier) */ /** @@ -57,7 +68,11 @@ enum class NodeType { Mod, // -> F % T UnaryMinus, // -> -F UnaryPlus, // -> +F - Assignment // -> Identifier = Expr + Assignment, // -> Identifier = Expr + LIncr, // -> ++ParIdentifier + RIncr, // -> ParIdentifier++ + LDecr, // -> --ParIdentifier + RDecr // -> ParIdentifier-- }; struct InnerNode; @@ -142,6 +157,12 @@ ParseReturn parse_u(vector tokens); */ ParseReturn parse_f(vector tokens); +/** + * Parse something derivated from ParIdentifier + * (An identifier with 0+ parentheses around it) +*/ +ParseReturn parse_par_identifier(vector tokens); + /** * Prints a tree for debugging it */ diff --git a/src/include/tokenize.h b/src/include/tokenize.h index d0ceacc..15a1fe8 100644 --- a/src/include/tokenize.h +++ b/src/include/tokenize.h @@ -6,7 +6,7 @@ #include using namespace std; -enum class TokenType { Type, Identifier, Int, Plus, Minus, Star, Slash, Percent, Equal, Semicolon, LParenthese, RParenthese }; +enum class TokenType { Type, Identifier, Int, Plus, Minus, DoublePlus, DoubleMinus, Star, Slash, Percent, Equal, Semicolon, LParenthese, RParenthese }; enum class Type { Int }; using TokenData = variant; diff --git a/src/parser.cpp b/src/parser.cpp index 30ea639..0d9e9aa 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -7,7 +7,7 @@ using namespace std; #include "include/parser.h" -const char* _node_names[] = {"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "UnaryMinus", "UnaryPlus", "Assignment"}; +const char* _node_names[] = {"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr"}; void _debug_print_tree(const Node& node, int depth, const string& prefix = "") { if (holds_alternative(node)) { const InnerNode& innerNode = get(node); @@ -135,11 +135,13 @@ ParseReturn parse_expr_statement(vector tokens) { Token type = tokens.back(); tokens.pop_back(); - if (tokens.size() < 1 || tokens.back().type != TokenType::Identifier) - throw ParseException(); + ParseReturn ret = parse_par_identifier(tokens); + tokens = ret.tokens; - Token identifier = tokens.back(); - tokens.pop_back(); + if (!holds_alternative(ret.node)) + throw ParseException(); // The parsing is incorrect + + Token identifier = get(ret.node); //* ExprStatement -> Type Identifier if (tokens.size() < 1 || tokens.back().type != TokenType::Equal) { @@ -157,7 +159,7 @@ ParseReturn parse_expr_statement(vector tokens) { // On retire le '=' tokens.pop_back(); - ParseReturn ret = parse_expr(tokens); + ret = parse_expr(tokens); InnerNode node = { .type=NodeType::AssignedDeclaration, @@ -325,7 +327,7 @@ ParseReturn parse_f(vector tokens) { throw ParseException(); switch (tokens.back().type) { - case TokenType::Int: { //* U -> Number + case TokenType::Int: { //* F -> Number Token number = tokens.back(); tokens.pop_back(); return { @@ -333,45 +335,131 @@ ParseReturn parse_f(vector tokens) { .tokens=tokens };; } - case TokenType::LParenthese: { //* U -> Identifier - tokens.pop_back(); - ParseReturn ret = parse_expr(tokens); - tokens=ret.tokens; - - if (tokens.size() < 1 || tokens.back().type != TokenType::RParenthese) - throw SyntaxError("Missing ')'"); - + case TokenType::DoublePlus: { //* F -> ++ParIdentifier tokens.pop_back(); + ParseReturn ret = parse_par_identifier(tokens); + InnerNode node = { + .type = NodeType::LIncr, + .children = { ret.node } + }; return { - .node=ret.node, - .tokens=tokens + .node=node, + .tokens=ret.tokens }; } - case TokenType::Identifier: { //* U -> Identifier... - Token identifier = tokens.back(); + case TokenType::DoubleMinus: { //* F -> --ParIdentifier tokens.pop_back(); + ParseReturn ret = parse_par_identifier(tokens); + + InnerNode node = { + .type = NodeType::LDecr, + .children = { ret.node } + }; + return { + .node=node, + .tokens=ret.tokens + }; + } + default: { + try { //* F -> ParIdentifier... + ParseReturn ret = parse_par_identifier(tokens); + + if (ret.tokens.size() >= 1) { + switch (ret.tokens.back().type){ + case TokenType::DoublePlus: { //* F -> ParIdentifier++ + ret.tokens.pop_back(); + InnerNode node = { + .type = NodeType::RIncr, + .children = { ret.node } + }; + return { + .node=node, + .tokens=ret.tokens + }; + } + case TokenType::DoubleMinus: { //* F -> ParIdentifier-- + ret.tokens.pop_back(); + InnerNode node = { + .type = NodeType::RDecr, + .children = { ret.node } + }; + return { + .node=node, + .tokens=ret.tokens + }; + } + case TokenType::Equal: { //* F -> ParIdentifier = (Expr) + ret.tokens.pop_back(); + ParseReturn ret_expr = parse_expr(ret.tokens); + + InnerNode node = { + .type = NodeType::Assignment, + .children = { ret.node, ret_expr.node } + }; + return { + .node=node, + .tokens=ret_expr.tokens + }; + } + default: + break; + } + } + //* F -> ParIdentifier + return ret; + + } catch (const ParseException& pex) { //* F -> (Expr) + if (tokens.back().type != TokenType::LParenthese) + throw ParseException(); - if (tokens.size() > 0 && tokens.back().type == TokenType::Equal) { //* U -> Identifier = Expr tokens.pop_back(); ParseReturn ret = parse_expr(tokens); + tokens=ret.tokens; + + if (tokens.size() < 1 || tokens.back().type != TokenType::RParenthese) + throw SyntaxError("Missing ')'"); + + tokens.pop_back(); - InnerNode node = { - .type = NodeType::Assignment, - .children = { identifier, ret.node } - }; return { - .node=node, - .tokens=ret.tokens + .node=ret.node, + .tokens=tokens }; } - - return { //* U -> Identifier - .node=identifier, - .tokens=tokens - }; } - default: - throw ParseException(); } +} + + +ParseReturn parse_par_identifier(vector tokens) { + if (tokens.size() < 1) + throw ParseException(); + + if (tokens.back().type == TokenType::Identifier) { + Token identifier = tokens.back(); + tokens.pop_back(); + + return { //* ParIdentifier -> Identifier + .node=identifier, + .tokens=tokens + }; + } + + if (tokens.back().type != TokenType::LParenthese) + throw ParseException(); + + tokens.pop_back(); + ParseReturn ret = parse_par_identifier(tokens); + tokens = ret.tokens; + + if (tokens.back().type != TokenType::RParenthese) + throw ParseException(); + + tokens.pop_back(); + + return { //* ParIdentifier -> (ParIdentifier) + .node=ret.node, + .tokens=tokens + }; } \ No newline at end of file diff --git a/src/tokenize.cpp b/src/tokenize.cpp index a562361..3253986 100644 --- a/src/tokenize.cpp +++ b/src/tokenize.cpp @@ -26,6 +26,12 @@ void _debug_print_token(Token token) { case TokenType::Minus: cout << "-"; break; + case TokenType::DoublePlus: + cout << "++"; + break; + case TokenType::DoubleMinus: + cout << "--"; + break; case TokenType::Star: cout << "*"; break; @@ -87,6 +93,16 @@ vector tokenize(string str) { tokens.emplace_back(token); str.erase(0, m.str().length()); } + else if (str.size() >= 2 && str[0] == '+' && str[1] == '+') { + Token token = { .type = TokenType::DoublePlus }; + tokens.emplace_back(token); + str.erase(0, 2); + } + else if (str.size() >= 2 && str[0] == '-' && str[1] == '-') { + Token token = { .type = TokenType::DoubleMinus }; + tokens.emplace_back(token); + str.erase(0, 2); + } else if (str[0] == '+') { Token token = { .type = TokenType::Plus }; tokens.emplace_back(token);