Added ParIdentifier, ++, --

(tokens & ast for now)
This commit is contained in:
augustin64 2023-11-14 17:00:34 +01:00
parent 0b79ee7822
commit fcd0c34885
4 changed files with 165 additions and 40 deletions

View File

@ -14,8 +14,8 @@ Instruction -> Statement | ExprStatement; | Expr; | ;
Statement -> // Rien pour l'instant, mais "for", "if" etc
ExprStatement ->
| Type Identifier = Expr // AssignedDeclaration
| Type Identifier // Declaration
| Type ParIdentifier = Expr // AssignedDeclaration
| Type ParIdentifier // Declaration
Expr ->
@ -35,10 +35,21 @@ U ->
| + U
F ->
| (Expr)
| Identifier
| Number
| Identifier = Expr // Assignment
| ++ParIdentifier
| --ParIdentifier
| ParIdentifier = Expr // Assignment
| ParIdentifier++
| ParIdentifier--
| ParIdentifier // This makes the grammar ambiguous but simpler to parse
| (Expr)
ParIdentifier ->
| Identifier
| (ParIdentifier)
*/
/**
@ -57,7 +68,11 @@ enum class NodeType {
Mod, // -> F % T
UnaryMinus, // -> -F
UnaryPlus, // -> +F
Assignment // -> Identifier = Expr
Assignment, // -> Identifier = Expr
LIncr, // -> ++ParIdentifier
RIncr, // -> ParIdentifier++
LDecr, // -> --ParIdentifier
RDecr // -> ParIdentifier--
};
struct InnerNode;
@ -142,6 +157,12 @@ ParseReturn parse_u(vector<Token> tokens);
*/
ParseReturn parse_f(vector<Token> tokens);
/**
* Parse something derivated from ParIdentifier
* (An identifier with 0+ parentheses around it)
*/
ParseReturn parse_par_identifier(vector<Token> tokens);
/**
* Prints a tree for debugging it
*/

View File

@ -6,7 +6,7 @@
#include <string>
using namespace std;
enum class TokenType { Type, Identifier, Int, Plus, Minus, Star, Slash, Percent, Equal, Semicolon, LParenthese, RParenthese };
enum class TokenType { Type, Identifier, Int, Plus, Minus, DoublePlus, DoubleMinus, Star, Slash, Percent, Equal, Semicolon, LParenthese, RParenthese };
enum class Type { Int };
using TokenData = variant<int, string, Type>;

View File

@ -7,7 +7,7 @@ using namespace std;
#include "include/parser.h"
const char* _node_names[] = {"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "UnaryMinus", "UnaryPlus", "Assignment"};
const char* _node_names[] = {"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr"};
void _debug_print_tree(const Node& node, int depth, const string& prefix = "") {
if (holds_alternative<InnerNode>(node)) {
const InnerNode& innerNode = get<InnerNode>(node);
@ -135,11 +135,13 @@ ParseReturn parse_expr_statement(vector<Token> tokens) {
Token type = tokens.back();
tokens.pop_back();
if (tokens.size() < 1 || tokens.back().type != TokenType::Identifier)
throw ParseException();
ParseReturn ret = parse_par_identifier(tokens);
tokens = ret.tokens;
Token identifier = tokens.back();
tokens.pop_back();
if (!holds_alternative<Token>(ret.node))
throw ParseException(); // The parsing is incorrect
Token identifier = get<Token>(ret.node);
//* ExprStatement -> Type Identifier
if (tokens.size() < 1 || tokens.back().type != TokenType::Equal) {
@ -157,7 +159,7 @@ ParseReturn parse_expr_statement(vector<Token> tokens) {
// On retire le '='
tokens.pop_back();
ParseReturn ret = parse_expr(tokens);
ret = parse_expr(tokens);
InnerNode node = {
.type=NodeType::AssignedDeclaration,
@ -325,7 +327,7 @@ ParseReturn parse_f(vector<Token> tokens) {
throw ParseException();
switch (tokens.back().type) {
case TokenType::Int: { //* U -> Number
case TokenType::Int: { //* F -> Number
Token number = tokens.back();
tokens.pop_back();
return {
@ -333,45 +335,131 @@ ParseReturn parse_f(vector<Token> tokens) {
.tokens=tokens
};;
}
case TokenType::LParenthese: { //* U -> Identifier
tokens.pop_back();
ParseReturn ret = parse_expr(tokens);
tokens=ret.tokens;
if (tokens.size() < 1 || tokens.back().type != TokenType::RParenthese)
throw SyntaxError("Missing ')'");
case TokenType::DoublePlus: { //* F -> ++ParIdentifier
tokens.pop_back();
ParseReturn ret = parse_par_identifier(tokens);
InnerNode node = {
.type = NodeType::LIncr,
.children = { ret.node }
};
return {
.node=ret.node,
.tokens=tokens
.node=node,
.tokens=ret.tokens
};
}
case TokenType::Identifier: { //* U -> Identifier...
Token identifier = tokens.back();
case TokenType::DoubleMinus: { //* F -> --ParIdentifier
tokens.pop_back();
ParseReturn ret = parse_par_identifier(tokens);
InnerNode node = {
.type = NodeType::LDecr,
.children = { ret.node }
};
return {
.node=node,
.tokens=ret.tokens
};
}
default: {
try { //* F -> ParIdentifier...
ParseReturn ret = parse_par_identifier(tokens);
if (ret.tokens.size() >= 1) {
switch (ret.tokens.back().type){
case TokenType::DoublePlus: { //* F -> ParIdentifier++
ret.tokens.pop_back();
InnerNode node = {
.type = NodeType::RIncr,
.children = { ret.node }
};
return {
.node=node,
.tokens=ret.tokens
};
}
case TokenType::DoubleMinus: { //* F -> ParIdentifier--
ret.tokens.pop_back();
InnerNode node = {
.type = NodeType::RDecr,
.children = { ret.node }
};
return {
.node=node,
.tokens=ret.tokens
};
}
case TokenType::Equal: { //* F -> ParIdentifier = (Expr)
ret.tokens.pop_back();
ParseReturn ret_expr = parse_expr(ret.tokens);
InnerNode node = {
.type = NodeType::Assignment,
.children = { ret.node, ret_expr.node }
};
return {
.node=node,
.tokens=ret_expr.tokens
};
}
default:
break;
}
}
//* F -> ParIdentifier
return ret;
} catch (const ParseException& pex) { //* F -> (Expr)
if (tokens.back().type != TokenType::LParenthese)
throw ParseException();
if (tokens.size() > 0 && tokens.back().type == TokenType::Equal) { //* U -> Identifier = Expr
tokens.pop_back();
ParseReturn ret = parse_expr(tokens);
tokens=ret.tokens;
if (tokens.size() < 1 || tokens.back().type != TokenType::RParenthese)
throw SyntaxError("Missing ')'");
tokens.pop_back();
InnerNode node = {
.type = NodeType::Assignment,
.children = { identifier, ret.node }
};
return {
.node=node,
.tokens=ret.tokens
.node=ret.node,
.tokens=tokens
};
}
return { //* U -> Identifier
.node=identifier,
.tokens=tokens
};
}
default:
throw ParseException();
}
}
ParseReturn parse_par_identifier(vector<Token> tokens) {
if (tokens.size() < 1)
throw ParseException();
if (tokens.back().type == TokenType::Identifier) {
Token identifier = tokens.back();
tokens.pop_back();
return { //* ParIdentifier -> Identifier
.node=identifier,
.tokens=tokens
};
}
if (tokens.back().type != TokenType::LParenthese)
throw ParseException();
tokens.pop_back();
ParseReturn ret = parse_par_identifier(tokens);
tokens = ret.tokens;
if (tokens.back().type != TokenType::RParenthese)
throw ParseException();
tokens.pop_back();
return { //* ParIdentifier -> (ParIdentifier)
.node=ret.node,
.tokens=tokens
};
}

View File

@ -26,6 +26,12 @@ void _debug_print_token(Token token) {
case TokenType::Minus:
cout << "-";
break;
case TokenType::DoublePlus:
cout << "++";
break;
case TokenType::DoubleMinus:
cout << "--";
break;
case TokenType::Star:
cout << "*";
break;
@ -87,6 +93,16 @@ vector<Token> tokenize(string str) {
tokens.emplace_back(token);
str.erase(0, m.str().length());
}
else if (str.size() >= 2 && str[0] == '+' && str[1] == '+') {
Token token = { .type = TokenType::DoublePlus };
tokens.emplace_back(token);
str.erase(0, 2);
}
else if (str.size() >= 2 && str[0] == '-' && str[1] == '-') {
Token token = { .type = TokenType::DoubleMinus };
tokens.emplace_back(token);
str.erase(0, 2);
}
else if (str[0] == '+') {
Token token = { .type = TokenType::Plus };
tokens.emplace_back(token);