Add conditional statements parsing

This commit is contained in:
augustin64 2023-11-16 14:09:32 +01:00
parent 9a4d993b4b
commit 1e0d372fc6
3 changed files with 145 additions and 31 deletions

View File

@ -14,7 +14,9 @@ Instruction -> Statement | ExprStatement; | Expr; | ;
Statement ->
| { Prog }
| If (Expr) Instruction
| If (Expr) Instruction Else Instruction
ExprStatement ->
| Type ParIdentifier = Expr // AssignedDeclaration
| Type ParIdentifier // Declaration
@ -74,7 +76,9 @@ enum class NodeType {
LIncr, // -> ++ParIdentifier
RIncr, // -> ParIdentifier++
LDecr, // -> --ParIdentifier
RDecr // -> ParIdentifier--
RDecr, // -> ParIdentifier--
If, // -> If (Expr) Instruction
IfElse // -> If (Expr) Instruction Else Instruction
};
struct InnerNode;
@ -127,6 +131,11 @@ public:
*/
Node parse(vector<Token> tokens);
/**
* Parse something derivated from Prog
*/
ParseReturn parse_prog(vector<Token> tokens);
/**
* Parse something derivated from Instruction
*/

View File

@ -12,7 +12,7 @@ CodePosition null_pos = {
};
const char* _node_names[] = {"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr"};
const char* _node_names[] = {"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr", "If", "IfElse"};
void _debug_print_tree(const Node& node, int depth, const string& prefix = "") {
if (holds_alternative<InnerNode>(node)) {
const InnerNode& innerNode = get<InnerNode>(node);
@ -57,12 +57,25 @@ Node parse(vector<Token> tokens) {
throw SyntaxError("Input must not be empty", null_pos);
}
ParseReturn ret = parse_prog(tokens);
// At least 1 instruction
ParseReturn ret = parse_instruction(tokens);
tokens = ret.tokens;
if (ret.tokens.size() != 0) {
CodePosition pos = ret.tokens.back().pos;
throw SyntaxError("Unable to parse", pos);
}
Node node = ret.node;
return ret.node;
}
ParseReturn parse_prog(vector<Token> tokens) {
vector<Node> children;
InnerNode epsilon_node = {
.type=NodeType::Epsilon,
.children=children,
.pos=null_pos
};
Node node = epsilon_node;
try {
while (tokens.size() != 0) {
@ -70,6 +83,9 @@ Node parse(vector<Token> tokens) {
tokens = ret.tokens;
if (!holds_alternative<InnerNode>(ret.node) || get<InnerNode>(ret.node).type != NodeType::Epsilon) {
if (holds_alternative<InnerNode>(node) && get<InnerNode>(node).type == NodeType::Epsilon) {
node = ret.node; // Remove base epsilon node
} else {
InnerNode new_node = {
.type=NodeType::Prog,
.children={node, ret.node},
@ -78,13 +94,16 @@ Node parse(vector<Token> tokens) {
node = new_node;
}
}
return node;
} catch (const ParseException& pex) {
CodePosition pos = tokens.back().pos;
throw SyntaxError("Unable to parse", pos);
}
} catch (const ParseException& pex) {}
return {
.node=node,
.tokens=tokens
};
}
ParseReturn parse_instruction(vector<Token> tokens) {
try { //* Instruction -> Statement
ParseReturn ret = parse_statement(tokens);
@ -140,9 +159,95 @@ ParseReturn parse_instruction(vector<Token> tokens) {
}
ParseReturn parse_statement(vector<Token> tokens) {
(void)tokens;
// Aucune règle
if (tokens.size() < 2) // 'If' / 'For' '(' at least
throw ParseException();
switch (tokens.back().type) {
case TokenType::If: {
CodePosition pos = tokens.back().pos;
tokens.pop_back();
if (tokens.back().type != TokenType::LParenthese) // Opening (
throw SyntaxError("Missing '('", tokens.back().pos);
tokens.pop_back();
ParseReturn ret = parse_expr(tokens); // Expr
int nb_tok = ret.tokens.size(); // Closing )
if (nb_tok == 0 || ret.tokens.back().type != TokenType::RParenthese)
throw SyntaxError(
"Missing ')'",
nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos
);
ret.tokens.pop_back();
tokens = ret.tokens;
Node expr = ret.node;
ret = parse_instruction(tokens); // Instruction1
tokens = ret.tokens;
Node instruction1 = ret.node;
if (tokens.size() == 0 || tokens.back().type != TokenType::Else) { //* -> If (Expr) Instruction
InnerNode node = {
.type=NodeType::If,
.children={expr, instruction1},
.pos=pos
};
return {
.node=node,
.tokens=tokens
};
}
tokens.pop_back(); // Else
ret = parse_instruction(tokens); // Instruction2
tokens = ret.tokens;
Node instruction2 = ret.node;
InnerNode node = {
.type=NodeType::IfElse,
.children={expr, instruction1, instruction2},
.pos=pos
};
return {
.node=node,
.tokens=tokens
};
}
case TokenType::LCurlyBracket: {
tokens.pop_back();
ParseReturn ret = parse_prog(tokens);
if ( // No expression parsed, the next token is not a '}'
holds_alternative<InnerNode>(ret.node) &&
get<InnerNode>(ret.node).type == NodeType::Epsilon &&
tokens.back().type != TokenType::RCurlyBracket
)
throw SyntaxError(
"Invalid Syntax. Missing ';' ?",
tokens.back().pos
);
int nb_tok = ret.tokens.size();
if (nb_tok == 0 || ret.tokens.back().type != TokenType::RCurlyBracket)
throw SyntaxError(
"Missing '}'",
nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos
);
tokens = ret.tokens;
tokens.pop_back();
return {
.node=ret.node,
.tokens=tokens
};
}
default:
throw ParseException();
}
}
ParseReturn parse_expr_statement(vector<Token> tokens) {

View File

@ -109,6 +109,16 @@ vector<Token> tokenize(vector<string> input, int initial_line) {
tokens.emplace_back(token);
j += m.str().length();
}
else if (str.starts_with("if")) {
Token token = { .type = TokenType::If, .pos = pos };
tokens.emplace_back(token);
j += 2;
}
else if (str.starts_with("else")) {
Token token = { .type = TokenType::Else, .pos = pos };
tokens.emplace_back(token);
j += 4;
}
else if (regex_search(str, m, IDENTIFIER_REGEX, regex_constants::match_continuous)) {
Token token = {
.type = TokenType::Identifier,
@ -188,16 +198,6 @@ vector<Token> tokenize(vector<string> input, int initial_line) {
tokens.emplace_back(token);
j += 1;
}
else if (str.starts_with("if")) {
Token token = { .type = TokenType::If, .pos = pos };
tokens.emplace_back(token);
j += 2;
}
else if (str.starts_with("else")) {
Token token = { .type = TokenType::Else, .pos = pos };
tokens.emplace_back(token);
j += 4;
}
else if (isspace(str[0])) {
j += 1;
}