Add conditional statements parsing

This commit is contained in:
augustin64 2023-11-16 14:09:32 +01:00
parent 9a4d993b4b
commit 1e0d372fc6
3 changed files with 145 additions and 31 deletions

View File

@ -14,7 +14,9 @@ Instruction -> Statement | ExprStatement; | Expr; | ;
Statement -> Statement ->
| { Prog } | { Prog }
| If (Expr) Instruction
| If (Expr) Instruction Else Instruction | If (Expr) Instruction Else Instruction
ExprStatement -> ExprStatement ->
| Type ParIdentifier = Expr // AssignedDeclaration | Type ParIdentifier = Expr // AssignedDeclaration
| Type ParIdentifier // Declaration | Type ParIdentifier // Declaration
@ -74,7 +76,9 @@ enum class NodeType {
LIncr, // -> ++ParIdentifier LIncr, // -> ++ParIdentifier
RIncr, // -> ParIdentifier++ RIncr, // -> ParIdentifier++
LDecr, // -> --ParIdentifier LDecr, // -> --ParIdentifier
RDecr // -> ParIdentifier-- RDecr, // -> ParIdentifier--
If, // -> If (Expr) Instruction
IfElse // -> If (Expr) Instruction Else Instruction
}; };
struct InnerNode; struct InnerNode;
@ -127,6 +131,11 @@ public:
*/ */
Node parse(vector<Token> tokens); Node parse(vector<Token> tokens);
/**
* Parse something derivated from Prog
*/
ParseReturn parse_prog(vector<Token> tokens);
/** /**
* Parse something derivated from Instruction * Parse something derivated from Instruction
*/ */

View File

@ -12,7 +12,7 @@ CodePosition null_pos = {
}; };
const char* _node_names[] = {"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr"}; const char* _node_names[] = {"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr", "If", "IfElse"};
void _debug_print_tree(const Node& node, int depth, const string& prefix = "") { void _debug_print_tree(const Node& node, int depth, const string& prefix = "") {
if (holds_alternative<InnerNode>(node)) { if (holds_alternative<InnerNode>(node)) {
const InnerNode& innerNode = get<InnerNode>(node); const InnerNode& innerNode = get<InnerNode>(node);
@ -57,12 +57,25 @@ Node parse(vector<Token> tokens) {
throw SyntaxError("Input must not be empty", null_pos); throw SyntaxError("Input must not be empty", null_pos);
} }
ParseReturn ret = parse_prog(tokens);
// At least 1 instruction if (ret.tokens.size() != 0) {
ParseReturn ret = parse_instruction(tokens); CodePosition pos = ret.tokens.back().pos;
tokens = ret.tokens; throw SyntaxError("Unable to parse", pos);
}
Node node = ret.node; return ret.node;
}
ParseReturn parse_prog(vector<Token> tokens) {
vector<Node> children;
InnerNode epsilon_node = {
.type=NodeType::Epsilon,
.children=children,
.pos=null_pos
};
Node node = epsilon_node;
try { try {
while (tokens.size() != 0) { while (tokens.size() != 0) {
@ -70,21 +83,27 @@ Node parse(vector<Token> tokens) {
tokens = ret.tokens; tokens = ret.tokens;
if (!holds_alternative<InnerNode>(ret.node) || get<InnerNode>(ret.node).type != NodeType::Epsilon) { if (!holds_alternative<InnerNode>(ret.node) || get<InnerNode>(ret.node).type != NodeType::Epsilon) {
InnerNode new_node = { if (holds_alternative<InnerNode>(node) && get<InnerNode>(node).type == NodeType::Epsilon) {
.type=NodeType::Prog, node = ret.node; // Remove base epsilon node
.children={node, ret.node}, } else {
.pos=get_node_pos(ret.node) InnerNode new_node = {
}; .type=NodeType::Prog,
node = new_node; .children={node, ret.node},
.pos=get_node_pos(ret.node)
};
node = new_node;
}
} }
} }
return node; } catch (const ParseException& pex) {}
} catch (const ParseException& pex) {
CodePosition pos = tokens.back().pos; return {
throw SyntaxError("Unable to parse", pos); .node=node,
} .tokens=tokens
};
} }
ParseReturn parse_instruction(vector<Token> tokens) { ParseReturn parse_instruction(vector<Token> tokens) {
try { //* Instruction -> Statement try { //* Instruction -> Statement
ParseReturn ret = parse_statement(tokens); ParseReturn ret = parse_statement(tokens);
@ -140,9 +159,95 @@ ParseReturn parse_instruction(vector<Token> tokens) {
} }
ParseReturn parse_statement(vector<Token> tokens) { ParseReturn parse_statement(vector<Token> tokens) {
(void)tokens; if (tokens.size() < 2) // 'If' / 'For' '(' at least
// Aucune règle throw ParseException();
throw ParseException();
switch (tokens.back().type) {
case TokenType::If: {
CodePosition pos = tokens.back().pos;
tokens.pop_back();
if (tokens.back().type != TokenType::LParenthese) // Opening (
throw SyntaxError("Missing '('", tokens.back().pos);
tokens.pop_back();
ParseReturn ret = parse_expr(tokens); // Expr
int nb_tok = ret.tokens.size(); // Closing )
if (nb_tok == 0 || ret.tokens.back().type != TokenType::RParenthese)
throw SyntaxError(
"Missing ')'",
nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos
);
ret.tokens.pop_back();
tokens = ret.tokens;
Node expr = ret.node;
ret = parse_instruction(tokens); // Instruction1
tokens = ret.tokens;
Node instruction1 = ret.node;
if (tokens.size() == 0 || tokens.back().type != TokenType::Else) { //* -> If (Expr) Instruction
InnerNode node = {
.type=NodeType::If,
.children={expr, instruction1},
.pos=pos
};
return {
.node=node,
.tokens=tokens
};
}
tokens.pop_back(); // Else
ret = parse_instruction(tokens); // Instruction2
tokens = ret.tokens;
Node instruction2 = ret.node;
InnerNode node = {
.type=NodeType::IfElse,
.children={expr, instruction1, instruction2},
.pos=pos
};
return {
.node=node,
.tokens=tokens
};
}
case TokenType::LCurlyBracket: {
tokens.pop_back();
ParseReturn ret = parse_prog(tokens);
if ( // No expression parsed, the next token is not a '}'
holds_alternative<InnerNode>(ret.node) &&
get<InnerNode>(ret.node).type == NodeType::Epsilon &&
tokens.back().type != TokenType::RCurlyBracket
)
throw SyntaxError(
"Invalid Syntax. Missing ';' ?",
tokens.back().pos
);
int nb_tok = ret.tokens.size();
if (nb_tok == 0 || ret.tokens.back().type != TokenType::RCurlyBracket)
throw SyntaxError(
"Missing '}'",
nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos
);
tokens = ret.tokens;
tokens.pop_back();
return {
.node=ret.node,
.tokens=tokens
};
}
default:
throw ParseException();
}
} }
ParseReturn parse_expr_statement(vector<Token> tokens) { ParseReturn parse_expr_statement(vector<Token> tokens) {

View File

@ -109,6 +109,16 @@ vector<Token> tokenize(vector<string> input, int initial_line) {
tokens.emplace_back(token); tokens.emplace_back(token);
j += m.str().length(); j += m.str().length();
} }
else if (str.starts_with("if")) {
Token token = { .type = TokenType::If, .pos = pos };
tokens.emplace_back(token);
j += 2;
}
else if (str.starts_with("else")) {
Token token = { .type = TokenType::Else, .pos = pos };
tokens.emplace_back(token);
j += 4;
}
else if (regex_search(str, m, IDENTIFIER_REGEX, regex_constants::match_continuous)) { else if (regex_search(str, m, IDENTIFIER_REGEX, regex_constants::match_continuous)) {
Token token = { Token token = {
.type = TokenType::Identifier, .type = TokenType::Identifier,
@ -188,16 +198,6 @@ vector<Token> tokenize(vector<string> input, int initial_line) {
tokens.emplace_back(token); tokens.emplace_back(token);
j += 1; j += 1;
} }
else if (str.starts_with("if")) {
Token token = { .type = TokenType::If, .pos = pos };
tokens.emplace_back(token);
j += 2;
}
else if (str.starts_with("else")) {
Token token = { .type = TokenType::Else, .pos = pos };
tokens.emplace_back(token);
j += 4;
}
else if (isspace(str[0])) { else if (isspace(str[0])) {
j += 1; j += 1;
} }