Add conditional statements parsing
This commit is contained in:
parent
9a4d993b4b
commit
1e0d372fc6
@ -14,7 +14,9 @@ Instruction -> Statement | ExprStatement; | Expr; | ;
|
||||
|
||||
Statement ->
|
||||
| { Prog }
|
||||
| If (Expr) Instruction
|
||||
| If (Expr) Instruction Else Instruction
|
||||
|
||||
ExprStatement ->
|
||||
| Type ParIdentifier = Expr // AssignedDeclaration
|
||||
| Type ParIdentifier // Declaration
|
||||
@ -74,7 +76,9 @@ enum class NodeType {
|
||||
LIncr, // -> ++ParIdentifier
|
||||
RIncr, // -> ParIdentifier++
|
||||
LDecr, // -> --ParIdentifier
|
||||
RDecr // -> ParIdentifier--
|
||||
RDecr, // -> ParIdentifier--
|
||||
If, // -> If (Expr) Instruction
|
||||
IfElse // -> If (Expr) Instruction Else Instruction
|
||||
};
|
||||
|
||||
struct InnerNode;
|
||||
@ -127,6 +131,11 @@ public:
|
||||
*/
|
||||
Node parse(vector<Token> tokens);
|
||||
|
||||
/**
|
||||
* Parse something derivated from Prog
|
||||
*/
|
||||
ParseReturn parse_prog(vector<Token> tokens);
|
||||
|
||||
/**
|
||||
* Parse something derivated from Instruction
|
||||
*/
|
||||
|
145
src/parser.cpp
145
src/parser.cpp
@ -12,7 +12,7 @@ CodePosition null_pos = {
|
||||
};
|
||||
|
||||
|
||||
const char* _node_names[] = {"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr"};
|
||||
const char* _node_names[] = {"Prog", "Epsilon", "AssignedDeclaration", "Declaration", "Plus", "Minus", "Mult", "Div", "Mod", "UnaryMinus", "UnaryPlus", "Assignment", "LIncr", "RIncr", "LDecr", "RDecr", "If", "IfElse"};
|
||||
void _debug_print_tree(const Node& node, int depth, const string& prefix = "") {
|
||||
if (holds_alternative<InnerNode>(node)) {
|
||||
const InnerNode& innerNode = get<InnerNode>(node);
|
||||
@ -56,13 +56,26 @@ Node parse(vector<Token> tokens) {
|
||||
if (tokens.size() == 0) {
|
||||
throw SyntaxError("Input must not be empty", null_pos);
|
||||
}
|
||||
|
||||
|
||||
// At least 1 instruction
|
||||
ParseReturn ret = parse_instruction(tokens);
|
||||
tokens = ret.tokens;
|
||||
ParseReturn ret = parse_prog(tokens);
|
||||
|
||||
Node node = ret.node;
|
||||
if (ret.tokens.size() != 0) {
|
||||
CodePosition pos = ret.tokens.back().pos;
|
||||
throw SyntaxError("Unable to parse", pos);
|
||||
}
|
||||
|
||||
return ret.node;
|
||||
}
|
||||
|
||||
|
||||
ParseReturn parse_prog(vector<Token> tokens) {
|
||||
vector<Node> children;
|
||||
InnerNode epsilon_node = {
|
||||
.type=NodeType::Epsilon,
|
||||
.children=children,
|
||||
.pos=null_pos
|
||||
};
|
||||
Node node = epsilon_node;
|
||||
|
||||
try {
|
||||
while (tokens.size() != 0) {
|
||||
@ -70,21 +83,27 @@ Node parse(vector<Token> tokens) {
|
||||
tokens = ret.tokens;
|
||||
|
||||
if (!holds_alternative<InnerNode>(ret.node) || get<InnerNode>(ret.node).type != NodeType::Epsilon) {
|
||||
InnerNode new_node = {
|
||||
.type=NodeType::Prog,
|
||||
.children={node, ret.node},
|
||||
.pos=get_node_pos(ret.node)
|
||||
};
|
||||
node = new_node;
|
||||
if (holds_alternative<InnerNode>(node) && get<InnerNode>(node).type == NodeType::Epsilon) {
|
||||
node = ret.node; // Remove base epsilon node
|
||||
} else {
|
||||
InnerNode new_node = {
|
||||
.type=NodeType::Prog,
|
||||
.children={node, ret.node},
|
||||
.pos=get_node_pos(ret.node)
|
||||
};
|
||||
node = new_node;
|
||||
}
|
||||
}
|
||||
}
|
||||
return node;
|
||||
} catch (const ParseException& pex) {
|
||||
CodePosition pos = tokens.back().pos;
|
||||
throw SyntaxError("Unable to parse", pos);
|
||||
}
|
||||
} catch (const ParseException& pex) {}
|
||||
|
||||
return {
|
||||
.node=node,
|
||||
.tokens=tokens
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
ParseReturn parse_instruction(vector<Token> tokens) {
|
||||
try { //* Instruction -> Statement
|
||||
ParseReturn ret = parse_statement(tokens);
|
||||
@ -140,9 +159,95 @@ ParseReturn parse_instruction(vector<Token> tokens) {
|
||||
}
|
||||
|
||||
ParseReturn parse_statement(vector<Token> tokens) {
|
||||
(void)tokens;
|
||||
// Aucune règle
|
||||
throw ParseException();
|
||||
if (tokens.size() < 2) // 'If' / 'For' '(' at least
|
||||
throw ParseException();
|
||||
|
||||
switch (tokens.back().type) {
|
||||
case TokenType::If: {
|
||||
CodePosition pos = tokens.back().pos;
|
||||
|
||||
tokens.pop_back();
|
||||
if (tokens.back().type != TokenType::LParenthese) // Opening (
|
||||
throw SyntaxError("Missing '('", tokens.back().pos);
|
||||
|
||||
tokens.pop_back();
|
||||
ParseReturn ret = parse_expr(tokens); // Expr
|
||||
|
||||
int nb_tok = ret.tokens.size(); // Closing )
|
||||
if (nb_tok == 0 || ret.tokens.back().type != TokenType::RParenthese)
|
||||
throw SyntaxError(
|
||||
"Missing ')'",
|
||||
nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos
|
||||
);
|
||||
|
||||
ret.tokens.pop_back();
|
||||
tokens = ret.tokens;
|
||||
Node expr = ret.node;
|
||||
|
||||
ret = parse_instruction(tokens); // Instruction1
|
||||
tokens = ret.tokens;
|
||||
Node instruction1 = ret.node;
|
||||
|
||||
if (tokens.size() == 0 || tokens.back().type != TokenType::Else) { //* -> If (Expr) Instruction
|
||||
InnerNode node = {
|
||||
.type=NodeType::If,
|
||||
.children={expr, instruction1},
|
||||
.pos=pos
|
||||
};
|
||||
return {
|
||||
.node=node,
|
||||
.tokens=tokens
|
||||
};
|
||||
}
|
||||
|
||||
tokens.pop_back(); // Else
|
||||
ret = parse_instruction(tokens); // Instruction2
|
||||
tokens = ret.tokens;
|
||||
Node instruction2 = ret.node;
|
||||
|
||||
InnerNode node = {
|
||||
.type=NodeType::IfElse,
|
||||
.children={expr, instruction1, instruction2},
|
||||
.pos=pos
|
||||
};
|
||||
return {
|
||||
.node=node,
|
||||
.tokens=tokens
|
||||
};
|
||||
}
|
||||
case TokenType::LCurlyBracket: {
|
||||
tokens.pop_back();
|
||||
|
||||
ParseReturn ret = parse_prog(tokens);
|
||||
if ( // No expression parsed, the next token is not a '}'
|
||||
holds_alternative<InnerNode>(ret.node) &&
|
||||
get<InnerNode>(ret.node).type == NodeType::Epsilon &&
|
||||
tokens.back().type != TokenType::RCurlyBracket
|
||||
)
|
||||
throw SyntaxError(
|
||||
"Invalid Syntax. Missing ';' ?",
|
||||
tokens.back().pos
|
||||
);
|
||||
|
||||
int nb_tok = ret.tokens.size();
|
||||
if (nb_tok == 0 || ret.tokens.back().type != TokenType::RCurlyBracket)
|
||||
throw SyntaxError(
|
||||
"Missing '}'",
|
||||
nb_tok == 0 ? tokens.back().pos : ret.tokens.back().pos
|
||||
);
|
||||
|
||||
tokens = ret.tokens;
|
||||
tokens.pop_back();
|
||||
|
||||
return {
|
||||
.node=ret.node,
|
||||
.tokens=tokens
|
||||
};
|
||||
|
||||
}
|
||||
default:
|
||||
throw ParseException();
|
||||
}
|
||||
}
|
||||
|
||||
ParseReturn parse_expr_statement(vector<Token> tokens) {
|
||||
|
@ -109,6 +109,16 @@ vector<Token> tokenize(vector<string> input, int initial_line) {
|
||||
tokens.emplace_back(token);
|
||||
j += m.str().length();
|
||||
}
|
||||
else if (str.starts_with("if")) {
|
||||
Token token = { .type = TokenType::If, .pos = pos };
|
||||
tokens.emplace_back(token);
|
||||
j += 2;
|
||||
}
|
||||
else if (str.starts_with("else")) {
|
||||
Token token = { .type = TokenType::Else, .pos = pos };
|
||||
tokens.emplace_back(token);
|
||||
j += 4;
|
||||
}
|
||||
else if (regex_search(str, m, IDENTIFIER_REGEX, regex_constants::match_continuous)) {
|
||||
Token token = {
|
||||
.type = TokenType::Identifier,
|
||||
@ -188,16 +198,6 @@ vector<Token> tokenize(vector<string> input, int initial_line) {
|
||||
tokens.emplace_back(token);
|
||||
j += 1;
|
||||
}
|
||||
else if (str.starts_with("if")) {
|
||||
Token token = { .type = TokenType::If, .pos = pos };
|
||||
tokens.emplace_back(token);
|
||||
j += 2;
|
||||
}
|
||||
else if (str.starts_with("else")) {
|
||||
Token token = { .type = TokenType::Else, .pos = pos };
|
||||
tokens.emplace_back(token);
|
||||
j += 4;
|
||||
}
|
||||
else if (isspace(str[0])) {
|
||||
j += 1;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user