Add more lexer tests
This commit is contained in:
parent
c61b26e692
commit
fc7b3d440a
@ -25,6 +25,11 @@ vector<Token> tokenize(vector<string> str, int initial_line=0);
|
||||
*/
|
||||
void _debug_print_token(Token token);
|
||||
|
||||
/*
|
||||
Returns the name of a TokenType
|
||||
*/
|
||||
string _debug_print_token_type(TokenType type);
|
||||
|
||||
/*
|
||||
Formats a list of tokens and prints it
|
||||
*/
|
||||
|
@ -6,7 +6,7 @@
|
||||
using namespace std;
|
||||
|
||||
regex NUMBER_REGEX ("\\d+(\\.\\d+)?");
|
||||
regex TYPE_INT_REGEX ("int\\s");
|
||||
regex TYPE_INT_REGEX ("int(\\s|$)");
|
||||
regex IDENTIFIER_REGEX ("[A-Za-z_]\\w*");
|
||||
|
||||
void _debug_print_token(Token token) {
|
||||
@ -95,6 +95,39 @@ void _debug_print_token(Token token) {
|
||||
}
|
||||
}
|
||||
|
||||
string _debug_print_token_type(TokenType type) {
|
||||
switch (type) {
|
||||
case TokenType::Type: return "Type";
|
||||
case TokenType::Identifier: return "Identifier";
|
||||
case TokenType::Int: return "Int";
|
||||
case TokenType::Plus: return "Plus";
|
||||
case TokenType::Minus: return "Minus";
|
||||
case TokenType::DoublePlus: return "DoublePlus";
|
||||
case TokenType::DoubleMinus: return "DoubleMinus";
|
||||
case TokenType::DoubleEqual: return "DoubleEqual";
|
||||
case TokenType::Land: return "Land";
|
||||
case TokenType::Lor: return "Lor";
|
||||
case TokenType::Lt: return "Lt";
|
||||
case TokenType::Gt: return "Gt";
|
||||
case TokenType::Leq: return "Leq";
|
||||
case TokenType::Geq: return "Geq";
|
||||
case TokenType::NotEqual: return "NotEqual";
|
||||
case TokenType::Not: return "Not";
|
||||
case TokenType::Star: return "Star";
|
||||
case TokenType::Slash: return "Slash";
|
||||
case TokenType::Percent: return "Percent";
|
||||
case TokenType::Equal: return "Equal";
|
||||
case TokenType::Semicolon: return "Semicolon";
|
||||
case TokenType::LParenthese: return "LParenthese";
|
||||
case TokenType::RParenthese: return "RParenthese";
|
||||
case TokenType::LCurlyBracket: return "LCurlyBracket";
|
||||
case TokenType::RCurlyBracket: return "RCurlyBracket";
|
||||
case TokenType::If: return "If";
|
||||
case TokenType::Else: return "Else";
|
||||
default: return "Unknown";
|
||||
}
|
||||
}
|
||||
|
||||
void _debug_print_tokens(vector<Token> tokens) {
|
||||
for (Token token : tokens) {
|
||||
_debug_print_token(token);
|
||||
@ -168,16 +201,6 @@ vector<Token> tokenize(vector<string> input, int initial_line) {
|
||||
tokens.emplace_back(token);
|
||||
j += 2;
|
||||
}
|
||||
else if (str.starts_with("<")) {
|
||||
Token token = { .type = TokenType::Lt, .pos = pos };
|
||||
tokens.emplace_back(token);
|
||||
j += 1;
|
||||
}
|
||||
else if (str.starts_with(">")) {
|
||||
Token token = { .type = TokenType::Gt, .pos = pos };
|
||||
tokens.emplace_back(token);
|
||||
j += 1;
|
||||
}
|
||||
else if (str.starts_with("<=")) {
|
||||
Token token = { .type = TokenType::Leq, .pos = pos };
|
||||
tokens.emplace_back(token);
|
||||
@ -193,6 +216,16 @@ vector<Token> tokenize(vector<string> input, int initial_line) {
|
||||
tokens.emplace_back(token);
|
||||
j += 2;
|
||||
}
|
||||
else if (str.starts_with("<")) {
|
||||
Token token = { .type = TokenType::Lt, .pos = pos };
|
||||
tokens.emplace_back(token);
|
||||
j += 1;
|
||||
}
|
||||
else if (str.starts_with(">")) {
|
||||
Token token = { .type = TokenType::Gt, .pos = pos };
|
||||
tokens.emplace_back(token);
|
||||
j += 1;
|
||||
}
|
||||
else if (str.starts_with("!")) {
|
||||
Token token = { .type = TokenType::Not, .pos = pos };
|
||||
tokens.emplace_back(token);
|
||||
|
@ -9,6 +9,26 @@ using namespace std;
|
||||
int main() {
|
||||
_TEST_PRESENTATION("Lexer");
|
||||
|
||||
/* All tokens */
|
||||
vector<string> inputs = {
|
||||
"int", "a", "=", "x", "++", "--", "==", "&&", "||", "<", ">", "<=", ">=", "!=", "!", "*", "/", "%", "=", ";", "(", ")", "{", "}", "if", "else"
|
||||
};
|
||||
|
||||
vector<TokenType> expectedTypes = {
|
||||
TokenType::Type, TokenType::Identifier, TokenType::Equal, TokenType::Identifier,
|
||||
TokenType::DoublePlus, TokenType::DoubleMinus, TokenType::DoubleEqual, TokenType::Land,
|
||||
TokenType::Lor, TokenType::Lt, TokenType::Gt, TokenType::Leq, TokenType::Geq, TokenType::NotEqual,
|
||||
TokenType::Not, TokenType::Star, TokenType::Slash, TokenType::Percent, TokenType::Equal,
|
||||
TokenType::Semicolon, TokenType::LParenthese, TokenType::RParenthese, TokenType::LCurlyBracket,
|
||||
TokenType::RCurlyBracket, TokenType::If, TokenType::Else
|
||||
};
|
||||
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
vector<Token> tokens = tokenize({ inputs[i] });
|
||||
_TEST_ASSERT(tokens.size() == 1, _debug_print_token_type(expectedTypes[i]).c_str(), false);
|
||||
_TEST_ASSERT(tokens[0].type == expectedTypes[i], _debug_print_token_type(expectedTypes[i]).c_str(), true);
|
||||
}
|
||||
|
||||
/* Complex input */
|
||||
{
|
||||
string input = "int a = x+++7;";
|
||||
|
Loading…
Reference in New Issue
Block a user