Add more lexer tests

This commit is contained in:
ala89 2023-11-24 10:21:58 +01:00
parent c61b26e692
commit fc7b3d440a
3 changed files with 69 additions and 11 deletions

View File

@ -25,6 +25,11 @@ vector<Token> tokenize(vector<string> str, int initial_line=0);
*/
void _debug_print_token(Token token);
/*
Returns the name of a TokenType
*/
string _debug_print_token_type(TokenType type);
/*
Formats a list of tokens and prints it
*/

View File

@ -6,7 +6,7 @@
using namespace std;
regex NUMBER_REGEX ("\\d+(\\.\\d+)?");
regex TYPE_INT_REGEX ("int\\s");
regex TYPE_INT_REGEX ("int(\\s|$)");
regex IDENTIFIER_REGEX ("[A-Za-z_]\\w*");
void _debug_print_token(Token token) {
@ -95,6 +95,39 @@ void _debug_print_token(Token token) {
}
}
string _debug_print_token_type(TokenType type) {
switch (type) {
case TokenType::Type: return "Type";
case TokenType::Identifier: return "Identifier";
case TokenType::Int: return "Int";
case TokenType::Plus: return "Plus";
case TokenType::Minus: return "Minus";
case TokenType::DoublePlus: return "DoublePlus";
case TokenType::DoubleMinus: return "DoubleMinus";
case TokenType::DoubleEqual: return "DoubleEqual";
case TokenType::Land: return "Land";
case TokenType::Lor: return "Lor";
case TokenType::Lt: return "Lt";
case TokenType::Gt: return "Gt";
case TokenType::Leq: return "Leq";
case TokenType::Geq: return "Geq";
case TokenType::NotEqual: return "NotEqual";
case TokenType::Not: return "Not";
case TokenType::Star: return "Star";
case TokenType::Slash: return "Slash";
case TokenType::Percent: return "Percent";
case TokenType::Equal: return "Equal";
case TokenType::Semicolon: return "Semicolon";
case TokenType::LParenthese: return "LParenthese";
case TokenType::RParenthese: return "RParenthese";
case TokenType::LCurlyBracket: return "LCurlyBracket";
case TokenType::RCurlyBracket: return "RCurlyBracket";
case TokenType::If: return "If";
case TokenType::Else: return "Else";
default: return "Unknown";
}
}
void _debug_print_tokens(vector<Token> tokens) {
for (Token token : tokens) {
_debug_print_token(token);
@ -168,16 +201,6 @@ vector<Token> tokenize(vector<string> input, int initial_line) {
tokens.emplace_back(token);
j += 2;
}
else if (str.starts_with("<")) {
Token token = { .type = TokenType::Lt, .pos = pos };
tokens.emplace_back(token);
j += 1;
}
else if (str.starts_with(">")) {
Token token = { .type = TokenType::Gt, .pos = pos };
tokens.emplace_back(token);
j += 1;
}
else if (str.starts_with("<=")) {
Token token = { .type = TokenType::Leq, .pos = pos };
tokens.emplace_back(token);
@ -193,6 +216,16 @@ vector<Token> tokenize(vector<string> input, int initial_line) {
tokens.emplace_back(token);
j += 2;
}
else if (str.starts_with("<")) {
Token token = { .type = TokenType::Lt, .pos = pos };
tokens.emplace_back(token);
j += 1;
}
else if (str.starts_with(">")) {
Token token = { .type = TokenType::Gt, .pos = pos };
tokens.emplace_back(token);
j += 1;
}
else if (str.starts_with("!")) {
Token token = { .type = TokenType::Not, .pos = pos };
tokens.emplace_back(token);

View File

@ -9,6 +9,26 @@ using namespace std;
int main() {
_TEST_PRESENTATION("Lexer");
/* All tokens */
vector<string> inputs = {
"int", "a", "=", "x", "++", "--", "==", "&&", "||", "<", ">", "<=", ">=", "!=", "!", "*", "/", "%", "=", ";", "(", ")", "{", "}", "if", "else"
};
vector<TokenType> expectedTypes = {
TokenType::Type, TokenType::Identifier, TokenType::Equal, TokenType::Identifier,
TokenType::DoublePlus, TokenType::DoubleMinus, TokenType::DoubleEqual, TokenType::Land,
TokenType::Lor, TokenType::Lt, TokenType::Gt, TokenType::Leq, TokenType::Geq, TokenType::NotEqual,
TokenType::Not, TokenType::Star, TokenType::Slash, TokenType::Percent, TokenType::Equal,
TokenType::Semicolon, TokenType::LParenthese, TokenType::RParenthese, TokenType::LCurlyBracket,
TokenType::RCurlyBracket, TokenType::If, TokenType::Else
};
for (size_t i = 0; i < inputs.size(); i++) {
vector<Token> tokens = tokenize({ inputs[i] });
_TEST_ASSERT(tokens.size() == 1, _debug_print_token_type(expectedTypes[i]).c_str(), false);
_TEST_ASSERT(tokens[0].type == expectedTypes[i], _debug_print_token_type(expectedTypes[i]).c_str(), true);
}
/* Complex input */
{
string input = "int a = x+++7;";