diff --git a/src/include/tokenize.h b/src/include/tokenize.h index a8f3688..145a0d8 100644 --- a/src/include/tokenize.h +++ b/src/include/tokenize.h @@ -25,6 +25,11 @@ vector tokenize(vector str, int initial_line=0); */ void _debug_print_token(Token token); +/* + Returns the name of a TokenType +*/ +string _debug_print_token_type(TokenType type); + /* Formats a list of tokens and prints it */ diff --git a/src/tokenize.cpp b/src/tokenize.cpp index b18b855..7f6a96a 100644 --- a/src/tokenize.cpp +++ b/src/tokenize.cpp @@ -6,7 +6,7 @@ using namespace std; regex NUMBER_REGEX ("\\d+(\\.\\d+)?"); -regex TYPE_INT_REGEX ("int\\s"); +regex TYPE_INT_REGEX ("int(\\s|$)"); regex IDENTIFIER_REGEX ("[A-Za-z_]\\w*"); void _debug_print_token(Token token) { @@ -95,6 +95,39 @@ void _debug_print_token(Token token) { } } +string _debug_print_token_type(TokenType type) { + switch (type) { + case TokenType::Type: return "Type"; + case TokenType::Identifier: return "Identifier"; + case TokenType::Int: return "Int"; + case TokenType::Plus: return "Plus"; + case TokenType::Minus: return "Minus"; + case TokenType::DoublePlus: return "DoublePlus"; + case TokenType::DoubleMinus: return "DoubleMinus"; + case TokenType::DoubleEqual: return "DoubleEqual"; + case TokenType::Land: return "Land"; + case TokenType::Lor: return "Lor"; + case TokenType::Lt: return "Lt"; + case TokenType::Gt: return "Gt"; + case TokenType::Leq: return "Leq"; + case TokenType::Geq: return "Geq"; + case TokenType::NotEqual: return "NotEqual"; + case TokenType::Not: return "Not"; + case TokenType::Star: return "Star"; + case TokenType::Slash: return "Slash"; + case TokenType::Percent: return "Percent"; + case TokenType::Equal: return "Equal"; + case TokenType::Semicolon: return "Semicolon"; + case TokenType::LParenthese: return "LParenthese"; + case TokenType::RParenthese: return "RParenthese"; + case TokenType::LCurlyBracket: return "LCurlyBracket"; + case TokenType::RCurlyBracket: return "RCurlyBracket"; + case TokenType::If: return "If"; + case TokenType::Else: return "Else"; + default: return "Unknown"; + } +} + void _debug_print_tokens(vector tokens) { for (Token token : tokens) { _debug_print_token(token); @@ -168,16 +201,6 @@ vector tokenize(vector input, int initial_line) { tokens.emplace_back(token); j += 2; } - else if (str.starts_with("<")) { - Token token = { .type = TokenType::Lt, .pos = pos }; - tokens.emplace_back(token); - j += 1; - } - else if (str.starts_with(">")) { - Token token = { .type = TokenType::Gt, .pos = pos }; - tokens.emplace_back(token); - j += 1; - } else if (str.starts_with("<=")) { Token token = { .type = TokenType::Leq, .pos = pos }; tokens.emplace_back(token); @@ -193,6 +216,16 @@ vector tokenize(vector input, int initial_line) { tokens.emplace_back(token); j += 2; } + else if (str.starts_with("<")) { + Token token = { .type = TokenType::Lt, .pos = pos }; + tokens.emplace_back(token); + j += 1; + } + else if (str.starts_with(">")) { + Token token = { .type = TokenType::Gt, .pos = pos }; + tokens.emplace_back(token); + j += 1; + } else if (str.starts_with("!")) { Token token = { .type = TokenType::Not, .pos = pos }; tokens.emplace_back(token); diff --git a/test/tokenize.cpp b/test/tokenize.cpp index bc4edc7..4c19967 100644 --- a/test/tokenize.cpp +++ b/test/tokenize.cpp @@ -9,6 +9,26 @@ using namespace std; int main() { _TEST_PRESENTATION("Lexer"); + /* All tokens */ + vector inputs = { + "int", "a", "=", "x", "++", "--", "==", "&&", "||", "<", ">", "<=", ">=", "!=", "!", "*", "/", "%", "=", ";", "(", ")", "{", "}", "if", "else" + }; + + vector expectedTypes = { + TokenType::Type, TokenType::Identifier, TokenType::Equal, TokenType::Identifier, + TokenType::DoublePlus, TokenType::DoubleMinus, TokenType::DoubleEqual, TokenType::Land, + TokenType::Lor, TokenType::Lt, TokenType::Gt, TokenType::Leq, TokenType::Geq, TokenType::NotEqual, + TokenType::Not, TokenType::Star, TokenType::Slash, TokenType::Percent, TokenType::Equal, + TokenType::Semicolon, TokenType::LParenthese, TokenType::RParenthese, TokenType::LCurlyBracket, + TokenType::RCurlyBracket, TokenType::If, TokenType::Else + }; + + for (size_t i = 0; i < inputs.size(); i++) { + vector tokens = tokenize({ inputs[i] }); + _TEST_ASSERT(tokens.size() == 1, _debug_print_token_type(expectedTypes[i]).c_str(), false); + _TEST_ASSERT(tokens[0].type == expectedTypes[i], _debug_print_token_type(expectedTypes[i]).c_str(), true); + } + /* Complex input */ { string input = "int a = x+++7;";