#include #include #include #include #include "include/tokenize.h" using namespace std; regex NUMBER_REGEX ("\\d+(\\.\\d+)?"); regex TYPE_INT_REGEX ("int\\s"); regex IDENTIFIER_REGEX ("[A-Za-z_]\\w*"); void _debug_print_token(Token token) { switch (token.type) { case TokenType::Type: cout << "Type(INT)"; break; case TokenType::Int: cout << "Number(" << get(token.data) << ")"; break; case TokenType::Identifier: cout << "Identifier(" << get(token.data) << ")"; break; case TokenType::Plus: cout << "+"; break; case TokenType::Minus: cout << "-"; break; case TokenType::Star: cout << "*"; break; case TokenType::Slash: cout << "/"; break; case TokenType::Percent: cout << "%"; break; case TokenType::Equal: cout << "="; break; case TokenType::Semicolon: cout << ";"; break; case TokenType::LParenthese: cout << "("; break; case TokenType::RParenthese: cout << ")"; break; } } void _debug_print_tokens(vector tokens) { for (Token token : tokens) { _debug_print_token(token); cout << " "; } cout << endl; } vector tokenize(string str) { vector tokens; while (str.size() > 0) { smatch m; if (regex_search(str, m, NUMBER_REGEX, regex_constants::match_continuous)) { Token token = { .type = TokenType::Int, .data = stoi(m.str()) }; tokens.emplace_back(token); str.erase(0, m.str().length()); } else if (regex_search(str, m, TYPE_INT_REGEX, regex_constants::match_continuous)) { Token token = { .type = TokenType::Type, .data = Type::Int }; tokens.emplace_back(token); str.erase(0, m.str().length()); } else if (regex_search(str, m, IDENTIFIER_REGEX, regex_constants::match_continuous)) { Token token = { .type = TokenType::Identifier, .data = m.str() }; tokens.emplace_back(token); str.erase(0, m.str().length()); } else if (str[0] == '+') { Token token = { .type = TokenType::Plus }; tokens.emplace_back(token); str.erase(0, 1); } else if (str[0] == '-') { Token token = { .type = TokenType::Minus }; tokens.emplace_back(token); str.erase(0, 1); } else if (str[0] == '*') { Token token = { .type = TokenType::Star }; tokens.emplace_back(token); str.erase(0, 1); } else if (str[0] == '/') { Token token = { .type = TokenType::Slash }; tokens.emplace_back(token); str.erase(0, 1); } else if (str[0] == '%') { Token token = { .type = TokenType::Percent }; tokens.emplace_back(token); str.erase(0, 1); } else if (str[0] == '=') { Token token = { .type = TokenType::Equal }; tokens.emplace_back(token); str.erase(0, 1); } else if (str[0] == ';') { Token token = { .type = TokenType::Semicolon }; tokens.emplace_back(token); str.erase(0, 1); } else if (str[0] == '(') { Token token = { .type = TokenType::LParenthese }; tokens.emplace_back(token); str.erase(0, 1); } else if (str[0] == ')') { Token token = { .type = TokenType::RParenthese }; tokens.emplace_back(token); str.erase(0, 1); } else if (isspace(str[0]) || str[0] == '\0') { str.erase(0, 1); } else { cerr << "Unknown token: \"" << str << "\"" << endl; break; } } return tokens; }