Add code position to tokens

This commit is contained in:
ala89 2023-11-15 14:31:11 +01:00
parent 87bcd0d064
commit cb3444fcf3
5 changed files with 105 additions and 92 deletions

View File

@ -14,7 +14,7 @@ TESTS_OBJ = $(TESTS_SRC:$(TEST_SRCDIR)/%.cpp=$(BUILDDIR)/test-%)
LD_CXXFLAGS = LD_CXXFLAGS =
# Compilation flag # Compilation flag
CXXFLAGS = -Wall -Wextra -g -O3 -std=c++2a CXXFLAGS = -Wall -Wextra -g -O0 -std=c++2a
# Remove warnings about unused variables, functions, ... # Remove warnings about unused variables, functions, ...
# -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable # -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable
# Compile with debug # Compile with debug

View File

@ -1,6 +1,7 @@
#include <regex> #include <regex>
#include <vector> #include <vector>
#include <iostream> #include <iostream>
#include <string>
#include "include/tokenize.h" #include "include/tokenize.h"
using namespace std; using namespace std;
@ -63,97 +64,109 @@ void _debug_print_tokens(vector<Token> tokens) {
cout << endl; cout << endl;
} }
vector<Token> tokenize(string str) { vector<Token> tokenize(vector<string> input) {
vector<Token> tokens; vector<Token> tokens;
while (str.size() > 0) { for (int i = 0; i < int(input.size()); i++) {
string line = input[i];
int j = 0;
CodePosition pos = { .line = i, .column = j };
while (j < int(line.length())) {
string str = line.substr(j, string::npos);
smatch m; smatch m;
if (regex_search(str, m, NUMBER_REGEX, regex_constants::match_continuous)) { if (regex_search(str, m, NUMBER_REGEX, regex_constants::match_continuous)) {
Token token = { Token token = {
.type = TokenType::Int, .type = TokenType::Int,
.data = stoi(m.str()) .data = stoi(m.str()),
.pos = pos
}; };
tokens.emplace_back(token); tokens.emplace_back(token);
str.erase(0, m.str().length()); j += m.str().length();
} }
else if (regex_search(str, m, TYPE_INT_REGEX, regex_constants::match_continuous)) { else if (regex_search(str, m, TYPE_INT_REGEX, regex_constants::match_continuous)) {
Token token = { Token token = {
.type = TokenType::Type, .type = TokenType::Type,
.data = Type::Int .data = Type::Int,
.pos = pos
}; };
tokens.emplace_back(token); tokens.emplace_back(token);
str.erase(0, m.str().length()); j += m.str().length();
} }
else if (regex_search(str, m, IDENTIFIER_REGEX, regex_constants::match_continuous)) { else if (regex_search(str, m, IDENTIFIER_REGEX, regex_constants::match_continuous)) {
Token token = { Token token = {
.type = TokenType::Identifier, .type = TokenType::Identifier,
.data = m.str() .data = m.str(),
.pos = pos
}; };
tokens.emplace_back(token); tokens.emplace_back(token);
str.erase(0, m.str().length()); j += m.str().length();
} }
else if (str.size() >= 2 && str[0] == '+' && str[1] == '+') { else if (str.length() >= 2 && str[0] == '+' && str[1] == '+') {
Token token = { .type = TokenType::DoublePlus }; Token token = { .type = TokenType::DoublePlus, .pos = pos };
tokens.emplace_back(token); tokens.emplace_back(token);
str.erase(0, 2); j += 2;
} }
else if (str.size() >= 2 && str[0] == '-' && str[1] == '-') { else if (str.length() >= 2 && str[0] == '-' && str[1] == '-') {
Token token = { .type = TokenType::DoubleMinus }; Token token = { .type = TokenType::DoubleMinus, .pos = pos };
tokens.emplace_back(token); tokens.emplace_back(token);
str.erase(0, 2); j += 2;
} }
else if (str[0] == '+') { else if (str[0] == '+') {
Token token = { .type = TokenType::Plus }; Token token = { .type = TokenType::Plus, .pos = pos };
tokens.emplace_back(token); tokens.emplace_back(token);
str.erase(0, 1); j += 1;
} }
else if (str[0] == '-') { else if (str[0] == '-') {
Token token = { .type = TokenType::Minus }; Token token = { .type = TokenType::Minus, .pos = pos };
tokens.emplace_back(token); tokens.emplace_back(token);
str.erase(0, 1); j += 1;
} }
else if (str[0] == '*') { else if (str[0] == '*') {
Token token = { .type = TokenType::Star }; Token token = { .type = TokenType::Star, .pos = pos };
tokens.emplace_back(token); tokens.emplace_back(token);
str.erase(0, 1); j += 1;
} }
else if (str[0] == '/') { else if (str[0] == '/') {
Token token = { .type = TokenType::Slash }; Token token = { .type = TokenType::Slash, .pos = pos };
tokens.emplace_back(token); tokens.emplace_back(token);
str.erase(0, 1); j += 1;
} }
else if (str[0] == '%') { else if (str[0] == '%') {
Token token = { .type = TokenType::Percent }; Token token = { .type = TokenType::Percent, .pos = pos };
tokens.emplace_back(token); tokens.emplace_back(token);
str.erase(0, 1); j += 1;
} }
else if (str[0] == '=') { else if (str[0] == '=') {
Token token = { .type = TokenType::Equal }; Token token = { .type = TokenType::Equal, .pos = pos };
tokens.emplace_back(token); tokens.emplace_back(token);
str.erase(0, 1); j += 1;
} }
else if (str[0] == ';') { else if (str[0] == ';') {
Token token = { .type = TokenType::Semicolon }; Token token = { .type = TokenType::Semicolon, .pos = pos };
tokens.emplace_back(token); tokens.emplace_back(token);
str.erase(0, 1); j += 1;
} }
else if (str[0] == '(') { else if (str[0] == '(') {
Token token = { .type = TokenType::LParenthese }; Token token = { .type = TokenType::LParenthese, .pos = pos };
tokens.emplace_back(token); tokens.emplace_back(token);
str.erase(0, 1); j += 1;
} }
else if (str[0] == ')') { else if (str[0] == ')') {
Token token = { .type = TokenType::RParenthese }; Token token = { .type = TokenType::RParenthese, .pos = pos };
tokens.emplace_back(token); tokens.emplace_back(token);
str.erase(0, 1); j += 1;
} }
else if (isspace(str[0]) || str[0] == '\0') { else if (isspace(str[0]) || str[0] == '\0') {
str.erase(0, 1); j += 1;
} }
else { else {
throw TokenError("Unknown token {}"); throw TokenError("Unknown token {}");
} }
} }
}
return tokens; return tokens;
} }

View File

@ -5,7 +5,7 @@
#include "../src/include/interpreter.h" #include "../src/include/interpreter.h"
int execute(string s) { int execute(string s) {
vector<Token> tokens = tokenize(s); vector<Token> tokens = tokenize({ s });
Node ast = parse(tokens); Node ast = parse(tokens);
EvalResult res = eval(ast); EvalResult res = eval(ast);

View File

@ -13,7 +13,7 @@ int main() {
{ {
string input = "int a = x+++7;"; string input = "int a = x+++7;";
vector<Token> tokens = tokenize(input); vector<Token> tokens = tokenize({ input });
_TEST_ASSERT( _TEST_ASSERT(
_TEST_NO_EXCEPTION(tokens.size() == 8), _TEST_NO_EXCEPTION(tokens.size() == 8),
@ -97,7 +97,7 @@ int main() {
string input = "int a = 10 @;"; string input = "int a = 10 @;";
_TEST_ASSERT( _TEST_ASSERT(
_TEST_IS_EXCEPTION(tokenize(input), TokenError), _TEST_IS_EXCEPTION(tokenize({ input }), TokenError),
"Token invalide", "Token invalide",
true true
) )

View File

@ -5,7 +5,7 @@
#include "../src/include/interpreter.h" #include "../src/include/interpreter.h"
int execute(string s) { int execute(string s) {
vector<Token> tokens = tokenize(s); vector<Token> tokens = tokenize({ s });
Node ast = parse(tokens); Node ast = parse(tokens);
EvalResult res = eval(ast); EvalResult res = eval(ast);