Add code position to tokens

This commit is contained in:
ala89 2023-11-15 14:31:11 +01:00
parent 87bcd0d064
commit cb3444fcf3
5 changed files with 105 additions and 92 deletions

View File

@ -14,7 +14,7 @@ TESTS_OBJ = $(TESTS_SRC:$(TEST_SRCDIR)/%.cpp=$(BUILDDIR)/test-%)
LD_CXXFLAGS =
# Compilation flag
CXXFLAGS = -Wall -Wextra -g -O3 -std=c++2a
CXXFLAGS = -Wall -Wextra -g -O0 -std=c++2a
# Remove warnings about unused variables, functions, ...
# -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -Wno-unused-but-set-variable
# Compile with debug

View File

@ -1,6 +1,7 @@
#include <regex>
#include <vector>
#include <iostream>
#include <string>
#include "include/tokenize.h"
using namespace std;
@ -63,95 +64,107 @@ void _debug_print_tokens(vector<Token> tokens) {
cout << endl;
}
vector<Token> tokenize(string str) {
vector<Token> tokenize(vector<string> input) {
vector<Token> tokens;
while (str.size() > 0) {
smatch m;
if (regex_search(str, m, NUMBER_REGEX, regex_constants::match_continuous)) {
Token token = {
.type = TokenType::Int,
.data = stoi(m.str())
};
tokens.emplace_back(token);
str.erase(0, m.str().length());
}
else if (regex_search(str, m, TYPE_INT_REGEX, regex_constants::match_continuous)) {
Token token = {
.type = TokenType::Type,
.data = Type::Int
};
tokens.emplace_back(token);
str.erase(0, m.str().length());
}
else if (regex_search(str, m, IDENTIFIER_REGEX, regex_constants::match_continuous)) {
Token token = {
.type = TokenType::Identifier,
.data = m.str()
};
tokens.emplace_back(token);
str.erase(0, m.str().length());
}
else if (str.size() >= 2 && str[0] == '+' && str[1] == '+') {
Token token = { .type = TokenType::DoublePlus };
tokens.emplace_back(token);
str.erase(0, 2);
}
else if (str.size() >= 2 && str[0] == '-' && str[1] == '-') {
Token token = { .type = TokenType::DoubleMinus };
tokens.emplace_back(token);
str.erase(0, 2);
}
else if (str[0] == '+') {
Token token = { .type = TokenType::Plus };
tokens.emplace_back(token);
str.erase(0, 1);
}
else if (str[0] == '-') {
Token token = { .type = TokenType::Minus };
tokens.emplace_back(token);
str.erase(0, 1);
}
else if (str[0] == '*') {
Token token = { .type = TokenType::Star };
tokens.emplace_back(token);
str.erase(0, 1);
}
else if (str[0] == '/') {
Token token = { .type = TokenType::Slash };
tokens.emplace_back(token);
str.erase(0, 1);
}
else if (str[0] == '%') {
Token token = { .type = TokenType::Percent };
tokens.emplace_back(token);
str.erase(0, 1);
}
else if (str[0] == '=') {
Token token = { .type = TokenType::Equal };
tokens.emplace_back(token);
str.erase(0, 1);
}
else if (str[0] == ';') {
Token token = { .type = TokenType::Semicolon };
tokens.emplace_back(token);
str.erase(0, 1);
}
else if (str[0] == '(') {
Token token = { .type = TokenType::LParenthese };
tokens.emplace_back(token);
str.erase(0, 1);
}
else if (str[0] == ')') {
Token token = { .type = TokenType::RParenthese };
tokens.emplace_back(token);
str.erase(0, 1);
}
else if (isspace(str[0]) || str[0] == '\0') {
str.erase(0, 1);
}
else {
throw TokenError("Unknown token {}");
for (int i = 0; i < int(input.size()); i++) {
string line = input[i];
int j = 0;
CodePosition pos = { .line = i, .column = j };
while (j < int(line.length())) {
string str = line.substr(j, string::npos);
smatch m;
if (regex_search(str, m, NUMBER_REGEX, regex_constants::match_continuous)) {
Token token = {
.type = TokenType::Int,
.data = stoi(m.str()),
.pos = pos
};
tokens.emplace_back(token);
j += m.str().length();
}
else if (regex_search(str, m, TYPE_INT_REGEX, regex_constants::match_continuous)) {
Token token = {
.type = TokenType::Type,
.data = Type::Int,
.pos = pos
};
tokens.emplace_back(token);
j += m.str().length();
}
else if (regex_search(str, m, IDENTIFIER_REGEX, regex_constants::match_continuous)) {
Token token = {
.type = TokenType::Identifier,
.data = m.str(),
.pos = pos
};
tokens.emplace_back(token);
j += m.str().length();
}
else if (str.length() >= 2 && str[0] == '+' && str[1] == '+') {
Token token = { .type = TokenType::DoublePlus, .pos = pos };
tokens.emplace_back(token);
j += 2;
}
else if (str.length() >= 2 && str[0] == '-' && str[1] == '-') {
Token token = { .type = TokenType::DoubleMinus, .pos = pos };
tokens.emplace_back(token);
j += 2;
}
else if (str[0] == '+') {
Token token = { .type = TokenType::Plus, .pos = pos };
tokens.emplace_back(token);
j += 1;
}
else if (str[0] == '-') {
Token token = { .type = TokenType::Minus, .pos = pos };
tokens.emplace_back(token);
j += 1;
}
else if (str[0] == '*') {
Token token = { .type = TokenType::Star, .pos = pos };
tokens.emplace_back(token);
j += 1;
}
else if (str[0] == '/') {
Token token = { .type = TokenType::Slash, .pos = pos };
tokens.emplace_back(token);
j += 1;
}
else if (str[0] == '%') {
Token token = { .type = TokenType::Percent, .pos = pos };
tokens.emplace_back(token);
j += 1;
}
else if (str[0] == '=') {
Token token = { .type = TokenType::Equal, .pos = pos };
tokens.emplace_back(token);
j += 1;
}
else if (str[0] == ';') {
Token token = { .type = TokenType::Semicolon, .pos = pos };
tokens.emplace_back(token);
j += 1;
}
else if (str[0] == '(') {
Token token = { .type = TokenType::LParenthese, .pos = pos };
tokens.emplace_back(token);
j += 1;
}
else if (str[0] == ')') {
Token token = { .type = TokenType::RParenthese, .pos = pos };
tokens.emplace_back(token);
j += 1;
}
else if (isspace(str[0]) || str[0] == '\0') {
j += 1;
}
else {
throw TokenError("Unknown token {}");
}
}
}

View File

@ -5,7 +5,7 @@
#include "../src/include/interpreter.h"
int execute(string s) {
vector<Token> tokens = tokenize(s);
vector<Token> tokens = tokenize({ s });
Node ast = parse(tokens);
EvalResult res = eval(ast);

View File

@ -13,7 +13,7 @@ int main() {
{
string input = "int a = x+++7;";
vector<Token> tokens = tokenize(input);
vector<Token> tokens = tokenize({ input });
_TEST_ASSERT(
_TEST_NO_EXCEPTION(tokens.size() == 8),
@ -97,7 +97,7 @@ int main() {
string input = "int a = 10 @;";
_TEST_ASSERT(
_TEST_IS_EXCEPTION(tokenize(input), TokenError),
_TEST_IS_EXCEPTION(tokenize({ input }), TokenError),
"Token invalide",
true
)

View File

@ -5,7 +5,7 @@
#include "../src/include/interpreter.h"
int execute(string s) {
vector<Token> tokens = tokenize(s);
vector<Token> tokens = tokenize({ s });
Node ast = parse(tokens);
EvalResult res = eval(ast);