Add static analysis

This commit is contained in:
augustin64 2023-12-08 15:59:45 +01:00
parent 6f71314f3f
commit 0935dff3a5
9 changed files with 298 additions and 16 deletions

View File

@ -0,0 +1,267 @@
#include <iostream>
#include "include/utils.h"
#include "include/analysis.h"
bool bool_castable(AnalysisResult type) {
(void)type;
return true;
}
void check_comparable(AnalysisResult res1, AnalysisResult res2, CodePosition pos) {
if (holds_alternative<monostate>(res1) || holds_alternative<monostate>(res2)) {
throw TypeError("Incomparable values", pos);
}
Type type1 = get<Type>(res1);
Type type2 = get<Type>(res2);
switch (type1) {
case Type::Int:
case Type::Double: {
switch (type2) {
case Type::Int:
case Type::Double: {
return;
}
default:
throw TypeError("Incomparable values", pos);
}
}
default:
throw TypeError("Incomparable values", pos);
}
}
Type get_cast(AnalysisResult type1, AnalysisResult type2, CodePosition pos) {
(void)type1; (void)type2; (void)pos;
return Type::Int;
}
bool is_arithmetic_type(Type type) {
switch (type) {
case Type::Int:
case Type::Double:
return true;
default:
return false;
}
}
Type string_to_type(string s, CodePosition pos) {
if (s == "int")
return Type::Int;
if (s == "double")
return Type::Double;
throw TypeError("Unknown type", pos);
}
AnalysisResult analyze(Node &ast, Memory &memory) {
if (holds_alternative<Token>(ast)) {
Token token = get<Token>(ast);
switch (token.type) {
case TokenType::Litteral: {
if (holds_alternative<int>(token.data)) {
return Type::Int;
} else if (holds_alternative<double>(token.data)) {
return Type::Double;
}
}
throw;
break;
case TokenType::Identifier: {
string identifier = get<string>(token.data);
if (!memory.contains(identifier))
throw RuntimeError("Unknown identifier \""+identifier+"\"", token.pos);
return memory.get(identifier).type;
}
throw;
break;
default:
throw;
}
} else {
InnerNode node = get<InnerNode>(ast);
switch (node.type) {
case NodeType::Prog:
analyze(node.children[0], memory);
analyze(node.children[1], memory);
return {};
break;
case NodeType::Epsilon:
return {};
break;
case NodeType::If:
case NodeType::IfElse: {
if (!bool_castable(analyze(node.children[0], memory))) {
throw TypeError("Can't find an explicit cast to bool", get_node_pos(node.children[0]));
}
analyze(node.children[1], memory);
if (node.type == NodeType::IfElse)
analyze(node.children[2], memory);
return {};
} break;
case NodeType::While: {
if (!bool_castable(analyze(node.children[0], memory))) {
throw TypeError("Can't find a cast to bool", get_node_pos(node.children[0]));
}
analyze(node.children[1], memory);
return {};
} break;
case NodeType::For: {
memory.add_scope(ScopeType::For);
analyze(node.children[0], memory);
if (!bool_castable(analyze(node.children[1], memory))) {
throw TypeError("Can't find an explicit cast to bool", get_node_pos(node.children[1]));
}
analyze(node.children[2], memory);
analyze(node.children[3], memory);
memory.remove_scope();
return {};
} break;
case NodeType::Bloc: {
memory.add_scope(ScopeType::Block);
analyze(node.children[0], memory);
memory.remove_scope();
return {};
} break;
case NodeType::Lor:
case NodeType::Land: {
if (!bool_castable(analyze(node.children[0], memory))) {
throw TypeError("Can't find an explicit cast to bool", get_node_pos(node.children[0]));
}
if (!bool_castable(analyze(node.children[1], memory))) {
throw TypeError("Can't find an explicit cast to bool", get_node_pos(node.children[1]));
}
return Type::Int;
} break;
case NodeType::Neg: {
if (!bool_castable(analyze(node.children[0], memory))) {
throw TypeError("Can't find an explicit cast to bool", get_node_pos(node.children[0]));
}
return Type::Int;
} break;
case NodeType::Lt:
case NodeType::Gt:
case NodeType::Leq:
case NodeType::Geq: {
AnalysisResult res1 = analyze(node.children[0], memory);
AnalysisResult res2 = analyze(node.children[1], memory);
check_comparable(res1, res2, node.pos);
return Type::Int;
} break;
case NodeType::Eq:
case NodeType::Neq:
case NodeType::Plus:
case NodeType::Minus:
case NodeType::Mult:
case NodeType::Div: {
AnalysisResult res1 = analyze(node.children[0], memory);
AnalysisResult res2 = analyze(node.children[1], memory);
return get_cast(res1, res2, node.pos);
} break;
case NodeType::Mod: {
AnalysisResult e1 = analyze(node.children[0], memory);
AnalysisResult e2 = analyze(node.children[1], memory);
if (holds_alternative<monostate>(e1) || get<Type>(e1) != Type::Int) {
throw TypeError("Expression must have integral type", get_node_pos(node.children[0]));
}
if (holds_alternative<monostate>(e2) || get<Type>(e2) != Type::Int) {
throw TypeError("Expression must have integral type", get_node_pos(node.children[1]));
}
return Type::Int;
} break;
case NodeType::UnaryPlus:
case NodeType::UnaryMinus: {
AnalysisResult res = analyze(node.children[0], memory);
if (holds_alternative<monostate>(res) || !is_arithmetic_type(get<Type>(res))) {
throw TypeError("Expressions must have arithmetic type", get_node_pos(node.children[1]));
}
} break;
case NodeType::Declaration: {
Token token = get<Token>(node.children[0]);
string identifier = get<string>(token.data);
if (memory.contains(identifier))
throw TypeError("Already defined identifier \""+identifier+"\"", token.pos);
Type type = string_to_type(get<string>(token.data), token.pos);
memory.declare(identifier, type);
Token typedToken = {
.type=TokenType::Identifier,
.data=type,
.pos=token.pos
};
node.children[0] = typedToken;
return {};
} break;
case NodeType::AssignedDeclaration: {
Token token = get<Token>(node.children[0]);
string identifier = get<string>(token.data);
if (memory.contains(identifier))
throw TypeError("Already defined identifier \""+identifier+"\"", token.pos);
Type type = string_to_type(get<string>(token.data), token.pos);
memory.declare(identifier, type);
cout << "declared" << endl;
Token typedToken = {
.type=TokenType::Identifier,
.data=type,
.pos=token.pos
};
node.children[0] = typedToken;
get_cast(type, analyze(node.children[2], memory), get_node_pos(node));
return type;
} break;
case NodeType::Assignment: {
Token identifierTok = get<Token>(node.children[0]);
string identifier = get<string>(identifierTok.data);
if (!memory.contains(identifier))
throw TypeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
Type type = memory.get(identifier).type;
AnalysisResult res = analyze(node.children[1], memory);
get_cast(type, res, get_node_pos(node.children[1]));
return type;
} break;
case NodeType::LIncr:
case NodeType::RIncr:
case NodeType::LDecr:
case NodeType::RDecr: {
Token identifierTok = get<Token>(node.children[0]);
string identifier = get<string>(identifierTok.data);
if (!memory.contains(identifier))
throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos);
return memory.get(identifier).type;
}
}
}
throw;
}

View File

@ -5,6 +5,8 @@
#include "memory.h" #include "memory.h"
using namespace std; using namespace std;
void analyze(Node &ast, Memory<int> &memory); using AnalysisResult = variant<Type, monostate>;
AnalysisResult analyze(Node &ast, Memory &memory);
#endif #endif

View File

@ -82,9 +82,4 @@ ParseReturn parse_par_identifier(vector<Token> tokens);
*/ */
void _debug_print_tree(const Node& node, int depth = 0, const string& prefix = ""); void _debug_print_tree(const Node& node, int depth = 0, const string& prefix = "");
/**
* Returns the CodePosition of a node
*/
CodePosition get_node_pos(Node node);
#endif #endif

View File

@ -20,7 +20,7 @@ void _debug_print_token(Token token);
/* /*
Returns the name of a TokenType Returns the name of a TokenType
*/ */
string _debug_print_token_type(TokenType type); string _debug_get_token_type_name(TokenType type);
/* /*
Formats a list of tokens and prints it Formats a list of tokens and prints it

12
src/include/utils.h Normal file
View File

@ -0,0 +1,12 @@
#ifndef DEF_UTILS_H
#define DEF_UTILS_H
#include "types.h"
using namespace std;
/**
* Returns the CodePosition of a node
*/
CodePosition get_node_pos(Node node);
#endif

View File

@ -6,9 +6,10 @@ using namespace std;
#include "include/errors.h" #include "include/errors.h"
#include "include/colors.h" #include "include/colors.h"
#include "include/parser.h" #include "include/parser.h"
#include "include/tokenize.h"
#include "include/interpreter.h"
#include "include/memory.h" #include "include/memory.h"
#include "include/tokenize.h"
#include "include/analysis.h"
#include "include/interpreter.h"
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
bool print_ast = false; bool print_ast = false;
@ -33,6 +34,8 @@ int main(int argc, char* argv[]) {
tokens = tokenize(input, initial_line); tokens = tokenize(input, initial_line);
Node ast = parse(tokens); Node ast = parse(tokens);
Memory type_mem = memory;
analyze(ast, type_mem);
if (print_ast) if (print_ast)
_debug_print_tree(ast, 0, ""); _debug_print_tree(ast, 0, "");

View File

@ -6,6 +6,7 @@ using namespace std;
#include "include/tokenize.h" #include "include/tokenize.h"
#include "include/colors.h" #include "include/colors.h"
#include "include/parser.h" #include "include/parser.h"
#include "include/utils.h"
CodePosition null_pos = { CodePosition null_pos = {
.line = -1, .line = -1,
@ -49,11 +50,6 @@ void _debug_print_tree(const Node& node, int depth, const string& prefix) {
} }
} }
CodePosition get_node_pos(Node node) {
if (holds_alternative<InnerNode>(node))
return get<InnerNode>(node).pos;
return get<Token>(node).pos;
}
Node parse(vector<Token> tokens) { Node parse(vector<Token> tokens) {
reverse(tokens.begin(), tokens.end()); reverse(tokens.begin(), tokens.end());

7
src/utils.cpp Normal file
View File

@ -0,0 +1,7 @@
#include "include/utils.h"
CodePosition get_node_pos(Node node) {
if (holds_alternative<InnerNode>(node))
return get<InnerNode>(node).pos;
return get<Token>(node).pos;
}

View File

@ -26,8 +26,8 @@ int main() {
for (size_t i = 0; i < inputs.size(); i++) { for (size_t i = 0; i < inputs.size(); i++) {
vector<Token> tokens = tokenize({ inputs[i] }); vector<Token> tokens = tokenize({ inputs[i] });
_TEST_ASSERT(tokens.size() == 1, _debug_print_token_type(expectedTypes[i]).c_str(), false); _TEST_ASSERT(tokens.size() == 1, _debug_get_token_type_name(expectedTypes[i]).c_str(), false);
_TEST_ASSERT(tokens[0].type == expectedTypes[i], _debug_print_token_type(expectedTypes[i]).c_str(), true); _TEST_ASSERT(tokens[0].type == expectedTypes[i], _debug_get_token_type_name(expectedTypes[i]).c_str(), true);
} }
/* Complex input */ /* Complex input */