From 0935dff3a515e0734b8c36301f7f7bf0f4ad72ba Mon Sep 17 00:00:00 2001 From: augustin64 Date: Fri, 8 Dec 2023 15:59:45 +0100 Subject: [PATCH] Add static analysis --- src/analysis.cpp | 267 +++++++++++++++++++++++++++++++++++++++++ src/include/analysis.h | 4 +- src/include/parser.h | 5 - src/include/tokenize.h | 2 +- src/include/utils.h | 12 ++ src/main.cpp | 7 +- src/parser.cpp | 6 +- src/utils.cpp | 7 ++ test/tokenize.cpp | 4 +- 9 files changed, 298 insertions(+), 16 deletions(-) create mode 100644 src/include/utils.h create mode 100644 src/utils.cpp diff --git a/src/analysis.cpp b/src/analysis.cpp index e69de29..8cd938a 100644 --- a/src/analysis.cpp +++ b/src/analysis.cpp @@ -0,0 +1,267 @@ +#include +#include "include/utils.h" +#include "include/analysis.h" + +bool bool_castable(AnalysisResult type) { + (void)type; + return true; +} + +void check_comparable(AnalysisResult res1, AnalysisResult res2, CodePosition pos) { + if (holds_alternative(res1) || holds_alternative(res2)) { + throw TypeError("Incomparable values", pos); + } + + Type type1 = get(res1); + Type type2 = get(res2); + + + switch (type1) { + case Type::Int: + case Type::Double: { + switch (type2) { + case Type::Int: + case Type::Double: { + return; + } + default: + throw TypeError("Incomparable values", pos); + } + } + default: + throw TypeError("Incomparable values", pos); + } +} + +Type get_cast(AnalysisResult type1, AnalysisResult type2, CodePosition pos) { + (void)type1; (void)type2; (void)pos; + return Type::Int; +} + +bool is_arithmetic_type(Type type) { + switch (type) { + case Type::Int: + case Type::Double: + return true; + default: + return false; + } +} + +Type string_to_type(string s, CodePosition pos) { + if (s == "int") + return Type::Int; + if (s == "double") + return Type::Double; + + throw TypeError("Unknown type", pos); +} + +AnalysisResult analyze(Node &ast, Memory &memory) { + if (holds_alternative(ast)) { + Token token = get(ast); + switch (token.type) { + case TokenType::Litteral: { + if (holds_alternative(token.data)) { + return Type::Int; + } else if (holds_alternative(token.data)) { + return Type::Double; + } + } + throw; + break; + case TokenType::Identifier: { + string identifier = get(token.data); + + if (!memory.contains(identifier)) + throw RuntimeError("Unknown identifier \""+identifier+"\"", token.pos); + + return memory.get(identifier).type; + } + throw; + break; + default: + throw; + } + } else { + InnerNode node = get(ast); + switch (node.type) { + case NodeType::Prog: + analyze(node.children[0], memory); + analyze(node.children[1], memory); + return {}; + break; + case NodeType::Epsilon: + return {}; + break; + case NodeType::If: + case NodeType::IfElse: { + if (!bool_castable(analyze(node.children[0], memory))) { + throw TypeError("Can't find an explicit cast to bool", get_node_pos(node.children[0])); + } + + analyze(node.children[1], memory); + if (node.type == NodeType::IfElse) + analyze(node.children[2], memory); + + return {}; + } break; + case NodeType::While: { + if (!bool_castable(analyze(node.children[0], memory))) { + throw TypeError("Can't find a cast to bool", get_node_pos(node.children[0])); + } + analyze(node.children[1], memory); + + return {}; + } break; + case NodeType::For: { + memory.add_scope(ScopeType::For); + + analyze(node.children[0], memory); + + if (!bool_castable(analyze(node.children[1], memory))) { + throw TypeError("Can't find an explicit cast to bool", get_node_pos(node.children[1])); + } + + analyze(node.children[2], memory); + analyze(node.children[3], memory); + + memory.remove_scope(); + + return {}; + } break; + case NodeType::Bloc: { + memory.add_scope(ScopeType::Block); + analyze(node.children[0], memory); + memory.remove_scope(); + return {}; + } break; + case NodeType::Lor: + case NodeType::Land: { + if (!bool_castable(analyze(node.children[0], memory))) { + throw TypeError("Can't find an explicit cast to bool", get_node_pos(node.children[0])); + } + if (!bool_castable(analyze(node.children[1], memory))) { + throw TypeError("Can't find an explicit cast to bool", get_node_pos(node.children[1])); + } + + return Type::Int; + } break; + case NodeType::Neg: { + if (!bool_castable(analyze(node.children[0], memory))) { + throw TypeError("Can't find an explicit cast to bool", get_node_pos(node.children[0])); + } + + return Type::Int; + } break; + case NodeType::Lt: + case NodeType::Gt: + case NodeType::Leq: + case NodeType::Geq: { + AnalysisResult res1 = analyze(node.children[0], memory); + AnalysisResult res2 = analyze(node.children[1], memory); + + check_comparable(res1, res2, node.pos); + + return Type::Int; + } break; + case NodeType::Eq: + case NodeType::Neq: + case NodeType::Plus: + case NodeType::Minus: + case NodeType::Mult: + case NodeType::Div: { + AnalysisResult res1 = analyze(node.children[0], memory); + AnalysisResult res2 = analyze(node.children[1], memory); + + return get_cast(res1, res2, node.pos); + } break; + case NodeType::Mod: { + AnalysisResult e1 = analyze(node.children[0], memory); + AnalysisResult e2 = analyze(node.children[1], memory); + + if (holds_alternative(e1) || get(e1) != Type::Int) { + throw TypeError("Expression must have integral type", get_node_pos(node.children[0])); + } + if (holds_alternative(e2) || get(e2) != Type::Int) { + throw TypeError("Expression must have integral type", get_node_pos(node.children[1])); + } + + return Type::Int; + } break; + case NodeType::UnaryPlus: + case NodeType::UnaryMinus: { + AnalysisResult res = analyze(node.children[0], memory); + + if (holds_alternative(res) || !is_arithmetic_type(get(res))) { + throw TypeError("Expressions must have arithmetic type", get_node_pos(node.children[1])); + } + } break; + case NodeType::Declaration: { + Token token = get(node.children[0]); + string identifier = get(token.data); + if (memory.contains(identifier)) + throw TypeError("Already defined identifier \""+identifier+"\"", token.pos); + + Type type = string_to_type(get(token.data), token.pos); + memory.declare(identifier, type); + + Token typedToken = { + .type=TokenType::Identifier, + .data=type, + .pos=token.pos + }; + node.children[0] = typedToken; + + return {}; + } break; + case NodeType::AssignedDeclaration: { + Token token = get(node.children[0]); + string identifier = get(token.data); + if (memory.contains(identifier)) + throw TypeError("Already defined identifier \""+identifier+"\"", token.pos); + + Type type = string_to_type(get(token.data), token.pos); + memory.declare(identifier, type); + cout << "declared" << endl; + + Token typedToken = { + .type=TokenType::Identifier, + .data=type, + .pos=token.pos + }; + node.children[0] = typedToken; + + get_cast(type, analyze(node.children[2], memory), get_node_pos(node)); + + return type; + } break; + case NodeType::Assignment: { + Token identifierTok = get(node.children[0]); + string identifier = get(identifierTok.data); + if (!memory.contains(identifier)) + throw TypeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); + + Type type = memory.get(identifier).type; + AnalysisResult res = analyze(node.children[1], memory); + + get_cast(type, res, get_node_pos(node.children[1])); + + return type; + } break; + case NodeType::LIncr: + case NodeType::RIncr: + case NodeType::LDecr: + case NodeType::RDecr: { + Token identifierTok = get(node.children[0]); + string identifier = get(identifierTok.data); + + if (!memory.contains(identifier)) + throw RuntimeError("Unknown identifier \""+identifier+"\"", identifierTok.pos); + + return memory.get(identifier).type; + } + } + } + throw; +} \ No newline at end of file diff --git a/src/include/analysis.h b/src/include/analysis.h index ded5f95..1de657b 100644 --- a/src/include/analysis.h +++ b/src/include/analysis.h @@ -5,6 +5,8 @@ #include "memory.h" using namespace std; -void analyze(Node &ast, Memory &memory); +using AnalysisResult = variant; + +AnalysisResult analyze(Node &ast, Memory &memory); #endif \ No newline at end of file diff --git a/src/include/parser.h b/src/include/parser.h index 3711e50..ad3f31c 100644 --- a/src/include/parser.h +++ b/src/include/parser.h @@ -82,9 +82,4 @@ ParseReturn parse_par_identifier(vector tokens); */ void _debug_print_tree(const Node& node, int depth = 0, const string& prefix = ""); -/** - * Returns the CodePosition of a node -*/ -CodePosition get_node_pos(Node node); - #endif \ No newline at end of file diff --git a/src/include/tokenize.h b/src/include/tokenize.h index 55bc8a9..9a66f66 100644 --- a/src/include/tokenize.h +++ b/src/include/tokenize.h @@ -20,7 +20,7 @@ void _debug_print_token(Token token); /* Returns the name of a TokenType */ -string _debug_print_token_type(TokenType type); +string _debug_get_token_type_name(TokenType type); /* Formats a list of tokens and prints it diff --git a/src/include/utils.h b/src/include/utils.h new file mode 100644 index 0000000..d318134 --- /dev/null +++ b/src/include/utils.h @@ -0,0 +1,12 @@ +#ifndef DEF_UTILS_H +#define DEF_UTILS_H + +#include "types.h" +using namespace std; + +/** + * Returns the CodePosition of a node +*/ +CodePosition get_node_pos(Node node); + +#endif \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 82fc7a8..8371508 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -6,9 +6,10 @@ using namespace std; #include "include/errors.h" #include "include/colors.h" #include "include/parser.h" -#include "include/tokenize.h" -#include "include/interpreter.h" #include "include/memory.h" +#include "include/tokenize.h" +#include "include/analysis.h" +#include "include/interpreter.h" int main(int argc, char* argv[]) { bool print_ast = false; @@ -33,6 +34,8 @@ int main(int argc, char* argv[]) { tokens = tokenize(input, initial_line); Node ast = parse(tokens); + Memory type_mem = memory; + analyze(ast, type_mem); if (print_ast) _debug_print_tree(ast, 0, ""); diff --git a/src/parser.cpp b/src/parser.cpp index eb42b2c..d1a3c9b 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -6,6 +6,7 @@ using namespace std; #include "include/tokenize.h" #include "include/colors.h" #include "include/parser.h" +#include "include/utils.h" CodePosition null_pos = { .line = -1, @@ -49,11 +50,6 @@ void _debug_print_tree(const Node& node, int depth, const string& prefix) { } } -CodePosition get_node_pos(Node node) { - if (holds_alternative(node)) - return get(node).pos; - return get(node).pos; -} Node parse(vector tokens) { reverse(tokens.begin(), tokens.end()); diff --git a/src/utils.cpp b/src/utils.cpp new file mode 100644 index 0000000..641f442 --- /dev/null +++ b/src/utils.cpp @@ -0,0 +1,7 @@ +#include "include/utils.h" + +CodePosition get_node_pos(Node node) { + if (holds_alternative(node)) + return get(node).pos; + return get(node).pos; +} \ No newline at end of file diff --git a/test/tokenize.cpp b/test/tokenize.cpp index 73bd85c..f799e5f 100644 --- a/test/tokenize.cpp +++ b/test/tokenize.cpp @@ -26,8 +26,8 @@ int main() { for (size_t i = 0; i < inputs.size(); i++) { vector tokens = tokenize({ inputs[i] }); - _TEST_ASSERT(tokens.size() == 1, _debug_print_token_type(expectedTypes[i]).c_str(), false); - _TEST_ASSERT(tokens[0].type == expectedTypes[i], _debug_print_token_type(expectedTypes[i]).c_str(), true); + _TEST_ASSERT(tokens.size() == 1, _debug_get_token_type_name(expectedTypes[i]).c_str(), false); + _TEST_ASSERT(tokens[0].type == expectedTypes[i], _debug_get_token_type_name(expectedTypes[i]).c_str(), true); } /* Complex input */