From 035bc05637a351cd29f20c29f76589d5af3796f8 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Fri, 11 Oct 2024 15:28:27 +0200 Subject: [PATCH] Initial commit --- .gitignore | 3 ++ README.md | 3 ++ cemantix.py | 82 ++++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 ++ 4 files changed, 90 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 cemantix.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a7e2360 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.venv +**/__pycache__ +*.bin diff --git a/README.md b/README.md new file mode 100644 index 0000000..32085dc --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# [Cemantix](https://cemantix.certitudes.org/) CLI (fully local) + +Download models at [Jean-Philippe Fauconnier's website](https://fauconnier.github.io/#data) (`frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin` recommanded) diff --git a/cemantix.py b/cemantix.py new file mode 100644 index 0000000..37e5037 --- /dev/null +++ b/cemantix.py @@ -0,0 +1,82 @@ +from gensim.models import KeyedVectors +from colorama import Fore, Back, Style +import argparse +import random + + +def cemantix(model, word=None): + if word is None: + rd = random.randint(0, len(model)) + word = model.index_to_key[rd] + while "_" in word: + rd += 1 + word = model.index_to_key[rd] + + nearest = [i[0] for i in model.most_similar(word, topn=1000)] + def get_rank(guess): + if guess not in nearest: + return None + return 1000 - nearest.index(guess) + + def formatted_status(guesses, last=None): + text = "" + for w, temp, rank in guesses: + if rank is not None: + text += Back.RED+Fore.BLACK+Style.BRIGHT+str(rank)+Style.RESET_ALL + text += "\t" + if temp >= 75: + text += Fore.BLUE + text += str(temp) + Style.RESET_ALL + "\t" + if last == w: + text += Style.BRIGHT+Back.WHITE+Fore.BLACK + text += w + Style.RESET_ALL+"\n" + return text[:-1] + + def tried(word, guessed): + return word in [i[0] for i in guessed] + + guesses = [] + + while True: + try: + guess = input(Style.BRIGHT+"Your guess > "+Style.RESET_ALL).strip() + except (EOFError, KeyboardInterrupt): + print("The word was "+Style.BRIGHT+word+Style.RESET_ALL) + print("Goodbye!") + return -1 + try: + dist = round(round(model.distance(word, guess), 4)*100, 2) + except KeyError: + print(Fore.RED+"Key not present"+Style.RESET_ALL) + continue + + if not tried(guess, guesses): + guesses.append((guess, dist, get_rank(guess))) + guesses.sort(key=lambda x:-x[1]) + print(chr(27) + "[2J") + print(formatted_status(guesses, last=guess)) + if guess == word: + print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(guesses)} tries.") + return len(guesses) + + + + +def main(): + parser = argparse.ArgumentParser(description="local infinite cemantix") + parser.add_argument("-w", "--word", dest="word", action="store", + help="Specify goal word") + parser.add_argument("-m", "--model", dest="model", action="store", + default="frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin", + help="Specify model to use") + args = parser.parse_args() + + model = KeyedVectors.load_word2vec_format( + args.model, + binary=True, + unicode_errors="ignore" + ) + cemantix(model, word=args.word) + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..dbab0f3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +colorama +gensim