From 553d14dfbab32ac64ae924e8d9732c1409b42514 Mon Sep 17 00:00:00 2001 From: augustin64 Date: Wed, 11 Dec 2024 15:01:37 +0100 Subject: [PATCH] Add auto-solver --- README.md | 7 +- src/cemantix.py | 206 ++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 170 insertions(+), 43 deletions(-) diff --git a/README.md b/README.md index ff9cf2f..9a508e5 100644 --- a/README.md +++ b/README.md @@ -9,11 +9,16 @@ python src/cemantix.py -m > # clear() to remove words that are not useful ``` +Additional arguments: +- `-w` `--word` specify an objective word (local only) +- `-r` `--remote` use [cemantix.certitudes.org](https://cemantix.certitudes.org) instead of local server +- `-s` `--solver` solve automatically instead of asking user to do it + # Models Les modèles de base viennent du [site de Jean-Philippe Fauconnier](https://fauconnier.github.io/#data) -Le modèle recommandé est disponible [ici](https://gitea.augustin64.fr/augustin64/cemantix-cli/releases/download/v1.0.0/selected_word2vec_model.bin) et est une modification de `frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin`. +Les modèles recommandés sont disponibles [ici](https://gitea.augustin64.fr/augustin64/cemantix-cli/releases/tag/v1.0.0/). Les mots qui ne sont pas dans le dictionnaire, les verbes conjugués et autres ont été supprimés. Il est possible de recréer ce modèle avec `src/generate_wordlist.py` et `src/restrict_model.py` et [ce dictionnaire français](https://github.com/hbenbel/French-Dictionary) diff --git a/src/cemantix.py b/src/cemantix.py index fb5dbaf..6e43cc0 100644 --- a/src/cemantix.py +++ b/src/cemantix.py @@ -1,3 +1,4 @@ +from dataclasses import dataclass from gensim.models import KeyedVectors from colorama import Fore, Back, Style import argparse @@ -10,7 +11,17 @@ import time import numpy as np np.seterr(divide='ignore', invalid='ignore') -class Server(): +@dataclass +class Command: + word: str + +@dataclass +class Guess: + word: str + +Input = Command | Guess + +class Server: inverse_order = False def __init__(self): pass @@ -24,14 +35,14 @@ class Server(): def get_temp(self, guess): pass - def _help(self): + def _help(self, rk): raise NotImplementedError def _reveal_word(self): raise NotImplementedError class LocalServer(Server): - inverse_order = True + inverse_order = False def __init__(self, word=None, file="models/selected_word2vec_model.bin"): self.model = KeyedVectors.load_word2vec_format( file, @@ -49,6 +60,8 @@ class LocalServer(Server): self.model.index_to_key[random.randint(0, len(self.model))] for _ in range(k) ] + if None in base_words: + continue complete_list = base_words.copy() for word in base_words: @@ -57,7 +70,7 @@ class LocalServer(Server): rk_words = self.model.rank_by_centrality(complete_list) self.word = rk_words[random.randint(0,5)%len(rk_words)][1] - self.nearest = [word]+[i[0] for i in self.model.most_similar(self.word, topn=1000)] + self.nearest = [self.word]+[i[0] for i in self.model.most_similar(self.word, topn=1000)] def get_rank(self, guess): if guess not in self.nearest: @@ -65,7 +78,7 @@ class LocalServer(Server): return 1000 - self.nearest.index(guess) def get_temp(self, guess): - return round(self.model.distance(self.word, guess)*100, 2) + return round(100*(1-self.model.distance(self.word, guess)), 2) def _help(self, rk): return self.nearest[rk] @@ -106,16 +119,45 @@ class CemantixServer(Server): if guess not in self.words: self._try(guess) - return round(self.words[guess]["score"]*100, 2) + return self.words[guess]["score"]*100 -def cemantix(server: Server): - server.init_word() +class Client: + inverse_order = False + def __init__(self): + pass - guesses = [] # guess, temp, rank - def formatted_status(guesses, last=None): + def guess(self) -> str: + raise NotImplementedError + + def answer_guess(self, word, temp, dist): + raise NotImplementedError + + def correct(self, word): + pass + + def reveal(self, word): + pass + + def unknow_word(self, word): + print(Fore.RED+"Key not present"+Style.RESET_ALL) + + def _clear(self): + pass + + def _help(self, word): + pass + + def _best_rank(self): + raise NotImplementedError + +class UserClient(Client): + def __init__(self): + self.guesses = [] # guess, temp, rank + + def formatted_status(self, last=None): text = "" - for w, temp, rank in guesses: + for w, temp, rank in self.guesses: if rank is not None: text += Back.RED+Fore.BLACK+Style.BRIGHT+str(rank)+Style.RESET_ALL text += "\t" @@ -126,37 +168,119 @@ def cemantix(server: Server): text += Style.BRIGHT+Back.WHITE+Fore.BLACK text += w + Style.RESET_ALL+"\n" return text[:-1] + + def guess(self): + try: + guess = input(Style.BRIGHT+"Your guess > "+Style.RESET_ALL).strip() + readline.add_history(guess) + except (EOFError, KeyboardInterrupt): + raise EOFError - def tried(word, guessed): - return (word in [i[0] for i in guessed]) + if guess.endswith("()"): + return Command(guess[:-2]) + return Guess(guess) - def interpret_command(cmd, guesses): + def answer_guess(self, guess, dist, rank): + if guess not in (i[0] for i in self.guesses): + self.guesses.append((guess, dist, rank)) + self.guesses.sort(key=lambda x:-x[1] if self.inverse_order else x[1]) + print(chr(27) + "[2J") + print(self.formatted_status(last=guess)) + + def reveal(self, word): + print("The word was "+Style.BRIGHT+word+Style.RESET_ALL) + + def correct(self, guess): + time.sleep(1) + print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(self.guesses)} tries.") + return len(self.guesses) + + def _clear(self): + self.guesses = [g for g in self.guesses if g[1] <= 75.] + + def _best_rank(self): + return max([rk for _, _, rk in self.guesses if rk is not None]+[749]) + + def _help(self, word): + print("Maybe try "+Back.YELLOW+Fore.BLACK+word+Style.RESET_ALL) + + +class AutoClient(Client): + def __init__(self, file="models/selected_word2vec_model.bin"): + self.model = KeyedVectors.load_word2vec_format( + file, + binary=True, + unicode_errors="ignore" + ) + self.dictionary = [ + self.model.index_to_key[i] for i in range(len(self.model)) + if self.model.index_to_key[i] is not None + ] + self.num_guesses = 0 + + def guess(self): + if len(self.dictionary) == 0: + raise EOFError + if len(self.dictionary) < 20: + return Guess(random.choice(self.dictionary)) + pos = -int(len(self.dictionary)/10) + return Guess(random.choice( + [w[1] for w in self.model.rank_by_centrality(self.dictionary)[pos:]] + )) + + def answer_guess(self, guess, dist, rank): + def cem(score): + return 100*(1-score) + self.num_guesses += 1 + old_count = len(self.dictionary) + + self.dictionary.remove(guess) + self.dictionary = [ + w for w in self.dictionary if abs(cem(self.model.distance(guess, w)) - dist) <= 0.01 + ] + + print(f"[{self.num_guesses}] Guessing {guess}: {old_count} => {len(self.dictionary)}") + + def reveal(self, word): + print("The word was "+Style.BRIGHT+word+Style.RESET_ALL) + + def correct(self, guess): + print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {self.num_guesses} tries.") + return self.num_guesses + + def unknow_word(self, word): + print(Fore.RED+"Key not present"+Style.RESET_ALL) + self.dictionary.remove(word) + + +def cemantix(server: Server, client: Client): + server.init_word() + client.inverse_order = server.inverse_order + + def interpret_command(cmd): match cmd: case "clear": - guesses = [g for g in guesses if g[1] <= 75.] + client._clear() case "help": try: - best_rk = max([rk for _, _, rk in guesses if rk is not None]+[749]) - print("Maybe try "+Back.YELLOW+Fore.BLACK+server._help(999-best_rk) - +Style.RESET_ALL) + client._help(server._help(999-client._best_rank())) except NotImplementedError: print(Fore.RED+"No help available"+Style.RESET_ALL) case _: print(Fore.RED+"Unknown command"+Style.RESET_ALL) - return guesses - - while True: + guess = None try: - guess = input(Style.BRIGHT+"Your guess > "+Style.RESET_ALL).strip() - readline.add_history(guess) - if guess.endswith("()"): - guesses = interpret_command(guess[:-2], guesses) - continue - except (EOFError, KeyboardInterrupt): + match client.guess(): + case Command(cmd): + interpret_command(cmd) + continue + case Guess(word): + guess = word + except EOFError: try: - print("The word was "+Style.BRIGHT+server._reveal_word()+Style.RESET_ALL) + client.reveal(server._reveal_word()) except NotImplementedError: pass print("Goodbye!") @@ -164,18 +288,13 @@ def cemantix(server: Server): try: dist = server.get_temp(guess) except KeyError: - print(Fore.RED+"Key not present"+Style.RESET_ALL) + client.unknow_word(guess) continue - - if not tried(guess, guesses): - guesses.append((guess, dist, server.get_rank(guess))) - guesses.sort(key=lambda x:-x[1] if server.inverse_order else x[1]) - print(chr(27) + "[2J") - print(formatted_status(guesses, last=guess)) - if server.get_rank(guess) == 1000: - time.sleep(1) - print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(guesses)} tries.") - return len(guesses) + + rank = server.get_rank(guess) + client.answer_guess(guess, dist, rank) + if rank == 1000: + return client.correct(guess) @@ -190,12 +309,15 @@ def main(): parser.add_argument("-r", "--remote", dest="remote", action="store_true", default=False, help="Use cemantix.certitudes.org instead of local model") + parser.add_argument("-s", "--solver", dest="auto_solver", action="store_true", + default=False, + help="Use auto solver") args = parser.parse_args() - if args.remote: - return cemantix(CemantixServer()) - return cemantix(LocalServer(word=args.word, file=args.model)) + client = UserClient() if not args.auto_solver else AutoClient(file=args.model) + server = LocalServer(word=args.word, file=args.model) if not args.remote else CemantixServer() + return cemantix(server, client) if __name__ == "__main__": main()