Add auto-solver

2024-12-11 15:01:37 +01:00 · 2024-12-11 15:01:37 +01:00 · 553d14dfba
commit 553d14dfba
parent 550e47cb2c
2 changed files with 170 additions and 43 deletions
--- a/README.md
+++ b/README.md
@ -9,11 +9,16 @@ python src/cemantix.py -m <model.bin>
 > # clear() to remove words that are not useful
 ```
 Additional arguments:
 - `-w` `--word` specify an objective word (local only)
 - `-r` `--remote` use [cemantix.certitudes.org](https://cemantix.certitudes.org) instead of local server
 - `-s` `--solver` solve automatically instead of asking user to do it
 # Models
 Les modèles de base viennent du [site de Jean-Philippe Fauconnier](https://fauconnier.github.io/#data)
-Le modèle recommandé est disponible [ici](https://gitea.augustin64.fr/augustin64/cemantix-cli/releases/download/v1.0.0/selected_word2vec_model.bin) et est une modification de `frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin`.
+Les modèles recommandés sont disponibles [ici](https://gitea.augustin64.fr/augustin64/cemantix-cli/releases/tag/v1.0.0/).
 Les mots qui ne sont pas dans le dictionnaire, les verbes conjugués et autres ont été supprimés.
 Il est possible de recréer ce modèle avec `src/generate_wordlist.py` et `src/restrict_model.py` et [ce dictionnaire français](https://github.com/hbenbel/French-Dictionary)
--- a/src/cemantix.py
+++ b/src/cemantix.py
@ -1,3 +1,4 @@
 from dataclasses import dataclass
 from gensim.models import KeyedVectors
 from colorama import Fore, Back, Style
 import argparse
@ -10,7 +11,17 @@ import time
 import numpy as np
 np.seterr(divide='ignore', invalid='ignore')
-class Server():
+@dataclass
 class Command:
    word: str
@dataclass
 class Guess:
    word: str
 Input = Command | Guess
 class Server:
    inverse_order = False
    def __init__(self):
        pass
@ -24,14 +35,14 @@ class Server():
    def get_temp(self, guess):
        pass
-    def _help(self):
+    def _help(self, rk):
        raise NotImplementedError
    def _reveal_word(self):
        raise NotImplementedError
 class LocalServer(Server):
-    inverse_order = True
+    inverse_order = False
    def __init__(self, word=None, file="models/selected_word2vec_model.bin"):
        self.model = KeyedVectors.load_word2vec_format(
            file,
@ -49,6 +60,8 @@ class LocalServer(Server):
                self.model.index_to_key[random.randint(0, len(self.model))]
                for _ in range(k)
            ]
            if None in base_words:
                continue
            complete_list = base_words.copy()
            for word in base_words:
@ -57,7 +70,7 @@ class LocalServer(Server):
            rk_words = self.model.rank_by_centrality(complete_list)
            self.word = rk_words[random.randint(0,5)%len(rk_words)][1]
-            self.nearest = [word]+[i[0] for i in self.model.most_similar(self.word, topn=1000)]
+            self.nearest = [self.word]+[i[0] for i in self.model.most_similar(self.word, topn=1000)]
    def get_rank(self, guess):
        if guess not in self.nearest:
@ -65,7 +78,7 @@ class LocalServer(Server):
        return 1000 - self.nearest.index(guess)
    def get_temp(self, guess):
-        return round(self.model.distance(self.word, guess)*100, 2)
+        return round(100*(1-self.model.distance(self.word, guess)), 2)
    def _help(self, rk):
        return self.nearest[rk]
@ -106,16 +119,45 @@ class CemantixServer(Server):
        if guess not in self.words:
            self._try(guess)
-        return round(self.words[guess]["score"]*100, 2)
+        return self.words[guess]["score"]*100
-def cemantix(server: Server):
+class Client:
-    server.init_word()
+    inverse_order = False
    def __init__(self):
        pass
-    guesses = [] # guess, temp, rank
+    def guess(self) -> str:
-    def formatted_status(guesses, last=None):
+        raise NotImplementedError
    def answer_guess(self, word, temp, dist):
        raise NotImplementedError
    def correct(self, word):
        pass
    def reveal(self, word):
        pass
    def unknow_word(self, word):
        print(Fore.RED+"Key not present"+Style.RESET_ALL)
    def _clear(self):
        pass
    def _help(self, word):
        pass
    def _best_rank(self):
        raise NotImplementedError
 class UserClient(Client):
    def __init__(self):
        self.guesses = [] # guess, temp, rank
    def formatted_status(self, last=None):
        text = ""
-        for w, temp, rank in guesses:
+        for w, temp, rank in self.guesses:
            if rank is not None:
                text += Back.RED+Fore.BLACK+Style.BRIGHT+str(rank)+Style.RESET_ALL
            text += "\t"
@ -127,36 +169,118 @@ def cemantix(server: Server):
            text += w + Style.RESET_ALL+"\n"
        return text[:-1]
-    def tried(word, guessed):
+    def guess(self):
-        return (word in [i[0] for i in guessed])
+        try:
            guess = input(Style.BRIGHT+"Your guess > "+Style.RESET_ALL).strip()
            readline.add_history(guess)
        except (EOFError, KeyboardInterrupt):
            raise EOFError
-    def interpret_command(cmd, guesses):
+        if guess.endswith("()"):
            return Command(guess[:-2])
        return Guess(guess)
    def answer_guess(self, guess, dist, rank):
        if guess not in (i[0] for i in self.guesses):
            self.guesses.append((guess, dist, rank))
            self.guesses.sort(key=lambda x:-x[1] if self.inverse_order else x[1])
        print(chr(27) + "[2J")
        print(self.formatted_status(last=guess))
    def reveal(self, word):
        print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
    def correct(self, guess):
        time.sleep(1)
        print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(self.guesses)} tries.")
        return len(self.guesses)
    def _clear(self):
        self.guesses = [g for g in self.guesses if g[1] <= 75.]
    def _best_rank(self):
        return max([rk for _, _, rk in self.guesses if rk is not None]+[749])
    def _help(self, word):
        print("Maybe try "+Back.YELLOW+Fore.BLACK+word+Style.RESET_ALL)
 class AutoClient(Client):
    def __init__(self, file="models/selected_word2vec_model.bin"):
        self.model = KeyedVectors.load_word2vec_format(
            file,
            binary=True,
            unicode_errors="ignore"
        )
        self.dictionary = [
            self.model.index_to_key[i] for i in range(len(self.model))
            if self.model.index_to_key[i] is not None
        ]
        self.num_guesses = 0
    def guess(self):
        if len(self.dictionary) == 0:
            raise EOFError
        if len(self.dictionary) < 20:
            return Guess(random.choice(self.dictionary))
        pos = -int(len(self.dictionary)/10)
        return Guess(random.choice(
            [w[1] for w in self.model.rank_by_centrality(self.dictionary)[pos:]]
        ))
    def answer_guess(self, guess, dist, rank):
        def cem(score):
            return  100*(1-score)
        self.num_guesses += 1
        old_count = len(self.dictionary)
        self.dictionary.remove(guess) 
        self.dictionary = [
            w for w in self.dictionary if abs(cem(self.model.distance(guess, w)) - dist) <= 0.01
        ]
        print(f"[{self.num_guesses}] Guessing {guess}: {old_count} => {len(self.dictionary)}")
    def reveal(self, word):
        print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
    def correct(self, guess):
        print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {self.num_guesses} tries.")
        return self.num_guesses
    def unknow_word(self, word):
        print(Fore.RED+"Key not present"+Style.RESET_ALL)
        self.dictionary.remove(word)
 def cemantix(server: Server, client: Client):
    server.init_word()
    client.inverse_order = server.inverse_order
    def interpret_command(cmd):
        match cmd:
            case "clear":
-                guesses = [g for g in guesses if g[1] <= 75.]
+                client._clear()
            case "help":
                try:
-                    best_rk = max([rk for _, _, rk in guesses if rk is not None]+[749])
+                    client._help(server._help(999-client._best_rank()))
                    print("Maybe try "+Back.YELLOW+Fore.BLACK+server._help(999-best_rk)
                        +Style.RESET_ALL)
                except NotImplementedError:
                    print(Fore.RED+"No help available"+Style.RESET_ALL)
            case _:
                print(Fore.RED+"Unknown command"+Style.RESET_ALL)
        return guesses
    while True:
        guess = None
        try:
-            guess = input(Style.BRIGHT+"Your guess > "+Style.RESET_ALL).strip()
+            match client.guess():
-            readline.add_history(guess)
+                case Command(cmd):
-            if guess.endswith("()"):
+                    interpret_command(cmd)
-                guesses = interpret_command(guess[:-2], guesses)
+                    continue
-                continue
+                case Guess(word):
-        except (EOFError, KeyboardInterrupt):
+                    guess = word
        except EOFError:
            try:
-                print("The word was "+Style.BRIGHT+server._reveal_word()+Style.RESET_ALL)
+                client.reveal(server._reveal_word())
            except NotImplementedError:
                pass
            print("Goodbye!")
@ -164,18 +288,13 @@ def cemantix(server: Server):
        try:
            dist = server.get_temp(guess)
        except KeyError:
-            print(Fore.RED+"Key not present"+Style.RESET_ALL)
+            client.unknow_word(guess)
            continue
-        if not tried(guess, guesses):
+        rank = server.get_rank(guess)
-            guesses.append((guess, dist, server.get_rank(guess)))
+        client.answer_guess(guess, dist, rank)
-            guesses.sort(key=lambda x:-x[1] if server.inverse_order else x[1])
+        if rank == 1000:
-        print(chr(27) + "[2J")
+            return client.correct(guess)
        print(formatted_status(guesses, last=guess))
        if server.get_rank(guess) == 1000:
            time.sleep(1)
            print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(guesses)} tries.")
            return len(guesses)
@ -190,12 +309,15 @@ def main():
    parser.add_argument("-r", "--remote", dest="remote", action="store_true",
                        default=False,
                        help="Use cemantix.certitudes.org instead of local model")
    parser.add_argument("-s", "--solver", dest="auto_solver", action="store_true",
                        default=False,
                        help="Use auto solver")
    args = parser.parse_args()
-    if args.remote:
+    client = UserClient() if not args.auto_solver else AutoClient(file=args.model)
-        return cemantix(CemantixServer())
+    server = LocalServer(word=args.word, file=args.model) if not args.remote else CemantixServer()
-    return cemantix(LocalServer(word=args.word, file=args.model))
+    return cemantix(server, client)
 if __name__ == "__main__":
    main()