return guess once solved

Remove json requirement
Add auto-solver
2024-12-12 09:20:42 +01:00 · 2024-12-11 17:58:23 +01:00 · 2024-12-11 15:01:37 +01:00 · 2024-12-06 11:24:25 +01:00 · 2024-12-04 11:47:20 +01:00 · 2024-10-16 10:27:39 +02:00
3 changed files with 282 additions and 63 deletions
--- a/README.md
+++ b/README.md
@ -3,17 +3,22 @@
 # Basic use
 ```bash
-python cemantix.py -m <model.bin>
+python src/cemantix.py -m <model.bin>
 > # input your guess
 > # help() to get an hint
 > # clear() to remove words that are not useful
 ```
 Additional arguments:
 - `-w` `--word` specify an objective word (local only)
 - `-r` `--remote` use [cemantix.certitudes.org](https://cemantix.certitudes.org) instead of local server
 - `-s` `--solver` solve automatically instead of asking user to do it
 # Models
 Les modèles de base viennent du [site de Jean-Philippe Fauconnier](https://fauconnier.github.io/#data)
-Le modèle recommandé est disponible [ici]() et est une modification de `frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin`.
+Les modèles recommandés sont disponibles [ici](https://gitea.augustin64.fr/augustin64/cemantix-cli/releases/tag/v1.0.0/).
 Les mots qui ne sont pas dans le dictionnaire, les verbes conjugués et autres ont été supprimés.
 Il est possible de recréer ce modèle avec `src/generate_wordlist.py` et `src/restrict_model.py` et [ce dictionnaire français](https://github.com/hbenbel/French-Dictionary)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,2 +1,3 @@
 colorama
 gensim
 requests
--- a/src/cemantix.py
+++ b/src/cemantix.py
@ -1,39 +1,163 @@
 from dataclasses import dataclass
 from gensim.models import KeyedVectors
 from colorama import Fore, Back, Style
 import argparse
 import requests
 import readline
 import random
 import json
 import time
 import numpy as np
 np.seterr(divide='ignore', invalid='ignore')
-def random_word(model, k=5, dist=100):
+@dataclass
-    base_words = [
+class Command:
-        model.index_to_key[random.randint(0, len(model))]
+    word: str
        for _ in range(k)
    ]
-    complete_list = base_words.copy()
+@dataclass
-    for word in base_words:
+class Guess:
-        complete_list += [i[0] for i in model.most_similar(word, topn=dist)]
+    word: str
-    rk_words = model.rank_by_centrality(complete_list)
+Input = Command | Guess
-    return rk_words[random.randint(0,5)][1]
+
 class Server:
    inverse_order = False
    def __init__(self):
        pass
    def init_word(self):
        pass
    def get_rank(self, guess):
        pass
    def get_temp(self, guess):
        pass
    def _help(self, rk):
        raise NotImplementedError
    def _reveal_word(self):
        raise NotImplementedError
 class LocalServer(Server):
    inverse_order = False
    def __init__(self, word=None, file="models/selected_word2vec_model.bin"):
        self.model = KeyedVectors.load_word2vec_format(
            file,
            binary=True,
            unicode_errors="ignore"
        )
        self.word = word
        self.nearest = []
-def cemantix(model, word=None):
+    def init_word(self, k=1, dist=100):
-    while word is None or len(word) < 5 or '-' in word or '_' in word:
+        while (self.word is None or len(self.word) < 5
-        word = random_word(model, k=1, dist=1) # augment numbers to try a "smooth selection"
+               or '-' in self.word or '_' in self.word):
            base_words = [
                self.model.index_to_key[random.randint(0, len(self.model))]
                for _ in range(k)
            ]
            if None in base_words:
                continue
-    nearest = [word]+[i[0] for i in model.most_similar(word, topn=1000)]
+            complete_list = base_words.copy()
-    guesses = [] # guess, temp, rank
+            for word in base_words:
-    def get_rank(guess):
+                complete_list += [i[0] for i in self.model.most_similar(word, topn=dist)]
-        if guess not in nearest:
+
            rk_words = self.model.rank_by_centrality(complete_list)
            self.word = rk_words[random.randint(0,5)%len(rk_words)][1]
            self.nearest = [self.word]+[i[0] for i in self.model.most_similar(self.word, topn=1000)]
    def get_rank(self, guess):
        if guess not in self.nearest:
            return None
-        return 1000 - nearest.index(guess)
+        return 1000 - self.nearest.index(guess)
-    def formatted_status(guesses, last=None):
+    def get_temp(self, guess):
        return round(100*(1-self.model.distance(self.word, guess)), 2)
    def _help(self, rk):
        return self.nearest[rk]
    def _reveal_word(self):
        return self.word
 class CemantixServer(Server):
    def __init__(self):
        self.words = {}
        self.solvers = None
        self.num = None
    def _try(self, word):
        url = 'https://cemantix.certitudes.org/score'
        headers = {"Origin": "https://cemantix.certitudes.org"}
        data = {'word': word}
        # Need some additional cookies to be a valid client
        r = requests.post(url, headers=headers, data=data)
        assert r.ok
        data = json.loads(r.text)
        self.solvers = int(data["solvers"])
        self.num = int(data["num"])
        self.words[word] = {"score" : float(data["score"])}
        if "percentile" in data:
            self.words[word]["percentile"] = data["percentile"]
    def get_rank(self, guess):
        if guess not in self.words:
            self._try(guess)
        return self.words[guess].get("percentile", None)
    def get_temp(self, guess):
        if guess not in self.words:
            self._try(guess)
        return self.words[guess]["score"]*100
 class Client:
    inverse_order = False
    def __init__(self):
        pass
    def guess(self) -> str:
        raise NotImplementedError
    def answer_guess(self, word, temp, dist):
        raise NotImplementedError
    def correct(self, word):
        pass
    def reveal(self, word):
        pass
    def unknow_word(self, word):
        print(Fore.RED+"Key not present"+Style.RESET_ALL)
    def _clear(self):
        pass
    def _help(self, word):
        pass
    def _best_rank(self):
        raise NotImplementedError
 class UserClient(Client):
    def __init__(self):
        self.guesses = [] # guess, temp, rank
    def formatted_status(self, last=None):
        text = ""
-        for w, temp, rank in guesses:
+        for w, temp, rank in self.guesses:
            if rank is not None:
                text += Back.RED+Fore.BLACK+Style.BRIGHT+str(rank)+Style.RESET_ALL
            text += "\t"
@ -44,49 +168,134 @@ def cemantix(model, word=None):
                text += Style.BRIGHT+Back.WHITE+Fore.BLACK
            text += w + Style.RESET_ALL+"\n"
        return text[:-1]
-
+    
-    def tried(word, guessed):
+    def guess(self):
        return word in [i[0] for i in guessed]
    def interpret_command(cmd, guesses):
        match cmd:
            case "clear":
                guesses = [g for g in guesses if g[1] <= 75.]
            case "help":
                best_rk = max([rk for _, _, rk in guesses if rk is not None]+[749])
                print("Maybe try "+Back.YELLOW+Fore.BLACK+nearest[999-best_rk]+Style.RESET_ALL)
            case _:
                print(Fore.RED+"Unknown command"+Style.RESET_ALL)
        return guesses
    while True:
        try:
            guess = input(Style.BRIGHT+"Your guess > "+Style.RESET_ALL).strip()
            readline.add_history(guess)
            if guess.endswith("()"):
                guesses = interpret_command(guess[:-2], guesses)
                continue
        except (EOFError, KeyboardInterrupt):
-            print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
+            raise EOFError
        if guess.endswith("()"):
            return Command(guess[:-2])
        return Guess(guess)
    def answer_guess(self, guess, dist, rank):
        if guess not in (i[0] for i in self.guesses):
            self.guesses.append((guess, dist, rank))
            self.guesses.sort(key=lambda x:-x[1] if self.inverse_order else x[1])
        print(chr(27) + "[2J")
        print(self.formatted_status(last=guess))
    def reveal(self, word):
        print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
    def correct(self, guess):
        time.sleep(1)
        print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(self.guesses)} tries.")
        return len(self.guesses)
    def _clear(self):
        self.guesses = [g for g in self.guesses if g[1] <= 75.]
    def _best_rank(self):
        return max([rk for _, _, rk in self.guesses if rk is not None]+[749])
    def _help(self, word):
        print("Maybe try "+Back.YELLOW+Fore.BLACK+word+Style.RESET_ALL)
 class AutoClient(Client):
    def __init__(self, file="models/selected_word2vec_model.bin"):
        self.model = KeyedVectors.load_word2vec_format(
            file,
            binary=True,
            unicode_errors="ignore"
        )
        self.dictionary = [
            self.model.index_to_key[i] for i in range(len(self.model))
            if self.model.index_to_key[i] is not None
        ]
        self.num_guesses = 0
    def guess(self):
        if len(self.dictionary) == 0:
            raise EOFError
        if len(self.dictionary) < 20:
            return Guess(random.choice(self.dictionary))
        pos = -int(len(self.dictionary)/10)
        return Guess(random.choice(
            [w[1] for w in self.model.rank_by_centrality(self.dictionary)[pos:]]
        ))
    def answer_guess(self, guess, dist, rank):
        def cem(score):
            return  100*(1-score)
        self.num_guesses += 1
        old_count = len(self.dictionary)
        self.dictionary.remove(guess) 
        self.dictionary = [
            w for w in self.dictionary if abs(cem(self.model.distance(guess, w)) - dist) <= 0.01
        ]
        print(f"[{self.num_guesses}] Guessing {guess}: {old_count} => {len(self.dictionary)}")
    def reveal(self, word):
        print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
    def correct(self, guess):
        print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {self.num_guesses} tries.")
        return self.num_guesses
    def unknow_word(self, word):
        print(Fore.RED+"Key not present"+Style.RESET_ALL)
        self.dictionary.remove(word)
 def cemantix(server: Server, client: Client):
    server.init_word()
    client.inverse_order = server.inverse_order
    def interpret_command(cmd):
        match cmd:
            case "clear":
                client._clear()
            case "help":
                try:
                    client._help(server._help(999-client._best_rank()))
                except NotImplementedError:
                    print(Fore.RED+"No help available"+Style.RESET_ALL)
            case _:
                print(Fore.RED+"Unknown command"+Style.RESET_ALL)
    while True:
        guess = None
        try:
            match client.guess():
                case Command(cmd):
                    interpret_command(cmd)
                    continue
                case Guess(word):
                    guess = word
        except EOFError:
            try:
                client.reveal(server._reveal_word())
            except NotImplementedError:
                pass
            print("Goodbye!")
            return -1
        try:
-            dist = round(round(model.distance(word, guess), 4)*100, 2)
+            dist = server.get_temp(guess)
        except KeyError:
-            print(Fore.RED+"Key not present"+Style.RESET_ALL)
+            client.unknow_word(guess)
            continue
-        
+       
-        if not tried(guess, guesses):
+        rank = server.get_rank(guess)
-            guesses.append((guess, dist, get_rank(guess)))
+        client.answer_guess(guess, dist, rank)
-            guesses.sort(key=lambda x:-x[1])
+        if rank == 1000:
-        print(chr(27) + "[2J")
+            client.correct(guess)
-        print(formatted_status(guesses, last=guess))
+            return guess
        if guess == word:
            time.sleep(1)
            print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(guesses)} tries.")
            return len(guesses)
@ -96,16 +305,20 @@ def main():
    parser.add_argument("-w", "--word", dest="word", action="store",
                        help="Specify goal word")
    parser.add_argument("-m", "--model", dest="model", action="store",
-                        default="frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin",
+                        default="models/selected_word2vec_model.bin",
                        help="Specify model to use")
    parser.add_argument("-r", "--remote", dest="remote", action="store_true",
                        default=False,
                        help="Use cemantix.certitudes.org instead of local model")
    parser.add_argument("-s", "--solver", dest="auto_solver", action="store_true",
                        default=False,
                        help="Use auto solver")
    args = parser.parse_args()
-    model = KeyedVectors.load_word2vec_format(
+    client = UserClient() if not args.auto_solver else AutoClient(file=args.model)
-        args.model,
+    server = LocalServer(word=args.word, file=args.model) if not args.remote else CemantixServer()
-        binary=True,
+    return cemantix(server, client)
        unicode_errors="ignore"
    )
    cemantix(model, word=args.word)
 if __name__ == "__main__":
    main()
Author	SHA1	Message	Date
augustin64	c197661571	return guess once solved	2024-12-12 09:20:42 +01:00
augustin64	48ff7bb93e	Remove `json` requirement	2024-12-11 17:58:23 +01:00
augustin64	553d14dfba	Add auto-solver	2024-12-11 15:01:37 +01:00
augustin64	550e47cb2c	Add remote server	2024-12-06 11:24:25 +01:00
augustin64	44df60fca2	Implement LocalServer	2024-12-04 11:47:20 +01:00
augustin64	a2d46b906b	Update according to precedent changes	2024-10-16 10:27:39 +02:00
augustin64	8a6b0d0963	Actualiser README.md	2024-10-14 13:36:40 +02:00