return guess once solved

Remove json requirement
Add auto-solver
2024-12-12 09:20:42 +01:00 · 2024-12-11 17:58:23 +01:00 · 2024-12-11 15:01:37 +01:00 · 2024-12-06 11:24:25 +01:00 · 2024-12-04 11:47:20 +01:00 · 2024-10-16 10:27:39 +02:00
3 changed files with 282 additions and 63 deletions
--- a/README.md
+++ b/README.md
@ -3,17 +3,22 @@
 # Basic use

 ```bash
-python cemantix.py -m <model.bin>
+python src/cemantix.py -m <model.bin>
 > # input your guess
 > # help() to get an hint
 > # clear() to remove words that are not useful
 ```

+Additional arguments:
+- `-w` `--word` specify an objective word (local only)
+- `-r` `--remote` use [cemantix.certitudes.org](https://cemantix.certitudes.org) instead of local server
+- `-s` `--solver` solve automatically instead of asking user to do it
+
 # Models

 Les modèles de base viennent du [site de Jean-Philippe Fauconnier](https://fauconnier.github.io/#data)

-Le modèle recommandé est disponible [ici]() et est une modification de `frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin`.
+Les modèles recommandés sont disponibles [ici](https://gitea.augustin64.fr/augustin64/cemantix-cli/releases/tag/v1.0.0/).
 Les mots qui ne sont pas dans le dictionnaire, les verbes conjugués et autres ont été supprimés.

 Il est possible de recréer ce modèle avec `src/generate_wordlist.py` et `src/restrict_model.py` et [ce dictionnaire français](https://github.com/hbenbel/French-Dictionary)
--- a/requirements.txt
+++ b/requirements.txt
@ -1,2 +1,3 @@
 colorama
 gensim
+requests
--- a/src/cemantix.py
+++ b/src/cemantix.py
@ -1,39 +1,163 @@
+from dataclasses import dataclass
 from gensim.models import KeyedVectors
 from colorama import Fore, Back, Style
 import argparse
+import requests
 import readline
 import random
+import json
 import time

+import numpy as np
+np.seterr(divide='ignore', invalid='ignore')

-def random_word(model, k=5, dist=100):
-    base_words = [
-        model.index_to_key[random.randint(0, len(model))]
-        for _ in range(k)
-    ]
+@dataclass
+class Command:
+    word: str

-    complete_list = base_words.copy()
-    for word in base_words:
-        complete_list += [i[0] for i in model.most_similar(word, topn=dist)]
+@dataclass
+class Guess:
+    word: str

-    rk_words = model.rank_by_centrality(complete_list)
-    return rk_words[random.randint(0,5)][1]
+Input = Command | Guess
+
+class Server:
+    inverse_order = False
+    def __init__(self):
+        pass
+
+    def init_word(self):
+        pass
+
+    def get_rank(self, guess):
+        pass
+
+    def get_temp(self, guess):
+        pass
+
+    def _help(self, rk):
+        raise NotImplementedError
+
+    def _reveal_word(self):
+        raise NotImplementedError
+
+class LocalServer(Server):
+    inverse_order = False
+    def __init__(self, word=None, file="models/selected_word2vec_model.bin"):
+        self.model = KeyedVectors.load_word2vec_format(
+            file,
+            binary=True,
+            unicode_errors="ignore"
+        )
+        self.word = word
+        self.nearest = []


-def cemantix(model, word=None):
-    while word is None or len(word) < 5 or '-' in word or '_' in word:
-        word = random_word(model, k=1, dist=1) # augment numbers to try a "smooth selection"
+    def init_word(self, k=1, dist=100):
+        while (self.word is None or len(self.word) < 5
+               or '-' in self.word or '_' in self.word):
+            base_words = [
+                self.model.index_to_key[random.randint(0, len(self.model))]
+                for _ in range(k)
+            ]
+            if None in base_words:
+                continue

-    nearest = [word]+[i[0] for i in model.most_similar(word, topn=1000)]
-    guesses = [] # guess, temp, rank
-    def get_rank(guess):
-        if guess not in nearest:
+            complete_list = base_words.copy()
+            for word in base_words:
+                complete_list += [i[0] for i in self.model.most_similar(word, topn=dist)]
+
+            rk_words = self.model.rank_by_centrality(complete_list)
+
+            self.word = rk_words[random.randint(0,5)%len(rk_words)][1]
+            self.nearest = [self.word]+[i[0] for i in self.model.most_similar(self.word, topn=1000)]
+
+    def get_rank(self, guess):
+        if guess not in self.nearest:
            return None
-        return 1000 - nearest.index(guess)
+        return 1000 - self.nearest.index(guess)

-    def formatted_status(guesses, last=None):
+    def get_temp(self, guess):
+        return round(100*(1-self.model.distance(self.word, guess)), 2)
+
+    def _help(self, rk):
+        return self.nearest[rk]
+
+    def _reveal_word(self):
+        return self.word
+
+class CemantixServer(Server):
+    def __init__(self):
+        self.words = {}
+        self.solvers = None
+        self.num = None
+
+    def _try(self, word):
+        url = 'https://cemantix.certitudes.org/score'
+        headers = {"Origin": "https://cemantix.certitudes.org"}
+        data = {'word': word}
+        # Need some additional cookies to be a valid client
+        r = requests.post(url, headers=headers, data=data)
+
+        assert r.ok
+
+        data = json.loads(r.text)
+        self.solvers = int(data["solvers"])
+        self.num = int(data["num"])
+        self.words[word] = {"score" : float(data["score"])}
+        if "percentile" in data:
+            self.words[word]["percentile"] = data["percentile"]
+
+
+    def get_rank(self, guess):
+        if guess not in self.words:
+            self._try(guess)
+
+        return self.words[guess].get("percentile", None)
+
+    def get_temp(self, guess):
+        if guess not in self.words:
+            self._try(guess)
+
+        return self.words[guess]["score"]*100
+
+
+class Client:
+    inverse_order = False
+    def __init__(self):
+        pass
+
+    def guess(self) -> str:
+        raise NotImplementedError
+
+    def answer_guess(self, word, temp, dist):
+        raise NotImplementedError
+
+    def correct(self, word):
+        pass
+
+    def reveal(self, word):
+        pass
+
+    def unknow_word(self, word):
+        print(Fore.RED+"Key not present"+Style.RESET_ALL)
+
+    def _clear(self):
+        pass
+
+    def _help(self, word):
+        pass
+
+    def _best_rank(self):
+        raise NotImplementedError
+
+class UserClient(Client):
+    def __init__(self):
+        self.guesses = [] # guess, temp, rank
+
+    def formatted_status(self, last=None):
        text = ""
-        for w, temp, rank in guesses:
+        for w, temp, rank in self.guesses:
            if rank is not None:
                text += Back.RED+Fore.BLACK+Style.BRIGHT+str(rank)+Style.RESET_ALL
            text += "\t"
@ -45,48 +169,133 @@ def cemantix(model, word=None):
            text += w + Style.RESET_ALL+"\n"
        return text[:-1]
    
-    def tried(word, guessed):
-        return word in [i[0] for i in guessed]
-
-    def interpret_command(cmd, guesses):
-        match cmd:
-            case "clear":
-                guesses = [g for g in guesses if g[1] <= 75.]
-            case "help":
-                best_rk = max([rk for _, _, rk in guesses if rk is not None]+[749])
-                print("Maybe try "+Back.YELLOW+Fore.BLACK+nearest[999-best_rk]+Style.RESET_ALL)
-            case _:
-                print(Fore.RED+"Unknown command"+Style.RESET_ALL)
-
-        return guesses
-
-
-    while True:
+    def guess(self):
        try:
            guess = input(Style.BRIGHT+"Your guess > "+Style.RESET_ALL).strip()
            readline.add_history(guess)
-            if guess.endswith("()"):
-                guesses = interpret_command(guess[:-2], guesses)
-                continue
        except (EOFError, KeyboardInterrupt):
-            print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
+            raise EOFError
+
+        if guess.endswith("()"):
+            return Command(guess[:-2])
+        return Guess(guess)
+
+    def answer_guess(self, guess, dist, rank):
+        if guess not in (i[0] for i in self.guesses):
+            self.guesses.append((guess, dist, rank))
+            self.guesses.sort(key=lambda x:-x[1] if self.inverse_order else x[1])
+        print(chr(27) + "[2J")
+        print(self.formatted_status(last=guess))
+
+    def reveal(self, word):
+        print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
+
+    def correct(self, guess):
+        time.sleep(1)
+        print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(self.guesses)} tries.")
+        return len(self.guesses)
+
+    def _clear(self):
+        self.guesses = [g for g in self.guesses if g[1] <= 75.]
+
+    def _best_rank(self):
+        return max([rk for _, _, rk in self.guesses if rk is not None]+[749])
+
+    def _help(self, word):
+        print("Maybe try "+Back.YELLOW+Fore.BLACK+word+Style.RESET_ALL)
+
+
+class AutoClient(Client):
+    def __init__(self, file="models/selected_word2vec_model.bin"):
+        self.model = KeyedVectors.load_word2vec_format(
+            file,
+            binary=True,
+            unicode_errors="ignore"
+        )
+        self.dictionary = [
+            self.model.index_to_key[i] for i in range(len(self.model))
+            if self.model.index_to_key[i] is not None
+        ]
+        self.num_guesses = 0
+
+    def guess(self):
+        if len(self.dictionary) == 0:
+            raise EOFError
+        if len(self.dictionary) < 20:
+            return Guess(random.choice(self.dictionary))
+        pos = -int(len(self.dictionary)/10)
+        return Guess(random.choice(
+            [w[1] for w in self.model.rank_by_centrality(self.dictionary)[pos:]]
+        ))
+
+    def answer_guess(self, guess, dist, rank):
+        def cem(score):
+            return  100*(1-score)
+        self.num_guesses += 1
+        old_count = len(self.dictionary)
+
+        self.dictionary.remove(guess) 
+        self.dictionary = [
+            w for w in self.dictionary if abs(cem(self.model.distance(guess, w)) - dist) <= 0.01
+        ]
+
+        print(f"[{self.num_guesses}] Guessing {guess}: {old_count} => {len(self.dictionary)}")
+
+    def reveal(self, word):
+        print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
+
+    def correct(self, guess):
+        print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {self.num_guesses} tries.")
+        return self.num_guesses
+
+    def unknow_word(self, word):
+        print(Fore.RED+"Key not present"+Style.RESET_ALL)
+        self.dictionary.remove(word)
+
+
+def cemantix(server: Server, client: Client):
+    server.init_word()
+    client.inverse_order = server.inverse_order
+
+    def interpret_command(cmd):
+        match cmd:
+            case "clear":
+                client._clear()
+            case "help":
+                try:
+                    client._help(server._help(999-client._best_rank()))
+                except NotImplementedError:
+                    print(Fore.RED+"No help available"+Style.RESET_ALL)
+            case _:
+                print(Fore.RED+"Unknown command"+Style.RESET_ALL)
+
+    while True:
+        guess = None
+        try:
+            match client.guess():
+                case Command(cmd):
+                    interpret_command(cmd)
+                    continue
+                case Guess(word):
+                    guess = word
+        except EOFError:
+            try:
+                client.reveal(server._reveal_word())
+            except NotImplementedError:
+                pass
            print("Goodbye!")
            return -1
        try:
-            dist = round(round(model.distance(word, guess), 4)*100, 2)
+            dist = server.get_temp(guess)
        except KeyError:
-            print(Fore.RED+"Key not present"+Style.RESET_ALL)
+            client.unknow_word(guess)
            continue
       
-        if not tried(guess, guesses):
-            guesses.append((guess, dist, get_rank(guess)))
-            guesses.sort(key=lambda x:-x[1])
-        print(chr(27) + "[2J")
-        print(formatted_status(guesses, last=guess))
-        if guess == word:
-            time.sleep(1)
-            print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(guesses)} tries.")
-            return len(guesses)
+        rank = server.get_rank(guess)
+        client.answer_guess(guess, dist, rank)
+        if rank == 1000:
+            client.correct(guess)
+            return guess



@ -96,16 +305,20 @@ def main():
    parser.add_argument("-w", "--word", dest="word", action="store",
                        help="Specify goal word")
    parser.add_argument("-m", "--model", dest="model", action="store",
-                        default="frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin",
+                        default="models/selected_word2vec_model.bin",
                        help="Specify model to use")
+    parser.add_argument("-r", "--remote", dest="remote", action="store_true",
+                        default=False,
+                        help="Use cemantix.certitudes.org instead of local model")
+    parser.add_argument("-s", "--solver", dest="auto_solver", action="store_true",
+                        default=False,
+                        help="Use auto solver")
+
    args = parser.parse_args()

-    model = KeyedVectors.load_word2vec_format(
-        args.model,
-        binary=True,
-        unicode_errors="ignore"
-    )
-    cemantix(model, word=args.word)
+    client = UserClient() if not args.auto_solver else AutoClient(file=args.model)
+    server = LocalServer(word=args.word, file=args.model) if not args.remote else CemantixServer()
+    return cemantix(server, client)

 if __name__ == "__main__":
    main()
Author	SHA1	Message	Date
augustin64	c197661571	return guess once solved	2024-12-12 09:20:42 +01:00
augustin64	48ff7bb93e	Remove `json` requirement	2024-12-11 17:58:23 +01:00
augustin64	553d14dfba	Add auto-solver	2024-12-11 15:01:37 +01:00
augustin64	550e47cb2c	Add remote server	2024-12-06 11:24:25 +01:00
augustin64	44df60fca2	Implement LocalServer	2024-12-04 11:47:20 +01:00
augustin64	a2d46b906b	Update according to precedent changes	2024-10-16 10:27:39 +02:00
augustin64	8a6b0d0963	Actualiser README.md	2024-10-14 13:36:40 +02:00