Add auto-solver

This commit is contained in:
augustin64 2024-12-11 15:01:37 +01:00
parent 550e47cb2c
commit 553d14dfba
2 changed files with 170 additions and 43 deletions

View File

@ -9,11 +9,16 @@ python src/cemantix.py -m <model.bin>
> # clear() to remove words that are not useful > # clear() to remove words that are not useful
``` ```
Additional arguments:
- `-w` `--word` specify an objective word (local only)
- `-r` `--remote` use [cemantix.certitudes.org](https://cemantix.certitudes.org) instead of local server
- `-s` `--solver` solve automatically instead of asking user to do it
# Models # Models
Les modèles de base viennent du [site de Jean-Philippe Fauconnier](https://fauconnier.github.io/#data) Les modèles de base viennent du [site de Jean-Philippe Fauconnier](https://fauconnier.github.io/#data)
Le modèle recommandé est disponible [ici](https://gitea.augustin64.fr/augustin64/cemantix-cli/releases/download/v1.0.0/selected_word2vec_model.bin) et est une modification de `frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin`. Les modèles recommandés sont disponibles [ici](https://gitea.augustin64.fr/augustin64/cemantix-cli/releases/tag/v1.0.0/).
Les mots qui ne sont pas dans le dictionnaire, les verbes conjugués et autres ont été supprimés. Les mots qui ne sont pas dans le dictionnaire, les verbes conjugués et autres ont été supprimés.
Il est possible de recréer ce modèle avec `src/generate_wordlist.py` et `src/restrict_model.py` et [ce dictionnaire français](https://github.com/hbenbel/French-Dictionary) Il est possible de recréer ce modèle avec `src/generate_wordlist.py` et `src/restrict_model.py` et [ce dictionnaire français](https://github.com/hbenbel/French-Dictionary)

View File

@ -1,3 +1,4 @@
from dataclasses import dataclass
from gensim.models import KeyedVectors from gensim.models import KeyedVectors
from colorama import Fore, Back, Style from colorama import Fore, Back, Style
import argparse import argparse
@ -10,7 +11,17 @@ import time
import numpy as np import numpy as np
np.seterr(divide='ignore', invalid='ignore') np.seterr(divide='ignore', invalid='ignore')
class Server(): @dataclass
class Command:
word: str
@dataclass
class Guess:
word: str
Input = Command | Guess
class Server:
inverse_order = False inverse_order = False
def __init__(self): def __init__(self):
pass pass
@ -24,14 +35,14 @@ class Server():
def get_temp(self, guess): def get_temp(self, guess):
pass pass
def _help(self): def _help(self, rk):
raise NotImplementedError raise NotImplementedError
def _reveal_word(self): def _reveal_word(self):
raise NotImplementedError raise NotImplementedError
class LocalServer(Server): class LocalServer(Server):
inverse_order = True inverse_order = False
def __init__(self, word=None, file="models/selected_word2vec_model.bin"): def __init__(self, word=None, file="models/selected_word2vec_model.bin"):
self.model = KeyedVectors.load_word2vec_format( self.model = KeyedVectors.load_word2vec_format(
file, file,
@ -49,6 +60,8 @@ class LocalServer(Server):
self.model.index_to_key[random.randint(0, len(self.model))] self.model.index_to_key[random.randint(0, len(self.model))]
for _ in range(k) for _ in range(k)
] ]
if None in base_words:
continue
complete_list = base_words.copy() complete_list = base_words.copy()
for word in base_words: for word in base_words:
@ -57,7 +70,7 @@ class LocalServer(Server):
rk_words = self.model.rank_by_centrality(complete_list) rk_words = self.model.rank_by_centrality(complete_list)
self.word = rk_words[random.randint(0,5)%len(rk_words)][1] self.word = rk_words[random.randint(0,5)%len(rk_words)][1]
self.nearest = [word]+[i[0] for i in self.model.most_similar(self.word, topn=1000)] self.nearest = [self.word]+[i[0] for i in self.model.most_similar(self.word, topn=1000)]
def get_rank(self, guess): def get_rank(self, guess):
if guess not in self.nearest: if guess not in self.nearest:
@ -65,7 +78,7 @@ class LocalServer(Server):
return 1000 - self.nearest.index(guess) return 1000 - self.nearest.index(guess)
def get_temp(self, guess): def get_temp(self, guess):
return round(self.model.distance(self.word, guess)*100, 2) return round(100*(1-self.model.distance(self.word, guess)), 2)
def _help(self, rk): def _help(self, rk):
return self.nearest[rk] return self.nearest[rk]
@ -106,16 +119,45 @@ class CemantixServer(Server):
if guess not in self.words: if guess not in self.words:
self._try(guess) self._try(guess)
return round(self.words[guess]["score"]*100, 2) return self.words[guess]["score"]*100
def cemantix(server: Server): class Client:
server.init_word() inverse_order = False
def __init__(self):
pass
guesses = [] # guess, temp, rank def guess(self) -> str:
def formatted_status(guesses, last=None): raise NotImplementedError
def answer_guess(self, word, temp, dist):
raise NotImplementedError
def correct(self, word):
pass
def reveal(self, word):
pass
def unknow_word(self, word):
print(Fore.RED+"Key not present"+Style.RESET_ALL)
def _clear(self):
pass
def _help(self, word):
pass
def _best_rank(self):
raise NotImplementedError
class UserClient(Client):
def __init__(self):
self.guesses = [] # guess, temp, rank
def formatted_status(self, last=None):
text = "" text = ""
for w, temp, rank in guesses: for w, temp, rank in self.guesses:
if rank is not None: if rank is not None:
text += Back.RED+Fore.BLACK+Style.BRIGHT+str(rank)+Style.RESET_ALL text += Back.RED+Fore.BLACK+Style.BRIGHT+str(rank)+Style.RESET_ALL
text += "\t" text += "\t"
@ -126,37 +168,119 @@ def cemantix(server: Server):
text += Style.BRIGHT+Back.WHITE+Fore.BLACK text += Style.BRIGHT+Back.WHITE+Fore.BLACK
text += w + Style.RESET_ALL+"\n" text += w + Style.RESET_ALL+"\n"
return text[:-1] return text[:-1]
def guess(self):
try:
guess = input(Style.BRIGHT+"Your guess > "+Style.RESET_ALL).strip()
readline.add_history(guess)
except (EOFError, KeyboardInterrupt):
raise EOFError
def tried(word, guessed): if guess.endswith("()"):
return (word in [i[0] for i in guessed]) return Command(guess[:-2])
return Guess(guess)
def interpret_command(cmd, guesses): def answer_guess(self, guess, dist, rank):
if guess not in (i[0] for i in self.guesses):
self.guesses.append((guess, dist, rank))
self.guesses.sort(key=lambda x:-x[1] if self.inverse_order else x[1])
print(chr(27) + "[2J")
print(self.formatted_status(last=guess))
def reveal(self, word):
print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
def correct(self, guess):
time.sleep(1)
print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(self.guesses)} tries.")
return len(self.guesses)
def _clear(self):
self.guesses = [g for g in self.guesses if g[1] <= 75.]
def _best_rank(self):
return max([rk for _, _, rk in self.guesses if rk is not None]+[749])
def _help(self, word):
print("Maybe try "+Back.YELLOW+Fore.BLACK+word+Style.RESET_ALL)
class AutoClient(Client):
def __init__(self, file="models/selected_word2vec_model.bin"):
self.model = KeyedVectors.load_word2vec_format(
file,
binary=True,
unicode_errors="ignore"
)
self.dictionary = [
self.model.index_to_key[i] for i in range(len(self.model))
if self.model.index_to_key[i] is not None
]
self.num_guesses = 0
def guess(self):
if len(self.dictionary) == 0:
raise EOFError
if len(self.dictionary) < 20:
return Guess(random.choice(self.dictionary))
pos = -int(len(self.dictionary)/10)
return Guess(random.choice(
[w[1] for w in self.model.rank_by_centrality(self.dictionary)[pos:]]
))
def answer_guess(self, guess, dist, rank):
def cem(score):
return 100*(1-score)
self.num_guesses += 1
old_count = len(self.dictionary)
self.dictionary.remove(guess)
self.dictionary = [
w for w in self.dictionary if abs(cem(self.model.distance(guess, w)) - dist) <= 0.01
]
print(f"[{self.num_guesses}] Guessing {guess}: {old_count} => {len(self.dictionary)}")
def reveal(self, word):
print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
def correct(self, guess):
print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {self.num_guesses} tries.")
return self.num_guesses
def unknow_word(self, word):
print(Fore.RED+"Key not present"+Style.RESET_ALL)
self.dictionary.remove(word)
def cemantix(server: Server, client: Client):
server.init_word()
client.inverse_order = server.inverse_order
def interpret_command(cmd):
match cmd: match cmd:
case "clear": case "clear":
guesses = [g for g in guesses if g[1] <= 75.] client._clear()
case "help": case "help":
try: try:
best_rk = max([rk for _, _, rk in guesses if rk is not None]+[749]) client._help(server._help(999-client._best_rank()))
print("Maybe try "+Back.YELLOW+Fore.BLACK+server._help(999-best_rk)
+Style.RESET_ALL)
except NotImplementedError: except NotImplementedError:
print(Fore.RED+"No help available"+Style.RESET_ALL) print(Fore.RED+"No help available"+Style.RESET_ALL)
case _: case _:
print(Fore.RED+"Unknown command"+Style.RESET_ALL) print(Fore.RED+"Unknown command"+Style.RESET_ALL)
return guesses
while True: while True:
guess = None
try: try:
guess = input(Style.BRIGHT+"Your guess > "+Style.RESET_ALL).strip() match client.guess():
readline.add_history(guess) case Command(cmd):
if guess.endswith("()"): interpret_command(cmd)
guesses = interpret_command(guess[:-2], guesses) continue
continue case Guess(word):
except (EOFError, KeyboardInterrupt): guess = word
except EOFError:
try: try:
print("The word was "+Style.BRIGHT+server._reveal_word()+Style.RESET_ALL) client.reveal(server._reveal_word())
except NotImplementedError: except NotImplementedError:
pass pass
print("Goodbye!") print("Goodbye!")
@ -164,18 +288,13 @@ def cemantix(server: Server):
try: try:
dist = server.get_temp(guess) dist = server.get_temp(guess)
except KeyError: except KeyError:
print(Fore.RED+"Key not present"+Style.RESET_ALL) client.unknow_word(guess)
continue continue
if not tried(guess, guesses): rank = server.get_rank(guess)
guesses.append((guess, dist, server.get_rank(guess))) client.answer_guess(guess, dist, rank)
guesses.sort(key=lambda x:-x[1] if server.inverse_order else x[1]) if rank == 1000:
print(chr(27) + "[2J") return client.correct(guess)
print(formatted_status(guesses, last=guess))
if server.get_rank(guess) == 1000:
time.sleep(1)
print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(guesses)} tries.")
return len(guesses)
@ -190,12 +309,15 @@ def main():
parser.add_argument("-r", "--remote", dest="remote", action="store_true", parser.add_argument("-r", "--remote", dest="remote", action="store_true",
default=False, default=False,
help="Use cemantix.certitudes.org instead of local model") help="Use cemantix.certitudes.org instead of local model")
parser.add_argument("-s", "--solver", dest="auto_solver", action="store_true",
default=False,
help="Use auto solver")
args = parser.parse_args() args = parser.parse_args()
if args.remote: client = UserClient() if not args.auto_solver else AutoClient(file=args.model)
return cemantix(CemantixServer()) server = LocalServer(word=args.word, file=args.model) if not args.remote else CemantixServer()
return cemantix(LocalServer(word=args.word, file=args.model)) return cemantix(server, client)
if __name__ == "__main__": if __name__ == "__main__":
main() main()