Compare commits

..

7 Commits
v1.0.0 ... main

Author SHA1 Message Date
c197661571 return guess once solved 2024-12-12 09:20:42 +01:00
48ff7bb93e Remove json requirement 2024-12-11 17:58:23 +01:00
553d14dfba Add auto-solver 2024-12-11 15:01:37 +01:00
550e47cb2c Add remote server 2024-12-06 11:24:25 +01:00
44df60fca2 Implement LocalServer 2024-12-04 11:47:20 +01:00
a2d46b906b Update according to precedent changes 2024-10-16 10:27:39 +02:00
8a6b0d0963 Actualiser README.md 2024-10-14 13:36:40 +02:00
3 changed files with 282 additions and 63 deletions

View File

@ -3,17 +3,22 @@
# Basic use # Basic use
```bash ```bash
python cemantix.py -m <model.bin> python src/cemantix.py -m <model.bin>
> # input your guess > # input your guess
> # help() to get an hint > # help() to get an hint
> # clear() to remove words that are not useful > # clear() to remove words that are not useful
``` ```
Additional arguments:
- `-w` `--word` specify an objective word (local only)
- `-r` `--remote` use [cemantix.certitudes.org](https://cemantix.certitudes.org) instead of local server
- `-s` `--solver` solve automatically instead of asking user to do it
# Models # Models
Les modèles de base viennent du [site de Jean-Philippe Fauconnier](https://fauconnier.github.io/#data) Les modèles de base viennent du [site de Jean-Philippe Fauconnier](https://fauconnier.github.io/#data)
Le modèle recommandé est disponible [ici]() et est une modification de `frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin`. Les modèles recommandés sont disponibles [ici](https://gitea.augustin64.fr/augustin64/cemantix-cli/releases/tag/v1.0.0/).
Les mots qui ne sont pas dans le dictionnaire, les verbes conjugués et autres ont été supprimés. Les mots qui ne sont pas dans le dictionnaire, les verbes conjugués et autres ont été supprimés.
Il est possible de recréer ce modèle avec `src/generate_wordlist.py` et `src/restrict_model.py` et [ce dictionnaire français](https://github.com/hbenbel/French-Dictionary) Il est possible de recréer ce modèle avec `src/generate_wordlist.py` et `src/restrict_model.py` et [ce dictionnaire français](https://github.com/hbenbel/French-Dictionary)

View File

@ -1,2 +1,3 @@
colorama colorama
gensim gensim
requests

View File

@ -1,39 +1,163 @@
from dataclasses import dataclass
from gensim.models import KeyedVectors from gensim.models import KeyedVectors
from colorama import Fore, Back, Style from colorama import Fore, Back, Style
import argparse import argparse
import requests
import readline import readline
import random import random
import json
import time import time
import numpy as np
np.seterr(divide='ignore', invalid='ignore')
def random_word(model, k=5, dist=100): @dataclass
base_words = [ class Command:
model.index_to_key[random.randint(0, len(model))] word: str
for _ in range(k)
]
complete_list = base_words.copy() @dataclass
for word in base_words: class Guess:
complete_list += [i[0] for i in model.most_similar(word, topn=dist)] word: str
rk_words = model.rank_by_centrality(complete_list) Input = Command | Guess
return rk_words[random.randint(0,5)][1]
class Server:
inverse_order = False
def __init__(self):
pass
def init_word(self):
pass
def get_rank(self, guess):
pass
def get_temp(self, guess):
pass
def _help(self, rk):
raise NotImplementedError
def _reveal_word(self):
raise NotImplementedError
class LocalServer(Server):
inverse_order = False
def __init__(self, word=None, file="models/selected_word2vec_model.bin"):
self.model = KeyedVectors.load_word2vec_format(
file,
binary=True,
unicode_errors="ignore"
)
self.word = word
self.nearest = []
def cemantix(model, word=None): def init_word(self, k=1, dist=100):
while word is None or len(word) < 5 or '-' in word or '_' in word: while (self.word is None or len(self.word) < 5
word = random_word(model, k=1, dist=1) # augment numbers to try a "smooth selection" or '-' in self.word or '_' in self.word):
base_words = [
self.model.index_to_key[random.randint(0, len(self.model))]
for _ in range(k)
]
if None in base_words:
continue
nearest = [word]+[i[0] for i in model.most_similar(word, topn=1000)] complete_list = base_words.copy()
guesses = [] # guess, temp, rank for word in base_words:
def get_rank(guess): complete_list += [i[0] for i in self.model.most_similar(word, topn=dist)]
if guess not in nearest:
rk_words = self.model.rank_by_centrality(complete_list)
self.word = rk_words[random.randint(0,5)%len(rk_words)][1]
self.nearest = [self.word]+[i[0] for i in self.model.most_similar(self.word, topn=1000)]
def get_rank(self, guess):
if guess not in self.nearest:
return None return None
return 1000 - nearest.index(guess) return 1000 - self.nearest.index(guess)
def formatted_status(guesses, last=None): def get_temp(self, guess):
return round(100*(1-self.model.distance(self.word, guess)), 2)
def _help(self, rk):
return self.nearest[rk]
def _reveal_word(self):
return self.word
class CemantixServer(Server):
def __init__(self):
self.words = {}
self.solvers = None
self.num = None
def _try(self, word):
url = 'https://cemantix.certitudes.org/score'
headers = {"Origin": "https://cemantix.certitudes.org"}
data = {'word': word}
# Need some additional cookies to be a valid client
r = requests.post(url, headers=headers, data=data)
assert r.ok
data = json.loads(r.text)
self.solvers = int(data["solvers"])
self.num = int(data["num"])
self.words[word] = {"score" : float(data["score"])}
if "percentile" in data:
self.words[word]["percentile"] = data["percentile"]
def get_rank(self, guess):
if guess not in self.words:
self._try(guess)
return self.words[guess].get("percentile", None)
def get_temp(self, guess):
if guess not in self.words:
self._try(guess)
return self.words[guess]["score"]*100
class Client:
inverse_order = False
def __init__(self):
pass
def guess(self) -> str:
raise NotImplementedError
def answer_guess(self, word, temp, dist):
raise NotImplementedError
def correct(self, word):
pass
def reveal(self, word):
pass
def unknow_word(self, word):
print(Fore.RED+"Key not present"+Style.RESET_ALL)
def _clear(self):
pass
def _help(self, word):
pass
def _best_rank(self):
raise NotImplementedError
class UserClient(Client):
def __init__(self):
self.guesses = [] # guess, temp, rank
def formatted_status(self, last=None):
text = "" text = ""
for w, temp, rank in guesses: for w, temp, rank in self.guesses:
if rank is not None: if rank is not None:
text += Back.RED+Fore.BLACK+Style.BRIGHT+str(rank)+Style.RESET_ALL text += Back.RED+Fore.BLACK+Style.BRIGHT+str(rank)+Style.RESET_ALL
text += "\t" text += "\t"
@ -44,49 +168,134 @@ def cemantix(model, word=None):
text += Style.BRIGHT+Back.WHITE+Fore.BLACK text += Style.BRIGHT+Back.WHITE+Fore.BLACK
text += w + Style.RESET_ALL+"\n" text += w + Style.RESET_ALL+"\n"
return text[:-1] return text[:-1]
def tried(word, guessed): def guess(self):
return word in [i[0] for i in guessed]
def interpret_command(cmd, guesses):
match cmd:
case "clear":
guesses = [g for g in guesses if g[1] <= 75.]
case "help":
best_rk = max([rk for _, _, rk in guesses if rk is not None]+[749])
print("Maybe try "+Back.YELLOW+Fore.BLACK+nearest[999-best_rk]+Style.RESET_ALL)
case _:
print(Fore.RED+"Unknown command"+Style.RESET_ALL)
return guesses
while True:
try: try:
guess = input(Style.BRIGHT+"Your guess > "+Style.RESET_ALL).strip() guess = input(Style.BRIGHT+"Your guess > "+Style.RESET_ALL).strip()
readline.add_history(guess) readline.add_history(guess)
if guess.endswith("()"):
guesses = interpret_command(guess[:-2], guesses)
continue
except (EOFError, KeyboardInterrupt): except (EOFError, KeyboardInterrupt):
print("The word was "+Style.BRIGHT+word+Style.RESET_ALL) raise EOFError
if guess.endswith("()"):
return Command(guess[:-2])
return Guess(guess)
def answer_guess(self, guess, dist, rank):
if guess not in (i[0] for i in self.guesses):
self.guesses.append((guess, dist, rank))
self.guesses.sort(key=lambda x:-x[1] if self.inverse_order else x[1])
print(chr(27) + "[2J")
print(self.formatted_status(last=guess))
def reveal(self, word):
print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
def correct(self, guess):
time.sleep(1)
print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(self.guesses)} tries.")
return len(self.guesses)
def _clear(self):
self.guesses = [g for g in self.guesses if g[1] <= 75.]
def _best_rank(self):
return max([rk for _, _, rk in self.guesses if rk is not None]+[749])
def _help(self, word):
print("Maybe try "+Back.YELLOW+Fore.BLACK+word+Style.RESET_ALL)
class AutoClient(Client):
def __init__(self, file="models/selected_word2vec_model.bin"):
self.model = KeyedVectors.load_word2vec_format(
file,
binary=True,
unicode_errors="ignore"
)
self.dictionary = [
self.model.index_to_key[i] for i in range(len(self.model))
if self.model.index_to_key[i] is not None
]
self.num_guesses = 0
def guess(self):
if len(self.dictionary) == 0:
raise EOFError
if len(self.dictionary) < 20:
return Guess(random.choice(self.dictionary))
pos = -int(len(self.dictionary)/10)
return Guess(random.choice(
[w[1] for w in self.model.rank_by_centrality(self.dictionary)[pos:]]
))
def answer_guess(self, guess, dist, rank):
def cem(score):
return 100*(1-score)
self.num_guesses += 1
old_count = len(self.dictionary)
self.dictionary.remove(guess)
self.dictionary = [
w for w in self.dictionary if abs(cem(self.model.distance(guess, w)) - dist) <= 0.01
]
print(f"[{self.num_guesses}] Guessing {guess}: {old_count} => {len(self.dictionary)}")
def reveal(self, word):
print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
def correct(self, guess):
print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {self.num_guesses} tries.")
return self.num_guesses
def unknow_word(self, word):
print(Fore.RED+"Key not present"+Style.RESET_ALL)
self.dictionary.remove(word)
def cemantix(server: Server, client: Client):
server.init_word()
client.inverse_order = server.inverse_order
def interpret_command(cmd):
match cmd:
case "clear":
client._clear()
case "help":
try:
client._help(server._help(999-client._best_rank()))
except NotImplementedError:
print(Fore.RED+"No help available"+Style.RESET_ALL)
case _:
print(Fore.RED+"Unknown command"+Style.RESET_ALL)
while True:
guess = None
try:
match client.guess():
case Command(cmd):
interpret_command(cmd)
continue
case Guess(word):
guess = word
except EOFError:
try:
client.reveal(server._reveal_word())
except NotImplementedError:
pass
print("Goodbye!") print("Goodbye!")
return -1 return -1
try: try:
dist = round(round(model.distance(word, guess), 4)*100, 2) dist = server.get_temp(guess)
except KeyError: except KeyError:
print(Fore.RED+"Key not present"+Style.RESET_ALL) client.unknow_word(guess)
continue continue
if not tried(guess, guesses): rank = server.get_rank(guess)
guesses.append((guess, dist, get_rank(guess))) client.answer_guess(guess, dist, rank)
guesses.sort(key=lambda x:-x[1]) if rank == 1000:
print(chr(27) + "[2J") client.correct(guess)
print(formatted_status(guesses, last=guess)) return guess
if guess == word:
time.sleep(1)
print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(guesses)} tries.")
return len(guesses)
@ -96,16 +305,20 @@ def main():
parser.add_argument("-w", "--word", dest="word", action="store", parser.add_argument("-w", "--word", dest="word", action="store",
help="Specify goal word") help="Specify goal word")
parser.add_argument("-m", "--model", dest="model", action="store", parser.add_argument("-m", "--model", dest="model", action="store",
default="frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin", default="models/selected_word2vec_model.bin",
help="Specify model to use") help="Specify model to use")
parser.add_argument("-r", "--remote", dest="remote", action="store_true",
default=False,
help="Use cemantix.certitudes.org instead of local model")
parser.add_argument("-s", "--solver", dest="auto_solver", action="store_true",
default=False,
help="Use auto solver")
args = parser.parse_args() args = parser.parse_args()
model = KeyedVectors.load_word2vec_format( client = UserClient() if not args.auto_solver else AutoClient(file=args.model)
args.model, server = LocalServer(word=args.word, file=args.model) if not args.remote else CemantixServer()
binary=True, return cemantix(server, client)
unicode_errors="ignore"
)
cemantix(model, word=args.word)
if __name__ == "__main__": if __name__ == "__main__":
main() main()