Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
c197661571 | |||
48ff7bb93e | |||
553d14dfba | |||
550e47cb2c | |||
44df60fca2 | |||
a2d46b906b | |||
8a6b0d0963 |
@ -3,17 +3,22 @@
|
||||
# Basic use
|
||||
|
||||
```bash
|
||||
python cemantix.py -m <model.bin>
|
||||
python src/cemantix.py -m <model.bin>
|
||||
> # input your guess
|
||||
> # help() to get an hint
|
||||
> # clear() to remove words that are not useful
|
||||
```
|
||||
|
||||
Additional arguments:
|
||||
- `-w` `--word` specify an objective word (local only)
|
||||
- `-r` `--remote` use [cemantix.certitudes.org](https://cemantix.certitudes.org) instead of local server
|
||||
- `-s` `--solver` solve automatically instead of asking user to do it
|
||||
|
||||
# Models
|
||||
|
||||
Les modèles de base viennent du [site de Jean-Philippe Fauconnier](https://fauconnier.github.io/#data)
|
||||
|
||||
Le modèle recommandé est disponible [ici]() et est une modification de `frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin`.
|
||||
Les modèles recommandés sont disponibles [ici](https://gitea.augustin64.fr/augustin64/cemantix-cli/releases/tag/v1.0.0/).
|
||||
Les mots qui ne sont pas dans le dictionnaire, les verbes conjugués et autres ont été supprimés.
|
||||
|
||||
Il est possible de recréer ce modèle avec `src/generate_wordlist.py` et `src/restrict_model.py` et [ce dictionnaire français](https://github.com/hbenbel/French-Dictionary)
|
||||
|
@ -1,2 +1,3 @@
|
||||
colorama
|
||||
gensim
|
||||
requests
|
||||
|
331
src/cemantix.py
331
src/cemantix.py
@ -1,39 +1,163 @@
|
||||
from dataclasses import dataclass
|
||||
from gensim.models import KeyedVectors
|
||||
from colorama import Fore, Back, Style
|
||||
import argparse
|
||||
import requests
|
||||
import readline
|
||||
import random
|
||||
import json
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
np.seterr(divide='ignore', invalid='ignore')
|
||||
|
||||
def random_word(model, k=5, dist=100):
|
||||
base_words = [
|
||||
model.index_to_key[random.randint(0, len(model))]
|
||||
for _ in range(k)
|
||||
]
|
||||
@dataclass
|
||||
class Command:
|
||||
word: str
|
||||
|
||||
complete_list = base_words.copy()
|
||||
for word in base_words:
|
||||
complete_list += [i[0] for i in model.most_similar(word, topn=dist)]
|
||||
@dataclass
|
||||
class Guess:
|
||||
word: str
|
||||
|
||||
rk_words = model.rank_by_centrality(complete_list)
|
||||
return rk_words[random.randint(0,5)][1]
|
||||
Input = Command | Guess
|
||||
|
||||
class Server:
|
||||
inverse_order = False
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def init_word(self):
|
||||
pass
|
||||
|
||||
def get_rank(self, guess):
|
||||
pass
|
||||
|
||||
def get_temp(self, guess):
|
||||
pass
|
||||
|
||||
def _help(self, rk):
|
||||
raise NotImplementedError
|
||||
|
||||
def _reveal_word(self):
|
||||
raise NotImplementedError
|
||||
|
||||
class LocalServer(Server):
|
||||
inverse_order = False
|
||||
def __init__(self, word=None, file="models/selected_word2vec_model.bin"):
|
||||
self.model = KeyedVectors.load_word2vec_format(
|
||||
file,
|
||||
binary=True,
|
||||
unicode_errors="ignore"
|
||||
)
|
||||
self.word = word
|
||||
self.nearest = []
|
||||
|
||||
|
||||
def cemantix(model, word=None):
|
||||
while word is None or len(word) < 5 or '-' in word or '_' in word:
|
||||
word = random_word(model, k=1, dist=1) # augment numbers to try a "smooth selection"
|
||||
def init_word(self, k=1, dist=100):
|
||||
while (self.word is None or len(self.word) < 5
|
||||
or '-' in self.word or '_' in self.word):
|
||||
base_words = [
|
||||
self.model.index_to_key[random.randint(0, len(self.model))]
|
||||
for _ in range(k)
|
||||
]
|
||||
if None in base_words:
|
||||
continue
|
||||
|
||||
nearest = [word]+[i[0] for i in model.most_similar(word, topn=1000)]
|
||||
guesses = [] # guess, temp, rank
|
||||
def get_rank(guess):
|
||||
if guess not in nearest:
|
||||
complete_list = base_words.copy()
|
||||
for word in base_words:
|
||||
complete_list += [i[0] for i in self.model.most_similar(word, topn=dist)]
|
||||
|
||||
rk_words = self.model.rank_by_centrality(complete_list)
|
||||
|
||||
self.word = rk_words[random.randint(0,5)%len(rk_words)][1]
|
||||
self.nearest = [self.word]+[i[0] for i in self.model.most_similar(self.word, topn=1000)]
|
||||
|
||||
def get_rank(self, guess):
|
||||
if guess not in self.nearest:
|
||||
return None
|
||||
return 1000 - nearest.index(guess)
|
||||
return 1000 - self.nearest.index(guess)
|
||||
|
||||
def formatted_status(guesses, last=None):
|
||||
def get_temp(self, guess):
|
||||
return round(100*(1-self.model.distance(self.word, guess)), 2)
|
||||
|
||||
def _help(self, rk):
|
||||
return self.nearest[rk]
|
||||
|
||||
def _reveal_word(self):
|
||||
return self.word
|
||||
|
||||
class CemantixServer(Server):
|
||||
def __init__(self):
|
||||
self.words = {}
|
||||
self.solvers = None
|
||||
self.num = None
|
||||
|
||||
def _try(self, word):
|
||||
url = 'https://cemantix.certitudes.org/score'
|
||||
headers = {"Origin": "https://cemantix.certitudes.org"}
|
||||
data = {'word': word}
|
||||
# Need some additional cookies to be a valid client
|
||||
r = requests.post(url, headers=headers, data=data)
|
||||
|
||||
assert r.ok
|
||||
|
||||
data = json.loads(r.text)
|
||||
self.solvers = int(data["solvers"])
|
||||
self.num = int(data["num"])
|
||||
self.words[word] = {"score" : float(data["score"])}
|
||||
if "percentile" in data:
|
||||
self.words[word]["percentile"] = data["percentile"]
|
||||
|
||||
|
||||
def get_rank(self, guess):
|
||||
if guess not in self.words:
|
||||
self._try(guess)
|
||||
|
||||
return self.words[guess].get("percentile", None)
|
||||
|
||||
def get_temp(self, guess):
|
||||
if guess not in self.words:
|
||||
self._try(guess)
|
||||
|
||||
return self.words[guess]["score"]*100
|
||||
|
||||
|
||||
class Client:
|
||||
inverse_order = False
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def guess(self) -> str:
|
||||
raise NotImplementedError
|
||||
|
||||
def answer_guess(self, word, temp, dist):
|
||||
raise NotImplementedError
|
||||
|
||||
def correct(self, word):
|
||||
pass
|
||||
|
||||
def reveal(self, word):
|
||||
pass
|
||||
|
||||
def unknow_word(self, word):
|
||||
print(Fore.RED+"Key not present"+Style.RESET_ALL)
|
||||
|
||||
def _clear(self):
|
||||
pass
|
||||
|
||||
def _help(self, word):
|
||||
pass
|
||||
|
||||
def _best_rank(self):
|
||||
raise NotImplementedError
|
||||
|
||||
class UserClient(Client):
|
||||
def __init__(self):
|
||||
self.guesses = [] # guess, temp, rank
|
||||
|
||||
def formatted_status(self, last=None):
|
||||
text = ""
|
||||
for w, temp, rank in guesses:
|
||||
for w, temp, rank in self.guesses:
|
||||
if rank is not None:
|
||||
text += Back.RED+Fore.BLACK+Style.BRIGHT+str(rank)+Style.RESET_ALL
|
||||
text += "\t"
|
||||
@ -45,48 +169,133 @@ def cemantix(model, word=None):
|
||||
text += w + Style.RESET_ALL+"\n"
|
||||
return text[:-1]
|
||||
|
||||
def tried(word, guessed):
|
||||
return word in [i[0] for i in guessed]
|
||||
|
||||
def interpret_command(cmd, guesses):
|
||||
match cmd:
|
||||
case "clear":
|
||||
guesses = [g for g in guesses if g[1] <= 75.]
|
||||
case "help":
|
||||
best_rk = max([rk for _, _, rk in guesses if rk is not None]+[749])
|
||||
print("Maybe try "+Back.YELLOW+Fore.BLACK+nearest[999-best_rk]+Style.RESET_ALL)
|
||||
case _:
|
||||
print(Fore.RED+"Unknown command"+Style.RESET_ALL)
|
||||
|
||||
return guesses
|
||||
|
||||
|
||||
while True:
|
||||
def guess(self):
|
||||
try:
|
||||
guess = input(Style.BRIGHT+"Your guess > "+Style.RESET_ALL).strip()
|
||||
readline.add_history(guess)
|
||||
if guess.endswith("()"):
|
||||
guesses = interpret_command(guess[:-2], guesses)
|
||||
continue
|
||||
except (EOFError, KeyboardInterrupt):
|
||||
print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
|
||||
raise EOFError
|
||||
|
||||
if guess.endswith("()"):
|
||||
return Command(guess[:-2])
|
||||
return Guess(guess)
|
||||
|
||||
def answer_guess(self, guess, dist, rank):
|
||||
if guess not in (i[0] for i in self.guesses):
|
||||
self.guesses.append((guess, dist, rank))
|
||||
self.guesses.sort(key=lambda x:-x[1] if self.inverse_order else x[1])
|
||||
print(chr(27) + "[2J")
|
||||
print(self.formatted_status(last=guess))
|
||||
|
||||
def reveal(self, word):
|
||||
print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
|
||||
|
||||
def correct(self, guess):
|
||||
time.sleep(1)
|
||||
print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(self.guesses)} tries.")
|
||||
return len(self.guesses)
|
||||
|
||||
def _clear(self):
|
||||
self.guesses = [g for g in self.guesses if g[1] <= 75.]
|
||||
|
||||
def _best_rank(self):
|
||||
return max([rk for _, _, rk in self.guesses if rk is not None]+[749])
|
||||
|
||||
def _help(self, word):
|
||||
print("Maybe try "+Back.YELLOW+Fore.BLACK+word+Style.RESET_ALL)
|
||||
|
||||
|
||||
class AutoClient(Client):
|
||||
def __init__(self, file="models/selected_word2vec_model.bin"):
|
||||
self.model = KeyedVectors.load_word2vec_format(
|
||||
file,
|
||||
binary=True,
|
||||
unicode_errors="ignore"
|
||||
)
|
||||
self.dictionary = [
|
||||
self.model.index_to_key[i] for i in range(len(self.model))
|
||||
if self.model.index_to_key[i] is not None
|
||||
]
|
||||
self.num_guesses = 0
|
||||
|
||||
def guess(self):
|
||||
if len(self.dictionary) == 0:
|
||||
raise EOFError
|
||||
if len(self.dictionary) < 20:
|
||||
return Guess(random.choice(self.dictionary))
|
||||
pos = -int(len(self.dictionary)/10)
|
||||
return Guess(random.choice(
|
||||
[w[1] for w in self.model.rank_by_centrality(self.dictionary)[pos:]]
|
||||
))
|
||||
|
||||
def answer_guess(self, guess, dist, rank):
|
||||
def cem(score):
|
||||
return 100*(1-score)
|
||||
self.num_guesses += 1
|
||||
old_count = len(self.dictionary)
|
||||
|
||||
self.dictionary.remove(guess)
|
||||
self.dictionary = [
|
||||
w for w in self.dictionary if abs(cem(self.model.distance(guess, w)) - dist) <= 0.01
|
||||
]
|
||||
|
||||
print(f"[{self.num_guesses}] Guessing {guess}: {old_count} => {len(self.dictionary)}")
|
||||
|
||||
def reveal(self, word):
|
||||
print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
|
||||
|
||||
def correct(self, guess):
|
||||
print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {self.num_guesses} tries.")
|
||||
return self.num_guesses
|
||||
|
||||
def unknow_word(self, word):
|
||||
print(Fore.RED+"Key not present"+Style.RESET_ALL)
|
||||
self.dictionary.remove(word)
|
||||
|
||||
|
||||
def cemantix(server: Server, client: Client):
|
||||
server.init_word()
|
||||
client.inverse_order = server.inverse_order
|
||||
|
||||
def interpret_command(cmd):
|
||||
match cmd:
|
||||
case "clear":
|
||||
client._clear()
|
||||
case "help":
|
||||
try:
|
||||
client._help(server._help(999-client._best_rank()))
|
||||
except NotImplementedError:
|
||||
print(Fore.RED+"No help available"+Style.RESET_ALL)
|
||||
case _:
|
||||
print(Fore.RED+"Unknown command"+Style.RESET_ALL)
|
||||
|
||||
while True:
|
||||
guess = None
|
||||
try:
|
||||
match client.guess():
|
||||
case Command(cmd):
|
||||
interpret_command(cmd)
|
||||
continue
|
||||
case Guess(word):
|
||||
guess = word
|
||||
except EOFError:
|
||||
try:
|
||||
client.reveal(server._reveal_word())
|
||||
except NotImplementedError:
|
||||
pass
|
||||
print("Goodbye!")
|
||||
return -1
|
||||
try:
|
||||
dist = round(round(model.distance(word, guess), 4)*100, 2)
|
||||
dist = server.get_temp(guess)
|
||||
except KeyError:
|
||||
print(Fore.RED+"Key not present"+Style.RESET_ALL)
|
||||
client.unknow_word(guess)
|
||||
continue
|
||||
|
||||
if not tried(guess, guesses):
|
||||
guesses.append((guess, dist, get_rank(guess)))
|
||||
guesses.sort(key=lambda x:-x[1])
|
||||
print(chr(27) + "[2J")
|
||||
print(formatted_status(guesses, last=guess))
|
||||
if guess == word:
|
||||
time.sleep(1)
|
||||
print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(guesses)} tries.")
|
||||
return len(guesses)
|
||||
rank = server.get_rank(guess)
|
||||
client.answer_guess(guess, dist, rank)
|
||||
if rank == 1000:
|
||||
client.correct(guess)
|
||||
return guess
|
||||
|
||||
|
||||
|
||||
@ -96,16 +305,20 @@ def main():
|
||||
parser.add_argument("-w", "--word", dest="word", action="store",
|
||||
help="Specify goal word")
|
||||
parser.add_argument("-m", "--model", dest="model", action="store",
|
||||
default="frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin",
|
||||
default="models/selected_word2vec_model.bin",
|
||||
help="Specify model to use")
|
||||
parser.add_argument("-r", "--remote", dest="remote", action="store_true",
|
||||
default=False,
|
||||
help="Use cemantix.certitudes.org instead of local model")
|
||||
parser.add_argument("-s", "--solver", dest="auto_solver", action="store_true",
|
||||
default=False,
|
||||
help="Use auto solver")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
model = KeyedVectors.load_word2vec_format(
|
||||
args.model,
|
||||
binary=True,
|
||||
unicode_errors="ignore"
|
||||
)
|
||||
cemantix(model, word=args.word)
|
||||
client = UserClient() if not args.auto_solver else AutoClient(file=args.model)
|
||||
server = LocalServer(word=args.word, file=args.model) if not args.remote else CemantixServer()
|
||||
return cemantix(server, client)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Loading…
Reference in New Issue
Block a user