Compare commits

..

No commits in common. "550e47cb2c06d326e5215a255e7cfaf0f1119b3e" and "a2d46b906bed910ad42b7a05ec0c7c34ee9acf13" have entirely different histories.

2 changed files with 33 additions and 125 deletions

View File

@ -1,4 +1,2 @@
colorama colorama
gensim gensim
json
requests

View File

@ -1,118 +1,36 @@
from gensim.models import KeyedVectors from gensim.models import KeyedVectors
from colorama import Fore, Back, Style from colorama import Fore, Back, Style
import argparse import argparse
import requests
import readline import readline
import random import random
import json
import time import time
import numpy as np
np.seterr(divide='ignore', invalid='ignore')
class Server(): def random_word(model, k=5, dist=100):
inverse_order = False base_words = [
def __init__(self): model.index_to_key[random.randint(0, len(model))]
pass for _ in range(k)
]
def init_word(self): complete_list = base_words.copy()
pass for word in base_words:
complete_list += [i[0] for i in model.most_similar(word, topn=dist)]
def get_rank(self, guess): rk_words = model.rank_by_centrality(complete_list)
pass return rk_words[random.randint(0,5)%len(rk_words)][1]
def get_temp(self, guess):
pass
def _help(self):
raise NotImplementedError
def _reveal_word(self):
raise NotImplementedError
class LocalServer(Server):
inverse_order = True
def __init__(self, word=None, file="models/selected_word2vec_model.bin"):
self.model = KeyedVectors.load_word2vec_format(
file,
binary=True,
unicode_errors="ignore"
)
self.word = word
self.nearest = []
def init_word(self, k=1, dist=100): def cemantix(model, word=None):
while (self.word is None or len(self.word) < 5 while word is None or len(word) < 5 or '-' in word or '_' in word:
or '-' in self.word or '_' in self.word): word = random_word(model, k=1, dist=0) # augment dist for a "smoother selection"
base_words = [
self.model.index_to_key[random.randint(0, len(self.model))]
for _ in range(k)
]
complete_list = base_words.copy()
for word in base_words:
complete_list += [i[0] for i in self.model.most_similar(word, topn=dist)]
rk_words = self.model.rank_by_centrality(complete_list)
self.word = rk_words[random.randint(0,5)%len(rk_words)][1]
self.nearest = [word]+[i[0] for i in self.model.most_similar(self.word, topn=1000)]
def get_rank(self, guess):
if guess not in self.nearest:
return None
return 1000 - self.nearest.index(guess)
def get_temp(self, guess):
return round(self.model.distance(self.word, guess)*100, 2)
def _help(self, rk):
return self.nearest[rk]
def _reveal_word(self):
return self.word
class CemantixServer(Server):
def __init__(self):
self.words = {}
self.solvers = None
self.num = None
def _try(self, word):
url = 'https://cemantix.certitudes.org/score'
headers = {"Origin": "https://cemantix.certitudes.org"}
data = {'word': word}
# Need some additional cookies to be a valid client
r = requests.post(url, headers=headers, data=data)
assert r.ok
data = json.loads(r.text)
self.solvers = int(data["solvers"])
self.num = int(data["num"])
self.words[word] = {"score" : float(data["score"])}
if "percentile" in data:
self.words[word]["percentile"] = data["percentile"]
def get_rank(self, guess):
if guess not in self.words:
self._try(guess)
return self.words[guess].get("percentile", None)
def get_temp(self, guess):
if guess not in self.words:
self._try(guess)
return round(self.words[guess]["score"]*100, 2)
def cemantix(server: Server):
server.init_word()
nearest = [word]+[i[0] for i in model.most_similar(word, topn=1000)]
guesses = [] # guess, temp, rank guesses = [] # guess, temp, rank
def get_rank(guess):
if guess not in nearest:
return None
return 1000 - nearest.index(guess)
def formatted_status(guesses, last=None): def formatted_status(guesses, last=None):
text = "" text = ""
for w, temp, rank in guesses: for w, temp, rank in guesses:
@ -128,19 +46,15 @@ def cemantix(server: Server):
return text[:-1] return text[:-1]
def tried(word, guessed): def tried(word, guessed):
return (word in [i[0] for i in guessed]) return word in [i[0] for i in guessed]
def interpret_command(cmd, guesses): def interpret_command(cmd, guesses):
match cmd: match cmd:
case "clear": case "clear":
guesses = [g for g in guesses if g[1] <= 75.] guesses = [g for g in guesses if g[1] <= 75.]
case "help": case "help":
try: best_rk = max([rk for _, _, rk in guesses if rk is not None]+[749])
best_rk = max([rk for _, _, rk in guesses if rk is not None]+[749]) print("Maybe try "+Back.YELLOW+Fore.BLACK+nearest[999-best_rk]+Style.RESET_ALL)
print("Maybe try "+Back.YELLOW+Fore.BLACK+server._help(999-best_rk)
+Style.RESET_ALL)
except NotImplementedError:
print(Fore.RED+"No help available"+Style.RESET_ALL)
case _: case _:
print(Fore.RED+"Unknown command"+Style.RESET_ALL) print(Fore.RED+"Unknown command"+Style.RESET_ALL)
@ -155,24 +69,21 @@ def cemantix(server: Server):
guesses = interpret_command(guess[:-2], guesses) guesses = interpret_command(guess[:-2], guesses)
continue continue
except (EOFError, KeyboardInterrupt): except (EOFError, KeyboardInterrupt):
try: print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
print("The word was "+Style.BRIGHT+server._reveal_word()+Style.RESET_ALL)
except NotImplementedError:
pass
print("Goodbye!") print("Goodbye!")
return -1 return -1
try: try:
dist = server.get_temp(guess) dist = round(round(model.distance(word, guess), 4)*100, 2)
except KeyError: except KeyError:
print(Fore.RED+"Key not present"+Style.RESET_ALL) print(Fore.RED+"Key not present"+Style.RESET_ALL)
continue continue
if not tried(guess, guesses): if not tried(guess, guesses):
guesses.append((guess, dist, server.get_rank(guess))) guesses.append((guess, dist, get_rank(guess)))
guesses.sort(key=lambda x:-x[1] if server.inverse_order else x[1]) guesses.sort(key=lambda x:-x[1])
print(chr(27) + "[2J") print(chr(27) + "[2J")
print(formatted_status(guesses, last=guess)) print(formatted_status(guesses, last=guess))
if server.get_rank(guess) == 1000: if guess == word:
time.sleep(1) time.sleep(1)
print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(guesses)} tries.") print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(guesses)} tries.")
return len(guesses) return len(guesses)
@ -187,15 +98,14 @@ def main():
parser.add_argument("-m", "--model", dest="model", action="store", parser.add_argument("-m", "--model", dest="model", action="store",
default="models/selected_word2vec_model.bin", default="models/selected_word2vec_model.bin",
help="Specify model to use") help="Specify model to use")
parser.add_argument("-r", "--remote", dest="remote", action="store_true",
default=False,
help="Use cemantix.certitudes.org instead of local model")
args = parser.parse_args() args = parser.parse_args()
if args.remote: model = KeyedVectors.load_word2vec_format(
return cemantix(CemantixServer()) args.model,
return cemantix(LocalServer(word=args.word, file=args.model)) binary=True,
unicode_errors="ignore"
)
cemantix(model, word=args.word)
if __name__ == "__main__": if __name__ == "__main__":
main() main()