Compare commits

..

2 Commits

Author SHA1 Message Date
550e47cb2c Add remote server 2024-12-06 11:24:25 +01:00
44df60fca2 Implement LocalServer 2024-12-04 11:47:20 +01:00
2 changed files with 124 additions and 32 deletions

View File

@ -1,2 +1,4 @@
colorama colorama
gensim gensim
json
requests

View File

@ -1,36 +1,118 @@
from gensim.models import KeyedVectors from gensim.models import KeyedVectors
from colorama import Fore, Back, Style from colorama import Fore, Back, Style
import argparse import argparse
import requests
import readline import readline
import random import random
import json
import time import time
import numpy as np
np.seterr(divide='ignore', invalid='ignore')
def random_word(model, k=5, dist=100): class Server():
inverse_order = False
def __init__(self):
pass
def init_word(self):
pass
def get_rank(self, guess):
pass
def get_temp(self, guess):
pass
def _help(self):
raise NotImplementedError
def _reveal_word(self):
raise NotImplementedError
class LocalServer(Server):
inverse_order = True
def __init__(self, word=None, file="models/selected_word2vec_model.bin"):
self.model = KeyedVectors.load_word2vec_format(
file,
binary=True,
unicode_errors="ignore"
)
self.word = word
self.nearest = []
def init_word(self, k=1, dist=100):
while (self.word is None or len(self.word) < 5
or '-' in self.word or '_' in self.word):
base_words = [ base_words = [
model.index_to_key[random.randint(0, len(model))] self.model.index_to_key[random.randint(0, len(self.model))]
for _ in range(k) for _ in range(k)
] ]
complete_list = base_words.copy() complete_list = base_words.copy()
for word in base_words: for word in base_words:
complete_list += [i[0] for i in model.most_similar(word, topn=dist)] complete_list += [i[0] for i in self.model.most_similar(word, topn=dist)]
rk_words = model.rank_by_centrality(complete_list) rk_words = self.model.rank_by_centrality(complete_list)
return rk_words[random.randint(0,5)%len(rk_words)][1]
self.word = rk_words[random.randint(0,5)%len(rk_words)][1]
self.nearest = [word]+[i[0] for i in self.model.most_similar(self.word, topn=1000)]
def cemantix(model, word=None): def get_rank(self, guess):
while word is None or len(word) < 5 or '-' in word or '_' in word: if guess not in self.nearest:
word = random_word(model, k=1, dist=0) # augment dist for a "smoother selection"
nearest = [word]+[i[0] for i in model.most_similar(word, topn=1000)]
guesses = [] # guess, temp, rank
def get_rank(guess):
if guess not in nearest:
return None return None
return 1000 - nearest.index(guess) return 1000 - self.nearest.index(guess)
def get_temp(self, guess):
return round(self.model.distance(self.word, guess)*100, 2)
def _help(self, rk):
return self.nearest[rk]
def _reveal_word(self):
return self.word
class CemantixServer(Server):
def __init__(self):
self.words = {}
self.solvers = None
self.num = None
def _try(self, word):
url = 'https://cemantix.certitudes.org/score'
headers = {"Origin": "https://cemantix.certitudes.org"}
data = {'word': word}
# Need some additional cookies to be a valid client
r = requests.post(url, headers=headers, data=data)
assert r.ok
data = json.loads(r.text)
self.solvers = int(data["solvers"])
self.num = int(data["num"])
self.words[word] = {"score" : float(data["score"])}
if "percentile" in data:
self.words[word]["percentile"] = data["percentile"]
def get_rank(self, guess):
if guess not in self.words:
self._try(guess)
return self.words[guess].get("percentile", None)
def get_temp(self, guess):
if guess not in self.words:
self._try(guess)
return round(self.words[guess]["score"]*100, 2)
def cemantix(server: Server):
server.init_word()
guesses = [] # guess, temp, rank
def formatted_status(guesses, last=None): def formatted_status(guesses, last=None):
text = "" text = ""
for w, temp, rank in guesses: for w, temp, rank in guesses:
@ -46,15 +128,19 @@ def cemantix(model, word=None):
return text[:-1] return text[:-1]
def tried(word, guessed): def tried(word, guessed):
return word in [i[0] for i in guessed] return (word in [i[0] for i in guessed])
def interpret_command(cmd, guesses): def interpret_command(cmd, guesses):
match cmd: match cmd:
case "clear": case "clear":
guesses = [g for g in guesses if g[1] <= 75.] guesses = [g for g in guesses if g[1] <= 75.]
case "help": case "help":
try:
best_rk = max([rk for _, _, rk in guesses if rk is not None]+[749]) best_rk = max([rk for _, _, rk in guesses if rk is not None]+[749])
print("Maybe try "+Back.YELLOW+Fore.BLACK+nearest[999-best_rk]+Style.RESET_ALL) print("Maybe try "+Back.YELLOW+Fore.BLACK+server._help(999-best_rk)
+Style.RESET_ALL)
except NotImplementedError:
print(Fore.RED+"No help available"+Style.RESET_ALL)
case _: case _:
print(Fore.RED+"Unknown command"+Style.RESET_ALL) print(Fore.RED+"Unknown command"+Style.RESET_ALL)
@ -69,21 +155,24 @@ def cemantix(model, word=None):
guesses = interpret_command(guess[:-2], guesses) guesses = interpret_command(guess[:-2], guesses)
continue continue
except (EOFError, KeyboardInterrupt): except (EOFError, KeyboardInterrupt):
print("The word was "+Style.BRIGHT+word+Style.RESET_ALL) try:
print("The word was "+Style.BRIGHT+server._reveal_word()+Style.RESET_ALL)
except NotImplementedError:
pass
print("Goodbye!") print("Goodbye!")
return -1 return -1
try: try:
dist = round(round(model.distance(word, guess), 4)*100, 2) dist = server.get_temp(guess)
except KeyError: except KeyError:
print(Fore.RED+"Key not present"+Style.RESET_ALL) print(Fore.RED+"Key not present"+Style.RESET_ALL)
continue continue
if not tried(guess, guesses): if not tried(guess, guesses):
guesses.append((guess, dist, get_rank(guess))) guesses.append((guess, dist, server.get_rank(guess)))
guesses.sort(key=lambda x:-x[1]) guesses.sort(key=lambda x:-x[1] if server.inverse_order else x[1])
print(chr(27) + "[2J") print(chr(27) + "[2J")
print(formatted_status(guesses, last=guess)) print(formatted_status(guesses, last=guess))
if guess == word: if server.get_rank(guess) == 1000:
time.sleep(1) time.sleep(1)
print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(guesses)} tries.") print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(guesses)} tries.")
return len(guesses) return len(guesses)
@ -98,14 +187,15 @@ def main():
parser.add_argument("-m", "--model", dest="model", action="store", parser.add_argument("-m", "--model", dest="model", action="store",
default="models/selected_word2vec_model.bin", default="models/selected_word2vec_model.bin",
help="Specify model to use") help="Specify model to use")
parser.add_argument("-r", "--remote", dest="remote", action="store_true",
default=False,
help="Use cemantix.certitudes.org instead of local model")
args = parser.parse_args() args = parser.parse_args()
model = KeyedVectors.load_word2vec_format( if args.remote:
args.model, return cemantix(CemantixServer())
binary=True, return cemantix(LocalServer(word=args.word, file=args.model))
unicode_errors="ignore"
)
cemantix(model, word=args.word)
if __name__ == "__main__": if __name__ == "__main__":
main() main()