cemantix-cli/cemantix.py

112 lines
3.6 KiB
Python

from gensim.models import KeyedVectors
from colorama import Fore, Back, Style
import argparse
import readline
import random
import time
def random_word(model, k=5, dist=100):
base_words = [
model.index_to_key[random.randint(0, len(model))]
for _ in range(k)
]
complete_list = base_words.copy()
for word in base_words:
complete_list += [i[0] for i in model.most_similar(word, topn=dist)]
rk_words = model.rank_by_centrality(complete_list)
return rk_words[random.randint(0,5)][1]
def cemantix(model, word=None):
while word is None or len(word) < 5 or '-' in word or '_' in word:
word = random_word(model, k=1, dist=1) # augment numbers to try a "smooth selection"
nearest = [word]+[i[0] for i in model.most_similar(word, topn=1000)]
guesses = [] # guess, temp, rank
def get_rank(guess):
if guess not in nearest:
return None
return 1000 - nearest.index(guess)
def formatted_status(guesses, last=None):
text = ""
for w, temp, rank in guesses:
if rank is not None:
text += Back.RED+Fore.BLACK+Style.BRIGHT+str(rank)+Style.RESET_ALL
text += "\t"
if temp >= 75:
text += Fore.BLUE
text += str(temp) + Style.RESET_ALL + "\t"
if last == w:
text += Style.BRIGHT+Back.WHITE+Fore.BLACK
text += w + Style.RESET_ALL+"\n"
return text[:-1]
def tried(word, guessed):
return word in [i[0] for i in guessed]
def interpret_command(cmd, guesses):
match cmd:
case "clear":
guesses = [g for g in guesses if g[1] <= 75.]
case "help":
best_rk = max([rk for _, _, rk in guesses if rk is not None]+[749])
print("Maybe try "+Back.YELLOW+Fore.BLACK+nearest[999-best_rk]+Style.RESET_ALL)
case _:
print(Fore.RED+"Unknown command"+Style.RESET_ALL)
return guesses
while True:
try:
guess = input(Style.BRIGHT+"Your guess > "+Style.RESET_ALL).strip()
readline.add_history(guess)
if guess.endswith("()"):
guesses = interpret_command(guess[:-2], guesses)
continue
except (EOFError, KeyboardInterrupt):
print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
print("Goodbye!")
return -1
try:
dist = round(round(model.distance(word, guess), 4)*100, 2)
except KeyError:
print(Fore.RED+"Key not present"+Style.RESET_ALL)
continue
if not tried(guess, guesses):
guesses.append((guess, dist, get_rank(guess)))
guesses.sort(key=lambda x:-x[1])
print(chr(27) + "[2J")
print(formatted_status(guesses, last=guess))
if guess == word:
time.sleep(1)
print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(guesses)} tries.")
return len(guesses)
def main():
parser = argparse.ArgumentParser(description="local infinite cemantix")
parser.add_argument("-w", "--word", dest="word", action="store",
help="Specify goal word")
parser.add_argument("-m", "--model", dest="model", action="store",
default="frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin",
help="Specify model to use")
args = parser.parse_args()
model = KeyedVectors.load_word2vec_format(
args.model,
binary=True,
unicode_errors="ignore"
)
cemantix(model, word=args.word)
if __name__ == "__main__":
main()