cemantix-cli/cemantix.py
2024-10-11 16:06:44 +02:00

87 lines
2.7 KiB
Python

from gensim.models import KeyedVectors
from colorama import Fore, Back, Style
import argparse
import readline
import random
import time
def cemantix(model, word=None):
if word is None:
rd = random.randint(0, len(model))
word = model.index_to_key[rd]
while '_' in word or '-' in word:
rd += 1
word = model.index_to_key[rd]
nearest = [word]+[i[0] for i in model.most_similar(word, topn=999)]
def get_rank(guess):
if guess not in nearest:
return None
return 1000 - nearest.index(guess)
def formatted_status(guesses, last=None):
text = ""
for w, temp, rank in guesses:
if rank is not None:
text += Back.RED+Fore.BLACK+Style.BRIGHT+str(rank)+Style.RESET_ALL
text += "\t"
if temp >= 75:
text += Fore.BLUE
text += str(temp) + Style.RESET_ALL + "\t"
if last == w:
text += Style.BRIGHT+Back.WHITE+Fore.BLACK
text += w + Style.RESET_ALL+"\n"
return text[:-1]
def tried(word, guessed):
return word in [i[0] for i in guessed]
guesses = []
while True:
try:
guess = input(Style.BRIGHT+"Your guess > "+Style.RESET_ALL).strip()
readline.add_history(guess)
except (EOFError, KeyboardInterrupt):
print("The word was "+Style.BRIGHT+word+Style.RESET_ALL)
print("Goodbye!")
return -1
try:
dist = round(round(model.distance(word, guess), 4)*100, 2)
except KeyError:
print(Fore.RED+"Key not present"+Style.RESET_ALL)
continue
if not tried(guess, guesses):
guesses.append((guess, dist, get_rank(guess)))
guesses.sort(key=lambda x:-x[1])
print(chr(27) + "[2J")
print(formatted_status(guesses, last=guess))
if guess == word:
time.sleep(1)
print(Fore.GREEN+"Correct!"+Style.RESET_ALL+f" {len(guesses)} tries.")
return len(guesses)
def main():
parser = argparse.ArgumentParser(description="local infinite cemantix")
parser.add_argument("-w", "--word", dest="word", action="store",
help="Specify goal word")
parser.add_argument("-m", "--model", dest="model", action="store",
default="frWac_non_lem_no_postag_no_phrase_200_skip_cut100.bin",
help="Specify model to use")
args = parser.parse_args()
model = KeyedVectors.load_word2vec_format(
args.model,
binary=True,
unicode_errors="ignore"
)
cemantix(model, word=args.word)
if __name__ == "__main__":
main()