-
Notifications
You must be signed in to change notification settings - Fork 9
/
wordle_tree.py
222 lines (187 loc) · 7.31 KB
/
wordle_tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
''' Wordle tree builder
Best tree has average length of 3.4211
Takes 3-5 hours to complete.
(Or about an hour if you override the first guess,
uncomment lines 81-82 for that)
'''
import time
import os.path
import math
import hashlib
import numpy as np
import wordle
def generate_the_matrix(puzzle_words, guessing_words, possible_answers):
''' Generate the main matrix of answers: all guessing words X
puzzle words: an answer number in the cell.
'''
matrix = np.zeros((len(puzzle_words), len(guessing_words)), dtype=np.uint8)
for i, correct_word in enumerate(puzzle_words.word_list):
for j, guess_word in enumerate(guessing_words.word_list):
guess = wordle.Guess(guess_word, correct_word)
matrix[i][j] = possible_answers[tuple(guess.result)]
return matrix
def get_filename(puzzle_words, guessing_words, possible_answers):
''' Hash three input objects, keep last 8 digits
'''
h = hashlib.new('sha256')
h.update(str(puzzle_words.word_list).encode("utf-8"))
h.update(str(guessing_words.word_list).encode("utf-8"))
h.update(str(frozenset(possible_answers.items())).encode("utf-8"))
hashstr = h.hexdigest()
return f"wordle_matrix_{hashstr[:8]}.npy"
def get_the_matrix(puzzle_words, guessing_words, possible_answers):
''' Load the matrix if saved version exists.
If not, generate, save, return.
Matrix saved as "wordle_matrix_[last 6 digits of hash].npy"
Hash is generated from all three inputs
'''
filename = get_filename(puzzle_words, guessing_words, possible_answers)
if os.path.exists(filename):
matrix = np.load(filename)
else:
print("Generating the cross-check file (takes a couple of minutes)")
matrix = generate_the_matrix(puzzle_words, guessing_words, possible_answers)
np.save(filename, matrix)
return matrix
def generate_all_possible_answers():
''' Generate all posible answers. and put them om dictionary
like this: {(0,0,0,0,0): 0 ..., (2,2,2,2,2): 242}
'''
out = {}
for i in range(3**5):
a_mask = tuple([(i // 3**j) % 3 for j in range(5)])
out[a_mask] = i
return out
def get_distribution(word_ns, guess_word_n, matrix):
''' get list of sizes of resulting wordlist, after splitting
word_list by guess guess_word
'''
answers = {i:0 for i in range(243)}
for n in word_ns:
answers[matrix[n][guess_word_n]] += 1
# remove zeros
non_zero_sizes = []
for count in answers.values():
if count != 0:
non_zero_sizes.append(count)
return non_zero_sizes
def score_distribution(distribution):
''' Return a score of distribution
Update this one to test other strategies
'''
return len(distribution)
def get_top_guesses(word_ns, ignore_ns, guess_words_ns, matrix):
''' Return top "tops" distributions with highest scores
'''
# First, can the list be broken by one if the words in it?
if len(word_ns)<500:
for guess_word in word_ns:
distribution = get_distribution(word_ns, guess_word, matrix)
if distribution.count(1) == len(distribution):
return [guess_word]
# Override the first guess, use SALET
#if len(word_ns) == 2315:
# return [10183]
# Number of bests to check.
# Minimum parameters for best result are: 3,5,10,20
if len(word_ns) > 300:
options = 3
elif len(word_ns) >= 10:
options = 10
else:
options = 20
best_score = [None for _ in range(options)]
best_n = [None for _ in range(options)]
for guess_n in guess_words_ns:
if guess_n in ignore_ns:
continue
distribution = get_distribution(word_ns, guess_n, matrix)
score = score_distribution(distribution)
# Find the best one
for i in range(options):
if best_n[i] is None or score > best_score[i]:
best_score.insert(i, score)
best_n.insert(i, guess_n)
del best_score[options]
del best_n[options]
#print (best_n, best_score)
break
return best_n
def get_valid_results(word_ns, guess_word_n, matrix):
''' Return list of (answer, resulting_list) that are valid for this
initial list and guess
'''
answers = {i:[] for i in range(243)}
for n in word_ns:
answers[matrix[n][guess_word_n]].append(n)
# remove empty ones
out = {}
for answer, words in answers.items():
if len(words) > 0:
out[answer] = words
return out
def result_length(result):
''' Len of this result (sum of the 2nd levels lenths)
'''
count = 0
for line in result:
count += len(line)
return count
def add_node(word_ns, guess_words_ns, matrix, previous_guesses):
''' main recursive function
'''
final_result = None
print (f"Starting new node for the list of {len(word_ns)}")
best_guesses = get_top_guesses(word_ns, previous_guesses, guess_words_ns, matrix)
print (f"Best are: {best_guesses}")
for i, best_guess in enumerate(best_guesses):
out = []
#print (f"Attempt {i}. Best word is: {best_guess}")
answers = get_valid_results(word_ns, best_guess, matrix)
for answer, new_list in answers.items():
if len(new_list) == 1 or answer == 242:
out.append(list(previous_guesses))
if answer != 242:
out[-1].append(best_guess)
out[-1].append(new_list[0])
else:
#print (f"After asnswer {answer} still a list of " +
# f"{len(new_list)}")
out += add_node(new_list, guess_words_ns, matrix,
tuple(list(previous_guesses) + [best_guess]))
if final_result is None or result_length(out) < result_length(final_result):
final_result = out
return final_result
def result_to_text(result):
''' Convert those numbers back to words
'''
out = ""
for line in result:
correct_word = guessing_words.word_list[line[-1]]
for word in line:
guess_word = guessing_words.word_list[word]
out += guess_word + "\t"
guess = wordle.Guess(guess_word, correct_word)
guess_txt = "".join(str(c) for c in guess.result)
out += guess_txt + "\t"
out += "\n"
return out
def main():
'''
'''
puzzle_words = wordle.WordList("words-guess.txt")
guessing_words = wordle.WordList("words-guess.txt", "words-all.txt")
possible_answers = generate_all_possible_answers()
matrix = get_the_matrix(puzzle_words, guessing_words, possible_answers)
guess_words_ns = [n for n in range(len(guessing_words))]
word_ns = [n for n in range(len(puzzle_words))]
prev = ()
result = add_node(word_ns, guess_words_ns, matrix, prev)
print (result[:10])
with open("results.txt", "w") as fs:
fs.write(result_to_text(result))
print ("Ave:", result_length(result)/2315)
if __name__ == "__main__":
t = time.time()
main()
print (time.time()-t)