forked from ttop/wordle_starting_guess
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyze.py
105 lines (80 loc) · 3.18 KB
/
analyze.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import re
import math
def load_words():
# from https://www.ef.edu/english-resources/english-vocabulary/top-3000-words/
file_to_open = 'common_3000.txt'
with open(file_to_open) as word_file:
valid_words = set(word_file.read().split())
return valid_words
def score_word(word, usage_freq, position_freq):
# Score word using the provided dictionary usage_freq where the letter is
# worth the number of points corresponding to its frequency, plus a bonus
# based on how common that letter is in that position in the word,
# but each additional time that letter is used in the word, its points are
# reduced.
seen = {}
word_score = 0
for position in range(0, 5):
letter = word[position]
letter_score = (usage_freq[letter] +
(position_freq[position][letter] * 3))
if letter in seen:
reduction_factor = seen[letter] * 4
letter_score = math.floor(letter_score / reduction_factor)
seen[letter] = seen[letter] + 1
else:
seen[letter] = 1
word_score = word_score + letter_score
return word_score
def print_frequency_count(letter_count):
print('Frequency count:\n')
for letter in sorted(letter_count, key=letter_count.get, reverse=True):
print(letter + ': ' + str(letter_count[letter]))
def get_five_letter_words(wordlist):
eligible_words = []
for word in wordlist:
include = True
if len(word) != 5:
continue
if re.match('[^a-zA-Z]', word):
# In case the provided wordlist contains any non-letter characters
continue
eligible_words.append(word.lower())
print(str(len(eligible_words)) + ' five-letter words')
return eligible_words
def get_usage_frequency(wordlist):
frequency_count = {}
for letter in 'abcdefghijklmnopqrstuvwxyz':
frequency_count[letter] = 0
for word in wordlist:
for letter in word:
frequency_count[letter] = frequency_count[letter] + 1
return frequency_count
def get_position_frequency(wordlist):
# for each letter position, 0-4, determine how often the letter is
# in that position
position_count = {}
for position in range(0, 5):
position_count[position] = {}
for letter in 'abcdefghijklmnopqrstuvwxyz':
position_count[position][letter] = 0
for word in wordlist:
for position in range(0, 5):
letter = word[position]
position_count[position][letter] = (
position_count[position][letter] + 1)
return position_count
if __name__ == '__main__':
eligible_words = get_five_letter_words(load_words())
usage_frequency = get_usage_frequency(eligible_words)
position_frequency = get_position_frequency(eligible_words)
word_scores = {}
for word in eligible_words:
word_scores[word] = score_word(
word, usage_frequency, position_frequency)
scored_rank = sorted(word_scores, key=word_scores.get, reverse=True)
max_to_display = 30
count = 0
for idx in range(max_to_display):
word = scored_rank[idx]
print(word.upper() + ": " + str(word_scores[word]))