Skip to content

Commit e217985

Browse files
Peter JohnsonPeter Johnson
authored andcommitted
add shannon 1-gram letters
1 parent 98d7d36 commit e217985

File tree

10 files changed

+159
-13
lines changed

10 files changed

+159
-13
lines changed

.DS_Store

0 Bytes
Binary file not shown.

evaluation_function/.DS_Store

0 Bytes
Binary file not shown.

evaluation_function/dev.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"basic_nn": {
3+
"answer": "1.0",
4+
"response": "1.0",
5+
"model": "basic_nn",
6+
"refresh": false
7+
},
8+
"shannon_letters_single": {
9+
"answer": "2.0",
10+
"response": "3.0",
11+
"model": "shannon_letters_single",
12+
"uniform": false,
13+
"word_count": "random"
14+
}
15+
}

evaluation_function/dev.py

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
import sys
1+
import sys, argparse, json, os
2+
from pathlib import Path
23

34
from lf_toolkit.shared.params import Params
45

@@ -7,17 +8,33 @@
78
def dev():
89
"""Run the evaluation function from the command line for development purposes.
910
10-
Usage: python -m evaluation_function.dev <answer> <response>
11+
Usage:
12+
poetry run python -m evaluation_function.dev --config configs/dev.json --case basic_nn
13+
14+
(Change the case as desired, and ensure the dev.json is up to date with your needs)
15+
1116
"""
12-
if len(sys.argv) < 3:
13-
print("Usage: python -m evaluation_function.dev <answer> <response>")
14-
return
15-
16-
answer = sys.argv[1]
17-
response = sys.argv[2]
18-
model = sys.argv[3] if len(sys.argv) > 3 else "basic_nn"
19-
refresh = sys.argv[4].lower() == "true" if len(sys.argv) >= 4 else False
20-
params = Params(model=model, refresh=refresh)
17+
18+
BASE_DIR = Path(__file__).resolve().parent
19+
20+
parser = argparse.ArgumentParser()
21+
parser.add_argument("--config", required=True, help="Path to JSON config")
22+
parser.add_argument("--case", help="Case inside the config file")
23+
args = parser.parse_args()
24+
25+
config_path = (BASE_DIR / args.config).resolve()
26+
with open(config_path) as f:
27+
all_config = json.load(f)
28+
29+
if args.case not in all_config: # extract config for the relevant case
30+
raise ValueError(f"Case '{args.case}' not found in {args.config}")
31+
32+
config = all_config[args.case]
33+
34+
# Separate out required fields
35+
answer = config.pop("answer")
36+
response = config.pop("response")
37+
params = Params(**config)
2138

2239
result = evaluation_function(answer, response, params)
2340

evaluation_function/evaluation.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,10 @@ def evaluation_function(
3131
to output the evaluation response.
3232
"""
3333

34-
model_name = getattr(params, "model", "basic_nn") # default
34+
#model_name = getattr(params, "model", "basic_nn") # default
35+
model_name = params.get("model", "basic_nn") # default
36+
37+
print(params)
3538
try:
3639
model = getattr(models, model_name) # e.g. models.basic_nn
3740
except AttributeError:
2 KB
Binary file not shown.
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
from . import basic_nn
2+
from . import shannon_letters_single
23

3-
__all__ = ["basic_nn"]
4+
__all__ = ["basic_nn", "shannon_letters_single"]
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import random
2+
import csv
3+
import os
4+
from pathlib import Path
5+
from io import StringIO
6+
import re
7+
8+
from lf_toolkit.evaluation import Result, Params
9+
10+
# Setup paths for saving/loading model and data
11+
BASE_DIR = Path(__file__).resolve().parent
12+
MODEL_DIR = Path(os.environ.get("MODEL_DIR", BASE_DIR / "storage"))
13+
MODEL_DIR.mkdir(parents=True, exist_ok=True)
14+
LETTERS_PATH = MODEL_DIR / "norvig_letter_frequencies.csv"
15+
WORD_LENGTHS_PATH = MODEL_DIR / "norvig_word_length_frequencies.csv"
16+
17+
# Relative Frequencies of Letters in General English Plain text From Cryptographical Mathematics, by Robert Edward Lewand
18+
# https://web.archive.org/web/20080708193159/http://pages.central.edu/emp/LintonT/classes/spring01/cryptography/letterfreq.html
19+
20+
def csv_to_lists(filename: str) -> list:
21+
frequencies = []
22+
with open(filename, newline='') as csvfile:
23+
reader = csv.reader(csvfile)
24+
next(reader) # Skip header row
25+
for key,value in reader:
26+
frequencies.append([key, float(value)])
27+
return frequencies
28+
29+
class FrequencyData:
30+
def __init__(self, filename: str = None):
31+
self.tokens = []
32+
self.weights = []
33+
if filename:
34+
data = csv_to_lists(filename)
35+
self.tokens = [row[0] for row in data]
36+
self.weights = [row[1] for row in data]
37+
38+
uniform_letters = FrequencyData()
39+
uniform_letters.tokens = [chr(65 + i) for i in range(26)] # 'A' to 'Z'
40+
uniform_letters.tokens.append(' ') # Add space character
41+
uniform_letters.weights = [1] * 27 # Equal weights for uniform distribution
42+
letters = FrequencyData(LETTERS_PATH)
43+
word_lengths = FrequencyData(WORD_LENGTHS_PATH)
44+
45+
def generate_string(uniform=False,word_count=5) -> str:
46+
output=[]
47+
for i in range(word_count):
48+
k=int(random.choices(word_lengths.tokens,weights=word_lengths.weights,k=1)[0])
49+
if uniform:
50+
output.append(''.join(random.choices(uniform_letters.tokens, weights=uniform_letters.weights,k=k)))
51+
else:
52+
output.append(''.join(random.choices(letters.tokens, weights=letters.weights,k=k)))
53+
output=' '.join(output)
54+
return output
55+
56+
def run(response, answer, params: Params) -> Result:
57+
is_correct = True
58+
word_count = params.get("word_count", 10)
59+
if word_count == "random":
60+
word_count = random.randint(3,15)
61+
output = generate_string(uniform=params.get("uniform", False),word_count=word_count)
62+
return Result(is_correct=is_correct,feedback_items=[("general",output)])
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
Letter,Percent
2+
E,12.49
3+
T,9.28
4+
A,8.04
5+
O,7.64
6+
I,7.57
7+
N,7.23
8+
S,6.51
9+
R,6.28
10+
H,5.05
11+
L,4.07
12+
D,3.82
13+
C,3.34
14+
U,2.73
15+
M,2.51
16+
F,2.4
17+
P,2.14
18+
G,1.87
19+
W,1.68
20+
Y,1.66
21+
B,1.48
22+
V,1.05
23+
K,0.54
24+
X,0.23
25+
J,0.16
26+
Q,0.12
27+
Z,0.09
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
WordLength,Percent
2+
1,2.998
3+
2,17.343
4+
3,21.093
5+
4,17.966
6+
5,14.906
7+
6,12.57
8+
7,8.498
9+
8,5.188
10+
9,3.018
11+
10,1.669
12+
11,0.848
13+
12,0.436
14+
13,0.212
15+
14,0.098
16+
15,0.046
17+
16,0.021
18+
17,0.01
19+
18,0.005
20+
19,0.002
21+
20,0.001

0 commit comments

Comments
 (0)