Skip to content

Commit da236ea

Browse files
Peter JohnsonPeter Johnson
authored andcommitted
Plain pickle approach
1 parent 42acbc4 commit da236ea

File tree

2 files changed

+4
-5
lines changed

2 files changed

+4
-5
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
*.pkl.bz2 filter=lfs diff=lfs merge=lfs -text
2+
*.pkl filter=lfs diff=lfs merge=lfs -text

evaluation_function/models/shannon_words_ngram.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,13 @@ def log(msg):
2828
# If creating when deployed:
2929
#FILE = Path(tempfile.gettempdir()) / "ngram_counts.pkl"
3030
# If creating locally, to be copied when deployed:
31-
FILE = MODEL_DIR / "ngram_counts.pkl.bz2"
31+
FILE = MODEL_DIR / "ngram_counts.pkl"
3232

3333
def get_counts(n=3, dev=False):
3434
print(f"Loading/building n-gram counts for n={n}...")
3535
if os.path.exists(FILE):
36-
size = os.path.getsize(FILE)
37-
raise RuntimeError(f"Found {FILE}, size={size} bytes")
3836
try:
39-
with bz2.BZ2File(FILE, "rb") as f:
37+
with open(FILE, "rb") as f:
4038
cache = pickle.load(f)
4139
if not isinstance(cache, dict):
4240
raise RuntimeError(f"Loaded cache is {type(cache)}, not dict — contents: {str(cache)[:300]}")
@@ -52,7 +50,7 @@ def get_counts(n=3, dev=False):
5250
print(f"Building n={n} counts...")
5351
cache[n] = build_counts(n, START, END) # only works if NLTK corpora are available
5452
print(f"Saving n-gram counts to {FILE}...")
55-
with bz2.BZ2File(FILE, "wb") as f:
53+
with open(FILE, "wb") as f:
5654
pickle.dump(cache,f)
5755
except Exception as e:
5856
raise RuntimeError(f"Failed to rebuild or save n-gram counts {e}")

0 commit comments

Comments
 (0)