Skip to content

Commit

Permalink
Merge pull request facebookresearch#168 from huangruizhe/patch-1
Browse files Browse the repository at this point in the history
Update make_kn_lm.py
  • Loading branch information
danpovey authored Jan 3, 2022
2 parents ea8af0e + 298faab commit 4314309
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions icefall/shared/make_kn_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,9 @@ def cal_discounting_constants(self):
n1 += stat[1]
n2 += stat[2]
assert n1 + 2 * n2 > 0
self.d.append(n1 * 1.0 / (n1 + 2 * n2))
self.d.append(max(0.1, n1 * 1.0) / (n1 + 2 * n2)) # We are doing this max(0.001, xxx) to avoid zero discounting constant D due to n1=0,
# which could happen if the number of symbols is small.
# Otherwise, zero discounting constant can cause division by zero in computing BOW.

def cal_f(self):
# f(a_z) is a probability distribution of word sequence a_z.
Expand Down Expand Up @@ -241,7 +243,11 @@ def cal_bow(self):
for u in a_counts_for_hist.word_to_count.keys(): # Should be careful here: what is Z1
sum_z1_f_z += _counts_for_hist.word_to_f[u]

counts_for_hist.word_to_bow[w] = (1.0 - sum_z1_f_a_z) / (1.0 - sum_z1_f_z)
if sum_z1_f_z < 1:
# assert sum_z1_f_a_z < 1
counts_for_hist.word_to_bow[w] = (1.0 - sum_z1_f_a_z) / (1.0 - sum_z1_f_z)
else:
counts_for_hist.word_to_bow[w] = None

def print_raw_counts(self, info_string):
# these are useful for debug.
Expand Down

0 comments on commit 4314309

Please sign in to comment.