-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
54 lines (40 loc) · 1.39 KB
/
main.py
File metadata and controls
54 lines (40 loc) · 1.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import hmm as HiddenMarkov
import gc
import utils as utls
import sys
datasetFile = "dataset.txt";
outFile = "out.txt";
testDataSize = 200;
datasetFile = sys.argv[1];
outFile = sys.argv[2];
print("initializing hmm...");
hiddenMarkovModel = HiddenMarkov.HMM(datasetFile);
print("Correcting the sentences...")
results = list();
data = hiddenMarkovModel.errorFullDataSet[:testDataSize];
dataLength = len(data);
for i in range(dataLength):
temp = hiddenMarkovModel.viterbi(data[i]);
results.append(temp);
if not (i%100):
gc.collect()
#evaluation
correctEstimatedWordCount = 0;
wrongTypedWordCount = 0;
for i in range(dataLength):
counts = utls.evaluateSentence(data[i], results[i]);
correctEstimatedWordCount = correctEstimatedWordCount + counts[1];
wrongTypedWordCount = wrongTypedWordCount + counts[0];
print(wrongTypedWordCount);
print(correctEstimatedWordCount);
acc = correctEstimatedWordCount/wrongTypedWordCount;
out = open(outFile,'w');
out.write("------- Evaluation -------\n");
out.write("Accuracy : " + str(acc));
out.write("\nTotal miss typed word count is : "+ str(wrongTypedWordCount));
out.write("\nTotal correct estimated word count is : "+ str(correctEstimatedWordCount));
out.write("\n\nSentences after viterbi:");
for result in results:
out.write("\n" + result);
out.write("\n----EOF----");
out.close()