Skip to content

Commit 3b55176

Browse files
unknownunknown
authored andcommitted
Added test harness
1 parent c80557d commit 3b55176

File tree

2 files changed

+71
-13
lines changed

2 files changed

+71
-13
lines changed

classify.py

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,15 @@ class Classify(Mode):
88
RARE_WORD_PROB = 0.5
99
EXCLUSIVE_WORD_PROB = 0.99
1010

11-
def validate(self, args):
12-
if len(args) != 5:
13-
raise ValueError('Usage: %s classify <file> <doctype> <doctype>' % args[0])
14-
15-
file_contents = None
11+
def validate_file_name(self, file_name):
1612
try:
17-
file_contents = open(args[2], 'r').read()
13+
file_contents = open(file_name, 'r').read()
14+
self.words = text_to_list(file_contents)
15+
1816
except Exception as e:
19-
raise ValueError('Unable to read specified file "%s", the error message was: %s' % (args[2], e))
20-
21-
doctype1 = args[3]
22-
doctype2 = args[4]
17+
raise ValueError('Unable to read specified file "%s", the error message was: %s' % (file_name, e))
2318

19+
def validate_doctypes(self, doctype1, doctype2):
2420
if doctype1 == doctype2:
2521
raise ValueError('Please enter two different doctypes')
2622

@@ -33,7 +29,13 @@ def validate(self, args):
3329

3430
self.doctype1 = doctype1
3531
self.doctype2 = doctype2
36-
self.words = text_to_list(file_contents)
32+
33+
def validate(self, args):
34+
if len(args) != 5:
35+
raise ValueError('Usage: %s classify <file> <doctype> <doctype>' % args[0])
36+
37+
self.validate_file_name(args[2])
38+
self.validate_doctypes(args[3], args[4])
3739

3840
def p_for_word(self, db, word):
3941
total_word_count = self.doctype1_word_count + self.doctype2_word_count
@@ -62,8 +64,7 @@ def p_from_list(self, l):
6264

6365
return p_product / (p_product + p_inverse_product)
6466

65-
66-
def execute(self):
67+
def calculate(self):
6768
pl = []
6869
db = Db()
6970

@@ -80,5 +81,11 @@ def execute(self):
8081

8182
result = self.p_from_list(pl)
8283

84+
return result
85+
86+
def execute(self):
87+
result = self.calculate()
88+
8389
print 'Probability that document is %s rather than %s is %1.2f' % (self.doctype1, self.doctype2, result)
90+
8491
return result

testharness.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
from __future__ import division
2+
import sys
3+
import os
4+
from classify import Classify
5+
from db import Db
6+
7+
classifier = Classify()
8+
9+
def is_doctype_valid(doctype):
10+
return Db().get_words_count(doctype) > 0
11+
12+
def check_file(f):
13+
classifier.validate_file_name(f)
14+
result = classifier.calculate()
15+
print result
16+
return [result]
17+
18+
def check_dir(d):
19+
results = []
20+
for f in os.listdir(d):
21+
if f.endswith(".txt"):
22+
results += check_file(os.path.join(d,f))
23+
24+
return results
25+
26+
def show_results(results):
27+
result_count = len(results)
28+
print 'Tested with %s document%s' % (result_count, '' if result_count == 1 else 's')
29+
print 'Result was %1.2f (1 is good, 0 is bad)' % (sum(results) / result_count,)
30+
31+
if __name__ == '__main__':
32+
usage = 'Usage: %s <file> <expected doctype> <other doctype>' % sys.argv[0]
33+
34+
if len(sys.argv) != 4:
35+
raise ValueError(usage)
36+
37+
input_file = sys.argv[1]
38+
doctype_expected = sys.argv[2]
39+
doctype_other = sys.argv[3]
40+
41+
classifier.validate_doctypes(doctype_expected, doctype_other)
42+
43+
results = None
44+
if os.path.isfile(input_file):
45+
results = check_file(input_file)
46+
elif os.path.isdir(input_file):
47+
results = check_dir(input_file)
48+
else:
49+
raise ValueError("Unable to find file/directory '%s'\n%s" % (input_file, usage))
50+
51+
show_results(results)

0 commit comments

Comments
 (0)