Skip to content

Commit 07896c3

Browse files
unknownunknown
authored andcommitted
refactoring, fixes, validation
1 parent 3b55176 commit 07896c3

File tree

7 files changed

+51
-30
lines changed

7 files changed

+51
-30
lines changed

bayes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def register_mode(mode_class):
2828

2929
mode = modes[mode_name]()
3030
mode.validate(args)
31-
mode.execute()
31+
mode.output(mode.execute())
3232

3333
except Exception as ex:
3434
print ex

classify.py

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,24 @@ class Classify(Mode):
88
RARE_WORD_PROB = 0.5
99
EXCLUSIVE_WORD_PROB = 0.99
1010

11-
def validate_file_name(self, file_name):
11+
def set_text(self, text):
12+
words = text_to_list(text)
13+
14+
if not len(words):
15+
raise ValueError('Text did not contain any valid words')
16+
17+
self.words = words
18+
return self
19+
20+
def set_file_name(self, file_name):
1221
try:
1322
file_contents = open(file_name, 'r').read()
14-
self.words = text_to_list(file_contents)
23+
return self.set_text(file_contents)
1524

1625
except Exception as e:
1726
raise ValueError('Unable to read specified file "%s", the error message was: %s' % (file_name, e))
1827

19-
def validate_doctypes(self, doctype1, doctype2):
28+
def set_doctypes(self, doctype1, doctype2):
2029
if doctype1 == doctype2:
2130
raise ValueError('Please enter two different doctypes')
2231

@@ -34,8 +43,8 @@ def validate(self, args):
3443
if len(args) != 5:
3544
raise ValueError('Usage: %s classify <file> <doctype> <doctype>' % args[0])
3645

37-
self.validate_file_name(args[2])
38-
self.validate_doctypes(args[3], args[4])
46+
self.set_file_name(args[2])
47+
self.set_doctypes(args[3], args[4])
3948

4049
def p_for_word(self, db, word):
4150
total_word_count = self.doctype1_word_count + self.doctype2_word_count
@@ -64,7 +73,7 @@ def p_from_list(self, l):
6473

6574
return p_product / (p_product + p_inverse_product)
6675

67-
def calculate(self):
76+
def execute(self):
6877
pl = []
6978
db = Db()
7079

@@ -83,9 +92,5 @@ def calculate(self):
8392

8493
return result
8594

86-
def execute(self):
87-
result = self.calculate()
88-
95+
def output(self, result):
8996
print 'Probability that document is %s rather than %s is %1.2f' % (self.doctype1, self.doctype2, result)
90-
91-
return result

learn.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,7 @@ def execute(self):
3636
d = list_to_dict(l)
3737
db.update_word_counts(d, self.doc_type)
3838
db.update_doctype_count(self.count, self.doc_type)
39+
return self.count
3940

41+
def output(self, _):
42+
print "Processed %s documents of type '%s'" % (self.count, self.doc_type)

mode.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ def validate(self):
55
def execute(self):
66
raise NotImplementedError()
77

8-
def name(self):
8+
def output(self):
99
raise NotImplementedError()

reset.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,13 @@
33
from db import Db
44

55
class Reset(Mode):
6-
def name(self):
7-
return 'reset'
8-
96
def validate(self, args):
107
if len(args) != 2:
118
raise ValueError('Usage: %s reset' % args[0])
129

1310
def execute(self):
1411
Db().reset()
15-
print 'Reset Complete'
1612
Status().execute()
13+
14+
def output(self, _):
15+
print 'Reset Complete'

status.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,23 @@
22
from mode import Mode
33

44
class Status(Mode):
5-
def name(self):
6-
return 'status'
75

86
def validate(self, args):
97
if len(args) != 2:
108
raise ValueError('Usage: %s status' % args[0])
119

1210
def execute(self):
1311
db = Db()
12+
return db.get_doctype_counts().items()
13+
14+
def output(self, results):
1415
bar = '=' * 40
15-
print 'Status:\n'
16-
for doctype, count in db.get_doctype_counts().items():
17-
print doctype, ': ', count
16+
print '%s\nStatus:\n%s\n' % (bar, bar)
17+
18+
if results:
19+
for doctype, count in results:
20+
print '%s: %s' % (doctype, count)
21+
else:
22+
print 'No data'
1823

24+
print '\n%s' % bar

testharness.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,23 +10,31 @@ def is_doctype_valid(doctype):
1010
return Db().get_words_count(doctype) > 0
1111

1212
def check_file(f):
13-
classifier.validate_file_name(f)
14-
result = classifier.calculate()
15-
print result
16-
return [result]
13+
results = []
14+
for line in open(f, 'r').readlines():
15+
try:
16+
classifier.set_text(line)
17+
results += [classifier.execute()]
18+
except ValueError:
19+
pass
20+
21+
return results
1722

1823
def check_dir(d):
1924
results = []
2025
for f in os.listdir(d):
21-
if f.endswith(".txt"):
26+
if f.endswith(".js"):
2227
results += check_file(os.path.join(d,f))
2328

2429
return results
2530

2631
def show_results(results):
2732
result_count = len(results)
28-
print 'Tested with %s document%s' % (result_count, '' if result_count == 1 else 's')
29-
print 'Result was %1.2f (1 is good, 0 is bad)' % (sum(results) / result_count,)
33+
if result_count:
34+
print 'Tested with %s document%s' % (result_count, '' if result_count == 1 else 's')
35+
print 'Result was %1.2f (0 = %s, 1 = %s)' % (sum(results) / result_count, doctype_other, doctype_expected)
36+
else :
37+
print 'No documents found'
3038

3139
if __name__ == '__main__':
3240
usage = 'Usage: %s <file> <expected doctype> <other doctype>' % sys.argv[0]
@@ -38,7 +46,7 @@ def show_results(results):
3846
doctype_expected = sys.argv[2]
3947
doctype_other = sys.argv[3]
4048

41-
classifier.validate_doctypes(doctype_expected, doctype_other)
49+
classifier.set_doctypes(doctype_expected, doctype_other)
4250

4351
results = None
4452
if os.path.isfile(input_file):

0 commit comments

Comments
 (0)