@@ -8,19 +8,15 @@ class Classify(Mode):
8
8
RARE_WORD_PROB = 0.5
9
9
EXCLUSIVE_WORD_PROB = 0.99
10
10
11
- def validate (self , args ):
12
- if len (args ) != 5 :
13
- raise ValueError ('Usage: %s classify <file> <doctype> <doctype>' % args [0 ])
14
-
15
- file_contents = None
11
+ def validate_file_name (self , file_name ):
16
12
try :
17
- file_contents = open (args [2 ], 'r' ).read ()
13
+ file_contents = open (file_name , 'r' ).read ()
14
+ self .words = text_to_list (file_contents )
15
+
18
16
except Exception as e :
19
- raise ValueError ('Unable to read specified file "%s", the error message was: %s' % (args [2 ], e ))
20
-
21
- doctype1 = args [3 ]
22
- doctype2 = args [4 ]
17
+ raise ValueError ('Unable to read specified file "%s", the error message was: %s' % (file_name , e ))
23
18
19
+ def validate_doctypes (self , doctype1 , doctype2 ):
24
20
if doctype1 == doctype2 :
25
21
raise ValueError ('Please enter two different doctypes' )
26
22
@@ -33,7 +29,13 @@ def validate(self, args):
33
29
34
30
self .doctype1 = doctype1
35
31
self .doctype2 = doctype2
36
- self .words = text_to_list (file_contents )
32
+
33
+ def validate (self , args ):
34
+ if len (args ) != 5 :
35
+ raise ValueError ('Usage: %s classify <file> <doctype> <doctype>' % args [0 ])
36
+
37
+ self .validate_file_name (args [2 ])
38
+ self .validate_doctypes (args [3 ], args [4 ])
37
39
38
40
def p_for_word (self , db , word ):
39
41
total_word_count = self .doctype1_word_count + self .doctype2_word_count
@@ -62,8 +64,7 @@ def p_from_list(self, l):
62
64
63
65
return p_product / (p_product + p_inverse_product )
64
66
65
-
66
- def execute (self ):
67
+ def calculate (self ):
67
68
pl = []
68
69
db = Db ()
69
70
@@ -80,5 +81,11 @@ def execute(self):
80
81
81
82
result = self .p_from_list (pl )
82
83
84
+ return result
85
+
86
+ def execute (self ):
87
+ result = self .calculate ()
88
+
83
89
print 'Probability that document is %s rather than %s is %1.2f' % (self .doctype1 , self .doctype2 , result )
90
+
84
91
return result
0 commit comments