@@ -17,12 +17,14 @@ class Bayes
1717 # auto_categorize: false When true, enables ability to dynamically declare a category
1818 # enable_threshold: false When true, enables a threshold requirement for classifition
1919 # threshold: 0.0 Default threshold, only used when enabled
20+ # enable_stemmer: true When false, disables word stemming
2021 def initialize ( *args )
2122 @categories = { }
2223 options = { language : 'en' ,
2324 auto_categorize : false ,
2425 enable_threshold : false ,
25- threshold : 0.0
26+ threshold : 0.0 ,
27+ enable_stemmer : true
2628 }
2729 args . flatten . each do |arg |
2830 if arg . is_a? ( Hash )
@@ -40,6 +42,7 @@ def initialize(*args)
4042 @auto_categorize = options [ :auto_categorize ]
4143 @enable_threshold = options [ :enable_threshold ]
4244 @threshold = options [ :threshold ]
45+ @enable_stemmer = options [ :enable_stemmer ]
4346 end
4447
4548 # Provides a general training method for all categories specified in Bayes#new
@@ -61,7 +64,7 @@ def train(category, text)
6164 end
6265
6366 @category_counts [ category ] += 1
64- Hasher . word_hash ( text , @language ) . each do |word , count |
67+ Hasher . word_hash ( text , @language , @enable_stemmer ) . each do |word , count |
6568 @categories [ category ] [ word ] += count
6669 @category_word_count [ category ] += count
6770 @total_words += count
@@ -78,7 +81,7 @@ def train(category, text)
7881 def untrain ( category , text )
7982 category = CategoryNamer . prepare_name ( category )
8083 @category_counts [ category ] -= 1
81- Hasher . word_hash ( text , @language ) . each do |word , count |
84+ Hasher . word_hash ( text , @language , @enable_stemmer ) . each do |word , count |
8285 next if @total_words < 0
8386 orig = @categories [ category ] [ word ] || 0
8487 @categories [ category ] [ word ] -= count
@@ -98,7 +101,7 @@ def untrain(category, text)
98101 # The largest of these scores (the one closest to 0) is the one picked out by #classify
99102 def classifications ( text )
100103 score = { }
101- word_hash = Hasher . word_hash ( text , @language )
104+ word_hash = Hasher . word_hash ( text , @language , @enable_stemmer )
102105 training_count = @category_counts . values . reduce ( :+ ) . to_f
103106 @categories . each do |category , category_words |
104107 score [ category . to_s ] = 0
@@ -155,6 +158,16 @@ def threshold_disabled?
155158 !@enable_threshold
156159 end
157160
161+ # Is word stemming enabled?
162+ def stemmer_enabled?
163+ @enable_stemmer
164+ end
165+
166+ # Is word stemming disabled?
167+ def stemmer_disabled?
168+ !@enable_stemmer
169+ end
170+
158171 # Provides training and untraining methods for the categories specified in Bayes#new
159172 # For example:
160173 # b = ClassifierReborn::Bayes.new 'This', 'That', 'the_other'
0 commit comments