11
11
12
12
#include < maths/CPrior.h>
13
13
14
+ #include < boost/optional.hpp>
14
15
#include < boost/unordered_map.hpp>
15
16
16
17
#include < cstddef>
18
+ #include < string>
17
19
#include < vector>
18
20
19
21
namespace ml
@@ -49,12 +51,18 @@ class MATHS_EXPORT CNaiveBayesFeatureDensity
49
51
// ! Persist state by passing information to \p inserter.
50
52
virtual void acceptPersistInserter (core::CStatePersistInserter &inserter) const = 0;
51
53
54
+ // ! Set the data type.
55
+ virtual void dataType (maths_t ::EDataType dataType) = 0;
56
+
52
57
// ! Add the value \p x.
53
58
virtual void add (const TDouble1Vec &x) = 0;
54
59
55
60
// ! Compute the log value of the density function at \p x.
56
61
virtual double logValue (const TDouble1Vec &x) const = 0;
57
62
63
+ // ! Compute the density at the mode.
64
+ virtual double logMaximumValue () const = 0;
65
+
58
66
// ! Age out old values density to account for \p time passing.
59
67
virtual void propagateForwardsByTime (double time) = 0;
60
68
@@ -69,6 +77,9 @@ class MATHS_EXPORT CNaiveBayesFeatureDensity
69
77
70
78
// ! Get a checksum for this object.
71
79
virtual uint64_t checksum (uint64_t seed) const = 0;
80
+
81
+ // ! Get a human readable description of the class density function.
82
+ virtual std::string print () const = 0;
72
83
};
73
84
74
85
// ! \brief An implementation of the class conditional density function
@@ -77,7 +88,7 @@ class MATHS_EXPORT CNaiveBayesFeatureDensityFromPrior final : public CNaiveBayes
77
88
{
78
89
public:
79
90
CNaiveBayesFeatureDensityFromPrior () = default ;
80
- CNaiveBayesFeatureDensityFromPrior (CPrior &prior);
91
+ CNaiveBayesFeatureDensityFromPrior (const CPrior &prior);
81
92
82
93
// ! Create and return a clone.
83
94
// !
@@ -97,6 +108,12 @@ class MATHS_EXPORT CNaiveBayesFeatureDensityFromPrior final : public CNaiveBayes
97
108
// ! Compute the log value of the density function at \p x.
98
109
virtual double logValue (const TDouble1Vec &x) const ;
99
110
111
+ // ! Compute the density at the mode.
112
+ virtual double logMaximumValue () const ;
113
+
114
+ // ! Set the data type.
115
+ virtual void dataType (maths_t ::EDataType dataType);
116
+
100
117
// ! Age out old values density to account for \p time passing.
101
118
virtual void propagateForwardsByTime (double time);
102
119
@@ -112,6 +129,9 @@ class MATHS_EXPORT CNaiveBayesFeatureDensityFromPrior final : public CNaiveBayes
112
129
// ! Get a checksum for this object.
113
130
virtual uint64_t checksum (uint64_t seed) const ;
114
131
132
+ // ! Get a human readable description of the class density function.
133
+ virtual std::string print () const ;
134
+
115
135
private:
116
136
using TPriorPtr = boost::shared_ptr<CPrior>;
117
137
@@ -128,16 +148,24 @@ class MATHS_EXPORT CNaiveBayes
128
148
using TDoubleSizePrVec = std::vector<TDoubleSizePr>;
129
149
using TDouble1Vec = core::CSmallVector<double , 1 >;
130
150
using TDouble1VecVec = std::vector<TDouble1Vec>;
151
+ using TOptionalDouble = boost::optional<double >;
131
152
132
153
public:
133
154
explicit CNaiveBayes (const CNaiveBayesFeatureDensity &exemplar,
134
- double decayRate = 0.0 );
155
+ double decayRate = 0.0 ,
156
+ TOptionalDouble minMaxLogLikelihoodToUseFeature = TOptionalDouble());
135
157
CNaiveBayes (const SDistributionRestoreParams ¶ms,
136
158
core::CStateRestoreTraverser &traverser);
137
159
138
160
// ! Persist state by passing information to \p inserter.
139
161
void acceptPersistInserter (core::CStatePersistInserter &inserter) const ;
140
162
163
+ // ! Efficiently swap the contents of this and \p other.
164
+ void swap (CNaiveBayes &other);
165
+
166
+ // ! Check if any training data has been added initialized.
167
+ bool initialized () const ;
168
+
141
169
// ! This can be used to optionally seed the class counts
142
170
// ! with \p counts. These are added on to data class counts
143
171
// ! to compute the class posterior probabilities.
@@ -153,11 +181,14 @@ class MATHS_EXPORT CNaiveBayes
153
181
// ! for that feature.
154
182
void addTrainingDataPoint (std::size_t label, const TDouble1VecVec &x);
155
183
184
+ // ! Set the data type.
185
+ void dataType (maths_t ::EDataType dataType);
186
+
156
187
// ! Age out old values from the class conditional densities
157
188
// ! to account for \p time passing.
158
189
void propagateForwardsByTime (double time);
159
190
160
- // ! Get the top \p n class probabilities for \p features .
191
+ // ! Get the top \p n class probabilities for \p x .
161
192
// !
162
193
// ! \param[in] n The number of class probabilities to estimate.
163
194
// ! \param[in] x The feature values.
@@ -167,6 +198,23 @@ class MATHS_EXPORT CNaiveBayes
167
198
TDoubleSizePrVec highestClassProbabilities (std::size_t n,
168
199
const TDouble1VecVec &x) const ;
169
200
201
+ // ! Get the probability of the class labeled \p label for \p x.
202
+ // !
203
+ // ! \param[in] label The label of the class of interest.
204
+ // ! \param[in] x The feature values.
205
+ // ! \note \p x size should be equal to the number of features.
206
+ // ! A feature is missing is indicated by passing an empty vector
207
+ // ! for that feature.
208
+ double classProbability (std::size_t label, const TDouble1VecVec &x) const ;
209
+
210
+ // ! Get the probabilities of all the classes for \p x.
211
+ // !
212
+ // ! \param[in] x The feature values.
213
+ // ! \note \p x size should be equal to the number of features.
214
+ // ! A feature is missing is indicated by passing an empty vector
215
+ // ! for that feature.
216
+ TDoubleSizePrVec classProbabilities (const TDouble1VecVec &x) const ;
217
+
170
218
// ! Debug the memory used by this object.
171
219
void debugMemoryUsage (core::CMemoryUsage::TMemoryUsagePtr mem) const ;
172
220
@@ -176,6 +224,9 @@ class MATHS_EXPORT CNaiveBayes
176
224
// ! Get a checksum for this object.
177
225
uint64_t checksum (uint64_t seed = 0 ) const ;
178
226
227
+ // ! Get a human readable description of the classifier.
228
+ std::string print () const ;
229
+
179
230
private:
180
231
using TFeatureDensityPtr = boost::shared_ptr<CNaiveBayesFeatureDensity>;
181
232
using TFeatureDensityPtrVec = std::vector<TFeatureDensityPtr>;
@@ -212,6 +263,13 @@ class MATHS_EXPORT CNaiveBayes
212
263
bool validate (const TDouble1VecVec &x) const ;
213
264
214
265
private:
266
+ // ! It is not always appropriate to use features with very low
267
+ // ! probability in all classes to discriminate: the class choice
268
+ // ! will be very sensitive to the underlying conditional density
269
+ // ! model. This is a cutoff (for the minimum maximum class log
270
+ // ! likelihood) in order to use a feature.
271
+ TOptionalDouble m_MinMaxLogLikelihoodToUseFeature;
272
+
215
273
// ! Controls the rate at which data are aged out.
216
274
double m_DecayRate;
217
275
0 commit comments