Skip to content

Commit

Permalink
Merge pull request #406 from htm-community/tm_anomaly
Browse files Browse the repository at this point in the history
TM anomaly additional fixes
  • Loading branch information
breznak authored May 5, 2019
2 parents a3a8ea8 + e73b54b commit 41f6e08
Show file tree
Hide file tree
Showing 18 changed files with 194 additions and 435 deletions.
6 changes: 5 additions & 1 deletion API_CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,12 @@ longer accept a synapse permanence threshold argument. PR #305

* SDRClassifier class is replaced by `Classifier` and `Predictor` classes.

* Anomaly class removed as obsolete, use `TM.anomaly` which is simpler to use, and `MovingAverage` when you need to emulate
running averages. Internaly the code still uses `computeRawAnomalyScore()` but there's no need to call it directly. `AnomalyLikelihood`
is still available and can be used in addition to TM.getAnomalyScore(). PR #406

* TemporalMemory::getPredictiveCells() now returns a SDR. This ensures more convenient API and that the SDR object has correct
dimensions matching TM. use TM.getPredictiveCells().getSparse() to obtain the sparse vector as before.
dimensions matching TM. use TM.getPredictiveCells().getSparse() to obtain the sparse vector as before. PR #437, #442

* TemporalMemory `compute()` and `activateCells()` now use only SDR variants, old overloads with C-style arrays removed. Bindings and
tests also updated.
3 changes: 3 additions & 0 deletions bindings/py/cpp_src/bindings/algorithms/py_TemporalMemory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ using namespace nupic::algorithms::connections;

py_HTM.def_property_readonly("extra", [](const HTM_t &self) { return self.extra; } );

py_HTM.def_property_readonly("anomaly", [](const HTM_t &self) { return self.anomaly; },
"Anomaly score updated with each TM::compute() call. "
);
}

} // namespace nupic_ext
32 changes: 12 additions & 20 deletions src/examples/hotgym/HelloSPTP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@

#include "HelloSPTP.hpp"

#include "nupic/algorithms/Anomaly.hpp"
#include "nupic/algorithms/TemporalMemory.hpp"
#include "nupic/algorithms/SpatialPooler.hpp"
#include "nupic/encoders/RandomDistributedScalarEncoder.hpp"
#include "nupic/algorithms/AnomalyLikelihood.hpp"

#include "nupic/types/Sdr.hpp"
#include "nupic/utils/Random.hpp"
Expand All @@ -44,8 +44,7 @@ using Encoder = nupic::encoders::RandomDistributedScalarEncoder;
using EncoderParameters = nupic::encoders::RDSE_Parameters;
using nupic::algorithms::spatial_pooler::SpatialPooler;
using TM = nupic::algorithms::temporal_memory::TemporalMemory;
using nupic::algorithms::anomaly::Anomaly;
using nupic::algorithms::anomaly::AnomalyMode;
using nupic::algorithms::anomaly::AnomalyLikelihood;


// work-load
Expand All @@ -63,7 +62,7 @@ Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool
std::cout << "EPOCHS = " << EPOCHS << std::endl;


// initialize SP, TM, Anomaly, AnomalyLikelihood
// initialize SP, TM, AnomalyLikelihood
tInit.start();
EncoderParameters encParams;
encParams.sparsity = 0.2f; //20% of the encoding are active bits (1's)
Expand All @@ -80,8 +79,7 @@ Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool

TM tm(vector<UInt>{COLS}, CELLS);

Anomaly an(5, AnomalyMode::PURE);
Anomaly anLikelihood(5, AnomalyMode::LIKELIHOOD);
AnomalyLikelihood anLikelihood;
tInit.stop();

// data for processing input
Expand All @@ -90,8 +88,7 @@ Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool
SDR outSPlocal(spLocal.getColumnDimensions()); //for SPlocal
SDR outSP(vector<UInt>{COLS});
SDR outTM(spGlobal.getColumnDimensions());
Real res = 0.0; //for anomaly:
SDR prevPred_(outTM.dimensions); //holds T-1 TM.predictive cells
Real an = 0.0f, anLikely = 0.0f; //for anomaly:

// Start a stopwatch timer
printf("starting: %d iterations.", EPOCHS);
Expand Down Expand Up @@ -125,7 +122,6 @@ Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool
tSPglob.stop();
}
outSP = outSPglobal; //toggle if local/global SP is used further down the chain (TM, Anomaly)
NTA_CHECK(outSP == outSPglobal);

// TM
if(useTM) {
Expand All @@ -138,22 +134,19 @@ Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool


//Anomaly (pure x likelihood)
tAn.start();
res = an.compute(outSP /*active*/, prevPred_ /*prev predicted*/);
tAn.stop();

an = tm.anomaly;
tAnLikelihood.start();
anLikelihood.compute(outSP /*active*/, prevPred_ /*prev predicted*/);
anLikelihood.anomalyProbability(an); //FIXME AnLikelihood is 0.0, probably not working correctly
tAnLikelihood.stop();

prevPred_ = outTM; //to be used as predicted T-1

// print
if (e == EPOCHS - 1) {
tAll.stop();

cout << "Epoch = " << e << endl;
cout << "Anomaly = " << res << endl;
cout << "Anomaly = " << an << endl;
cout << "Anomaly (Likelihood) = " << anLikely << endl;
cout << "SP (g)= " << outSP << endl;
cout << "SP (l)= " << outSPlocal <<endl;
cout << "TM= " << outTM << endl;
Expand All @@ -164,7 +157,6 @@ Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool
if(useSPlocal) cout << "SP (l):\t" << tSPloc.getElapsed()*1.0f << endl;
if(useSPglobal) cout << "SP (g):\t" << tSPglob.getElapsed() << endl;
if(useTM) cout << "TM:\t" << tTM.getElapsed() << endl;
cout << "AN:\t" << tAn.getElapsed() << endl;
cout << "AN:\t" << tAnLikelihood.getElapsed() << endl;

// check deterministic SP, TM output
Expand Down Expand Up @@ -192,16 +184,16 @@ Real64 BenchmarkHotgym::run(UInt EPOCHS, bool useSPlocal, bool useSPglobal, bool
};
goldTM.setSparse(deterministicTM);

const float goldAn = 0.920001f;
const float goldAn = 0.8f;

if(EPOCHS == 5000) { //these hand-written values are only valid for EPOCHS = 5000 (default), but not for debug and custom runs.
NTA_CHECK(input == goldEnc) << "Deterministic output of Encoder failed!\n" << input << "should be:\n" << goldEnc;
NTA_CHECK(outSPglobal == goldSP) << "Deterministic output of SP (g) failed!\n" << outSP << "should be:\n" << goldSP;
NTA_CHECK(outSPlocal == goldSPlocal) << "Deterministic output of SP (l) failed!\n" << outSPlocal << "should be:\n" << goldSPlocal;
#ifndef _MSC_VER //FIXME deterministic checks fail on Windows
NTA_CHECK(outTM == goldTM) << "Deterministic output of TM failed!\n" << outTM << "should be:\n" << goldTM;
NTA_CHECK(static_cast<UInt>(res *10000) == static_cast<UInt>(goldAn *10000)) //compare to 4 decimal places
<< "Deterministic output of Anomaly failed! " << res << "should be: " << goldAn;
NTA_CHECK(static_cast<UInt>(an *10000.0f) == static_cast<UInt>(goldAn *10000.0f)) //compare to 4 decimal places
<< "Deterministic output of Anomaly failed! " << an << "should be: " << goldAn;
#endif
}

Expand Down
4 changes: 1 addition & 3 deletions src/examples/hotgym/HelloSPTP.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@ class BenchmarkHotgym {
);

//timers
Timer tInit, tAll, tRng, tEnc, tSPloc, tSPglob, tTM,
tAn, tAnLikelihood;

Timer tInit, tAll, tRng, tEnc, tSPloc, tSPglob, tTM, tAnLikelihood;
};

} //-ns
Expand Down
85 changes: 2 additions & 83 deletions src/nupic/algorithms/Anomaly.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,8 @@
* ---------------------------------------------------------------------
*/

#include <algorithm>
#include <iterator>
#include <numeric>
#include <set>
#include <vector>

#include "nupic/algorithms/Anomaly.hpp"
#include "nupic/utils/Log.hpp"
#include "nupic/types/Sdr.hpp"

using namespace std;
using namespace nupic;
Expand All @@ -42,92 +35,18 @@ namespace anomaly {
Real computeRawAnomalyScore(const SDR& active,
const SDR& predicted) {

NTA_ASSERT(active.dimensions == predicted.dimensions);

// Return 0 if no active columns are present
if (active.getSum() == 0) {
return 0.0f;
}

NTA_CHECK(active.dimensions == predicted.dimensions);

// Calculate and return percent of active columns that were not predicted.
SDR both(active.dimensions);
both.intersection(active, predicted);

return (active.getSum() - both.getSum()) / Real(active.getSum());
}

Real computeRawAnomalyScore(vector<UInt>& active,
vector<UInt>& predicted)
{
// Don't divide by zero. Return 0 if no active columns are present.
if (active.size() == 0) {
return 0.0f;
}

vector<UInt> correctPredictions;
sort( active.begin(), active.end());
sort( predicted.begin(), predicted.end());
set_intersection(active.begin(), active.end(),
predicted.begin(), predicted.end(),
back_inserter( correctPredictions ));

return (Real) (active.size() - correctPredictions.size()) / active.size();
}

}}} // End namespace

Anomaly::Anomaly(UInt slidingWindowSize, AnomalyMode mode, Real binaryAnomalyThreshold)
: binaryThreshold_(binaryAnomalyThreshold)
{
NTA_CHECK(binaryAnomalyThreshold >= 0 && binaryAnomalyThreshold <= 1) << "binaryAnomalyThreshold must be within [0.0,1.0]";
mode_ = mode;
if (slidingWindowSize > 0) {
movingAverage_.reset(new nupic::util::MovingAverage(slidingWindowSize));
}
}


Real Anomaly::compute(const SDR& active, const SDR& predicted, int timestamp)
{ return compute(active.getSparse(), predicted.getSparse(), timestamp); }

Real Anomaly::compute(vector<UInt>& active, vector<UInt>& predicted, int timestamp)
{
Real anomalyScore = computeRawAnomalyScore(active, predicted);
Real likelihood = 0.5;
Real score = anomalyScore;
switch(mode_)
{
case AnomalyMode::PURE:
score = anomalyScore;
break;
case AnomalyMode::LIKELIHOOD:
likelihood = likelihood_.anomalyProbability(anomalyScore, timestamp);
score = 1 - likelihood;
break;
case AnomalyMode::WEIGHTED:
likelihood = likelihood_.anomalyProbability(anomalyScore, timestamp);
score = anomalyScore * (1 - likelihood);
break;
}

if (movingAverage_) {
score = movingAverage_->compute(score);
}

if (binaryThreshold_) {
score = (score >= binaryThreshold_) ? 1.0f : 0.0f;
}

return score;
}

bool Anomaly::operator==(const Anomaly &a) const {
if (mode_ != a.mode_) return false;
if (binaryThreshold_ != a.binaryThreshold_) return false;
if (movingAverage_ != nullptr && a.movingAverage_ == nullptr) return false;
if (movingAverage_ == nullptr && a.movingAverage_ != nullptr) return false;
if (movingAverage_ != nullptr && *(movingAverage_.get()) != *(a.movingAverage_.get())) return false;
if (likelihood_ != a.likelihood_) return false;
return true;
}

105 changes: 6 additions & 99 deletions src/nupic/algorithms/Anomaly.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,7 @@
#ifndef NUPIC_ALGORITHMS_ANOMALY_HPP
#define NUPIC_ALGORITHMS_ANOMALY_HPP

#include <memory> // Needed for smart pointer templates
#include <vector>
#include <nupic/types/Serializable.hpp>
#include <nupic/algorithms/AnomalyLikelihood.hpp>
#include <nupic/types/Types.hpp>
#include <nupic/utils/MovingAverage.hpp> // Needed for for smart pointer templates
#include <nupic/types/Sdr.hpp> // sdr::SDR

namespace nupic {
Expand All @@ -40,104 +35,16 @@ namespace anomaly {
* Computes the raw anomaly score.
*
* The raw anomaly score is the fraction of active columns not predicted.
* Do not use these methods directly, these are for testing and internal implementation.
* Use `TM.anomaly` (+AnomalyLikelihood, MovingAverage for more specific needs).
*
* @param activeColumns: array of active column indices
* @param prevPredictedColumns: array of columns indices predicted in
* prev step
* @param activeColumns: SDR with active columns (not cells) from current step (T)
* @param prevPredictedColumns: SDR of predictive columns indices from prev step (T-1)
* @return anomaly score 0..1 (Real32)
*/
Real32 computeRawAnomalyScore(std::vector<UInt> &active,
std::vector<UInt> &predicted);
Real32 computeRawAnomalyScore(const sdr::SDR& active,
const sdr::SDR& predicted);

Real32 computeRawAnomalyScore(const sdr::SDR& active, const sdr::SDR& predicted);

enum class AnomalyMode { PURE, LIKELIHOOD, WEIGHTED };

class Anomaly : public Serializable {
public:
/**
* Utility class for generating anomaly scores in different ways.
*
* Supported modes:
* PURE - the raw anomaly score as computed by computeRawAnomalyScore
* LIKELIHOOD - uses the AnomalyLikelihood class on top of the raw
* anomaly scores (not implemented in C++)
* WEIGHTED - multiplies the likelihood result with the raw anomaly
* score that was used to generate the likelihood (not
* implemented in C++)
*
* @param slidingWindowSize (optional) - how many elements are
* summed up; enables moving average on final anomaly score;
* int >= 0
* @param mode (optional) - (enum) how to compute anomaly;
* possible values are AnomalyMode::
* - PURE - the default, how much anomal the value is;
* Real32 0..1 where 1=totally unexpected
* - LIKELIHOOD - uses the anomaly_likelihood code;
* models probability of receiving this value and
* anomalyScore
* - WEIGHTED - "pure" anomaly weighted by "likelihood"
* (anomaly * likelihood)
* @param binaryAnomalyThreshold (optional) - if set [0,1] anomaly
* score will be discretized to 1/0
* (1 iff >= binaryAnomalyThreshold). The transformation is
* applied after moving average is computed.
*/
Anomaly(UInt slidingWindowSize = 0, AnomalyMode mode = AnomalyMode::PURE,
Real32 binaryAnomalyThreshold = 0);

/**
* Compute the anomaly score as the percent of active columns not
* predicted.
*
* @param active: array of active column indices
* @param predicted: array of columns indices predicted in prev step (T-1)
* @param timestamp: (optional) date timestamp when the sample occured
* (used in anomaly-likelihood), -1 defaults to using iteration step
* @return the computed anomaly score; Real32 0..1
*/
Real compute(std::vector<UInt> &active,
std::vector<UInt> &predicted,
int timestamp = -1);

Real compute(const sdr::SDR &active,
const sdr::SDR &predicted,
int timestamp = -1);


CerealAdapter;

template<class Archive>
void save_ar(Archive & ar) const {
std::string name("Anomaly");
ar(CEREAL_NVP(name),
CEREAL_NVP(mode_),
CEREAL_NVP(binaryThreshold_),
CEREAL_NVP(likelihood_),
CEREAL_NVP(movingAverage_));
}
template<class Archive>
void load_ar(Archive & ar) {
std::string name;
ar(CEREAL_NVP(name),
CEREAL_NVP(mode_),
CEREAL_NVP(binaryThreshold_));
ar(CEREAL_NVP(likelihood_));
ar(CEREAL_NVP(movingAverage_));
}



bool operator==(const Anomaly &a) const;
inline bool operator!=(const Anomaly &a) const
{ return not ((*this) == a); }

private:
AnomalyMode mode_;
Real32 binaryThreshold_;
std::unique_ptr<nupic::util::MovingAverage> movingAverage_;
AnomalyLikelihood likelihood_; //TODO which params/how pass them to constructor?
};
}}} //end-ns

#endif // NUPIC_ALGORITHMS_ANOMALY_HPP
3 changes: 2 additions & 1 deletion src/nupic/algorithms/AnomalyLikelihood.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,13 @@ There are 3 ways to use the code:
- using the raw individual functions.
**/

using namespace std;

namespace nupic {
namespace algorithms {
namespace anomaly {

using namespace std;

struct DistributionParams {
DistributionParams(std::string name, Real mean, Real variance, Real stdev) :
name(name),mean(mean), variance(variance), stdev(stdev) {}
Expand Down
Loading

0 comments on commit 41f6e08

Please sign in to comment.