diff --git a/src/examples/mnist/MNIST_SP.cpp b/src/examples/mnist/MNIST_SP.cpp
index 4f39fa09bd..35c46fae1b 100644
--- a/src/examples/mnist/MNIST_SP.cpp
+++ b/src/examples/mnist/MNIST_SP.cpp
@@ -31,16 +31,32 @@
 #include <htm/algorithms/SpatialPooler.hpp>
 #include <htm/algorithms/SDRClassifier.hpp>
 #include <htm/utils/SdrMetrics.hpp>
+#include <htm/os/Timer.hpp>
 
 #include <mnist/mnist_reader.hpp> // MNIST data itself + read methods, namespace mnist::
 #include <mnist/mnist_utils.hpp>  // mnist::binarize_dataset
 
-namespace examples {
 
 using namespace std;
 using namespace htm;
 
 class MNIST {
+/**
+ * RESULTS:
+ *
+ * Order :	score			: column dim	: #pass : time(s): git commit	: comment
+ * -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ * 1/Score: 97.11% (289 / 10000 wrong)	: 28x28x16	: 4	: 557	: 1f0187fc6 	: epochs help, at cost of time 
+ *
+ * 2/Score: 96.56% (344 / 10000 wrong)	: 28x28x16	: 1	: 142	: 3ccadc6d6  
+ *
+ * 3/Score: 96.1% (390 / 10000 wrong).  : 28x28x30 	: 1  	: 256	: 454f7a9d8 
+ *
+ * others/
+ * Score: 95.35% (465 / 10000 wrong)	: 28x28x16	: 2	: 125	: 		: smaller boosting (2.0)
+ * 	 -- this will be my working model, reasonable performance/speed ratio
+ *
+ */
 
   private:
     SpatialPooler sp;
@@ -51,31 +67,31 @@ class MNIST {
 
   public:
     UInt verbosity = 1;
-    const UInt train_dataset_iterations = 1u;
+    const UInt train_dataset_iterations = 2u; //epochs somewhat help, at linear time
 
 
 void setup() {
 
-  input.initialize({28 * 28});
-  columns.initialize({10 * 1000});
+  input.initialize({28, 28,1}); 
+  columns.initialize({28, 28, 8}); //1D vs 2D no big difference, 2D seems more natural for the problem. Speed-----, Results+++++++++; #columns HIGHEST impact. 
   sp.initialize(
     /* inputDimensions */             input.dimensions,
     /* columnDimensions */            columns.dimensions,
-    /* potentialRadius */             999999u, // No topology, all to all connections.
-    /* potentialPct */                0.65f,
-    /* globalInhibition */            true,
-    /* localAreaDensity */            0.05f,  // % active bits
+    /* potentialRadius */             7, // with 2D, 7 results in 15x15 area, which is cca 25% for the input area. Slightly improves than 99999 aka "no topology, all to all connections"
+    /* potentialPct */                0.1f, //we have only 10 classes, and << #columns. So we want to force each col to specialize. Cca 0.3 w "7" above, or very small (0.1) for "no topology". Cannot be too small due to internal checks. Speed++
+    /* globalInhibition */            true, //Speed+++++++; SDR quality-- (global does have active nearby cols, which we want to avoid (local)); Results+-0
+    /* localAreaDensity */            0.1f,  // % active bits
     /* numActiveColumnsPerInhArea */  -1,
     /* stimulusThreshold */           6u,
-    /* synPermInactiveDec */          0.005f,
-    /* synPermActiveInc */            0.014f,
-    /* synPermConnected */            0.1f,
-    /* minPctOverlapDutyCycles */     0.001f,
+    /* synPermInactiveDec */          0.002f, //FIXME inactive decay permanence plays NO role, investigate! (slightly better w/o it)
+    /* synPermActiveInc */            0.14f, //takes upto 5x steps to get dis/connected
+    /* synPermConnected */            0.5f, //no difference, let's leave at 0.5 in the middle
+    /* minPctOverlapDutyCycles */     0.2f, //speed of re-learning?
     /* dutyCyclePeriod */             1402,
-    /* boostStrength */               7.8f, // Boosting does help
-    /* seed */                        93u,
+    /* boostStrength */               2.0f, // Boosting does help, but entropy is high, on MNIST it does not matter, for learning with TM prefer boosting off (=0.0), or "neutral"=1.0
+    /* seed */                        4u,
     /* spVerbosity */                 1u,
-    /* wrapAround */                  false); // No topology, turn off wrapping
+    /* wrapAround */                  true); // does not matter (helps slightly)
 
   // Save the connections to file for postmortem analysis.
   ofstream dump("mnist_sp_initial.connections", ofstream::binary | ofstream::trunc | ofstream::out);
@@ -99,6 +115,8 @@ void train() {
   Metrics inputStats(input,    1402);
   Metrics columnStats(columns, 1402);
 
+  Timer tTrain(true);
+
   for(auto epoch = 0u; epoch < train_dataset_iterations; epoch++) {
     NTA_INFO << "epoch " << epoch;
     // Shuffle the training data.
@@ -120,11 +138,15 @@ void train() {
       if( verbosity && (++i % 1000 == 0) ) cout << "." << flush;
     }
     if( verbosity ) cout << endl;
-  }
+  
   cout << "epoch ended" << endl;
   cout << "inputStats "  << inputStats << endl;
   cout << "columnStats " << columnStats << endl;
   cout << sp << endl;
+  }
+  
+  tTrain.stop();
+  cout << "MNIST train time: " << tTrain.getElapsed() << endl; 
 
   // Save the connections to file for postmortem analysis.
   ofstream dump("mnist_sp_learned.connections", ofstream::binary | ofstream::trunc | ofstream::out);
@@ -153,14 +175,15 @@ void test() {
     if( verbosity && i % 1000 == 0 ) cout << "." << flush;
   }
   if( verbosity ) cout << endl;
-  cout << "Score: " << 100.0 * score / n_samples << "% " << endl;
+  cout << "===========RESULTs=================" << endl;
+  cout << "Score: " << 100.0 * score / n_samples << "% ("<< (n_samples - score) << " / " << n_samples << " wrong). "   << endl;
+  cout << "SDR example: " << columns << endl;
 }
 
 };  // End class MNIST
-}   // End namespace examples
 
 int main(int argc, char **argv) {
-  examples::MNIST m;
+  MNIST m;
   m.setup();
   m.train();
   m.test();
diff --git a/src/htm/algorithms/SpatialPooler.cpp b/src/htm/algorithms/SpatialPooler.cpp
index e2015f3a24..5ee4a46ac6 100644
--- a/src/htm/algorithms/SpatialPooler.cpp
+++ b/src/htm/algorithms/SpatialPooler.cpp
@@ -432,7 +432,7 @@ void SpatialPooler::initialize(
   overlapDutyCycles_.assign(numColumns_, 0); //TODO make all these sparse or rm to reduce footprint
   activeDutyCycles_.assign(numColumns_, 0);
   minOverlapDutyCycles_.assign(numColumns_, 0.0);
-  boostFactors_.assign(numColumns_, 1);
+  boostFactors_.assign(numColumns_, 1.0); //1 is neutral value for boosting
   overlaps_.resize(numColumns_);
   boostedOverlaps_.resize(numColumns_);
 
@@ -493,6 +493,10 @@ void SpatialPooler::compute(const SDR &input, const bool learn, SDR &active) {
 
 void SpatialPooler::boostOverlaps_(const vector<SynapseIdx> &overlaps, //TODO use Eigen sparse vector here
                                    vector<Real> &boosted) const {
+  if(boostStrength_ < htm::Epsilon) { //boost ~ 0.0, we can skip these computations, just copy the data
+    boosted.assign(overlaps.begin(), overlaps.end());
+    return;
+  }
   for (UInt i = 0; i < numColumns_; i++) {
     boosted[i] = overlaps[i] * boostFactors_[i];
   }
@@ -748,6 +752,16 @@ void SpatialPooler::updateBoostFactors_() {
 }
 
 
+void applyBoosting_(const UInt i,
+		    const Real targetDensity, 
+		    const vector<Real>& actualDensity,
+		    const Real boost,
+	            vector<Real>& output) {
+  if(boost < htm::Epsilon) return; //skip for disabled boosting
+  output[i] = exp((targetDensity - actualDensity[i]) * boost); //TODO doc this code
+}
+
+
 void SpatialPooler::updateBoostFactorsGlobal_() {
   Real targetDensity;
   if (numActiveColumnsPerInhArea_ > 0) {
@@ -760,9 +774,9 @@ void SpatialPooler::updateBoostFactorsGlobal_() {
   } else {
     targetDensity = localAreaDensity_;
   }
-
-  for (UInt i = 0; i < numColumns_; ++i) {
-    boostFactors_[i] = exp((targetDensity - activeDutyCycles_[i]) * boostStrength_);
+  
+  for (UInt i = 0; i < numColumns_; ++i) { 
+    applyBoosting_(i, targetDensity, activeDutyCycles_, boostStrength_, boostFactors_);
   }
 }
 
@@ -785,8 +799,7 @@ void SpatialPooler::updateBoostFactorsLocal_() {
     }
 
     const Real targetDensity = localActivityDensity / numNeighbors;
-    boostFactors_[i] =
-        exp((targetDensity - activeDutyCycles_[i]) * boostStrength_);
+    applyBoosting_(i, targetDensity, activeDutyCycles_, boostStrength_, boostFactors_);
   }
 }
 
@@ -807,7 +820,7 @@ void SpatialPooler::calculateOverlap_(const SDR &input,
 
 
 void SpatialPooler::inhibitColumns_(const vector<Real> &overlaps,
-                                    vector<UInt> &activeColumns) const {
+                                    vector<CellIdx> &activeColumns) const {
   Real density = localAreaDensity_;
   if (numActiveColumnsPerInhArea_ > 0) {
     UInt inhibitionArea =
diff --git a/src/htm/algorithms/SpatialPooler.hpp b/src/htm/algorithms/SpatialPooler.hpp
index e035106340..3f2d1cbe33 100644
--- a/src/htm/algorithms/SpatialPooler.hpp
+++ b/src/htm/algorithms/SpatialPooler.hpp
@@ -107,7 +107,7 @@ class SpatialPooler : public Serializable
         that each column can potentially be connected to every input
         bit. This parameter defines a square (or hyper square) area: a
         column will have a max square potential pool with sides of
-        length (2 * potentialRadius + 1).
+        length `(2 * potentialRadius + 1)`, rounded to fit into each dimension.
 
   @param potentialPct The percent of the inputs, within a column's
         potential radius, that a column can be connected to. If set to
@@ -186,7 +186,8 @@ class SpatialPooler : public Serializable
         likely to oscillate.
 
   @param boostStrength A number greater or equal than 0, used to
-        control boosting strength. No boosting is applied if it is set to 0.
+        control boosting strength. 
+	No boosting is applied if it is set to 0.0, (runs faster due to skipped code).
         The strength of boosting increases as a function of boostStrength.
         Boosting encourages columns to have similar activeDutyCycles as their
         neighbors, which will lead to more efficient use of columns. However,
@@ -917,7 +918,7 @@ class SpatialPooler : public Serializable
      columns.
   */
   void inhibitColumns_(const vector<Real> &overlaps,
-                       vector<UInt> &activeColumns) const;
+                       vector<CellIdx> &activeColumns) const;
 
   /**
      Perform global inhibition.
diff --git a/src/htm/algorithms/TemporalMemory.cpp b/src/htm/algorithms/TemporalMemory.cpp
index 6f88bd7f06..526944e0da 100644
--- a/src/htm/algorithms/TemporalMemory.cpp
+++ b/src/htm/algorithms/TemporalMemory.cpp
@@ -427,13 +427,13 @@ void TemporalMemory::activateCells(const SDR &activeColumns, const bool learn) {
   const auto columnForSegment = [&](Segment segment) {
     return connections.cellForSegment(segment) / cellsPerColumn_;
   };
-  const auto identity = [](const UInt a) {return a;}; //TODO use std::identity when c++20
+  const auto identity = [](const ElemSparse a) {return a;}; //TODO use std::identity when c++20
 
   for (auto &&columnData : groupBy( //group by columns, and convert activeSegments & matchingSegments to cols. 
            sparse, identity,
            activeSegments_, columnForSegment,
            matchingSegments_, columnForSegment)) {
-    UInt column;
+    CellIdx column;
     vector<Segment>::const_iterator activeColumnsBegin, activeColumnsEnd, 
 	       columnActiveSegmentsBegin, columnActiveSegmentsEnd, 
          columnMatchingSegmentsBegin, columnMatchingSegmentsEnd;
@@ -598,9 +598,12 @@ SDR TemporalMemory::cellsToColumns(const SDR& cells) const {
   auto correctDims = getColumnDimensions(); //nD column dimensions (eg 10x100)
   correctDims.push_back(static_cast<CellIdx>(getCellsPerColumn())); //add n+1-th dimension for cellsPerColumn (eg. 10x100x8)
 
-  NTA_CHECK(cells.dimensions == correctDims) 
+  NTA_CHECK(cells.dimensions.size() == correctDims.size()) 
 	  << "cells.dimensions must match TM's (column dims x cellsPerColumn) ";
 
+  for(size_t i = 0; i<correctDims.size(); i++) 
+	  NTA_CHECK(correctDims[i] == cells.dimensions[i]);
+
   SDR cols(getColumnDimensions());
   auto& dense = cols.getDense();
   for(const auto cell : cells.getSparse()) {
diff --git a/src/htm/types/Sdr.cpp b/src/htm/types/Sdr.cpp
index 5231156483..16290694ca 100644
--- a/src/htm/types/Sdr.cpp
+++ b/src/htm/types/Sdr.cpp
@@ -318,7 +318,7 @@ namespace htm {
         NTA_CHECK( ( 1 + fractionNoise) * getSparsity() <= 1. );
 
         const UInt num_move_bits = (UInt) std::round( fractionNoise * getSum() );
-        const vector<UInt> turn_off = rng.sample(getSparse(), num_move_bits);
+        const auto& turn_off = rng.sample(getSparse(), num_move_bits);
 
         auto& dns = getDense();
 
diff --git a/src/htm/utils/GroupBy.hpp b/src/htm/utils/GroupBy.hpp
index 00d9f9894d..41e897256d 100644
--- a/src/htm/utils/GroupBy.hpp
+++ b/src/htm/utils/GroupBy.hpp
@@ -77,7 +77,7 @@ static KeyType minFrontKey(KeyType frontrunner, Iterator0 begin0,
   KeyType ret = frontrunner;
 
   if (begin0 != end0) {
-    ret = std::min(ret, keyFn0(*begin0));
+    ret = std::min(ret, static_cast<KeyType>(keyFn0(*begin0)));
   }
 
   if (begin1 != end1) {
diff --git a/src/test/unit/algorithms/SpatialPoolerTest.cpp b/src/test/unit/algorithms/SpatialPoolerTest.cpp
index 68a1cb8472..ea1e26d7ab 100644
--- a/src/test/unit/algorithms/SpatialPoolerTest.cpp
+++ b/src/test/unit/algorithms/SpatialPoolerTest.cpp
@@ -1676,6 +1676,7 @@ TEST(SpatialPoolerTest, getOverlaps) {
 
   vector<Real> boostFactors = {1.0f, 2.0f, 3.0f};
   sp.setBoostFactors(boostFactors.data());
+  sp.setBoostStrength(0.0f); //default, effectively disables boosting
 
   SDR input( {5}); 
   input.setDense(vector<UInt>{1, 1, 1, 1, 1});
@@ -1683,13 +1684,27 @@ TEST(SpatialPoolerTest, getOverlaps) {
   activeColumns.setDense(vector<UInt>{0, 0, 0});
   sp.compute(input, true, activeColumns);
 
+  //overlaps (not boosted)
   const auto &overlaps = sp.getOverlaps();
   const vector<SynapseIdx> expectedOverlaps = {0, 3, 5};
   EXPECT_EQ(expectedOverlaps, overlaps);
 
-  const vector<Real> &boostedOverlaps = sp.getBoostedOverlaps();
-  const vector<Real> expectedBoostedOverlaps = {0.0f, 6.0f, 15.0f};
-  EXPECT_EQ(expectedBoostedOverlaps, boostedOverlaps);
+  //boosted overlaps, but boost strength=0.0
+  const auto& boostedOverlaps = sp.getBoostedOverlaps();
+  const vector<Real> expectedBoostedOverlaps = {0.0f, 3.0f, 5.0f}; //same as orig above (but float)
+  EXPECT_EQ(expectedBoostedOverlaps, boostedOverlaps) << "SP with boost strength " << sp.getBoostStrength() << " must not change boosting ";
+
+  //boosted overlaps, but boost strength=2.0
+  //recompute
+  sp.setBoostFactors(boostFactors.data());
+  sp.setBoostStrength(2.0f);
+  
+  activeColumns.setDense(vector<UInt>{0, 0, 0});
+  sp.compute(input, true, activeColumns);
+
+  const auto& boostedOverlaps2 = sp.getBoostedOverlaps();
+  const vector<Real> expectedBoostedOverlaps2 = {0.0f, 6.0f, 15.0f};
+  EXPECT_EQ(expectedBoostedOverlaps2, boostedOverlaps2) << "SP with boost strength " << sp.getBoostStrength() << " must change boosting ";
 }
 
 TEST(SpatialPoolerTest, ZeroOverlap_NoStimulusThreshold_GlobalInhibition) {