From f3a58a49943dcd8c2388e95a37729de5bd1220c5 Mon Sep 17 00:00:00 2001 From: Marek Otahal Date: Fri, 20 Sep 2019 12:15:44 +0200 Subject: [PATCH] Classifier: use map to allow sparse categories use map internally for categories_, instead of vector, which allows us to have sparse {1,2,999} categories (=3 total). Instead, with vector this would have to be 999 categories! --- src/htm/algorithms/SDRClassifier.cpp | 32 +++++++++++++++------------- src/htm/algorithms/SDRClassifier.hpp | 8 +++---- 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/src/htm/algorithms/SDRClassifier.cpp b/src/htm/algorithms/SDRClassifier.cpp index d731e1dccf..bb76289e2f 100644 --- a/src/htm/algorithms/SDRClassifier.cpp +++ b/src/htm/algorithms/SDRClassifier.cpp @@ -39,7 +39,7 @@ void Classifier::initialize(const Real alpha) NTA_CHECK(alpha > 0.0f); alpha_ = alpha; dimensions_ = 0; - numCategories_ = 0u; + categories_.clear(); weights_.clear(); } @@ -47,15 +47,16 @@ void Classifier::initialize(const Real alpha) PDF Classifier::infer(const SDR & pattern) const { // Check input dimensions, or if this is the first time the Classifier is used and dimensions // are unset, return zeroes. - NTA_CHECK( dimensions_ != 0 ) + NTA_CHECK( not categories_.empty() ) << "Classifier: must call `learn` before `infer`."; NTA_ASSERT(pattern.size == dimensions_) << "Input SDR does not match previously seen size!"; // Accumulate feed forward input. - PDF probabilities( numCategories_, 0.0f ); + PDF probabilities( categories_.size(), 0.0f ); for( const auto bit : pattern.getSparse() ) { - for( size_t i = 0; i < numCategories_; i++ ) { - probabilities[i] += weights_[bit][i]; + for( size_t i=0u; i< categories_.size(); i++) { + const auto category = categories_.at(i); + probabilities[i] += weights_.at(bit).at(category); // needs .at() instead of [] because of the infer() const } } @@ -72,19 +73,20 @@ void Classifier::learn(const SDR &pattern, const vector &categoryIdxList) if( dimensions_ == 0 ) { dimensions_ = pattern.size; while( weights_.size() < pattern.size ) { - const auto initialEmptyWeights = PDF( numCategories_, 0.0f ); + std::unordered_map initialEmptyWeights; weights_.push_back( initialEmptyWeights ); } } NTA_ASSERT(pattern.size == dimensions_) << "Input SDR does not match previously seen size!"; // Check if this is a new category & resize the weights table to hold it. - const size_t maxCategoryIdx = *max_element(categoryIdxList.cbegin(), categoryIdxList.cend()); - if( maxCategoryIdx >= numCategories_ ) { - numCategories_ = maxCategoryIdx + 1; - for( auto & vec : weights_ ) { - while( vec.size() < numCategories_ ) { - vec.push_back( 0.0f ); + for (const auto cat: categoryIdxList) { + const bool alreadyInCategories = std::find(categories_.cbegin(), categories_.cend(), cat) != categories_.cend(); + if( not alreadyInCategories ) { + categories_.push_back(cat); + //update existing inner weights: set new cat's weight to zero + for( auto & mapp : weights_ ) { + mapp.insert({cat, 0.0f}); } } } @@ -92,8 +94,8 @@ void Classifier::learn(const SDR &pattern, const vector &categoryIdxList) // Compute errors and update weights. const auto& error = calculateError_(categoryIdxList, pattern); for( const auto& bit : pattern.getSparse() ) { - for(size_t i = 0u; i < numCategories_; i++) { - weights_[bit][i] += alpha_ * error[i]; + for(const auto cat: categories_) { + weights_[bit][cat] += alpha_ * error[cat]; } } } @@ -106,7 +108,7 @@ std::vector Classifier::calculateError_(const std::vector &categor auto likelihoods = infer(pattern); // Compute target likelihoods - PDF targetDistribution(numCategories_ + 1u, 0.0f); + PDF targetDistribution(categories_.size() + 1u, 0.0f); for( size_t i = 0u; i < categoryIdxList.size(); i++ ) { targetDistribution[categoryIdxList[i]] = 1.0f / categoryIdxList.size(); } diff --git a/src/htm/algorithms/SDRClassifier.hpp b/src/htm/algorithms/SDRClassifier.hpp index 99358d2d2d..bf566e081b 100644 --- a/src/htm/algorithms/SDRClassifier.hpp +++ b/src/htm/algorithms/SDRClassifier.hpp @@ -146,25 +146,25 @@ class Classifier : public Serializable { ar(cereal::make_nvp("alpha", alpha_), cereal::make_nvp("dimensions", dimensions_), - cereal::make_nvp("numCategories", numCategories_), + cereal::make_nvp("categories", categories_), cereal::make_nvp("weights", weights_)); } template void load_ar(Archive & ar) - { ar( alpha_, dimensions_, numCategories_, weights_ ); } + { ar( alpha_, dimensions_, categories_, weights_ ); } private: Real alpha_; UInt dimensions_; - size_t numCategories_; + std::vector categories_; /** * 2D map used to store the data. * Use as: weights_[ input-bit ][ category-index ] * Real64 (not just Real) so the computations do not lose precision. */ - std::vector> weights_; + std::vector> weights_; // Helper function to compute the error signal for learning. std::vector calculateError_(const std::vector &bucketIdxList,