Fix the wrong usage for LRUCache (#2267)

dangleptr · heng · web-flow · commit 12e89a616fee · 2020-08-21T10:23:22.000+08:00
* Fix the wrong usage for LRUCache

* Overwrite the old value when calling insert

* Address comments

* Address liuyu's comments

* Address critical27's comments

Co-authored-by: heng &lt;heng.chen@vesoft.com&gt;
diff --git a/src/common/base/ConcurrentLRUCache.h b/src/common/base/ConcurrentLRUCache.h
@@ -206,19 +206,23 @@ class LRU {
     }
 
     void insert(key_type&& key, value_type&& value) {
-        typename map_type::iterator i = map_.find(key);
-        if (i == map_.end()) {
+        typename map_type::iterator it = map_.find(key);
+        if (it == map_.end()) {
             // insert item into the cache, but first check if it is full
             if (size() >= capacity_) {
                 VLOG(3) << "Size:" << size() << ", capacity " << capacity_;
                 // cache is full, evict the least recently used item
                 evict();
             }
             VLOG(3) << "Insert key " << key << ", val " << value;
-            // insert the new item
             list_.push_front(key);
             map_.emplace(std::forward<key_type>(key),
                          std::make_tuple(std::forward<value_type>(value), list_.begin()));
+        } else {
+            // Overwrite the value
+            std::get<0>(it->second) = std::move(value);
+            typename list_type::iterator j = std::get<1>(it->second);
+            list_.splice(list_.begin(), list_, j);
         }
     }
 
@@ -296,9 +300,9 @@ class LRU {
     map_type map_;
     list_type list_;
     size_t capacity_;
-    std::atomic_uint64_t total_{0};
-    std::atomic_uint64_t hits_{0};
-    std::atomic_uint64_t evicts_{0};
+    uint64_t total_{0};
+    uint64_t hits_{0};
+    uint64_t evicts_{0};
 };
 
 }  // namespace nebula
diff --git a/src/common/base/test/ConcurrentLRUCacheTest.cpp b/src/common/base/test/ConcurrentLRUCacheTest.cpp
@@ -139,6 +139,21 @@ TEST(ConcurrentLRUCacheTest, MultiThreadsTest) {
     EXPECT_EQ(10000, cache.total());
 }
 
+TEST(ConcurrentLRUCacheTest, OverwriteTest) {
+    ConcurrentLRUCache<int32_t, std::string> cache(1024);
+    cache.insert(10, "ten");
+    {
+        auto v = cache.get(10);
+        EXPECT_TRUE(v.ok());
+        EXPECT_EQ("ten", v.value());
+    }
+    cache.insert(10, "ten_v1");
+    {
+        auto v = cache.get(10);
+        EXPECT_TRUE(v.ok());
+        EXPECT_EQ("ten_v1", v.value());
+    }
+}
 
 }  // namespace nebula
 
diff --git a/src/storage/mutate/AddVerticesProcessor.cpp b/src/storage/mutate/AddVerticesProcessor.cpp
@@ -30,78 +30,77 @@ void AddVerticesProcessor::process(const cpp2::AddVerticesRequest& req) {
     }
 
     CHECK_NOTNULL(kvstore_);
-    if (indexes_.empty()) {
-        std::for_each(partVertices.begin(), partVertices.end(), [&](auto& pv) {
-            auto partId = pv.first;
-            const auto& vertices = pv.second;
-            std::vector<kvstore::KV> data;
-            std::for_each(vertices.begin(), vertices.end(), [&](auto& v) {
-                const auto& tags = v.get_tags();
-                std::for_each(tags.begin(), tags.end(), [&](auto& tag) {
-                    VLOG(3) << "PartitionID: " << partId << ", VertexID: " << v.get_id()
-                            << ", TagID: " << tag.get_tag_id() << ", TagVersion: " << version;
-                    auto key = NebulaKeyUtils::vertexKey(partId, v.get_id(),
-                                                         tag.get_tag_id(), version);
-                    data.emplace_back(std::move(key), std::move(tag.get_props()));
-                    if (FLAGS_enable_vertex_cache && vertexCache_ != nullptr) {
-                        vertexCache_->evict(std::make_pair(v.get_id(), tag.get_tag_id()));
-                        VLOG(3) << "Evict cache for vId " << v.get_id()
-                                << ", tagId " << tag.get_tag_id();
-                    }
-                });
+    std::unordered_set<std::pair<VertexID, TagID>> uniqueIDs;
+    uniqueIDs.reserve(128);
+
+    std::for_each(partVertices.begin(), partVertices.end(), [&](auto& pv) {
+        std::vector<kvstore::KV> data;
+        data.reserve(128);
+        std::vector<std::tuple<VertexID, TagID, std::string>> cacheData;
+        if (FLAGS_enable_vertex_cache && vertexCache_ != nullptr) {
+            cacheData.reserve(128);
+        }
+        auto partId = pv.first;
+        const auto& vertices = pv.second;
+
+        uniqueIDs.clear();
+        std::for_each(vertices.rbegin(), vertices.rend(), [&](auto& v) {
+            const auto& tags = v.get_tags();
+            std::for_each(tags.begin(), tags.end(), [&](auto& tag) {
+                auto uniqueKey = std::make_pair(v.get_id(), tag.get_tag_id());
+                if (uniqueIDs.find(uniqueKey) != uniqueIDs.end()) {
+                    return;
+                }
+
+                VLOG(3) << "PartitionID: " << partId << ", VertexID: " << v.get_id()
+                        << ", TagID: " << tag.get_tag_id() << ", TagVersion: " << version;
+                auto key = NebulaKeyUtils::vertexKey(partId, v.get_id(),
+                                                     tag.get_tag_id(), version);
+                if (FLAGS_enable_vertex_cache && vertexCache_ != nullptr) {
+                    cacheData.emplace_back(v.get_id(), tag.get_tag_id(), tag.get_props());
+                }
+                data.emplace_back(std::move(key), std::move(tag.get_props()));
+                uniqueIDs.emplace(uniqueKey);
             });
-            doPut(spaceId_, partId, std::move(data));
-        });
-    } else {
-        std::for_each(partVertices.begin(), partVertices.end(), [&](auto &pv) {
-            auto partId = pv.first;
-            auto atomic = [version, partId, vertices = std::move(pv.second), this]()
-                          -> folly::Optional<std::string> {
-                return addVertices(version, partId, vertices);
-            };
-            auto callback = [partId, this](kvstore::ResultCode code) {
-                handleAsync(spaceId_, partId, code);
-            };
-            this->kvstore_->asyncAtomicOp(spaceId_, partId, atomic, callback);
         });
-    }
-}
 
-std::string AddVerticesProcessor::addVertices(int64_t version, PartitionID partId,
-                                              const std::vector<cpp2::Vertex>& vertices) {
-    std::unique_ptr<kvstore::BatchHolder> batchHolder = std::make_unique<kvstore::BatchHolder>();
-    /*
-     * Define the map newIndexes to avoid inserting duplicate vertex.
-     * This map means :
-     * map<vertex_unique_key, prop_value> ,
-     * -- vertex_unique_key is only used as the unique key , for example:
-     * insert below vertices in the same request:
-     *     kv(part1_vid1_tag1 , v1)
-     *     kv(part1_vid1_tag1 , v2)
-     *     kv(part1_vid1_tag1 , v3)
-     *     kv(part1_vid1_tag1 , v4)
-     *
-     * Ultimately, kv(part1_vid1_tag1 , v4) . It's just what I need.
-     */
-    std::map<std::string, std::string> newVertices;
-    std::for_each(vertices.begin(), vertices.end(), [&](auto& v) {
-        auto vId = v.get_id();
-        const auto& tags = v.get_tags();
-        std::for_each(tags.begin(), tags.end(), [&](auto& tag) {
-            auto tagId = tag.get_tag_id();
-            auto prop = tag.get_props();
-            VLOG(3) << "PartitionID: " << partId << ", VertexID: " << vId
-                    << ", TagID: " << tagId << ", TagVersion: " << version;
-            auto key = NebulaKeyUtils::vertexKey(partId, vId, tagId, version);
-            newVertices[key] = std::move(prop);
-            if (FLAGS_enable_vertex_cache && this->vertexCache_ != nullptr) {
-                this->vertexCache_->evict(std::make_pair(vId, tagId));
-                VLOG(3) << "Evict cache for vId " << vId << ", tagId " << tagId;
+        auto callback = [partId,
+                         this,
+                         cacheData = std::move(cacheData)] (kvstore::ResultCode code) mutable {
+            if (FLAGS_enable_vertex_cache
+                && vertexCache_ != nullptr
+                && code == kvstore::ResultCode::SUCCEEDED) {
+                for (auto&& tup : cacheData) {
+                    vertexCache_->insert(std::make_pair(std::get<0>(tup),
+                                                        std::get<1>(tup)),
+                                         std::move(std::get<2>(tup)));
+                }
             }
-        });
+            handleAsync(spaceId_, partId, code);
+        };
+        if (indexes_.empty()) {
+            this->kvstore_->asyncMultiPut(spaceId_,
+                                          partId,
+                                          std::move(data),
+                                          std::move(callback));
+        } else {
+            auto atomicOp = [partId, data = std::move(data), this]() mutable
+                                            -> folly::Optional<std::string> {
+                return addVerticesWithIndex(partId, std::move(data));
+            };
+
+            this->kvstore_->asyncAtomicOp(spaceId_,
+                                          partId,
+                                          std::move(atomicOp),
+                                          std::move(callback));
+        }
     });
+}
 
-    for (auto& v : newVertices) {
+std::string AddVerticesProcessor::addVerticesWithIndex(PartitionID partId,
+                                                       std::vector<kvstore::KV>&& data) {
+    std::unique_ptr<kvstore::BatchHolder> batchHolder = std::make_unique<kvstore::BatchHolder>();
+    for (auto& v : data) {
         std::string val;
         RowReader nReader = RowReader::getEmptyRowReader();
         auto tagId = NebulaKeyUtils::getTagId(v.first);
@@ -150,9 +149,7 @@ std::string AddVerticesProcessor::addVertices(int64_t version, PartitionID partI
         /*
          * step 3 , Insert new vertex data
          */
-        auto key = v.first;
-        auto prop = v.second;
-        batchHolder->put(std::move(key), std::move(prop));
+        batchHolder->put(std::move(v.first), std::move(v.second));
     }
     return encodeBatchValue(batchHolder->getBatch());
 }
diff --git a/src/storage/mutate/AddVerticesProcessor.h b/src/storage/mutate/AddVerticesProcessor.h
@@ -39,8 +39,8 @@ class AddVerticesProcessor : public BaseProcessor<cpp2::ExecResponse> {
             , indexMan_(indexMan)
             , vertexCache_(cache) {}
 
-    std::string addVertices(int64_t version, PartitionID partId,
-                            const std::vector<cpp2::Vertex>& vertices);
+    std::string addVerticesWithIndex(PartitionID partId,
+                                     std::vector<kvstore::KV>&& data);
 
     std::string findObsoleteIndex(PartitionID partId,
                                   VertexID vId,