graph: interface: allocator: remove monitor

oneapi-src · Sep 29, 2024 · f178d84 · f178d84
1 parent c4b2695
commit f178d84
Show file tree

Hide file tree

Showing 4 changed files with 3 additions and 300 deletions.
diff --git a/src/graph/interface/allocator.cpp b/src/graph/interface/allocator.cpp
@@ -139,67 +139,3 @@ dnnl_graph_ocl_interop_make_engine_from_cache_blob_with_allocator(
     return status::success;
 }
 #endif
-
-void dnnl_graph_allocator::monitor_t::record_allocate(
-        const void *buf, size_t size, dnnl_graph_allocator::mem_type_t type) {
-    const auto persistent = dnnl_graph_allocator::mem_type_t::persistent;
-    const auto temp = dnnl_graph_allocator::mem_type_t::temp;
-    if (type == persistent) {
-        persist_mem_ += size;
-        persist_mem_infos_.emplace(buf, mem_info_t {size, persistent});
-    } else if (type == temp) {
-        auto tid = std::this_thread::get_id();
-        temp_mem_[tid] += size;
-        if (peak_temp_mem_[tid] < temp_mem_[tid])
-            peak_temp_mem_[tid] = temp_mem_[tid];
-        temp_mem_infos_[tid].emplace(buf, mem_info_t {size, temp});
-    } else {
-        // we didn't use output type buffer now.
-        assertm(0, "we didn't use output type buffer now");
-    }
-}
-
-void dnnl_graph_allocator::monitor_t::record_deallocate(const void *buf) {
-    bool is_persist = persist_mem_infos_.find(buf) != persist_mem_infos_.end();
-    if (is_persist) {
-        auto persist_pos = persist_mem_infos_.find(buf);
-        persist_mem_ -= persist_pos->second.size_;
-        persist_mem_infos_.erase(persist_pos);
-    } else {
-        auto tid = std::this_thread::get_id();
-        auto temp_pos = temp_mem_infos_[tid].find(buf);
-        if (temp_pos != temp_mem_infos_[tid].end()) {
-            temp_mem_[tid] -= temp_pos->second.size_;
-        }
-    }
-}
-
-void dnnl_graph_allocator::monitor_t::reset_peak_temp_memory() {
-    auto tid = std::this_thread::get_id();
-    rw_mutex_.lock_write();
-    peak_temp_mem_[tid] = 0;
-    rw_mutex_.unlock_write();
-}
-
-size_t dnnl_graph_allocator::monitor_t::get_peak_temp_memory() {
-    auto tid = std::this_thread::get_id();
-    rw_mutex_.lock_read();
-    size_t ret = peak_temp_mem_.at(tid);
-    rw_mutex_.unlock_read();
-    return ret;
-}
-
-size_t dnnl_graph_allocator::monitor_t::get_total_persist_memory() {
-    rw_mutex_.lock_read();
-    size_t size = persist_mem_;
-    rw_mutex_.unlock_read();
-    return size;
-}
-
-void dnnl_graph_allocator::monitor_t::lock_write() {
-    rw_mutex_.lock_write();
-}
-
-void dnnl_graph_allocator::monitor_t::unlock_write() {
-    rw_mutex_.unlock_write();
-}
diff --git a/src/graph/interface/allocator.hpp b/src/graph/interface/allocator.hpp
@@ -125,86 +125,25 @@ struct dnnl_graph_allocator final : public dnnl::impl::graph::utils::id_t {
         mem_attr_t &operator=(const mem_attr_t &other) = default;
     };
 
-    struct mem_info_t {
-        mem_info_t(size_t size, mem_type_t type) : size_(size), type_(type) {}
-        size_t size_;
-        mem_type_t type_;
-    };
-
-    struct monitor_t {
-    private:
-        size_t persist_mem_ = 0;
-
-        std::unordered_map<const void *, mem_info_t> persist_mem_infos_;
-
-        std::unordered_map<std::thread::id, size_t> temp_mem_;
-        std::unordered_map<std::thread::id, size_t> peak_temp_mem_;
-        std::unordered_map<std::thread::id,
-                std::unordered_map<const void *, mem_info_t>>
-                temp_mem_infos_;
-
-        // Since the memory operation will be performed from multiple threads,
-        // so we use the rw lock to guarantee the thread safety of the global
-        // persistent memory monitoring.
-        dnnl::impl::utils::rw_mutex_t rw_mutex_;
-
-    public:
-        void record_allocate(const void *buf, size_t size, mem_type_t type);
-
-        void record_deallocate(const void *buf);
-
-        void reset_peak_temp_memory();
-
-        size_t get_peak_temp_memory();
-
-        size_t get_total_persist_memory();
-
-        void lock_write();
-        void unlock_write();
-    };
-
     void *allocate(size_t size, mem_attr_t attr = {}) const {
-#ifndef NDEBUG
-        monitor_.lock_write();
-        void *buffer = host_malloc_(size, attr.alignment_);
-        monitor_.record_allocate(buffer, size, attr.type_);
-        monitor_.unlock_write();
-#else
         void *buffer = host_malloc_(size, attr.alignment_);
-#endif
         return buffer;
     }
 
 #ifdef DNNL_WITH_SYCL
     void *allocate(size_t size, const ::sycl::device &dev,
             const ::sycl::context &ctx, mem_attr_t attr = {}) const {
-#ifndef NDEBUG
-        monitor_.lock_write();
         void *buffer = sycl_malloc_(size, attr.alignment_,
                 static_cast<const void *>(&dev),
                 static_cast<const void *>(&ctx));
-        monitor_.record_allocate(buffer, size, attr.type_);
-        monitor_.unlock_write();
-#else
-        void *buffer = sycl_malloc_(size, attr.alignment_,
-                static_cast<const void *>(&dev),
-                static_cast<const void *>(&ctx));
-#endif
         return buffer;
     }
 #endif
 
 #if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL
     void *allocate(size_t size, cl_device_id dev, cl_context ctx,
             mem_attr_t attr = {}) const {
-#ifndef NDEBUG
-        monitor_.lock_write();
-        void *buffer = ocl_malloc_(size, attr.alignment_, dev, ctx);
-        monitor_.record_allocate(buffer, size, attr.type_);
-        monitor_.unlock_write();
-#else
         void *buffer = ocl_malloc_(size, attr.alignment_, dev, ctx);
-#endif
         return buffer;
     }
 #endif
@@ -237,34 +176,16 @@ struct dnnl_graph_allocator final : public dnnl::impl::graph::utils::id_t {
 #endif
 
     void deallocate(void *buffer) const {
-        if (buffer) {
-#ifndef NDEBUG
-            monitor_.lock_write();
-            monitor_.record_deallocate(buffer);
-            host_free_(buffer);
-            monitor_.unlock_write();
-#else
-            host_free_(buffer);
-#endif
-        }
+        if (buffer) { host_free_(buffer); }
     }
 
 #ifdef DNNL_WITH_SYCL
     void deallocate(void *buffer, const ::sycl::device &dev,
             const ::sycl::context &ctx, ::sycl::event deps) const {
         if (buffer) {
-#ifndef NDEBUG
-            monitor_.lock_write();
-            monitor_.record_deallocate(buffer);
             sycl_free_(buffer, static_cast<const void *>(&dev),
                     static_cast<const void *>(&ctx),
                     static_cast<void *>(&deps));
-            monitor_.unlock_write();
-#else
-            sycl_free_(buffer, static_cast<const void *>(&dev),
-                    static_cast<const void *>(&ctx),
-                    static_cast<void *>(&deps));
-#endif
         }
     }
 #endif
@@ -273,21 +194,12 @@ struct dnnl_graph_allocator final : public dnnl::impl::graph::utils::id_t {
     void deallocate(void *buffer, cl_device_id dev, cl_context ctx,
             cl_event deps) const {
         if (buffer) {
-#ifndef NDEBUG
-            monitor_.lock_write();
-            monitor_.record_deallocate(buffer);
             ocl_free_(buffer, dev, ctx, deps);
-            monitor_.unlock_write();
-#else
-            ocl_free_(buffer, dev, ctx, deps);
-#endif
             buffer = nullptr;
         }
     }
 #endif
 
-    monitor_t &get_monitor() { return monitor_; }
-
 private:
     dnnl_graph_host_allocate_f host_malloc_ {
             dnnl::impl::graph::utils::cpu_allocator_t::malloc};
@@ -308,8 +220,6 @@ struct dnnl_graph_allocator final : public dnnl::impl::graph::utils::id_t {
     dnnl_graph_ocl_deallocate_f ocl_free_ {
             dnnl::impl::graph::utils::ocl_allocator_t::free};
 #endif
-
-    mutable monitor_t monitor_;
 };
 
 #endif
diff --git a/src/graph/interface/partition.cpp b/src/graph/interface/partition.cpp
@@ -322,27 +322,13 @@ status_t DNNL_API dnnl_graph_compiled_partition_execute(
 
     if (get_verbose(dnnl::impl::verbose_t::exec_profile,
                 dnnl::impl::component_t::graph)) {
-#ifndef NDEBUG
-        allocator_t *alloc = reinterpret_cast<allocator_t *>(
-                compiled_partition->get_engine()->get_allocator());
-        allocator_t::monitor_t &monitor = alloc->get_monitor();
-        monitor.reset_peak_temp_memory();
-#endif
         stream->wait();
         double start_ms = dnnl::impl::get_msec();
         CHECK(compiled_partition->execute(stream, ins, outs));
         stream->wait();
         double duration_ms = dnnl::impl::get_msec() - start_ms;
-#ifndef NDEBUG
-        VFORMAT(start_ms, graph, exec, VERBOSE_profile, "%s,%g,%zu,%s,%zu,%zu",
-                compiled_partition->info(), duration_ms, alloc->id(),
-                utils::thread_id_to_str(std::this_thread::get_id()).c_str(),
-                monitor.get_total_persist_memory(),
-                monitor.get_peak_temp_memory());
-#else
         VPROF(start_ms, graph, exec, VERBOSE_profile,
                 compiled_partition->info(), duration_ms);
-#endif
     } else {
         CHECK(compiled_partition->execute(stream, ins, outs));
     }
@@ -375,36 +361,8 @@ status_t DNNL_API dnnl_graph_sycl_interop_compiled_partition_execute(
     for (size_t i = 0; i < num_outputs; ++i) {
         outs.emplace_back(**(outputs + i));
     }
-#ifndef NDEBUG
-    if (get_verbose(dnnl::impl::verbose_t::exec_profile,
-                dnnl::impl::component_t::graph)) {
-        allocator_t *alloc = reinterpret_cast<allocator_t *>(
-                compiled_partition->get_engine()->get_allocator());
-        allocator_t::monitor_t &monitor = alloc->get_monitor();
-        monitor.reset_peak_temp_memory();
-        stream->wait();
-        double start_ms = dnnl::impl::get_msec();
-        if (deps != nullptr) {
-            const auto &sycl_deps = *(const std::vector<::sycl::event> *)deps;
-            CHECK(compiled_partition->execute_sycl(stream, ins, outs, sycl_deps,
-                    static_cast<::sycl::event *>(sycl_event)));
-        } else {
-            CHECK(compiled_partition->execute_sycl(stream, ins, outs, {},
-                    static_cast<::sycl::event *>(sycl_event)));
-        }
-        stream->wait();
-        double duration_ms = dnnl::impl::get_msec() - start_ms;
-        VFORMAT(start_ms, graph, exec, VERBOSE_profile, "%s,%g,%zu,%s,%zu,%zu",
-                compiled_partition->info(), duration_ms, alloc->id(),
-                utils::thread_id_to_str(std::this_thread::get_id()).c_str(),
-                monitor.get_total_persist_memory(),
-                monitor.get_peak_temp_memory());
-    } else if (get_verbose(dnnl::impl::verbose_t::exec_profile,
-                       dnnl::impl::component_t::graph)) {
-#else
     if (get_verbose(dnnl::impl::verbose_t::exec_profile,
                 dnnl::impl::component_t::graph)) {
-#endif
         stream->wait();
         double start_ms = dnnl::impl::get_msec();
         if (deps != nullptr) {
@@ -465,36 +423,9 @@ status_t DNNL_API dnnl_graph_ocl_interop_compiled_partition_execute(
     for (size_t i = 0; i < num_outputs; ++i) {
         outs.emplace_back(**(outputs + i));
     }
-#ifndef NDEBUG
-    if (get_verbose(dnnl::impl::verbose_t::exec_profile,
-                dnnl::impl::component_t::graph)) {
-        allocator_t *alloc = reinterpret_cast<allocator_t *>(
-                compiled_partition->get_engine()->get_allocator());
-        allocator_t::monitor_t &monitor = alloc->get_monitor();
-        monitor.reset_peak_temp_memory();
-        stream->wait();
-        double start_ms = dnnl::impl::get_msec();
-        if (deps != nullptr) {
-            std::vector<cl_event> ocl_deps(deps, deps + ndeps);
-            CHECK(compiled_partition->execute_ocl(
-                    stream, ins, outs, ocl_deps, ocl_event));
-        } else {
-            CHECK(compiled_partition->execute_ocl(
-                    stream, ins, outs, {}, ocl_event));
-        }
-        stream->wait();
-        double duration_ms = dnnl::impl::get_msec() - start_ms;
-        VFORMAT(start_ms, graph, exec, VERBOSE_profile, "%s,%g,%zu,%s,%zu,%zu",
-                compiled_partition->info(), duration_ms, alloc->id(),
-                utils::thread_id_to_str(std::this_thread::get_id()).c_str(),
-                monitor.get_total_persist_memory(),
-                monitor.get_peak_temp_memory());
-    } else if (get_verbose(dnnl::impl::verbose_t::exec_profile,
-                       dnnl::impl::component_t::graph)) {
-#else
+
     if (get_verbose(dnnl::impl::verbose_t::exec_profile,
                 dnnl::impl::component_t::graph)) {
-#endif
         stream->wait();
         double start_ms = dnnl::impl::get_msec();
         if (deps != nullptr) {