Skip to content

Commit

Permalink
graph: interface: allocator: remove monitor
Browse files Browse the repository at this point in the history
  • Loading branch information
TaoLv committed Sep 29, 2024
1 parent c4b2695 commit f178d84
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 300 deletions.
64 changes: 0 additions & 64 deletions src/graph/interface/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,67 +139,3 @@ dnnl_graph_ocl_interop_make_engine_from_cache_blob_with_allocator(
return status::success;
}
#endif

void dnnl_graph_allocator::monitor_t::record_allocate(
const void *buf, size_t size, dnnl_graph_allocator::mem_type_t type) {
const auto persistent = dnnl_graph_allocator::mem_type_t::persistent;
const auto temp = dnnl_graph_allocator::mem_type_t::temp;
if (type == persistent) {
persist_mem_ += size;
persist_mem_infos_.emplace(buf, mem_info_t {size, persistent});
} else if (type == temp) {
auto tid = std::this_thread::get_id();
temp_mem_[tid] += size;
if (peak_temp_mem_[tid] < temp_mem_[tid])
peak_temp_mem_[tid] = temp_mem_[tid];
temp_mem_infos_[tid].emplace(buf, mem_info_t {size, temp});
} else {
// we didn't use output type buffer now.
assertm(0, "we didn't use output type buffer now");
}
}

void dnnl_graph_allocator::monitor_t::record_deallocate(const void *buf) {
bool is_persist = persist_mem_infos_.find(buf) != persist_mem_infos_.end();
if (is_persist) {
auto persist_pos = persist_mem_infos_.find(buf);
persist_mem_ -= persist_pos->second.size_;
persist_mem_infos_.erase(persist_pos);
} else {
auto tid = std::this_thread::get_id();
auto temp_pos = temp_mem_infos_[tid].find(buf);
if (temp_pos != temp_mem_infos_[tid].end()) {
temp_mem_[tid] -= temp_pos->second.size_;
}
}
}

void dnnl_graph_allocator::monitor_t::reset_peak_temp_memory() {
auto tid = std::this_thread::get_id();
rw_mutex_.lock_write();
peak_temp_mem_[tid] = 0;
rw_mutex_.unlock_write();
}

size_t dnnl_graph_allocator::monitor_t::get_peak_temp_memory() {
auto tid = std::this_thread::get_id();
rw_mutex_.lock_read();
size_t ret = peak_temp_mem_.at(tid);
rw_mutex_.unlock_read();
return ret;
}

size_t dnnl_graph_allocator::monitor_t::get_total_persist_memory() {
rw_mutex_.lock_read();
size_t size = persist_mem_;
rw_mutex_.unlock_read();
return size;
}

void dnnl_graph_allocator::monitor_t::lock_write() {
rw_mutex_.lock_write();
}

void dnnl_graph_allocator::monitor_t::unlock_write() {
rw_mutex_.unlock_write();
}
92 changes: 1 addition & 91 deletions src/graph/interface/allocator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,86 +125,25 @@ struct dnnl_graph_allocator final : public dnnl::impl::graph::utils::id_t {
mem_attr_t &operator=(const mem_attr_t &other) = default;
};

struct mem_info_t {
mem_info_t(size_t size, mem_type_t type) : size_(size), type_(type) {}
size_t size_;
mem_type_t type_;
};

struct monitor_t {
private:
size_t persist_mem_ = 0;

std::unordered_map<const void *, mem_info_t> persist_mem_infos_;

std::unordered_map<std::thread::id, size_t> temp_mem_;
std::unordered_map<std::thread::id, size_t> peak_temp_mem_;
std::unordered_map<std::thread::id,
std::unordered_map<const void *, mem_info_t>>
temp_mem_infos_;

// Since the memory operation will be performed from multiple threads,
// so we use the rw lock to guarantee the thread safety of the global
// persistent memory monitoring.
dnnl::impl::utils::rw_mutex_t rw_mutex_;

public:
void record_allocate(const void *buf, size_t size, mem_type_t type);

void record_deallocate(const void *buf);

void reset_peak_temp_memory();

size_t get_peak_temp_memory();

size_t get_total_persist_memory();

void lock_write();
void unlock_write();
};

void *allocate(size_t size, mem_attr_t attr = {}) const {
#ifndef NDEBUG
monitor_.lock_write();
void *buffer = host_malloc_(size, attr.alignment_);
monitor_.record_allocate(buffer, size, attr.type_);
monitor_.unlock_write();
#else
void *buffer = host_malloc_(size, attr.alignment_);
#endif
return buffer;
}

#ifdef DNNL_WITH_SYCL
void *allocate(size_t size, const ::sycl::device &dev,
const ::sycl::context &ctx, mem_attr_t attr = {}) const {
#ifndef NDEBUG
monitor_.lock_write();
void *buffer = sycl_malloc_(size, attr.alignment_,
static_cast<const void *>(&dev),
static_cast<const void *>(&ctx));
monitor_.record_allocate(buffer, size, attr.type_);
monitor_.unlock_write();
#else
void *buffer = sycl_malloc_(size, attr.alignment_,
static_cast<const void *>(&dev),
static_cast<const void *>(&ctx));
#endif
return buffer;
}
#endif

#if DNNL_GPU_RUNTIME == DNNL_RUNTIME_OCL
void *allocate(size_t size, cl_device_id dev, cl_context ctx,
mem_attr_t attr = {}) const {
#ifndef NDEBUG
monitor_.lock_write();
void *buffer = ocl_malloc_(size, attr.alignment_, dev, ctx);
monitor_.record_allocate(buffer, size, attr.type_);
monitor_.unlock_write();
#else
void *buffer = ocl_malloc_(size, attr.alignment_, dev, ctx);
#endif
return buffer;
}
#endif
Expand Down Expand Up @@ -237,34 +176,16 @@ struct dnnl_graph_allocator final : public dnnl::impl::graph::utils::id_t {
#endif

void deallocate(void *buffer) const {
if (buffer) {
#ifndef NDEBUG
monitor_.lock_write();
monitor_.record_deallocate(buffer);
host_free_(buffer);
monitor_.unlock_write();
#else
host_free_(buffer);
#endif
}
if (buffer) { host_free_(buffer); }
}

#ifdef DNNL_WITH_SYCL
void deallocate(void *buffer, const ::sycl::device &dev,
const ::sycl::context &ctx, ::sycl::event deps) const {
if (buffer) {
#ifndef NDEBUG
monitor_.lock_write();
monitor_.record_deallocate(buffer);
sycl_free_(buffer, static_cast<const void *>(&dev),
static_cast<const void *>(&ctx),
static_cast<void *>(&deps));
monitor_.unlock_write();
#else
sycl_free_(buffer, static_cast<const void *>(&dev),
static_cast<const void *>(&ctx),
static_cast<void *>(&deps));
#endif
}
}
#endif
Expand All @@ -273,21 +194,12 @@ struct dnnl_graph_allocator final : public dnnl::impl::graph::utils::id_t {
void deallocate(void *buffer, cl_device_id dev, cl_context ctx,
cl_event deps) const {
if (buffer) {
#ifndef NDEBUG
monitor_.lock_write();
monitor_.record_deallocate(buffer);
ocl_free_(buffer, dev, ctx, deps);
monitor_.unlock_write();
#else
ocl_free_(buffer, dev, ctx, deps);
#endif
buffer = nullptr;
}
}
#endif

monitor_t &get_monitor() { return monitor_; }

private:
dnnl_graph_host_allocate_f host_malloc_ {
dnnl::impl::graph::utils::cpu_allocator_t::malloc};
Expand All @@ -308,8 +220,6 @@ struct dnnl_graph_allocator final : public dnnl::impl::graph::utils::id_t {
dnnl_graph_ocl_deallocate_f ocl_free_ {
dnnl::impl::graph::utils::ocl_allocator_t::free};
#endif

mutable monitor_t monitor_;
};

#endif
71 changes: 1 addition & 70 deletions src/graph/interface/partition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -322,27 +322,13 @@ status_t DNNL_API dnnl_graph_compiled_partition_execute(

if (get_verbose(dnnl::impl::verbose_t::exec_profile,
dnnl::impl::component_t::graph)) {
#ifndef NDEBUG
allocator_t *alloc = reinterpret_cast<allocator_t *>(
compiled_partition->get_engine()->get_allocator());
allocator_t::monitor_t &monitor = alloc->get_monitor();
monitor.reset_peak_temp_memory();
#endif
stream->wait();
double start_ms = dnnl::impl::get_msec();
CHECK(compiled_partition->execute(stream, ins, outs));
stream->wait();
double duration_ms = dnnl::impl::get_msec() - start_ms;
#ifndef NDEBUG
VFORMAT(start_ms, graph, exec, VERBOSE_profile, "%s,%g,%zu,%s,%zu,%zu",
compiled_partition->info(), duration_ms, alloc->id(),
utils::thread_id_to_str(std::this_thread::get_id()).c_str(),
monitor.get_total_persist_memory(),
monitor.get_peak_temp_memory());
#else
VPROF(start_ms, graph, exec, VERBOSE_profile,
compiled_partition->info(), duration_ms);
#endif
} else {
CHECK(compiled_partition->execute(stream, ins, outs));
}
Expand Down Expand Up @@ -375,36 +361,8 @@ status_t DNNL_API dnnl_graph_sycl_interop_compiled_partition_execute(
for (size_t i = 0; i < num_outputs; ++i) {
outs.emplace_back(**(outputs + i));
}
#ifndef NDEBUG
if (get_verbose(dnnl::impl::verbose_t::exec_profile,
dnnl::impl::component_t::graph)) {
allocator_t *alloc = reinterpret_cast<allocator_t *>(
compiled_partition->get_engine()->get_allocator());
allocator_t::monitor_t &monitor = alloc->get_monitor();
monitor.reset_peak_temp_memory();
stream->wait();
double start_ms = dnnl::impl::get_msec();
if (deps != nullptr) {
const auto &sycl_deps = *(const std::vector<::sycl::event> *)deps;
CHECK(compiled_partition->execute_sycl(stream, ins, outs, sycl_deps,
static_cast<::sycl::event *>(sycl_event)));
} else {
CHECK(compiled_partition->execute_sycl(stream, ins, outs, {},
static_cast<::sycl::event *>(sycl_event)));
}
stream->wait();
double duration_ms = dnnl::impl::get_msec() - start_ms;
VFORMAT(start_ms, graph, exec, VERBOSE_profile, "%s,%g,%zu,%s,%zu,%zu",
compiled_partition->info(), duration_ms, alloc->id(),
utils::thread_id_to_str(std::this_thread::get_id()).c_str(),
monitor.get_total_persist_memory(),
monitor.get_peak_temp_memory());
} else if (get_verbose(dnnl::impl::verbose_t::exec_profile,
dnnl::impl::component_t::graph)) {
#else
if (get_verbose(dnnl::impl::verbose_t::exec_profile,
dnnl::impl::component_t::graph)) {
#endif
stream->wait();
double start_ms = dnnl::impl::get_msec();
if (deps != nullptr) {
Expand Down Expand Up @@ -465,36 +423,9 @@ status_t DNNL_API dnnl_graph_ocl_interop_compiled_partition_execute(
for (size_t i = 0; i < num_outputs; ++i) {
outs.emplace_back(**(outputs + i));
}
#ifndef NDEBUG
if (get_verbose(dnnl::impl::verbose_t::exec_profile,
dnnl::impl::component_t::graph)) {
allocator_t *alloc = reinterpret_cast<allocator_t *>(
compiled_partition->get_engine()->get_allocator());
allocator_t::monitor_t &monitor = alloc->get_monitor();
monitor.reset_peak_temp_memory();
stream->wait();
double start_ms = dnnl::impl::get_msec();
if (deps != nullptr) {
std::vector<cl_event> ocl_deps(deps, deps + ndeps);
CHECK(compiled_partition->execute_ocl(
stream, ins, outs, ocl_deps, ocl_event));
} else {
CHECK(compiled_partition->execute_ocl(
stream, ins, outs, {}, ocl_event));
}
stream->wait();
double duration_ms = dnnl::impl::get_msec() - start_ms;
VFORMAT(start_ms, graph, exec, VERBOSE_profile, "%s,%g,%zu,%s,%zu,%zu",
compiled_partition->info(), duration_ms, alloc->id(),
utils::thread_id_to_str(std::this_thread::get_id()).c_str(),
monitor.get_total_persist_memory(),
monitor.get_peak_temp_memory());
} else if (get_verbose(dnnl::impl::verbose_t::exec_profile,
dnnl::impl::component_t::graph)) {
#else

if (get_verbose(dnnl::impl::verbose_t::exec_profile,
dnnl::impl::component_t::graph)) {
#endif
stream->wait();
double start_ms = dnnl::impl::get_msec();
if (deps != nullptr) {
Expand Down
Loading

0 comments on commit f178d84

Please sign in to comment.