Skip to content

Commit 636998d

Browse files
committed
Merge pull request #1 from borisfom/caffe-0.14-cnmem
Caffe 0.14 cnmem
2 parents 848bfda + 36eaec8 commit 636998d

File tree

13 files changed

+263
-313
lines changed

13 files changed

+263
-313
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[submodule "cnmem"]
2+
path = cnmem
3+
url = https://github.com/NVIDIA/cnmem.git

Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,12 +301,16 @@ endif
301301
# cuDNN acceleration configuration.
302302
ifeq ($(USE_CUDNN), 1)
303303
LIBRARIES += cudnn
304+
INCLUDE_DIRS += ${CUDNN_DIR}/include
305+
LIBRARY_DIRS += ${CUDNN_DIR}/install/cuda/lib64
304306
COMMON_FLAGS += -DUSE_CUDNN
305307
endif
306308

307309
# cuMEM integration
308310
ifeq ($(USE_CNMEM), 1)
309311
LIBRARIES += cnmem
312+
LIBRARY_DIRS += ${CNMEM_DIR}/build
313+
INCLUDE_DIRS += ${CNMEM_DIR}/include
310314
COMMON_FLAGS += -DUSE_CNMEM
311315
endif
312316

cnmem

Submodule cnmem added at e817a7a

include/caffe/CuMem.hpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#ifndef CAFFE_CUMEM_HPP_
2+
#define CAFFE_CUMEM_HPP_
3+
4+
#include "common.hpp"
5+
6+
#ifdef USE_CNMEM
7+
// CNMEM integration
8+
#include <cnmem.h>
9+
#endif
10+
11+
namespace caffe {
12+
13+
class CuMem {
14+
public:
15+
#ifndef CPU_ONLY
16+
static void mallocGPU(void **ptr, size_t size,
17+
cudaStream_t stream = cudaStreamDefault);
18+
static void freeGPU(void *ptr, cudaStream_t = cudaStreamDefault);
19+
static void registerStream(cudaStream_t stream);
20+
#endif
21+
22+
static bool usingPool() {
23+
return using_pool_;
24+
}
25+
26+
static void getInfo(size_t *free_mem, size_t *used_mem);
27+
28+
private:
29+
static void init(const std::vector<int>& gpus_, bool use_pool=true);
30+
static void destroy();
31+
32+
friend class CuMemActivator;
33+
static bool using_pool_;
34+
static bool initialized_;
35+
36+
37+
};
38+
39+
class CuMemActivator {
40+
public:
41+
explicit CuMemActivator(const std::vector<int>& gpus)
42+
: using_pool_(false) {
43+
if (gpus.size() > 0) {
44+
#ifdef USE_CNMEM
45+
using_pool_ = true;
46+
#endif
47+
CuMem::init(gpus, using_pool_);
48+
}
49+
}
50+
~CuMemActivator() {
51+
if (using_pool_) {
52+
CuMem::destroy();
53+
}
54+
}
55+
private:
56+
int using_pool_;
57+
};
58+
59+
} // namespace caffe
60+
61+
# endif

include/caffe/common.hpp

Lines changed: 7 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,6 @@
1818

1919
#include "caffe/util/device_alternate.hpp"
2020

21-
#ifdef USE_CNMEM
22-
// cuMEM integration
23-
#include <cnmem.h>
24-
#endif
25-
2621
// gflags 2.1 issue: namespace google was changed to gflags without warning.
2722
// Luckily we will be able to use GFLAGS_GFLAGS_H_ to detect if it is version
2823
// 2.1. If yes, we will add a temporary solution to redirect the namespace.
@@ -70,6 +65,12 @@ private:\
7065
// is executed we will see a fatal log.
7166
#define NOT_IMPLEMENTED LOG(FATAL) << "Not Implemented Yet"
7267

68+
#include "CuMem.hpp"
69+
70+
// bfomitchev: temporary, for better merge
71+
#define MemoryHandler CuMem
72+
#define MemoryHandlerActivator CuMemActivator
73+
7374
// See PR #1236
7475
namespace cv { class Mat; }
7576

@@ -184,67 +185,7 @@ class Caffe {
184185
DISABLE_COPY_AND_ASSIGN(Caffe);
185186
};
186187

187-
class MemoryHandler {
188-
public:
189-
static MemoryHandler& Get();
190-
#ifndef CPU_ONLY
191-
static void mallocGPU(void **ptr, size_t size,
192-
cudaStream_t stream = cudaStreamDefault);
193-
static void freeGPU(void *ptr, cudaStream_t = cudaStreamDefault);
194-
static void registerStream(cudaStream_t stream);
195-
#endif
196-
static void setGPUs(const std::vector<int>& gpus) { Get().gpus_ = gpus; }
197-
static void usePool() { Get().using_pool_ = true; }
198-
static bool usingPool() {
199-
#ifdef USE_CNMEM
200-
return Get().using_pool_;
201-
#else
202-
return false;
203-
#endif
204-
}
205-
static void getInfo(size_t *free_mem, size_t *used_mem);
206-
static void destroy();
207-
~MemoryHandler() { }
208-
209-
private:
210-
MemoryHandler() : using_pool_(false), initialized_(false) {}
211-
static void Init();
212-
// static void Destroy();
213-
#ifndef CPU_ONLY
214-
void allocate_memory(void **ptr, size_t size, cudaStream_t stream);
215-
void free_memory(void *ptr, cudaStream_t stream);
216-
#endif
217-
DISABLE_COPY_AND_ASSIGN(MemoryHandler);
218-
219-
bool using_pool_;
220-
bool initialized_;
221-
std::vector<int> gpus_;
222-
};
223-
224-
class MemoryHandlerActivator {
225-
public:
226-
explicit MemoryHandlerActivator(const std::vector<int>& gpus)
227-
: using_pool_(false) {
228-
if (gpus.size() > 0) {
229-
using_pool_ = true;
230-
MemoryHandler::usePool();
231-
MemoryHandler::setGPUs(gpus);
232-
#ifndef CPU_ONLY
233-
void* temp;
234-
MemoryHandler::mallocGPU(&temp, 4);
235-
MemoryHandler::freeGPU(temp);
236-
#endif
237-
}
238-
}
239-
~MemoryHandlerActivator() {
240-
if (using_pool_) {
241-
MemoryHandler::destroy();
242-
}
243-
}
244-
private:
245-
int using_pool_;
246-
};
247-
248188
} // namespace caffe
249189

250190
#endif // CAFFE_COMMON_HPP_
191+

include/caffe/util/device_alternate.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ void classname<Dtype>::funcname##_##gpu(const vector<Blob<Dtype>*>& top, \
7474
CHECK_EQ(status, CNMEM_STATUS_SUCCESS) << " " \
7575
<< cnmemGetErrorString(status); \
7676
} while (0)
77-
77+
#else
78+
#define CNMEM_CHECK(condition)
7879
#endif
7980

8081
// CUDA: grid stride looping

src/caffe/CuMem.cpp

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#include "caffe/common.hpp"
2+
#include "caffe/CuMem.hpp"
3+
4+
#include <boost/thread.hpp>
5+
6+
namespace caffe {
7+
8+
bool CuMem::using_pool_ = false;
9+
bool CuMem::initialized_ = false;
10+
11+
using namespace boost;
12+
13+
#ifndef CNMEM_CHECK
14+
# define CNMEM_CHECK(x)
15+
#endif
16+
17+
#ifndef CPU_ONLY // CPU-only Caffe.
18+
19+
void CuMem::mallocGPU(void **ptr, size_t size, cudaStream_t stream) {
20+
CHECK(initialized_);
21+
if (using_pool_) {
22+
CNMEM_CHECK(cnmemMalloc(ptr, size, stream));
23+
} else {
24+
CUDA_CHECK(cudaMalloc(ptr, size));
25+
}
26+
}
27+
28+
29+
void CuMem::freeGPU(void *ptr, cudaStream_t stream) {
30+
CHECK(initialized_);
31+
if (using_pool_) {
32+
CNMEM_CHECK(cnmemFree(ptr, stream));
33+
} else {
34+
CUDA_CHECK(cudaFree(ptr));
35+
}
36+
}
37+
38+
void CuMem::registerStream(cudaStream_t stream) {
39+
CHECK(initialized_);
40+
if (using_pool_) {
41+
CNMEM_CHECK(cnmemRegisterStream(stream));
42+
}
43+
}
44+
45+
void CuMem::destroy() {
46+
CHECK(initialized_);
47+
CNMEM_CHECK(cnmemFinalize());
48+
initialized_ = false;
49+
using_pool_ = false;
50+
}
51+
52+
void CuMem::init(const std::vector<int>& gpus, bool use_pool) {
53+
CHECK(!initialized_);
54+
#ifdef USE_CNMEM
55+
if (false /* use_pool */) {
56+
using_pool_ = true;
57+
cnmemDevice_t *devs = new cnmemDevice_t[gpus.size()];
58+
59+
int initial_device;
60+
CUDA_CHECK(cudaGetDevice(&initial_device));
61+
62+
for (int i = 0; i < gpus.size(); i++) {
63+
CUDA_CHECK(cudaSetDevice(gpus[i]));
64+
65+
devs[i].device = gpus[i];
66+
67+
size_t free_mem, used_mem;
68+
CUDA_CHECK(cudaMemGetInfo(&free_mem, &used_mem));
69+
70+
devs[i].size = size_t(0.95*free_mem);
71+
devs[i].numStreams = 0;
72+
devs[i].streams = NULL;
73+
}
74+
CNMEM_CHECK(cnmemInit(gpus.size(), devs, CNMEM_FLAGS_DEFAULT));
75+
initialized_ = true;
76+
77+
CUDA_CHECK(cudaSetDevice(initial_device));
78+
79+
delete [] devs;
80+
}
81+
#endif
82+
initialized_ = true;
83+
std::cout << "CuMem initialized" <<
84+
(using_pool_ ? " with CNMEM pool.\n" : " with CUDA allocator.\n");
85+
}
86+
87+
void CuMem::getInfo(size_t *free_mem, size_t *total_mem) {
88+
if (using_pool_) {
89+
CNMEM_CHECK(cnmemMemGetInfo(free_mem, total_mem, cudaStreamDefault));
90+
} else {
91+
CUDA_CHECK(cudaMemGetInfo(free_mem, total_mem));
92+
}
93+
}
94+
95+
}
96+
97+
#endif // CPU_ONLY
98+

0 commit comments

Comments
 (0)