-
Notifications
You must be signed in to change notification settings - Fork 2
/
cuda_wrapper.hpp
151 lines (129 loc) · 3.06 KB
/
cuda_wrapper.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#ifndef CUDA_WRAPPER_HPP_
#define CUDA_WRAPPER_HPP_
#include <iostream>
#ifdef debug
#define CudaCall(ans) { CudaAssert((ans), __FILE__, __LINE__); }
inline void CudaAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
std::cerr << "GPUassert: " << cudaGetErrorString(code)
<< " " << file << " " << line << std::endl;
if (abort)
exit(code);
}
}
#else
#define CudaCall(ans) {ans;}
#endif
#ifdef debug
#define CudaLaunch(kernel) \
{ \
kernel; \
CudaCall(cudaPeekAtLastError()); \
CudaCall(cudaDeviceSynchronize()); \
}
#else
#define CudaLaunch(kernel) \
{ \
kernel; \
}
#endif
#define CudaMalloc(ptr,size) \
{\
CudaCall(cudaMalloc((void**)&ptr,size));\
}
#define CudaMallocHost(ptr, size)\
{\
CudaCall(cudaMallocHost((void**)&ptr, size)); \
}
#define CudaMemcpyHtoD(dest, src, size)\
CudaCall(cudaMemcpy(dest, src, size, cudaMemcpyHostToDevice))
#define CudaMemcpyDtoH(dest, src, size)\
CudaCall(cudaMemcpy(dest, src, size, cudaMemcpyDeviceToHost))
#define CudaMemcpyAsyncHtoD(dest, src, size, stream) \
CudaCall(cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice, stream))
#define CudaMemcpyAsyncDtoH(dest, src, size, stream) \
CudaCall(cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream))
#define CudaFree(ptr) \
{\
if(ptr != nullptr) \
CudaCall(cudaFree(ptr));\
ptr = nullptr; \
}
#define CudaFreeHost(ptr) \
{\
if(ptr != nullptr) \
CudaCall(cudaFreeHost(ptr)); \
ptr = nullptr; \
}
#define CudaDeviceSynchronize()\
CudaCall(cudaDeviceSynchronize())
#define CudaMemset(src, val, size) \
{\
CudaCall(cudaMemset(src, val, size));\
}
#define CudaHostRegister(ptr, size, flag)\
{\
CudaCall(cudaHostRegister(ptr, size, flag)); \
}
#define CudaHostUnregister(ptr)\
{\
CudaCall(cudaHostUnregister(ptr));\
}
#define CudaSetDevice(id) \
{\
CudaCall(cudaSetDevice(id));\
}
//define some cuda kernel configuration
#define TILESIZE01 32
#define TILESIZE02 16
#define TILESIZE03 8
#define TILESIZE04 4
#define TILESIZE05 2
#define WARPSIZE 32
#define BLOCKDIM01 64
#define BLOCKDIM02 128
#define BLOCKDIM03 256
#define BLOCKDIM04 512
#define MAX_BLOCKDIM 1024
#define MAX_GRIDDIM 65535
#ifdef USE_32BIT_GRAPH
#define MAX_FLOAT 1.0E+38
#define EPS 1.19E-7
#else
#define MAX_FLOAT 1.0E+308
#define EPS 2.22E-16
#endif
#ifdef USE_32BIT_GRAPH
struct less_int2
{
__host__ __device__ bool operator()(const int2& a, const int2& b)
{
return (a.x != b.x) ? (a.x < b.x) : (a.y < b.y);
};
};
struct equal_int2
{
__host__ __device__ bool operator()(const int2& a, const int2& b)
{
return (a.x != b.x) ? false : (a.y == b.y);
};
};
#else
struct less_int2
{
__host__ __device__ bool operator() (const longlong2& a, const longlong2& b)
{
return (a.x != b.x) ? (a.x < b.x) : (a.y < b.y);
};
};
struct equal_int2
{
__host__ __device__ bool operator() (const longlong2& a, const longlong2& b)
{
return (a.x != b.x) ? false : (a.y == b.y);
};
};
#endif
#endif