Skip to content

Commit 4dd32c4

Browse files
committed
add 12_6
1 parent 07f8ce0 commit 4dd32c4

File tree

6 files changed

+821
-0
lines changed

6 files changed

+821
-0
lines changed
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
#include "cuda_runtime.h"
2+
#include "device_launch_parameters.h"
3+
4+
#include "DS_timer.h"
5+
#include <stdio.h>
6+
#include <stdlib.h>
7+
#include <string.h>
8+
9+
#define NUM_BLOCK 10240
10+
#define NUM_T_IN_B 512
11+
12+
__global__ void threadCounting_noSync(int* a)
13+
{
14+
(*a)++;
15+
}
16+
17+
__global__ void threadCounting_atomicGlobal(int* a)
18+
{
19+
atomicAdd(a, 1);
20+
}
21+
22+
__global__ void threadCounting_atomicShared(int* a)
23+
{
24+
__shared__ int sa;
25+
26+
if (threadIdx.x == 0)
27+
sa = 0;
28+
__syncthreads();
29+
30+
atomicAdd(&sa, 1);
31+
__syncthreads();
32+
33+
if (threadIdx.x == 0)
34+
atomicAdd(a, sa);
35+
}
36+
37+
int main(void) {
38+
DS_timer timer(10);
39+
timer.setTimerName(0, (char*)"No Sync.");
40+
timer.setTimerName(1, (char*)"AtomicGlobal");
41+
timer.setTimerName(2, (char*)"AtomicShared");
42+
43+
int a = 0;
44+
int* d1, * d2, * d3;
45+
46+
//cudaSetDevice(1);
47+
48+
cudaMalloc((void**)&d1, sizeof(int));
49+
cudaMemset(d1, 0, sizeof(int) * 0);
50+
51+
cudaMalloc((void**)&d2, sizeof(int));
52+
cudaMemset(d2, 0, sizeof(int) * 0);
53+
54+
cudaMalloc((void**)&d3, sizeof(int));
55+
cudaMemset(d3, 0, sizeof(int) * 0);
56+
57+
// warp-up
58+
threadCounting_noSync << <NUM_BLOCK, NUM_T_IN_B >> > (d1);
59+
cudaDeviceSynchronize();
60+
61+
timer.onTimer(0);
62+
threadCounting_noSync << <NUM_BLOCK, NUM_T_IN_B >> > (d1);
63+
cudaDeviceSynchronize();
64+
timer.offTimer(0);
65+
66+
cudaMemcpy(&a, d1, sizeof(int), cudaMemcpyDeviceToHost);
67+
printf("[No Sync.] # of threads = %d\n", a);
68+
69+
timer.onTimer(1);
70+
threadCounting_atomicGlobal << <NUM_BLOCK, NUM_T_IN_B >> > (d2);
71+
cudaDeviceSynchronize();
72+
timer.offTimer(1);
73+
74+
cudaMemcpy(&a, d2, sizeof(int), cudaMemcpyDeviceToHost);
75+
printf("[AtomicGlobal] # of threads = %d\n", a);
76+
77+
timer.onTimer(2);
78+
threadCounting_atomicShared << <NUM_BLOCK, NUM_T_IN_B >> > (d3);
79+
cudaDeviceSynchronize();
80+
timer.offTimer(2);
81+
82+
cudaMemcpy(&a, d3, sizeof(int), cudaMemcpyDeviceToHost);
83+
printf("[AtomicShared] # of threads = %d\n", a);
84+
85+
cudaFree(d1);
86+
cudaFree(d2);
87+
cudaFree(d3);
88+
89+
timer.printTimer();
90+
91+
return 0;
92+
}
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
#pragma once
2+
3+
#include <stdio.h>
4+
#include <stdlib.h>
5+
#include <string.h>
6+
#include <iostream>
7+
8+
#define OS_WINDOWS 0
9+
#define OS_LINUX 1
10+
11+
#ifdef _WIN32
12+
#define _TARGET_OS OS_WINDOWS
13+
#else
14+
#ifndef nullptr
15+
#define nullptr NULL
16+
#endif
17+
#define _TARGET_OS OS_LINUX
18+
#endif
19+
20+
/************************************************************************/
21+
/* OS dependet function */
22+
/************************************************************************/
23+
#if _TARGET_OS == OS_WINDOWS
24+
// #define _SPRINT sprintf_s
25+
#define _STRTOK strtok_s
26+
27+
#define EXIT_WIHT_KEYPRESS {std::cout << "Press any key to exit..."; getchar(); exit(0);}
28+
29+
#define SPLIT_PATH(_path,_result) \
30+
_splitpath_s(_path, _result.drive, 255, _result.dir, 255, _result.filename, 255, _result.ext, 255)
31+
32+
33+
#elif _TARGET_OS == OS_LINUX
34+
#include <libgen.h>
35+
#include <inttypes.h>
36+
37+
#define _STRTOK strtok_r
38+
39+
#define EXIT_WIHT_KEYPRESS {std::cout << "Program was terminated!"; exit(0);}
40+
41+
#define sprintf_s sprintf
42+
#define scanf_s scanf
43+
#define fprintf_s fprintf
44+
45+
#define __int64 int64_t
46+
47+
#define fopen_s(fp, name, mode) (*fp = fopen(name, mode))
48+
49+
#endif
50+
51+
/************************************************************************/
52+
/* Defines */
53+
/************************************************************************/
54+
55+
// *********** data size
56+
#define _1K_ 1024
57+
#define _1M_ (_1K_*_1K_)
58+
#define _1G_ (_1M_*_1K_)
59+
60+
#define CHAR_STRING_SIZE 255
61+
62+
/************************************************************************/
63+
/* Type definitions */
64+
/************************************************************************/
65+
typedef unsigned int UINT ;
66+
67+
/************************************************************************/
68+
/* Macro functions */
69+
/************************************************************************/
70+
#define DS_MEM_DELETE(a) \
71+
if (a != NULL) { \
72+
delete a ; \
73+
a = NULL ; \
74+
}
75+
76+
#define DS_MEM_DELETE_ARRAY(a) \
77+
if (a != NULL) { \
78+
delete [] a ; \
79+
a = NULL ; \
80+
}
81+
82+
#define RANGE_MIN 0
83+
#define RANGE_MAX 1
84+
85+
#define MATCHED_STRING 0
86+
87+
#ifndef VTK_RANGE_MIN
88+
#define VTK_RANGE_MIN 0
89+
#define VTK_RANGE_MAX 1
90+
#endif
91+
92+
// Print
93+
#define PRINT_LINE_INFO printf("%s, line %d", __FILE__, __LINE__)
94+
#define PRINT_ERROR_MSG(_msg) {PRINT_LINE_INFO; printf(" at "); printf(_msg);}
95+
96+
// Single loops
97+
#define LOOP_I(a) for(int i=0; i<a; i++)
98+
#define LOOP_J(a) for(int j=0; j<a; j++)
99+
#define LOOP_K(a) for(int k=0; k<a; k++)
100+
#define LOOP_INDEX(index, end) for (int index = 0 ; index < end ; index++)
101+
#define LOOP_INDEX_START_END(index, start, end) for (int index = start ; index < end ; index++)
102+
103+
// Multiple loops
104+
#define LOOP_J_I(b, a) LOOP_J(b) LOOP_I(a)
105+
#define LOOP_K_J_I(c,b,a) for(int k=0; k<c; k++) LOOP_J_I(b,a)
106+
107+
//
108+
#ifndef SWAP
109+
template<class T>
110+
void SWAP(T &a, T &b){
111+
T tmp = a;
112+
a = b;
113+
b = tmp;
114+
}
115+
#endif
116+
117+
//
118+
#ifndef MIN
119+
#define MIN(a,b) (a > b ? b : a)
120+
#endif
121+
122+
#ifndef MAX
123+
#define MAX(a,b) (a > b ? a : b)
124+
#endif
125+
126+
// Index converter
127+
128+
#define INDEX2X(_ID,_W) (_ID%_W)
129+
#define INDEX2Y(_ID,_W) (_ID/_W)
130+
#define INDEX2ID(_ID,_X,_Y,_W) {_X=INDEX2X(_ID,_W);_Y=INDEX2Y(_ID_,_W);}
131+
#define ID2INDEX(_W,_X,_Y) (_Y*_W+_X)
132+
#define PTR2ID(_type, _target, _base) ((_type*)_target - (_type*)_base)
133+
134+
// Memory allocation and release
135+
#ifndef SAFE_DELETE
136+
#define SAFE_DELETE(p) {if(p!=NULL) delete p; p=NULL;}
137+
#endif
138+
139+
#ifndef SAFE_DELETE_ARR
140+
#define SAFE_DELETE_ARR(p) {if(p!=NULL) delete [] p; p=NULL;}
141+
#endif
142+
143+
#define SAFE_NEW(p, type, size) {\
144+
try {p = new type[size];} \
145+
catch(std::bad_alloc& exc) \
146+
{ printf("[%s, line %d] fail to memory allocation - %.2f MB requested\n", __FILE__, __LINE__, (float)(sizeof(type)*size)/_1M_); \
147+
EXIT_WIHT_KEYPRESS }\
148+
}
149+
150+
template<class T>
151+
void memsetZero(T** p, long long size = 0) {
152+
if (*p != NULL)
153+
memset(*p, 0, sizeof(T)*size);
154+
}
155+
156+
template<class T>
157+
void allocNinitMem(T** p, long long size, double *memUsage = NULL) {
158+
*p = new T[size];
159+
//SAFE_NEW(*p, T, size);
160+
memset(*p, 0, sizeof(T)*size);
161+
162+
if (memUsage != NULL) {
163+
*memUsage += sizeof(T)*size;
164+
}
165+
}
166+
167+
#define SAFE_MEMCPY(_dst, _src, _type, _size){ \
168+
if(_dst == nullptr || _src == nullptr ) \
169+
printf("[%s, line %d] fail to memcpy (dst = %x, src = %x)\n", __FILE__, __LINE__, _dst, _src); \
170+
exit(-1); \
171+
memcpy(_dst, _src, sizeof(_type)*_size);\
172+
}
173+
174+
// VTK related
175+
#ifndef SAFE_DELETE_VTK
176+
#define SAFE_DELETE_VTK(p) {if(p!=NULL) p->Delete(); p=NULL;}
177+
#endif
178+
179+
#ifndef VTK_IS_NOERROR
180+
//#include "DS_common_def.h"
181+
#define VTK_IS_NOERROR(p) (p->GetErrorCode()==vtkErrorCode::NoError ? true : false)
182+
#endif
183+
184+
/************************************************************************/
185+
/* Data structures */
186+
/************************************************************************/
187+
typedef struct {
188+
std::string input;
189+
std::string output;
190+
} nameMatch;
191+
192+
typedef struct {
193+
char drive[255];
194+
char dir[255];
195+
char filename[255];
196+
char ext[255];
197+
} filePathSplit;

0 commit comments

Comments
 (0)