merge reduce.cu

Merge branch 'master' of github.com:cyberaide/biostatistics Conflicts: GPUMapReduce/reduce.cu
ThomasLecat · Sep 17, 2012 · ea2dc39 · ea2dc39
2 parents 220b161 + 764990c
commit ea2dc39
Show file tree

Hide file tree

Showing 37 changed files with 12,808 additions and 2,017 deletions.
diff --git a/GPUMapReduce/Global.h b/GPUMapReduce/Global.h
@@ -1,27 +1,76 @@
+
 /*	
 	Copyright 2012 The Trustees of Indiana University.  All rights reserved.
 	CGL MapReduce Framework on GPUs and CPUs
-	Code Name: Panda 0.1
+	
+	Code Name: Panda 
+	
 	File: Global.h 
-	Time: 2012-07-01 
+	First Version:		2012-07-01 V0.1
+	Current Version:	2012-09-01 V0.3	
+	Last Updates:		2012-09-02
+
 	Developer: Hui Li (lihui@indiana.edu)
 
 	This is the source code for Panda, a MapReduce runtime on GPUs and CPUs.
- 
+
  */
 
 #ifndef __GLOBAL_H__
 #define __GLOBAL_H__
 
+
+#define MATRIX_BLOCK_SIZE 64
+
+#define CHECK_BANK_CONFLICTS 0
+#if CHECK_BANK_CONFLICTS
+#define AS(i, j) cutilBankChecker(((float*)&As[0][0]), (BLOCK_SIZE * i + j))
+#define BS(i, j) cutilBankChecker(((float*)&Bs[0][0]), (BLOCK_SIZE * i + j))
+#else
+#define AS(i, j) As[i][j]
+#define BS(i, j) Bs[i][j]
+#endif
+
+
+
+extern "C"
+void cpu_map(void *KEY, void*VAL, int keySize, int valSize, cpu_context *d_g_state, int map_task_idx);
+
+
+extern "C"
+__device__ int compare(const void *d_a, int len_a, const void *d_b, int len_b);
+
+extern "C"
+__device__ void map2(void *KEY, void*VAL, int keySize, int valSize, gpu_context *d_g_state, int map_task_idx);
+
+
+//extern "C" void cpu_matrix(float *A, float *B, float *C, int wide, int start_row_id, int end);
+
 typedef struct
 {
-	char* file; 
-} WC_KEY_T;
 
-typedef __align__(16) struct
-{
-	int line_offset;
-	int line_size;
-} WC_VAL_T;
+        float* matrix1;
+        float* matrix2;
+		float* matrix3;
+
+		float* h_matrix1;
+		float* h_matrix2;
+		float* h_matrix3;
+
+		//int test;
+
+} MM_KEY_T;
+
+typedef struct
+{		
+        int row;
+        int col;
+
+		//int tbz;//thread block size
+		//int mbz;//matrix block size
+
+        int row_dim;
+        int col_dim;
+} MM_VAL_T;
 
 #endif
diff --git a/GPUMapReduce/Makefile b/GPUMapReduce/Makefile
@@ -1,35 +1,37 @@
 ################################################################################
-# Build script for MPI/CUDA implementation of PandaSort on Delta
-# lihui@indiana.edu   5/28/2012
+# Makefile for Panda source code
+# lihui@indiana.edu   7/01/2012
 ################################################################################
 
-all:mpi_cuda
+all:panda_application
 
 CC	= g++
 NVCC	= nvcc
 
-NVCCFLAGS += --ptxas-options=-v
-NVCCFLAGS += -Xcompiler -fopenmp
-INCLUDE   = -I. -I/opt/cuda/include/ -I/opt/NVIDIA_GPU_Computing_SDK/C/common/inc/
-LIBPATH =  -L./ -L/opt/cuda/lib64/ -L/opt/NVIDIA_GPU_Computing_SDK/C/lib/
+#NVCCFLAGS += --ptxas-options=-v
+NVCCFLAGS += -Xcompiler -O
+
+INCLUDE   = -I. -I/sw/keeneland/cuda/4.1/linux_binary/include/ -I/opt/NVIDIA_GPU_Computing_SDK/4.1.28/C/common/inc/
+LIBPATH =  -L./ -L/sw/keeneland/cuda/4.1/linux_binary/lib64/ -L/opt/NVIDIA_GPU_Computing_SDK/4.1.28/C/lib/
 
 .c.o:
 	$(CC) -c $<
-#PandaLib.cu PandaInc.h PandaUtils.cpp PandaScan.cu PandaSort.cu
-OBJ		= main.o PandaLib.o PandaUtils.o PandaScan.o PandaSort.o
+
+OBJ		= main.o PandaLib.o PandaUtils.o PandaSched.o PandaSort.o matrixutil.o
 
 PandaLib.o: PandaLib.cu
-	$(NVCC) -arch=sm_20 -c PandaLib.cu $(INCLUDE) -lcudart
+	$(NVCC) -O -arch=sm_20 -c PandaLib.cu $(INCLUDE) -lcudart
 PandaUtils.o: PandaUtils.cu
-	$(NVCC) -arch=sm_20 -c PandaUtils.cu $(INCLUDE) -lcudart 
-PandaScan.o: PandaScan.cu
-	nvcc -arch=sm_20 -c PandaScan.cu -o PandaScan.o $(INCLUDE) -lcudart
+	$(NVCC) -O -arch=sm_20 -c PandaUtils.cu $(INCLUDE) -lcudart 
+PandaSched.o: PandaSched.cu
+	nvcc -arch=sm_20 -c PandaSched.cu -o PandaSched.o $(INCLUDE) -lcudart
 PandaSort.o: PandaSort.cu
 	nvcc -arch=sm_20 -c PandaSort.cu -o PandaSort.o $(INCLUDE) -lcudart 
-
+matrixutil.o: matrixutil.cpp
+	g++ -O -o matrixutil.o -c matrixutil.cpp
 main.o: main.cu
-	nvcc -arch=sm_20 -c main.cu -o main.o $(INCLUDE) -lcudart
-mpi_cuda: $(OBJ)
-	nvcc -arch=sm_20 -o cuda_kmeans $(OBJ) $(INCLUDE) $(LIBPATH) -lcudart -lcutil_x86_64
+	nvcc -O -arch=sm_20 -c main.cu -o main.o $(INCLUDE) -lcudart
+panda_application: $(OBJ)
+	nvcc -O -arch=sm_20 -o panda_matrixMul_1d $(OBJ) $(INCLUDE) $(LIBPATH) -lcudart 
 clean:
-	rm *.o cuda_kmeans
+	rm *.o panda_matrixMul