IBMSparkGPU
diff --git a/‎lbfgs/Makefile
Lines changed: 64 additions & 0 deletions b/‎lbfgs/Makefile
Lines changed: 64 additions & 0 deletions
diff --git a/‎lbfgs/README
Lines changed: 31 additions & 0 deletions b/‎lbfgs/README
Lines changed: 31 additions & 0 deletions
diff --git a/‎lbfgs/include/cuda_checking.h
Lines changed: 43 additions & 0 deletions b/‎lbfgs/include/cuda_checking.h
Lines changed: 43 additions & 0 deletions
diff --git a/‎lbfgs/include/lbfgs.h
Lines changed: 102 additions & 0 deletions b/‎lbfgs/include/lbfgs.h
Lines changed: 102 additions & 0 deletions
diff --git a/‎lbfgs/include/opt_cuda.h
Lines changed: 85 additions & 0 deletions b/‎lbfgs/include/opt_cuda.h
Lines changed: 85 additions & 0 deletions
diff --git a/‎lbfgs/spark/.gitignore
Lines changed: 1 addition & 0 deletions b/‎lbfgs/spark/.gitignore
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,64 @@
+CUDA		:= /usr/local/cuda
+
+CUDASRC := $(wildcard src/*.cu)
+CUDAOBJ := $(CUDASRC:%.cu=%.o)
+
+CPPSRC := $(wildcard src/*.C)
+CPPOBJ := $(CPPSRC:%.C=%.o)
+
+JNISRC := $(wildcard spark/*.c)
+JNIOBJ := $(JNISRC:%.c=%.o)
+
+UTILSRC := ../utilities.cu
+UTILOBJ := utilities.o
+
+
+# Allow various cuda versions
+GENCODE_SM20    := -gencode arch=compute_20,code=sm_20
+GENCODE_SM21    := -gencode arch=compute_20,code=sm_21
+GENCODE_SM30    := -gencode arch=compute_30,code=sm_30 
+GENCODE_SM35    := -gencode arch=compute_35,code=sm_35
+CUDAFLAGS       := $(GENCODE_SM21) $(GENCODE_SM35)
+
+
+INCLUDES        := -I.. -I./. -I$(CUDA)/include -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/linux 
+
+# It is important that CUDAFLAGS end with -Xcompiler and CXXFLAGS start with -fPIC
+# because nvcc wants the pair -Xcompiler -fPIC, while gcc wants only -fPIC
+CUDAFLAGS +=   -Xcompiler
+CXXFLAGS  :=   -fPIC -g -shared -m64 -O3 $(INCLUDES)
+
+
+#extend gcc loader flags to load cuda libraries
+LDFLAGS  += -L$(CUDA)/lib64 -lcudart -lcublas -lpthread
+
+SHARED_LIBRARY  := libGPULBFGS.so
+
+
+default: lbfgs
+
+spark/%.o: spark/%.c
+	gcc $(CXXFLAGS) -c $< -o $@
+
+src/%.o: src/%.C
+	g++ $(CXXFLAGS) -c $< -o $@
+
+src/%.o: src/%.cu
+	nvcc  $(CUDAFLAGS) $(CXXFLAGS) -c $< -o $@
+
+$(UTILOBJ): $(UTILSRC)
+	nvcc  $(CUDAFLAGS) $(CXXFLAGS) -c $< -o $@
+
+$(SHARED_LIBRARY): $(COBJ) $(CUDAOBJ) $(CPPOBJ) $(JNIOBJ) $(UTILOBJ)
+	g++ -shared -o $(SHARED_LIBRARY) $(COBJ) $(CUDAOBJ) $(CPPOBJ) $(JNIOBJ) $(UTILOBJ) $(LDFLAGS)
+
+lbfgs: $(SHARED_LIBRARY)
+	g++  -g $(SHARED_LIBRARY) -o src/lbfgs -lm
+
+clean:
+	rm -f src/lbfgs src/*.o $(SHARED_LIBRARY) $(COBJ) $(CPPOBJ) $(CUDAOBJ) $(LVOVOBJ) $(JNIOBJ) $(UTILOBJ)
+
+all:
+	make clean
+	make 
+	./run.sh
@@ -0,0 +1,31 @@
+This is a CUDA kernel which implements the LBFGS algorithm.
+It can be used stand-alone, or by Spark.  The JNI code to 
+integrate with Spark is under the spark subdirectory, and 
+changes to Spark itself to call the kernel are located in 
+Spark-MLlib: org/apache/spark/mllib/optimization/LBFGS.scala
+
+This code currently compiles and runs, executing the GPU 
+code by copying all data to the driver and executing 
+the LBFGS algorithm there.
+
+To test and verify, perform the following steps:
+
+1) Build this library by typing "make" in this directory.
+
+2) Build spark from the Spark-MLlib repository.  An example command to do this is from the 
+Spark-MLlib folder is: 
+"./make-distribution.sh -Pyarn -Phadoop-2.6 -Dhadoop.version=2.6.3".  Refer to the
+Spark documentation here for more details: https://spark.apache.org/docs/latest/building-spark.html
+
+3) Set the configuration property "spark.mllib.LBFGS.useGPU=true", and specify the 
+argument "--driver-library-path=/path/to/CUDA-MLlib/lbfgs" when running spark-submit. 
+Setting "spark.mllib.LBFGS.useGPU=false" or not setting it at all will disable the GPU, and run stock 
+spark code.
+
+4) Run your program.  An example program that comes with spark and uses LBFGS is 
+"mllib.LBFGSExample".  Here's an example full command to run that program locally: 
+
+spark-submit -v --master local[*] --conf spark.mllib.LBFGS.useGPU=true 
+--driver-library-path=/path/to/CUDA-MLlib/lbfgs 
+--class org.apache.spark.examples.mllib.LBFGSExample 
+/path/to/Spark-MLlib/dist/lib/spark-examples-2.0.0-SNAPSHOT-hadoop2.6.3.jar
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CUDA_CHECKING_H_
+#define CUDA_CHECKING_H_
+
+#include <string>
+#include <cuda.h>
+#include <cublas_v2.h>
+
+
+
+extern const char *cublasGetErrorString(cublasStatus_t e);
+
+#define checkCublasErrors(err)    __cublasCheckError( err, __FILE__, __LINE__ )
+inline void __cublasCheckError( cublasStatus_t err, const char *file, const int line )
+{
+#ifdef CUBLAS_ERROR_CHECK
+    if ( CUBLAS_STATUS_SUCCESS != err )
+    {
+        fprintf( stderr, "CUBLAS call failed at %s:%i : %s\n",
+                 file, line, cublasGetErrorString( err ) );
+        exit( -1 );
+    }
+#endif
+}
+
+#endif
+
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LBFGS_H
+#define LBFGS_H
+#include "cublas_v2.h"
+#ifdef __cplusplus
+#include <stdlib.h>
+#include <stdint.h>
+
+#define HALT {printf("HALT: \""__FILE__"\", line %d.\n", __LINE__); exit(-1);}
+#define FALSE (0!=0)
+#define TRUE (1!=0)
+
+#define DEBUG
+
+
+
+/* The following four functions must be provided by user. */
+
+double
+function_to_be_minimized(
+double * input_vector,
+double *device_X, double *device_Y,
+double regulerization_parameter, int N, int dimension,
+cublasHandle_t cublasHandle);
+
+void
+gradient_of_function_to_be_minimized(
+double * output_gradient, double * input_vector,
+double *device_X, double *device_Y,
+double regulerization_parameter, int N, int dimension,
+cublasHandle_t cublasHandle);
+
+extern void
+initialize_from_file(const char *file_name, int *n, int *dim, double **deviceX, double **deviceY);
+
+
+/* Minimization by LBFGS algorithm */
+
+extern double lbfgs_from_file(const char *file_name);
+
+extern "C"
+#endif
+
+extern
+#ifdef __cplusplus
+"C"
+#endif
+void initialize_from_arrays(double *  givenY,
+                       double *  givenX,
+                       double ** deviceX,
+                       double ** deviceY,
+                       int   givenDimension,
+                       int   givenN);
+
+#ifdef __cplusplus
+extern "C"
+#endif
+void lbfgs(double * minimizing_vector,
+      double * minimum, 
+      double  convergenceTol,
+      int      maxIterations,
+      double * device_X,
+      double * device_Y,
+      double   regulerization_parameter,
+      int      N,
+      int      dimension,
+      double * loss_history_array,
+      int      loss_history_array_size);
+
+extern double lbfgs_from_file(const char *file_name);
+
+#ifdef __cplusplus
+extern "C" 
+#endif
+double lbfgs_from_arrays(double *Y,
+                         double *X,
+                         double *YX,
+                         double  convergenceTol,
+                         double  regularization_parameter,
+                         double *minimizing_vector,
+                         double *loss_history_array,
+                         int     loss_history_array_size,
+                         int     numSamples,
+                         int     numFeatures,
+                         int     maxIterations);
+#endif
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef _KERNELS_H_
+#define _KERNELS_H_
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <cublas_v2.h>
+#include "utilities.h"
+
+double *allocateDeviceMemory(int len);
+void copyFromHostToDevice(double *h_region, double *d_region, int len);
+void copyFromDeviceToHost(double *h_region, double *d_region, int len);
+void copyFromDeviceToDevice(double *dest, double *src, int len);
+
+#ifdef __cplusplus
+extern "C"
+#endif
+void freeDeviceMemory(double *region);
+
+
+extern void   cuda_set_vector_to_zero(double * h_vec, int n);
+extern void   cuda_vec_equals_vec1_plus_alpha_times_vec2(double * h_vec,
+                                                       double * h_vec1,
+                                                       double   alpha,
+                                                       double * a1,
+                                                       double * h_vec2,
+                                                       int       numElements);
+
+extern void   cuda_matrix_times_vector(const double *h_matrixIn, int rows, int cols,
+                                   const double *h_vectorIn,
+                                   double       *h_vectorOut);
+
+extern void   cuda_mult_vector_by_number(double * h_vec,
+                                       double   alpha,
+                                       int       numElements);
+
+extern void   cuda_vec_equals_minus_vec1(double * h_vec,
+                                       double * h_vec1,
+                                       int       numElements);
+extern double cuda_euclidean_norm(const double * h_vec, int numElements) ;
+extern void   cuda_dot_product(const double * h_vec1,
+                             const double * h_vec2,
+                             double       * d_answer,
+                             int             numElements,
+                             cublasHandle_t cublasHandle) ;
+
+extern double cuda_function_to_be_minimized(double     * h_input_vector,
+                                          double     * x,
+                                          double     * y,
+                                          double       regularization_parameter,
+                                          int           N,
+                                          int           dimension,
+                                          cublasHandle_t cublasHandle);
+
+extern void   cuda_gradient_of_function_to_be_minimized(double     * h_output_gradient,
+                                                      double     * h_input_vector,
+                                                      double     * x,
+                                                      double     * y,
+                                                      double       regularization_parameter,
+                                                      int           N,
+                                                      int           dimension,
+                                                      cublasHandle_t cublasHandle);
+extern int getGPUCount();
+extern void setGPUDevice(int id);
+extern int getCurrentGPU();
+
+#endif
+
@@ -0,0 +1 @@
+/NativeLBFGS.o