Skip to content

Commit 26f330e

Browse files
committed
initial checkin
1 parent 0b18cf4 commit 26f330e

15 files changed

+2154
-0
lines changed

lbfgs/Makefile

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
CUDA := /usr/local/cuda
2+
3+
CUDASRC := $(wildcard src/*.cu)
4+
CUDAOBJ := $(CUDASRC:%.cu=%.o)
5+
6+
CPPSRC := $(wildcard src/*.C)
7+
CPPOBJ := $(CPPSRC:%.C=%.o)
8+
9+
JNISRC := $(wildcard spark/*.c)
10+
JNIOBJ := $(JNISRC:%.c=%.o)
11+
12+
UTILSRC := ../utilities.cu
13+
UTILOBJ := utilities.o
14+
15+
16+
# Allow various cuda versions
17+
GENCODE_SM20 := -gencode arch=compute_20,code=sm_20
18+
GENCODE_SM21 := -gencode arch=compute_20,code=sm_21
19+
GENCODE_SM30 := -gencode arch=compute_30,code=sm_30
20+
GENCODE_SM35 := -gencode arch=compute_35,code=sm_35
21+
CUDAFLAGS := $(GENCODE_SM21) $(GENCODE_SM35)
22+
23+
24+
INCLUDES := -I.. -I./. -I$(CUDA)/include -I$(JAVA_HOME)/include -I$(JAVA_HOME)/include/linux
25+
26+
# It is important that CUDAFLAGS end with -Xcompiler and CXXFLAGS start with -fPIC
27+
# because nvcc wants the pair -Xcompiler -fPIC, while gcc wants only -fPIC
28+
CUDAFLAGS += -Xcompiler
29+
CXXFLAGS := -fPIC -g -shared -m64 -O3 $(INCLUDES)
30+
31+
32+
#extend gcc loader flags to load cuda libraries
33+
LDFLAGS += -L$(CUDA)/lib64 -lcudart -lcublas -lpthread
34+
35+
SHARED_LIBRARY := libGPULBFGS.so
36+
37+
38+
default: lbfgs
39+
40+
spark/%.o: spark/%.c
41+
gcc $(CXXFLAGS) -c $< -o $@
42+
43+
src/%.o: src/%.C
44+
g++ $(CXXFLAGS) -c $< -o $@
45+
46+
src/%.o: src/%.cu
47+
nvcc $(CUDAFLAGS) $(CXXFLAGS) -c $< -o $@
48+
49+
$(UTILOBJ): $(UTILSRC)
50+
nvcc $(CUDAFLAGS) $(CXXFLAGS) -c $< -o $@
51+
52+
$(SHARED_LIBRARY): $(COBJ) $(CUDAOBJ) $(CPPOBJ) $(JNIOBJ) $(UTILOBJ)
53+
g++ -shared -o $(SHARED_LIBRARY) $(COBJ) $(CUDAOBJ) $(CPPOBJ) $(JNIOBJ) $(UTILOBJ) $(LDFLAGS)
54+
55+
lbfgs: $(SHARED_LIBRARY)
56+
g++ -g $(SHARED_LIBRARY) -o src/lbfgs -lm
57+
58+
clean:
59+
rm -f src/lbfgs src/*.o $(SHARED_LIBRARY) $(COBJ) $(CPPOBJ) $(CUDAOBJ) $(LVOVOBJ) $(JNIOBJ) $(UTILOBJ)
60+
61+
all:
62+
make clean
63+
make
64+
./run.sh

lbfgs/README

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
This is a CUDA kernel which implements the LBFGS algorithm.
2+
It can be used stand-alone, or by Spark. The JNI code to
3+
integrate with Spark is under the spark subdirectory, and
4+
changes to Spark itself to call the kernel are located in
5+
Spark-MLlib: org/apache/spark/mllib/optimization/LBFGS.scala
6+
7+
This code currently compiles and runs, executing the GPU
8+
code by copying all data to the driver and executing
9+
the LBFGS algorithm there.
10+
11+
To test and verify, perform the following steps:
12+
13+
1) Build this library by typing "make" in this directory.
14+
15+
2) Build spark from the Spark-MLlib repository. An example command to do this is from the
16+
Spark-MLlib folder is:
17+
"./make-distribution.sh -Pyarn -Phadoop-2.6 -Dhadoop.version=2.6.3". Refer to the
18+
Spark documentation here for more details: https://spark.apache.org/docs/latest/building-spark.html
19+
20+
3) Set the configuration property "spark.mllib.LBFGS.useGPU=true", and specify the
21+
argument "--driver-library-path=/path/to/CUDA-MLlib/lbfgs" when running spark-submit.
22+
Setting "spark.mllib.LBFGS.useGPU=false" or not setting it at all will disable the GPU, and run stock
23+
spark code.
24+
25+
4) Run your program. An example program that comes with spark and uses LBFGS is
26+
"mllib.LBFGSExample". Here's an example full command to run that program locally:
27+
28+
spark-submit -v --master local[*] --conf spark.mllib.LBFGS.useGPU=true
29+
--driver-library-path=/path/to/CUDA-MLlib/lbfgs
30+
--class org.apache.spark.examples.mllib.LBFGSExample
31+
/path/to/Spark-MLlib/dist/lib/spark-examples-2.0.0-SNAPSHOT-hadoop2.6.3.jar

lbfgs/include/cuda_checking.h

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
#ifndef CUDA_CHECKING_H_
19+
#define CUDA_CHECKING_H_
20+
21+
#include <string>
22+
#include <cuda.h>
23+
#include <cublas_v2.h>
24+
25+
26+
27+
extern const char *cublasGetErrorString(cublasStatus_t e);
28+
29+
#define checkCublasErrors(err) __cublasCheckError( err, __FILE__, __LINE__ )
30+
inline void __cublasCheckError( cublasStatus_t err, const char *file, const int line )
31+
{
32+
#ifdef CUBLAS_ERROR_CHECK
33+
if ( CUBLAS_STATUS_SUCCESS != err )
34+
{
35+
fprintf( stderr, "CUBLAS call failed at %s:%i : %s\n",
36+
file, line, cublasGetErrorString( err ) );
37+
exit( -1 );
38+
}
39+
#endif
40+
}
41+
42+
#endif
43+

lbfgs/include/lbfgs.h

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
#ifndef LBFGS_H
19+
#define LBFGS_H
20+
#include "cublas_v2.h"
21+
#ifdef __cplusplus
22+
#include <stdlib.h>
23+
#include <stdint.h>
24+
25+
#define HALT {printf("HALT: \""__FILE__"\", line %d.\n", __LINE__); exit(-1);}
26+
#define FALSE (0!=0)
27+
#define TRUE (1!=0)
28+
29+
#define DEBUG
30+
31+
32+
33+
/* The following four functions must be provided by user. */
34+
35+
double
36+
function_to_be_minimized(
37+
double * input_vector,
38+
double *device_X, double *device_Y,
39+
double regulerization_parameter, int N, int dimension,
40+
cublasHandle_t cublasHandle);
41+
42+
void
43+
gradient_of_function_to_be_minimized(
44+
double * output_gradient, double * input_vector,
45+
double *device_X, double *device_Y,
46+
double regulerization_parameter, int N, int dimension,
47+
cublasHandle_t cublasHandle);
48+
49+
extern void
50+
initialize_from_file(const char *file_name, int *n, int *dim, double **deviceX, double **deviceY);
51+
52+
53+
/* Minimization by LBFGS algorithm */
54+
55+
extern double lbfgs_from_file(const char *file_name);
56+
57+
extern "C"
58+
#endif
59+
60+
extern
61+
#ifdef __cplusplus
62+
"C"
63+
#endif
64+
void initialize_from_arrays(double * givenY,
65+
double * givenX,
66+
double ** deviceX,
67+
double ** deviceY,
68+
int givenDimension,
69+
int givenN);
70+
71+
#ifdef __cplusplus
72+
extern "C"
73+
#endif
74+
void lbfgs(double * minimizing_vector,
75+
double * minimum,
76+
double convergenceTol,
77+
int maxIterations,
78+
double * device_X,
79+
double * device_Y,
80+
double regulerization_parameter,
81+
int N,
82+
int dimension,
83+
double * loss_history_array,
84+
int loss_history_array_size);
85+
86+
extern double lbfgs_from_file(const char *file_name);
87+
88+
#ifdef __cplusplus
89+
extern "C"
90+
#endif
91+
double lbfgs_from_arrays(double *Y,
92+
double *X,
93+
double *YX,
94+
double convergenceTol,
95+
double regularization_parameter,
96+
double *minimizing_vector,
97+
double *loss_history_array,
98+
int loss_history_array_size,
99+
int numSamples,
100+
int numFeatures,
101+
int maxIterations);
102+
#endif

lbfgs/include/opt_cuda.h

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
#ifndef _KERNELS_H_
19+
#define _KERNELS_H_
20+
21+
#include <stdio.h>
22+
#include <stdlib.h>
23+
24+
#include <cublas_v2.h>
25+
#include "utilities.h"
26+
27+
double *allocateDeviceMemory(int len);
28+
void copyFromHostToDevice(double *h_region, double *d_region, int len);
29+
void copyFromDeviceToHost(double *h_region, double *d_region, int len);
30+
void copyFromDeviceToDevice(double *dest, double *src, int len);
31+
32+
#ifdef __cplusplus
33+
extern "C"
34+
#endif
35+
void freeDeviceMemory(double *region);
36+
37+
38+
extern void cuda_set_vector_to_zero(double * h_vec, int n);
39+
extern void cuda_vec_equals_vec1_plus_alpha_times_vec2(double * h_vec,
40+
double * h_vec1,
41+
double alpha,
42+
double * a1,
43+
double * h_vec2,
44+
int numElements);
45+
46+
extern void cuda_matrix_times_vector(const double *h_matrixIn, int rows, int cols,
47+
const double *h_vectorIn,
48+
double *h_vectorOut);
49+
50+
extern void cuda_mult_vector_by_number(double * h_vec,
51+
double alpha,
52+
int numElements);
53+
54+
extern void cuda_vec_equals_minus_vec1(double * h_vec,
55+
double * h_vec1,
56+
int numElements);
57+
extern double cuda_euclidean_norm(const double * h_vec, int numElements) ;
58+
extern void cuda_dot_product(const double * h_vec1,
59+
const double * h_vec2,
60+
double * d_answer,
61+
int numElements,
62+
cublasHandle_t cublasHandle) ;
63+
64+
extern double cuda_function_to_be_minimized(double * h_input_vector,
65+
double * x,
66+
double * y,
67+
double regularization_parameter,
68+
int N,
69+
int dimension,
70+
cublasHandle_t cublasHandle);
71+
72+
extern void cuda_gradient_of_function_to_be_minimized(double * h_output_gradient,
73+
double * h_input_vector,
74+
double * x,
75+
double * y,
76+
double regularization_parameter,
77+
int N,
78+
int dimension,
79+
cublasHandle_t cublasHandle);
80+
extern int getGPUCount();
81+
extern void setGPUDevice(int id);
82+
extern int getCurrentGPU();
83+
84+
#endif
85+

lbfgs/spark/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/NativeLBFGS.o

0 commit comments

Comments
 (0)