Skip to content

Commit

Permalink
Add cuda rtc module (#8017)
Browse files Browse the repository at this point in the history
* Add cuda rtc module

* add to docs

* commit fix

* fix

* fix

* Update Jenkinsfile
  • Loading branch information
piiswrong authored Sep 26, 2017
1 parent fa7c654 commit a36bf57
Show file tree
Hide file tree
Showing 18 changed files with 774 additions and 396 deletions.
1 change: 0 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,6 @@ if(USE_CUDA)
endif()
list(APPEND SOURCE ${cuda_objs} ${CUDA})
add_definitions(-DMXNET_USE_CUDA=1)
add_definitions(-DMXNET_USE_NVRTC=1)
if(CUDA_LIBRARY_PATH)
if(IS_CONTAINER_BUILD)
# In case of building on a production-like build container which may not have Cuda installed
Expand Down
4 changes: 4 additions & 0 deletions Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,11 @@ try {
init_git()
unpack_lib('gpu')
timeout(time: max_time, unit: 'MINUTES') {
try {
sh "${docker_run} gpu ./perl-package/test.sh"
} catch (exc) {
error "Perl GPU test failed."
}
}
}
}
Expand Down
9 changes: 1 addition & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ ALL_DEP = $(OBJ) $(EXTRA_OBJ) $(PLUGIN_OBJ) $(LIB_DEP)
ifeq ($(USE_CUDA), 1)
CFLAGS += -I$(ROOTDIR)/cub
ALL_DEP += $(CUOBJ) $(EXTRA_CUOBJ) $(PLUGIN_CUOBJ)
LDFLAGS += -lcuda -lcufft
LDFLAGS += -lcuda -lcufft -lnvrtc
SCALA_PKG_PROFILE := $(SCALA_PKG_PROFILE)-gpu
else
SCALA_PKG_PROFILE := $(SCALA_PKG_PROFILE)-cpu
Expand All @@ -281,13 +281,6 @@ endif
# For quick compile test, used smaller subset
ALLX_DEP= $(ALL_DEP)

ifeq ($(USE_NVRTC), 1)
LDFLAGS += -lnvrtc
CFLAGS += -DMXNET_USE_NVRTC=1
else
CFLAGS += -DMXNET_USE_NVRTC=0
endif

build/src/%.o: src/%.cc
@mkdir -p $(@D)
$(CXX) -std=c++11 -c $(CFLAGS) -MMD -c $< -o $@
Expand Down
9 changes: 9 additions & 0 deletions docs/api/python/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,12 @@ imported by running:
metric/metric.md
```

## Run-Time Compilation API

```eval_rst
.. toctree::
:maxdepth 1
rtc/rtc.md
```
29 changes: 29 additions & 0 deletions docs/api/python/rtc/rtc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Run-Time Compilation API

```eval_rst
.. currentmodule:: mxnet.rtc
```

## Overview

The RTC package contains tools for compiling and running CUDA code from python
frontend. The compiled kernels can be used stand-alone or combined with
`autograd.Function` or `operator.CustomOpProp` to support differentiation.

```eval_rst
.. autosummary::
:nosignatures:
mxnet.rtc
```

## API Reference

<script type="text/javascript" src='../../_static/js/auto_module_index.js'></script>

```eval_rst
.. automodule:: mxnet.rtc
:members:
```

<script>auto_index("api-reference");</script>
57 changes: 57 additions & 0 deletions include/mxnet/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ typedef void *KVStoreHandle;
typedef void *RecordIOHandle;
/*! \brief handle to MXRtc*/
typedef void *RtcHandle;
/*! \brief handle to rtc cuda module*/
typedef void *CudaModuleHandle;
/*! \brief handle to rtc cuda kernel*/
typedef void *CudaKernelHandle;

typedef void (*ExecutorMonitorCallback)(const char*,
NDArrayHandle,
Expand Down Expand Up @@ -1922,6 +1926,59 @@ MXNET_DLL int MXCustomOpRegister(const char* op_type, CustomOpPropCreator creato
MXNET_DLL int MXCustomFunctionRecord(int num_inputs, NDArrayHandle *inputs,
int num_outputs, NDArrayHandle *outputs,
struct MXCallbackList *callbacks);
/*
* \brief create cuda rtc module
* \param source cuda source code
* \param num_options number of compiler flags
* \param options compiler flags
* \param num_exports number of exported function names
* \param exported function names
* \param out handle to created module
*/
MXNET_DLL int MXRtcCudaModuleCreate(const char* source, int num_options,
const char** options, int num_exports,
const char** exports, CudaModuleHandle *out);
/*
* \brief delete cuda rtc module
* \param handle handle to cuda module
*/
MXNET_DLL int MXRtcCudaModuleFree(CudaModuleHandle handle);
/*
* \brief get kernel from module
* \param handle handle to cuda module
* \param name name of kernel function
* \param num_args number of arguments
* \param is_ndarray whether argument is ndarray
* \param is_const whether argument is constant
* \param arg_types data type of arguments
* \param out created kernel
*/
MXNET_DLL int MXRtcCudaKernelCreate(CudaModuleHandle handle, const char* name,
int num_args, int* is_ndarray, int* is_const,
int* arg_types, CudaKernelHandle *out);
/*
* \brief delete kernel
* \param handle handle to previously created kernel
*/
MXNET_DLL int MXRtcCudaKernelFree(CudaKernelHandle handle);
/*
* \brief launch cuda kernel
* \param handle handle to kernel
* \param dev_id (GPU) device id
* \param args pointer to arguments
* \param grid_dim_x grid dimension x
* \param grid_dim_y grid dimension y
* \param grid_dim_z grid dimension z
* \param block_dim_x block dimension x
* \param block_dim_y block dimension y
* \param block_dim_z block dimension z
* \param shared_mem size of dynamically allocated shared memory
*/
MXNET_DLL int MXRtcCudaKernelCall(CudaKernelHandle handle, int dev_id, void** args,
mx_uint grid_dim_x, mx_uint grid_dim_y,
mx_uint grid_dim_z, mx_uint block_dim_x,
mx_uint block_dim_y, mx_uint block_dim_z,
mx_uint shared_mem);

#ifdef __cplusplus
}
Expand Down
107 changes: 0 additions & 107 deletions include/mxnet/mxrtc.h

This file was deleted.

Loading

0 comments on commit a36bf57

Please sign in to comment.