Skip to content

Commit

Permalink
relu backprop
Browse files Browse the repository at this point in the history
  • Loading branch information
CubeFlix committed Jul 6, 2023
1 parent b2c4d81 commit c5c87d6
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 5 deletions.
7 changes: 7 additions & 0 deletions matricies/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,13 @@ namespace Oliver {
}
}

void Matrix::heaviside(int device) {
cudaError_t cudaError = cudaHeaviside(this, device);
if (cudaError != cudaSuccess) {
throw CUDAException(cudaError);
}
}

void Matrix::addBias(Matrix* x, int device) {
// Ensure the dimensions match.
if (m_cols != x->cols()) {
Expand Down
5 changes: 3 additions & 2 deletions matricies/Matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ namespace Oliver {
Matrix(unsigned const int rows, unsigned const int cols);
~Matrix();

unsigned const int rows();
unsigned const int cols();
unsigned int rows();
unsigned int cols();
float* buf();

void add(Matrix *x, int device);
Expand All @@ -44,6 +44,7 @@ namespace Oliver {
void inv(int device);
void exp(int device);
void log(int device);
void heaviside(int device);
void addBias(Matrix* x, int device);
void max(float x, int device);

Expand Down
54 changes: 54 additions & 0 deletions matricies/MatrixKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,60 @@ namespace Oliver {
return cudaStatus;
}

// Matrix heaviside kernel.
__global__ void heavisideKernel(float* a, const unsigned int rows, const unsigned int cols) {
int rowIdx = threadIdx.y + blockIdx.y * blockDim.y;
int colIdx = threadIdx.x + blockIdx.x * blockDim.x;

if (rowIdx < rows && colIdx < cols) {
int i = colIdx + rowIdx * cols;
a[i] = step(0.0, a[i]);
}
}

// GPU matrix heaviside function.
cudaError_t cudaHeaviside(Matrix* a, int device) {
float* dev_a = 0;
size_t s = a->rows() * a->cols() * sizeof(float);
cudaError_t cudaStatus;

// Move data to the device.
cudaStatus = cudaSetDevice(device);
if (cudaStatus != cudaSuccess) {
goto Error;
}
cudaStatus = cudaAllocCopy(((void**)&dev_a), a->buf(), s);
if (cudaStatus != cudaSuccess) {
goto Error;
}

// Calculate the thread and block dimensions.
const dim3 threadsPerBlock(BLOCK_SIZE, BLOCK_SIZE);
const dim3 blocksPerGrid((a->cols() + BLOCK_SIZE - 1) / BLOCK_SIZE, (a->rows() + BLOCK_SIZE - 1) / BLOCK_SIZE);

// Run the kernel.
heavisideKernel << <blocksPerGrid, threadsPerBlock >> > (dev_a, a->rows(), a->cols());

// Clean up.
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess) {
goto Error;
}
cudaStatus = cudaDeviceSynchronize();
if (cudaStatus != cudaSuccess) {
goto Error;
}
cudaStatus = cudaMemcpy(a->buf(), dev_a, s, cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess) {
goto Error;
}

Error:
cudaFree(dev_a);

return cudaStatus;
}

// Matrix add bias kernel.
__global__ void addBiasKernel(float* a, const float* b, const unsigned int rows, const unsigned int cols) {
int rowIdx = threadIdx.y + blockIdx.y * blockDim.y;
Expand Down
1 change: 1 addition & 0 deletions matricies/MatrixKernels.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ namespace Oliver {
cudaError_t cudaInv(Matrix* a, int device);
cudaError_t cudaExp(Matrix* a, int device);
cudaError_t cudaLog(Matrix* a, int device);
cudaError_t cudaHeaviside(Matrix* a, int device);
cudaError_t cudaAddBias(Matrix* a, Matrix* b, int device);
cudaError_t cudaScalarMax(Matrix* a, float b, int device);
cudaError_t cudaTranspose(Matrix* a, Matrix* out, int device);
Expand Down
5 changes: 2 additions & 3 deletions matricies/RELULayer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,9 @@ namespace Oliver {
throw NetworkException("invalid input, previous gradient, and output gradient matrix sizes for layer");
}

// Calculate max(x, 0)/max(x, 0) * grad.
// Calculate heaviside(input) * grad.
memcpy(prevGrad->buf(), m_inputCache->buf(), m_inputCache->rows() * m_inputCache->cols() * sizeof(float));
prevGrad->max(0.0, device);
prevGrad->div(prevGrad, device);
prevGrad->heaviside(device);
prevGrad->mul(outputGrad, device);

delete m_inputCache;
Expand Down

0 comments on commit c5c87d6

Please sign in to comment.