Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated lib kernel unit tests #1599

Open
wants to merge 46 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
c0bf108
test_utils refactor, local_cpu_allocator
oOTigger Jul 11, 2024
47ad0d8
test utils modification, cast, reverse, and replicate cpu kernels
oOTigger Jul 12, 2024
921fe65
combine kernel
oOTigger Jul 14, 2024
4ca67aa
combine kernels .h file
oOTigger Jul 14, 2024
86edf2e
Implementations for methods for machine_views and associated modules …
Marsella8 Jul 19, 2024
d9af610
test utils logic cleanup, reverse cpu_kernel pedagogical implmentatio…
oOTigger Jul 31, 2024
64034a5
cpu_kernel's refactor, generic tensor accessor indexing
oOTigger Oct 8, 2024
0304f17
accessor.h formatting
oOTigger Oct 8, 2024
7c3ff87
mk_runtime_error formatting
oOTigger Oct 8, 2024
65d7804
reverse_kernels include
oOTigger Oct 8, 2024
7c5fb1f
test_utils refactor and clarity
oOTigger Oct 15, 2024
8188afe
formatting
oOTigger Oct 15, 2024
a13255b
comment removal reverse_kernels
oOTigger Oct 15, 2024
7ed5624
Issue #1435, tests for managed stream and handle
oOTigger Oct 16, 2024
c1758c0
#1435 formatting
oOTigger Oct 16, 2024
54b3888
#1409 issue, change datatype for linear kernels away from void *
oOTigger Oct 16, 2024
5b5c2f6
R & W accessor changes, minimize code bloat
oOTigger Nov 5, 2024
ddae367
code formatting and refactor
oOTigger Nov 16, 2024
507df4a
issue #1502 & issue #1540
oOTigger Nov 22, 2024
c64a55c
format check
oOTigger Nov 22, 2024
a091652
branch merge and test fixes
oOTigger Jan 28, 2025
f19df3a
Merge remote-tracking branch 'origin/master' into cpu-kernels-tests-v2
oOTigger Jan 29, 2025
8860adf
build issues
oOTigger Jan 29, 2025
7b74acc
Add AWS linux AMI to runs-on for testing (#1589)
lockshaw Jan 30, 2025
8cdc677
Pin runs-on images (#1590)
lockshaw Jan 30, 2025
209db7e
GPU CI Fix (Pin runs-on GPU image) (#1588)
lockshaw Jan 31, 2025
0d2ffdb
Merge substitution-builder (#1575)
victorli2002 Feb 1, 2025
fe339eb
test_utils refactor, local_cpu_allocator
oOTigger Jul 11, 2024
2e2ae13
test utils modification, cast, reverse, and replicate cpu kernels
oOTigger Jul 12, 2024
6c30466
combine kernel
oOTigger Jul 14, 2024
5b5c591
test utils logic cleanup, reverse cpu_kernel pedagogical implmentatio…
oOTigger Jul 31, 2024
f0432c3
cpu_kernel's refactor, generic tensor accessor indexing
oOTigger Oct 8, 2024
74d186d
test_utils refactor and clarity
oOTigger Oct 15, 2024
f95d9da
R & W accessor changes, minimize code bloat
oOTigger Nov 5, 2024
8c8bc75
issue #1502 & issue #1540
oOTigger Nov 22, 2024
c00ab84
branch merge and test fixes
oOTigger Jan 28, 2025
bc4b659
merge
oOTigger Feb 5, 2025
3146712
Merge remote-tracking branch 'origin/master' into cpu-kernels-tests
oOTigger Feb 5, 2025
e71b6d7
build after merge
oOTigger Feb 5, 2025
311caf8
kernel issues
oOTigger Feb 8, 2025
157407d
managed stream / handle test case fix
oOTigger Feb 10, 2025
338fc8d
Merge remote-tracking branch 'origin/master' into cpu-kernels-tests
oOTigger Feb 10, 2025
f73e7a1
accessor, array_shape, copy_tensor_accessor, datatype_dispatch, alloc…
oOTigger Feb 25, 2025
4fc0475
remove . files
oOTigger Feb 25, 2025
8b72dcd
format issues
oOTigger Feb 25, 2025
2914494
merge w/ master
oOTigger Feb 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
#1409 issue, change datatype for linear kernels away from void *
  • Loading branch information
oOTigger committed Jan 22, 2025
commit 54b3888eb36776eb3d99901463777c4d592ee064
22 changes: 11 additions & 11 deletions lib/kernels/include/kernels/linear_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,23 +50,23 @@ bool use_activation(Activation activation);

void forward_kernel(ffStream_t stream,
LinearPerDeviceState const &m,
void const *input_ptr,
void *output_ptr,
void const *filter_ptr,
void const *bias_ptr,
float const *input_ptr,
float *output_ptr,
float const *filter_ptr,
float const *bias_ptr,
int in_dim,
int out_dim,
int batch_size);

void backward_kernel(ffStream_t stream,
LinearPerDeviceState const &m,
void const *input_ptr,
void *input_grad_ptr,
void const *output_ptr,
void *output_grad_ptr,
void const *kernel_ptr,
void *kernel_grad_ptr,
void *bias_ptr,
float const *input_ptr,
float *input_grad_ptr,
float const *output_ptr,
float *output_grad_ptr,
float const *kernel_ptr,
float *kernel_grad_ptr,
float *bias_ptr,
int in_dim,
int out_dim,
int batch_size);
Expand Down
76 changes: 41 additions & 35 deletions lib/kernels/src/cuda/ops/linear_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,10 @@ LinearPerDeviceState init_kernel(PerDeviceFFHandle handle,

void forward_kernel(cudaStream_t stream,
LinearPerDeviceState const &m,
void const *input_ptr,
void *output_ptr,
void const *weight_ptr,
void const *bias_ptr,
float const *input_ptr,
float *output_ptr,
float const *weight_ptr,
float const *bias_ptr,
int in_dim,
int out_dim,
int batch_size) {
Expand All @@ -135,14 +135,14 @@ void forward_kernel(cudaStream_t stream,
batch_size,
in_dim,
&alpha,
weight_ptr,
(void *)weight_ptr,
weight_type,
in_dim,
input_ptr,
(void *)input_ptr,
input_type,
in_dim,
&beta,
output_ptr,
(void *)output_ptr,
output_type,
out_dim,
compute_type,
Expand All @@ -156,14 +156,14 @@ void forward_kernel(cudaStream_t stream,
batch_size,
1,
&alpha,
bias_ptr,
(void *)bias_ptr,
weight_type,
1,
m.one_ptr,
(void *)m.one_ptr,
CUDA_R_32F,
1,
&alpha,
output_ptr,
(void *)output_ptr,
output_type,
out_dim,
compute_type,
Expand All @@ -174,10 +174,10 @@ void forward_kernel(cudaStream_t stream,
m.actiDesc,
&alpha,
m.outputTensor,
output_ptr,
(void *)output_ptr,
&beta,
m.outputTensor,
output_ptr));
(void *)output_ptr));
} else if (m.activation == Activation::GELU) {
size_t elements = size_t_from_int(out_dim) * size_t_from_int(batch_size);
constexpr float B = 0.7978845608028654f; // sqrt(2.0/M_PI)
Expand All @@ -191,13 +191,13 @@ void forward_kernel(cudaStream_t stream,

void backward_kernel(cudaStream_t stream,
LinearPerDeviceState const &m,
void const *input_ptr,
void *input_grad_ptr,
void const *output_ptr,
void *output_grad_ptr,
void const *kernel_ptr,
void *kernel_grad_ptr,
void *bias_grad_ptr,
float const *input_ptr,
float *input_grad_ptr,
float const *output_ptr,
float *output_grad_ptr,
float const *kernel_ptr,
float *kernel_grad_ptr,
float *bias_grad_ptr,
int in_dim,
int out_dim,
int batch_size) {
Expand All @@ -216,11 +216,17 @@ void backward_kernel(cudaStream_t stream,
int output_size = out_dim * batch_size;
if (m.activation.has_value()) {
if (m.activation == Activation::RELU) {
relu_backward_kernel(
m.output_type, output_grad_ptr, output_ptr, output_size, stream);
relu_backward_kernel(m.output_type,
(void *)output_grad_ptr,
(void *)output_ptr,
output_size,
stream);
} else if (m.activation == Activation::SIGMOID) {
sigmoid_backward_kernel(
m.output_type, output_grad_ptr, output_ptr, output_size, stream);
sigmoid_backward_kernel(m.output_type,
(void *)output_grad_ptr,
(void *)output_ptr,
output_size,
stream);
} else {
// TODO: only support relu and sigmoid for now
assert(false && "Unsupported activation for Linear");
Expand All @@ -235,14 +241,14 @@ void backward_kernel(cudaStream_t stream,
out_dim,
batch_size,
&alpha,
input_ptr,
(void *)input_ptr,
input_type,
in_dim,
output_grad_ptr,
(void *)output_grad_ptr,
output_type,
out_dim,
&alpha,
kernel_grad_ptr,
(void *)kernel_grad_ptr,
weight_type,
in_dim,
compute_type,
Expand All @@ -261,12 +267,12 @@ void backward_kernel(cudaStream_t stream,
in_dim,
out_dim,
&alpha,
(float *)kernel_grad_ptr,
kernel_grad_ptr,
in_dim,
&lambda,
(float *)kernel_ptr,
kernel_ptr,
in_dim,
(float *)kernel_grad_ptr,
kernel_grad_ptr,
in_dim));
} else {
assert(false && "Only L2 regularization is supported");
Expand All @@ -284,14 +290,14 @@ void backward_kernel(cudaStream_t stream,
out_dim,
batch_size,
&alpha,
m.one_ptr,
(void *)m.one_ptr,
CUDA_R_32F,
1,
output_grad_ptr,
(void *)output_grad_ptr,
output_type,
out_dim,
&alpha,
bias_grad_ptr,
(void *)bias_grad_ptr,
weight_type,
1,
compute_type,
Expand All @@ -307,14 +313,14 @@ void backward_kernel(cudaStream_t stream,
batch_size,
out_dim,
&alpha,
kernel_ptr,
(void *)kernel_ptr,
weight_type,
in_dim,
output_grad_ptr,
(void *)output_grad_ptr,
output_type,
out_dim,
&alpha,
input_grad_ptr,
(void *)input_grad_ptr,
input_type,
in_dim,
compute_type,
Expand Down
14 changes: 7 additions & 7 deletions lib/local-execution/src/ops/linear.cc
Original file line number Diff line number Diff line change
Expand Up @@ -148,13 +148,13 @@ static std::optional<float>
profiling,
"[Linear] backward_time = {:.2lf}ms\n",
per_device_state,
(void *)input.get_float_ptr(),
(void *)input_grad.get_float_ptr(),
(void *)output.get_float_ptr(),
(void *)output_grad.get_float_ptr(),
(void *)weight.get_float_ptr(),
(void *)weight_grad.get_float_ptr(),
(void *)bias_ptr,
input.get_float_ptr(),
(float *)input_grad.get_float_ptr(),
output.get_float_ptr(),
(float *)output_grad.get_float_ptr(),
weight.get_float_ptr(),
(float *)weight_grad.get_float_ptr(),
(float *)bias_ptr,
in_dim,
out_dim,
batch_size);
Expand Down