Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion pygpu/_elemwise.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ cdef extern from "gpuarray/elemwise.h":

cdef int GE_BROADCAST
cdef int GE_NOCOLLAPSE
cdef int GE_PADSHAPE


cdef class arg:
Expand Down Expand Up @@ -193,9 +194,19 @@ cdef class GpuElemwise:
def __call__(self, *args, **kwargs):
cdef unsigned int i
cdef int err
cdef int flags

flags = 0
if kwargs.pop('broadcast', True):
flags |= GE_BROADCAST
if kwargs.pop('padshape', True):
flags |= GE_PADSHAPE

if len(kwargs) != 0:
raise TypeError("Unknown keyword argument: %s" % list(kwargs.keys())[0])

for i, arg in enumerate(args):
self._setarg(i, arg)
err = GpuElemwise_call(self.ge, self.callbuf, GE_BROADCAST if kwargs.get('broadcast', True) else 0)
err = GpuElemwise_call(self.ge, self.callbuf, flags)
if err != GA_NO_ERROR:
raise get_exc(err)("Could not call GpuElemwise")
5 changes: 5 additions & 0 deletions src/gpuarray/elemwise.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,11 @@ GPUARRAY_PUBLIC int GpuElemwise_call(GpuElemwise *ge, void **args, int flags);
*/
#define GE_NOCOLLAPSE 0x0200

/**
* Allow implicit left-padding of shape with dimensions of size 1.
*/
#define GE_PADSHAPE 0x0400

/**
* @}
*/
Expand Down
36 changes: 27 additions & 9 deletions src/gpuarray_elemwise.c
Original file line number Diff line number Diff line change
Expand Up @@ -276,14 +276,21 @@ static int check_basic(GpuElemwise *ge, void **args, int flags,
GpuArray *a = NULL, *v;
unsigned int i, j, p, num_arrays = 0, nd = 0, nnd;
int call32 = 1;
unsigned int nd_i = 0;
size_t v_dim_j = 0;

/* Go through the list and grab some info */
for (i = 0; i < ge->n; i++) {
if (is_array(ge->args[i])) {
nd_i = ((GpuArray *)args[i])->nd;
if (num_arrays == 0)
nd = ((GpuArray *)args[i])->nd;
else if (((GpuArray *)args[i])->nd != nd)
return error_fmt(ctx->err, GA_VALUE_ERROR, "Arg %u has differing nd = %u", i, ((GpuArray *)args[i])->nd);
nd = nd_i;
else if (nd_i != nd) {
if (flags & GE_PADSHAPE)
nd = nd_i > nd ? nd_i : nd;
else
return error_fmt(ctx->err, GA_VALUE_ERROR, "Arg %u has differing nd = %u", i, nd_i);
}
++num_arrays;
if (a == NULL && is_output(ge->args[i]))
a = (GpuArray *)args[i];
Expand All @@ -301,15 +308,19 @@ static int check_basic(GpuElemwise *ge, void **args, int flags,
return error_sys(ctx->err, "ge_grow");
}

/* Now we know that all array arguments have the same number of
/* Now we know that all array arguments have at most nd
dimensions and that the expected output size is the size of a */

/* And copy their initial values in */
memcpy(ge->dims, a->dimensions, nd*sizeof(size_t));
p = 0;
for (i = 0; i < ge->n; i++) {
if (is_array(ge->args[i])) {
memcpy(ge->strides[p], ((GpuArray *)args[i])->strides, nd*sizeof(ssize_t));
/* Left-pad strides with zero on implicitly broadcasted dimensions */
memset(ge->strides[p], 0, nd*sizeof(ssize_t));
nd_i = ((GpuArray *)args[i])->nd;
memcpy((char *)(ge->strides[p]) + (nd - nd_i)*sizeof(ssize_t),
((GpuArray *)args[i])->strides, nd_i*sizeof(ssize_t));
p++;
}
}
Expand All @@ -326,16 +337,23 @@ static int check_basic(GpuElemwise *ge, void **args, int flags,
for (i = 0; i < ge->n; i++) {
if (is_array(ge->args[i])) {
v = (GpuArray *)args[i];
if (ge->dims[j] != v->dimensions[j]) {
nd_i = v->nd;
/* Pad shape with 1 if needed for implicitly broadcasted dimensions
and shift if needed */
if (j < nd - nd_i)
v_dim_j = 1;
else
v_dim_j = v->dimensions[j - (nd - nd_i)];
if (ge->dims[j] != v_dim_j) {
/* We can't broadcast outputs */
if (ISCLR(flags, GE_BROADCAST) || is_output(ge->args[i]) ||
v->dimensions[j] != 1) {
return error_fmt(ctx->err, GA_VALUE_ERROR, "Mismatched dimension %u for input %u (expected %" SPREFIX "u got %" SPREFIX "u)", j, i, ge->dims[j], v->dimensions[j]);
v_dim_j != 1) {
return error_fmt(ctx->err, GA_VALUE_ERROR, "Mismatched dimension %u for input %u (expected %" SPREFIX "u got %" SPREFIX "u)", j, i, ge->dims[j], v_dim_j);
}
}
/* If the dimension is 1 set the strides to 0 regardless since
it won't change anything in the non-broadcast case. */
if (v->dimensions[j] == 1) {
if (v_dim_j == 1) {
ge->strides[p][j] = 0;
}
call32 &= v->offset < ADDR32_MAX;
Expand Down
69 changes: 69 additions & 0 deletions tests/check_elemwise.c
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,7 @@ START_TEST(test_basic_offset) {
/* Simulate indexing */
a.offset = 12;
a.dimensions[1] = 3;
GpuArray_fix_flags(&a);

ga_assert_ok(GpuArray_write(&a, data1, sizeof(data1)));

Expand Down Expand Up @@ -563,6 +564,73 @@ START_TEST(test_basic_broadcast) {
}
END_TEST

START_TEST(test_basic_padshape) {
GpuArray a;
GpuArray b;
GpuArray c;

GpuElemwise *ge;

static const uint32_t data1[3] = {1, 2, 3};
static const uint32_t data2[2] = {4, 5};
uint32_t data3[6] = {0};

size_t dims[2];

gpuelemwise_arg args[3] = {{0}};
void *rargs[3];

dims[0] = 3;

ga_assert_ok(GpuArray_empty(&a, ctx, GA_UINT, 1, dims, GA_C_ORDER));
ga_assert_ok(GpuArray_write(&a, data1, sizeof(data1)));

dims[0] = 2;
dims[1] = 1;

ga_assert_ok(GpuArray_empty(&b, ctx, GA_UINT, 2, dims, GA_F_ORDER));
ga_assert_ok(GpuArray_write(&b, data2, sizeof(data2)));

dims[0] = 2;
dims[1] = 3;

ga_assert_ok(GpuArray_empty(&c, ctx, GA_UINT, 2, dims, GA_C_ORDER));

args[0].name = "a";
args[0].typecode = GA_UINT;
args[0].flags = GE_READ;

args[1].name = "b";
args[1].typecode = GA_UINT;
args[1].flags = GE_READ;

args[2].name = "c";
args[2].typecode = GA_UINT;
args[2].flags = GE_WRITE;

ge = GpuElemwise_new(ctx, "", "c = a + b", 3, args, 2, 0);

ck_assert_ptr_ne(ge, NULL);

rargs[0] = &a;
rargs[1] = &b;
rargs[2] = &c;

ck_assert_int_eq(GpuElemwise_call(ge, rargs, GE_NOCOLLAPSE), GA_VALUE_ERROR);

ga_assert_ok(GpuElemwise_call(ge, rargs, GE_NOCOLLAPSE | GE_BROADCAST | GE_PADSHAPE));

ga_assert_ok(GpuArray_read(data3, sizeof(data3), &c));

ck_assert_int_eq(data3[0], 5);
ck_assert_int_eq(data3[1], 6);
ck_assert_int_eq(data3[2], 7);
ck_assert_int_eq(data3[3], 6);
ck_assert_int_eq(data3[4], 7);
ck_assert_int_eq(data3[5], 8);
}
END_TEST

START_TEST(test_basic_collapse) {
GpuArray a;
GpuArray b;
Expand Down Expand Up @@ -755,6 +823,7 @@ Suite *get_suite(void) {
tcase_add_test(tc, test_basic_offset);
tcase_add_test(tc, test_basic_remove1);
tcase_add_test(tc, test_basic_broadcast);
tcase_add_test(tc, test_basic_padshape);
tcase_add_test(tc, test_basic_collapse);
tcase_add_test(tc, test_basic_neg_strides);
tcase_add_test(tc, test_basic_0);
Expand Down