Skip to content

Commit efaa614

Browse files
committed
Generalize CLKernel.setArg (velocity voodoo) + fix size of vector types of arity 3 (must be treated as size 4 as per section 6.1.5 of spec)
1 parent 95281fb commit efaa614

File tree

3 files changed

+36
-48
lines changed

3 files changed

+36
-48
lines changed

Core/src/main/velocity/com/nativelibs4java/opencl/CLKernel.java

Lines changed: 26 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -231,55 +231,37 @@ public void setArg(int i, Pointer<?> ptr) {
231231
setKernelArg(i, ptr.getValidBytes(), ptr);
232232
}
233233

234+
#foreach ($prim in $primitives)
234235
/**
236+
* Sets a vector type value as the arg at index {@code i}.
237+
*
238+
* For instance to pass a {@code ${prim.Name}3}, one must pass a {@code ${prim.Name}[]} of size {@code 3}.
239+
*
235240
#documentCallsFunction("clSetKernelArg")
241+
* @param i index of the argument in the kernel's parameters list
242+
* @param arg array of ${prim.Name}s (of length 2, 3, 4, 8 or 16, which must match the arity of the OpenCL vector type used by the kernel).
236243
*/
237-
public void setArg(int i, float[] arg) {
238-
setKernelArg(i, arg.length * 4, arg.length <= MAX_TMP_ITEMS ? localPointer.setFloats(arg) : pointerToFloats(arg));
244+
public void setArg(int i, ${prim.Name}[] arg) {
245+
int length = normalizeVectorTypeArity(arg.length);
246+
setKernelArg(i, length * ${prim.Size}, length <= MAX_TMP_ITEMS ? localPointer.set${prim.CapName}s(arg) : pointerTo${prim.CapName}s(arg));
239247
}
240-
/**
241-
#documentCallsFunction("clSetKernelArg")
242-
*/
243-
public void setArg(int i, int[] arg) {
244-
setKernelArg(i, arg.length * 4, arg.length <= MAX_TMP_ITEMS ? localPointer.setInts(arg) : pointerToInts(arg));
245-
}
246-
/**
247-
#documentCallsFunction("clSetKernelArg")
248-
*/
249-
public void setArg(int i, double[] arg) {
250-
setKernelArg(i, arg.length * 8, arg.length <= MAX_TMP_ITEMS ? localPointer.setDoubles(arg) : pointerToDoubles(arg));
251-
}
252-
/**
253-
#documentCallsFunction("clSetKernelArg")
254-
*/
255-
public void setArg(int i, long[] arg) {
256-
setKernelArg(i, arg.length * 8, arg.length <= MAX_TMP_ITEMS ? localPointer.setLongs(arg) : pointerToLongs(arg));
257-
}
258-
/**
259-
#documentCallsFunction("clSetKernelArg")
260-
*/
261-
public void setArg(int i, short[] arg) {
262-
setKernelArg(i, arg.length * 2, arg.length <= MAX_TMP_ITEMS ? localPointer.setShorts(arg) : pointerToShorts(arg));
263-
}
264-
/**
265-
#documentCallsFunction("clSetKernelArg")
266-
*/
267-
public void setArg(int i, byte[] arg) {
268-
setKernelArg(i, arg.length, arg.length <= MAX_TMP_ITEMS ? localPointer.setBytes(arg) : pointerToBytes(arg));
269-
}
270-
/**
271-
#documentCallsFunction("clSetKernelArg")
272-
*/
273-
public void setArg(int i, boolean[] arg) {
274-
setKernelArg(i, arg.length, arg.length <= MAX_TMP_ITEMS ? localPointer.setBooleans(arg) : pointerToBooleans(arg));
275-
}
276-
/**
277-
#documentCallsFunction("clSetKernelArg")
278-
*/
279-
public void setArg(int i, char[] arg) {
280-
setKernelArg(i, arg.length * 2, arg.length <= MAX_TMP_ITEMS ? localPointer.setChars(arg) : pointerToChars(arg));
248+
#end
249+
250+
private static int normalizeVectorTypeArity(int length) {
251+
switch (length) {
252+
case 3:
253+
// cl_<prim>3 is identical in size, alignment and behavior to cl_<prim>4. See section 6.1.5
254+
return 4;
255+
case 2:
256+
case 4:
257+
case 8:
258+
case 16:
259+
return length;
260+
default:
261+
throw new IllegalArgumentException("Invalid vector type length: " + length + " (valid vector type sizes are 2, 3, 4, 8, 16)");
262+
}
281263
}
282-
264+
283265
/**
284266
#documentCallsFunction("clSetKernelArg")
285267
*/

Core/src/main/velocity/com/nativelibs4java/opencl/ReusablePointer.java

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,14 @@ final class ReusablePointer {
1212

1313
public ReusablePointer(long bytesCapacity) {
1414
this.bytesCapacity = bytesCapacity;
15-
this.pointer = Pointer.allocateBytes(bytesCapacity).withoutValidityInformation();
15+
this.pointer = allocateAlignedBytes(bytesCapacity).withoutValidityInformation();
1616
}
17+
18+
static Pointer<?> allocateAlignedBytes(int count) {
19+
// Allocate memory aligned to 128 bytes to match alignment of cl_double16.
20+
return Pointer.allocateAlignedBytes(null /* io */, count, 128 /* alignment */, null /* beforeDeallocation */);
21+
}
22+
1723
public Pointer<Integer> pointerToInts(int... values) {
1824
if (values == null)
1925
return null;
@@ -48,7 +54,7 @@ public <T> Pointer<T> allocatedBytes(int needed) {
4854
if (needed == 0)
4955
return null;
5056
if (needed > bytesCapacity) {
51-
return (Pointer)Pointer.allocateBytes(needed);
57+
return (Pointer)allocateAlignedBytes(needed);
5258
} else {
5359
return (Pointer)pointer;
5460
}
@@ -59,4 +65,4 @@ public <T> Pointer<T> allocatedSizeTs(int needed) {
5965
public <T> Pointer<T> allocatedInts(int needed) {
6066
return allocatedBytes(needed * 4);
6167
}
62-
}
68+
}

Core/src/test/java/com/nativelibs4java/opencl/KernelTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ public <T> Object testArrayArg(String type, Object array, Class<T> targetType) {
5656

5757
@Test
5858
public void nullArg() {
59-
assertArrayEquals(new Pointer[] { null }, testArg("int", CLKernel.NULL_POINTER_KERNEL_ARGUMENT, Pointer.class).getPointers());
59+
assertArrayEquals(new Pointer[] { null }, testArg("int*", CLKernel.NULL_POINTER_KERNEL_ARGUMENT, Pointer.class).getPointers());
6060
}
6161

6262
byte[] byteTup(int n) {

0 commit comments

Comments
 (0)