Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion vortex-array/src/validity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,14 @@ impl FromIterator<bool> for Validity {
impl From<Nullability> for Validity {
#[inline]
fn from(value: Nullability) -> Self {
match value {
Validity::from(&value)
}
}

impl From<&Nullability> for Validity {
#[inline]
fn from(value: &Nullability) -> Self {
match *value {
Nullability::NonNullable => Validity::NonNullable,
Nullability::Nullable => Validity::AllValid,
}
Expand Down
37 changes: 37 additions & 0 deletions vortex-cuda/kernels/src/constant_numeric.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#include "config.cuh"
#include "types.cuh"
#include <cuda_fp16.h>

// Fill an output buffer with a constant value.
template<typename T>
__device__ void constant_fill(
T *__restrict output,
T value,
uint64_t array_len
) {
const uint64_t worker = blockIdx.x * blockDim.x + threadIdx.x;
const uint64_t startElem = START_ELEM(worker, array_len);
const uint64_t stopElem = STOP_ELEM(worker, array_len);

if (startElem >= array_len) {
return;
}

for (uint64_t idx = startElem; idx < stopElem; idx++) {
output[idx] = value;
}
}

#define GENERATE_CONSTANT_NUMERIC_KERNEL(suffix, Type) \
extern "C" __global__ void constant_numeric_##suffix( \
Type *__restrict output, \
Type value, \
uint64_t array_len \
) { \
constant_fill(output, value, array_len); \
}

FOR_EACH_NUMERIC(GENERATE_CONSTANT_NUMERIC_KERNEL)
26 changes: 4 additions & 22 deletions vortex-cuda/kernels/src/dict.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#include <cuda.h>
#include <cuda_fp16.h>
#include <cuda_runtime.h>
#include <stdint.h>

Expand Down Expand Up @@ -39,30 +38,13 @@ extern "C" __global__ void dict_##value_suffix##_##index_suffix( \
dict_kernel<ValueType, IndexType>(codes, codes_len, values, output); \
}

// Generate for all combinations of value types and index types
// Value types: u8, i8, u16, i16, u32, i32, u64, i64
// Index types: u8, u16, u32, u64 (codes are typically unsigned)

#define GENERATE_DICT_KERNELS_FOR_VALUE(value_suffix, ValueType) \
// Generate dict kernel for all index types (unsigned integers) for a given value type
#define GENERATE_DICT_FOR_ALL_INDICES(value_suffix, ValueType) \
GENERATE_DICT_KERNEL(value_suffix, ValueType, u8, uint8_t) \
GENERATE_DICT_KERNEL(value_suffix, ValueType, u16, uint16_t) \
GENERATE_DICT_KERNEL(value_suffix, ValueType, u32, uint32_t) \
GENERATE_DICT_KERNEL(value_suffix, ValueType, u64, uint64_t)

GENERATE_DICT_KERNELS_FOR_VALUE(u8, uint8_t)
GENERATE_DICT_KERNELS_FOR_VALUE(i8, int8_t)
GENERATE_DICT_KERNELS_FOR_VALUE(u16, uint16_t)
GENERATE_DICT_KERNELS_FOR_VALUE(i16, int16_t)
GENERATE_DICT_KERNELS_FOR_VALUE(u32, uint32_t)
GENERATE_DICT_KERNELS_FOR_VALUE(i32, int32_t)
GENERATE_DICT_KERNELS_FOR_VALUE(u64, uint64_t)
GENERATE_DICT_KERNELS_FOR_VALUE(i64, int64_t)

// Float types
GENERATE_DICT_KERNELS_FOR_VALUE(f16, __half)
GENERATE_DICT_KERNELS_FOR_VALUE(f32, float)
GENERATE_DICT_KERNELS_FOR_VALUE(f64, double)
// Generate for all native ptypes & decimal values
FOR_EACH_NUMERIC(GENERATE_DICT_FOR_ALL_INDICES)

// Decimal types (128-bit and 256-bit)
GENERATE_DICT_KERNELS_FOR_VALUE(i128, int128_t)
GENERATE_DICT_KERNELS_FOR_VALUE(i256, int256_t)
21 changes: 4 additions & 17 deletions vortex-cuda/kernels/src/for.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#include "scalar_kernel.cuh"
#include "types.cuh"

// Frame-of-Reference operation: adds a reference value to each element.
template<typename T>
Expand Down Expand Up @@ -34,22 +35,8 @@ extern "C" __global__ void for_in_out_##suffix( \
scalar_kernel(input, output, array_len, ForOp<Type>{reference}); \
}

// In-place variants (modifies input buffer)
GENERATE_FOR_KERNEL(u8, uint8_t)
GENERATE_FOR_KERNEL(i8, int8_t)
GENERATE_FOR_KERNEL(u16, uint16_t)
GENERATE_FOR_KERNEL(i16, int16_t)
GENERATE_FOR_KERNEL(u32, uint32_t)
GENERATE_FOR_KERNEL(i32, int32_t)
GENERATE_FOR_KERNEL(u64, uint64_t)
GENERATE_FOR_KERNEL(i64, int64_t)
// In-place variants (modifies input buffer) - FoR is only used for integers
FOR_EACH_INTEGER(GENERATE_FOR_KERNEL)

// Separate input/output variants (preserves input buffer)
GENERATE_FOR_IN_OUT_KERNEL(u8, uint8_t)
GENERATE_FOR_IN_OUT_KERNEL(i8, int8_t)
GENERATE_FOR_IN_OUT_KERNEL(u16, uint16_t)
GENERATE_FOR_IN_OUT_KERNEL(i16, int16_t)
GENERATE_FOR_IN_OUT_KERNEL(u32, uint32_t)
GENERATE_FOR_IN_OUT_KERNEL(i32, int32_t)
GENERATE_FOR_IN_OUT_KERNEL(u64, uint64_t)
GENERATE_FOR_IN_OUT_KERNEL(i64, int64_t)
FOR_EACH_INTEGER(GENERATE_FOR_IN_OUT_KERNEL)
27 changes: 9 additions & 18 deletions vortex-cuda/kernels/src/patches.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#include "config.cuh"
#include "types.cuh"

// TODO(aduffy): this is very naive. In the future we need to
// transpose the patches, see G-ALP paper.
Expand Down Expand Up @@ -40,22 +41,12 @@ extern "C" __global__ void patches_##value_suffix##_##index_suffix( \
patches(values, patchIndices, patchValues, patchesLen); \
}

#define GENERATE_PATCHES_KERNEL_FOR_VALUE(ValueT, value_suffix) \
GENERATE_PATCHES_KERNEL(ValueT, value_suffix, uint8_t, u8) \
GENERATE_PATCHES_KERNEL(ValueT, value_suffix, uint16_t, u16) \
GENERATE_PATCHES_KERNEL(ValueT, value_suffix, uint32_t, u32) \
GENERATE_PATCHES_KERNEL(ValueT, value_suffix, uint64_t, u64)
// Generate patches kernel for all index types (unsigned integers) for a given value type
#define GENERATE_PATCHES_FOR_ALL_INDICES(value_suffix, ValueT) \
GENERATE_PATCHES_KERNEL(ValueT, value_suffix, uint8_t, u8) \
GENERATE_PATCHES_KERNEL(ValueT, value_suffix, uint16_t, u16) \
GENERATE_PATCHES_KERNEL(ValueT, value_suffix, uint32_t, u32) \
GENERATE_PATCHES_KERNEL(ValueT, value_suffix, uint64_t, u64)


GENERATE_PATCHES_KERNEL_FOR_VALUE(uint8_t, u8)
GENERATE_PATCHES_KERNEL_FOR_VALUE(uint16_t, u16)
GENERATE_PATCHES_KERNEL_FOR_VALUE(uint32_t, u32)
GENERATE_PATCHES_KERNEL_FOR_VALUE(uint64_t, u64)

GENERATE_PATCHES_KERNEL_FOR_VALUE(int8_t, i8)
GENERATE_PATCHES_KERNEL_FOR_VALUE(int16_t, i16)
GENERATE_PATCHES_KERNEL_FOR_VALUE(int32_t, i32)
GENERATE_PATCHES_KERNEL_FOR_VALUE(int64_t, i64)

GENERATE_PATCHES_KERNEL_FOR_VALUE(float, f32)
GENERATE_PATCHES_KERNEL_FOR_VALUE(double, f64)
// Generate for all native SIMD ptypes
FOR_EACH_NATIVE_SIMD_PTYPE(GENERATE_PATCHES_FOR_ALL_INDICES)
54 changes: 51 additions & 3 deletions vortex-cuda/kernels/src/types.cuh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

#ifndef VORTEX_CUDA_TYPES_CUH
#define VORTEX_CUDA_TYPES_CUH
#pragma once

#include <cuda_fp16.h>
#include <stdint.h>

// 128-bit signed integer type for decimal values
Expand All @@ -17,4 +17,52 @@ struct __align__(32) int256_t {
int64_t parts[4];
};

#endif // VORTEX_CUDA_TYPES_CUH
// Type iteration macros - call MACRO(suffix, Type) for each type in category.
// These mirror the Rust match_each_*_ptype macros.

// Unsigned integers
#define FOR_EACH_UNSIGNED_INT(MACRO) \
MACRO(u8, uint8_t) \
MACRO(u16, uint16_t) \
MACRO(u32, uint32_t) \
MACRO(u64, uint64_t)

// Signed integers
#define FOR_EACH_SIGNED_INT(MACRO) \
MACRO(i8, int8_t) \
MACRO(i16, int16_t) \
MACRO(i32, int32_t) \
MACRO(i64, int64_t)

// All integers (signed + unsigned)
#define FOR_EACH_INTEGER(MACRO) \
FOR_EACH_UNSIGNED_INT(MACRO) \
FOR_EACH_SIGNED_INT(MACRO)

// All floating point types (requires #include <cuda_fp16.h>)
#define FOR_EACH_FLOAT(MACRO) \
MACRO(f16, __half) \
MACRO(f32, float) \
MACRO(f64, double)

// Native SIMD types (integers + f32/f64, matches match_each_native_simd_ptype)
#define FOR_EACH_NATIVE_SIMD_PTYPE(MACRO) \
FOR_EACH_INTEGER(MACRO) \
MACRO(f32, float) \
MACRO(f64, double)

// All native ptypes (requires #include <cuda_fp16.h>, matches match_each_native_ptype)
#define FOR_EACH_NATIVE_PTYPE(MACRO) \
FOR_EACH_INTEGER(MACRO) \
FOR_EACH_FLOAT(MACRO)

// Large decimal types (128-bit and 256-bit integers for decimal representation).
// Use alongside FOR_EACH_NATIVE_PTYPE for full type coverage.
#define FOR_EACH_LARGE_DECIMAL(MACRO) \
MACRO(i128, int128_t) \
MACRO(i256, int256_t)

// All numeric types: native ptypes + large decimals (requires #include <cuda_fp16.h>)
#define FOR_EACH_NUMERIC(MACRO) \
FOR_EACH_NATIVE_PTYPE(MACRO) \
FOR_EACH_LARGE_DECIMAL(MACRO)
Loading
Loading