intel · steffenlarsen · Jun 2, 2023 · May 3, 2023 · May 11, 2023 · May 11, 2023
@@ -952,3 +952,8 @@ foreach name = ["GroupUMin", "GroupUMax"] in {
 foreach name = ["GroupSMin", "GroupSMax"] in {
   def : SPVBuiltin<name, [ASIGenTypeN, UInt, UInt, ASIGenTypeN], Attr.Convergent>;
 }
+
+// TODO: These builtins need to support vectors of bool.
+foreach name = ["GroupLogicalAndKHR", "GroupLogicalOrKHR"] in {
+  def : SPVBuiltin<name, [Bool, UInt, UInt, Bool], Attr.Convergent>;
+}
@@ -225,6 +225,8 @@ __clc__SubgroupBitwiseAny(uint op, bool predicate, bool *carry) {
 #define __CLC_XOR(x, y) (x ^ y)
 #define __CLC_AND(x, y) (x & y)
 #define __CLC_MUL(x, y) (x * y)
+#define __CLC_LOGICAL_OR(x, y) (x || y)
+#define __CLC_LOGICAL_AND(x, y) (x && y)
 
 #define __DEFINE_CLC_COMPLEX_MUL(TYPE)                                         \
   _CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT complex_##TYPE __clc_complex_mul(     \
@@ -424,6 +426,9 @@ __CLC_SUBGROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, long, ~0l)
 __CLC_SUBGROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, long, 0l)
 __CLC_SUBGROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, long, 0l)
 
+__CLC_SUBGROUP_COLLECTIVE(LogicalOrKHR, __CLC_LOGICAL_OR, bool, false)
+__CLC_SUBGROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, true)
+
 #undef __CLC_SUBGROUP_COLLECTIVE_BODY
 #undef __CLC_SUBGROUP_COLLECTIVE
 #undef __CLC_SUBGROUP_COLLECTIVE_REDUX
@@ -592,6 +597,9 @@ __CLC_GROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, long, ~0l)
 __CLC_GROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, long, 0l)
 __CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, long, 0l)
 
+__CLC_GROUP_COLLECTIVE(LogicalOrKHR, __CLC_LOGICAL_OR, bool, false)
+__CLC_GROUP_COLLECTIVE(LogicalAndKHR, __CLC_LOGICAL_AND, bool, true)
+
 // half requires additional mangled entry points
 #define __CLC_GROUP_COLLECTIVE__DF16(MANGLED_NAME, SPIRV_DISPATCH)             \
   _CLC_DEF _CLC_CONVERGENT half MANGLED_NAME(uint scope, uint op, half x) {    \

@@ -1170,6 +1170,9 @@ __SYCL_GROUP_COLLECTIVE_OVERLOAD(BitwiseOrKHR)
 __SYCL_GROUP_COLLECTIVE_OVERLOAD(BitwiseXorKHR)
 __SYCL_GROUP_COLLECTIVE_OVERLOAD(BitwiseAndKHR)
 
+__SYCL_GROUP_COLLECTIVE_OVERLOAD(LogicalAndKHR)
+__SYCL_GROUP_COLLECTIVE_OVERLOAD(LogicalOrKHR)
+
 } // namespace spirv
 } // namespace detail
 } // __SYCL_INLINE_VER_NAMESPACE(_V1)

@@ -34,6 +34,7 @@ struct GroupOpISigned {};
 struct GroupOpIUnsigned {};
 struct GroupOpFP {};
 struct GroupOpC {};
+struct GroupOpBool {};
 
 template <typename T, typename = void> struct GroupOpTag;
 
@@ -60,6 +61,11 @@ struct GroupOpTag<
   using type = GroupOpC;
 };
 
+template <typename T>
+struct GroupOpTag<T, std::enable_if_t<detail::is_genbool<T>::value>> {
+  using type = GroupOpBool;
+};
+
 #define __SYCL_CALC_OVERLOAD(GroupTag, SPIRVOperation, BinaryOperation)        \
   template <__spv::GroupOperation O, typename Group, typename T>               \
   static T calc(Group g, GroupTag, T x, BinaryOperation) {                     \
@@ -91,6 +97,16 @@ __SYCL_CALC_OVERLOAD(GroupOpIUnsigned, BitwiseXorKHR, sycl::bit_xor<T>)
 __SYCL_CALC_OVERLOAD(GroupOpISigned, BitwiseAndKHR, sycl::bit_and<T>)
 __SYCL_CALC_OVERLOAD(GroupOpIUnsigned, BitwiseAndKHR, sycl::bit_and<T>)
 
+__SYCL_CALC_OVERLOAD(GroupOpBool, LogicalAndKHR, sycl::logical_and<T>)
+__SYCL_CALC_OVERLOAD(GroupOpISigned, LogicalAndKHR, sycl::logical_and<T>)
+__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, LogicalAndKHR, sycl::logical_and<T>)
+__SYCL_CALC_OVERLOAD(GroupOpFP, LogicalAndKHR, sycl::logical_and<T>)
+
+__SYCL_CALC_OVERLOAD(GroupOpBool, LogicalOrKHR, sycl::logical_or<T>)
+__SYCL_CALC_OVERLOAD(GroupOpISigned, LogicalOrKHR, sycl::logical_or<T>)
+__SYCL_CALC_OVERLOAD(GroupOpIUnsigned, LogicalOrKHR, sycl::logical_or<T>)
+__SYCL_CALC_OVERLOAD(GroupOpFP, LogicalOrKHR, sycl::logical_or<T>)
+
 #undef __SYCL_CALC_OVERLOAD
 
 template <__spv::GroupOperation O, typename Group, typename T,

@@ -20,8 +20,20 @@ template <typename T = void> using multiplies = std::multiplies<T>;
 template <typename T = void> using bit_and = std::bit_and<T>;
 template <typename T = void> using bit_or = std::bit_or<T>;
 template <typename T = void> using bit_xor = std::bit_xor<T>;
-template <typename T = void> using logical_and = std::logical_and<T>;
-template <typename T = void> using logical_or = std::logical_or<T>;
+
+// std:logical_and/std::logical_or with a non-void type returns bool,
+// sycl requires returning T.
+template <typename T = void> struct logical_and {
+  T operator()(const T &lhs, const T &rhs) { return lhs && rhs; }
+};
+
+template <> struct logical_and<void> : std::logical_and<void> {};
+
+template <typename T = void> struct logical_or {
+  T operator()(const T &lhs, const T &rhs) { return lhs || rhs; }
+};
+
+template <> struct logical_or<void> : std::logical_or<void> {};
 
 template <typename T = void> struct minimum {
   T operator()(const T &lhs, const T &rhs) const {

@@ -92,7 +92,7 @@ template <typename T>
 using native_op_list =
     type_list<sycl::plus<T>, sycl::bit_or<T>, sycl::bit_xor<T>,
               sycl::bit_and<T>, sycl::maximum<T>, sycl::minimum<T>,
-              sycl::multiplies<T>>;
+              sycl::multiplies<T>, sycl::logical_or<T>, sycl::logical_and<T>>;
 
 template <typename T, typename BinaryOperation> struct is_native_op {
   static constexpr bool value =

@@ -55,12 +55,16 @@ using IsBitXOR =
 
 template <typename T, class BinaryOperation>
 using IsLogicalAND = std::bool_constant<
+    std::is_same_v<BinaryOperation, std::logical_and<T>> ||
+    std::is_same_v<BinaryOperation, std::logical_and<void>> ||
     std::is_same_v<BinaryOperation, sycl::logical_and<T>> ||
     std::is_same_v<BinaryOperation, sycl::logical_and<void>>>;
 
 template <typename T, class BinaryOperation>
 using IsLogicalOR =
-    std::bool_constant<std::is_same_v<BinaryOperation, sycl::logical_or<T>> ||
+    std::bool_constant<std::is_same_v<BinaryOperation, std::logical_or<T>> ||
+                       std::is_same_v<BinaryOperation, std::logical_or<void>> ||
+                       std::is_same_v<BinaryOperation, sycl::logical_or<T>> ||
                        std::is_same_v<BinaryOperation, sycl::logical_or<void>>>;
 
 template <typename T>

@@ -188,6 +188,20 @@ int main() {
   test<class KernelNameBitAndI>(q, input_small, output_small,
                                 sycl::bit_and<int>(), ~0);
 
+  test<class LogicalOrInt>(q, input, output, sycl::logical_or<int>(), 0);
+  test<class LogicalAndInt>(q, input, output, sycl::logical_and<int>(), 1);
+
+  std::array<bool, N> bool_input = {};
+  std::array<bool, N> bool_output = {};
+  test<class LogicalOrBool>(q, bool_input, bool_output,
+                            sycl::logical_or<bool>(), false);
+  test<class LogicalOrVoid>(q, bool_input, bool_output, sycl::logical_or<>(),
+                            false);
+  test<class LogicalAndBool>(q, bool_input, bool_output,
+                             sycl::logical_and<bool>(), true);
+  test<class LogicalAndVoid>(q, bool_input, bool_output, sycl::logical_and<>(),
+                             true);
+
   // as part of SYCL_EXT_ONEAPI_COMPLEX_ALGORITHMS (
   // https://github.com/intel/llvm/pull/5108/ ) joint_exclusive_scan and
   // exclusive_scan_over_group now operate on std::complex but limited to the

@@ -186,6 +186,20 @@ int main() {
   test<class KernelNameBitAndI>(q, input_small, output_small,
                                 sycl::bit_and<int>(), ~0);
 
+  test<class LogicalOrInt>(q, input, output, sycl::logical_or<int>(), 0);
+  test<class LogicalAndInt>(q, input, output, sycl::logical_and<int>(), 1);
+
+  std::array<bool, N> bool_input = {};
+  std::array<bool, N> bool_output = {};
+  test<class LogicalOrBool>(q, bool_input, bool_output,
+                            sycl::logical_or<bool>(), false);
+  test<class LogicalOrVoid>(q, bool_input, bool_output, sycl::logical_or<>(),
+                            false);
+  test<class LogicalAndBool>(q, bool_input, bool_output,
+                             sycl::logical_and<bool>(), true);
+  test<class LogicalAndVoid>(q, bool_input, bool_output, sycl::logical_and<>(),
+                             true);
+
   // as part of SYCL_EXT_ONEAPI_COMPLEX_ALGORITHMS (
   // https://github.com/intel/llvm/pull/5108/ ) joint_inclusive_scan and
   // inclusive_scan_over_group now operate on std::complex limited to using the

@@ -96,6 +96,20 @@ int main() {
   test<class KernelNameBitXorI>(q, input, output, sycl::bit_xor<int>(), 0);
   test<class KernelNameBitAndI>(q, input, output, sycl::bit_and<int>(), ~0);
 
+  test<class LogicalOrInt>(q, input, output, sycl::logical_or<int>(), 0);
+  test<class LogicalAndInt>(q, input, output, sycl::logical_and<int>(), 1);
+
+  std::array<bool, N> bool_input = {};
+  std::array<bool, 6> bool_output = {};
+  test<class LogicalOrBool>(q, bool_input, bool_output,
+                            sycl::logical_or<bool>(), false);
+  test<class LogicalOrVoid>(q, bool_input, bool_output, sycl::logical_or<>(),
+                            false);
+  test<class LogicalAndBool>(q, bool_input, bool_output,
+                             sycl::logical_and<bool>(), true);
+  test<class LogicalAndVoid>(q, bool_input, bool_output, sycl::logical_and<>(),
+                             true);
+
   // as part of SYCL_EXT_ONEAPI_COMPLEX_ALGORITHMS (
   // https://github.com/intel/llvm/pull/5108/ ) joint_reduce and
   // reduce_over_group now operate on std::complex limited to using the