thecppzoo · jamierpond · Feb 27, 2024 · Feb 27, 2024 · Feb 27, 2024 · Feb 27, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,7 +1,10 @@
-# Vscode does not like to build outside of the source tree
-# (multiple glitches)
-
-.vscode
-test/.vscode
-build
-.cache
+
+.vscode
+test/.vscode
+build
+.cache
+.idea
+**cmake-build**
+
+# Vscode does not like to build outside of the source tree
+# (multiple glitches)
diff --git a/CMakeLists.txt b/CMakeLists.txt
diff --git a/inc/zoo/meta/BitmaskMaker.h b/inc/zoo/meta/BitmaskMaker.h
@@ -42,6 +42,7 @@ struct BitmaskMaker {
 
 static_assert(0xF0F0 == BitmaskMaker<uint16_t, 0xF0, 8>::value);
 static_assert(0xEDFEDFED == BitmaskMaker<uint32_t, 0xFED, 12>::value);
+static_assert(0b0001'0001 == BitmaskMaker<unsigned char, 1, 4>::value);
 
 }} // zoo::meta
 

diff --git a/inc/zoo/swar/SWAR.h b/inc/zoo/swar/SWAR.h
@@ -5,6 +5,7 @@
 #include "zoo/meta/log.h"
 
 #include <type_traits>
+#include <initializer_list>
 
 #ifdef _MSC_VER
 #include <iso646.h>
@@ -90,6 +91,21 @@ struct SWAR {
 
     constexpr T value() const noexcept { return m_v; }
 
+    template<std::size_t N>
+    constexpr static T baseFromLaneLiterals(const T(&args)[N]) {
+        static_assert(N == Lanes, "Wrong number of lanes");
+        T result = 0;
+        for (auto arg: args) {
+            result = (result << NBits) | arg;
+        }
+        return result;
+    }
+
+    template<std::size_t N>
+    constexpr static SWAR fromLaneLiterals(const T(&args)[N]) {
+        return SWAR{baseFromLaneLiterals(args)};
+    }
+
     #define SWAR_UNARY_OPERATORS_X_LIST \
         X(SWAR, ~)
     //constexpr SWAR operator~() const noexcept { return SWAR{~m_v}; }

diff --git a/inc/zoo/swar/associative_iteration.h b/inc/zoo/swar/associative_iteration.h
@@ -260,7 +260,7 @@ template<int NB, typename B>
 constexpr auto makeLaneMaskFromMSB(SWAR<NB, B> input) {
     using S = SWAR<NB, B>;
     auto msb = input & S{S::MostSignificantBit};
-    auto msbCopiedToLSB = S{msb.value() >> (NB - 1)};
+    auto msbCopiedToLSB = S{static_cast<B>(msb.value() >> (NB - 1))};
     return impl::makeLaneMaskFromMSB_and_LSB(msb, msbCopiedToLSB);
 }
 
@@ -392,8 +392,13 @@ template<
     typename CountHalver
 >
 constexpr auto associativeOperatorIterated_regressive(
-    Base base, Base neutral, IterationCount count, IterationCount forSquaring,
-    Operator op, unsigned log2Count, CountHalver ch
+    const Base base,
+    const Base neutral,
+    IterationCount count,
+    const IterationCount forSquaring,
+    const Operator op,
+    unsigned log2Count,
+    const CountHalver ch
 ) {
     auto result = neutral;
     if(!log2Count) { return result; }
@@ -419,17 +424,54 @@ constexpr auto multiplication_OverflowUnsafe_SpecificBitCount(
 
     auto halver = [](auto counts) {
         auto msbCleared = counts & ~S{S::MostSignificantBit};
-        return S{msbCleared.value() << 1};
+        return S{static_cast<T>(msbCleared.value() << 1)};
     };
 
-    multiplier = S{multiplier.value() << (NB - ActualBits)};
+    multiplier = S{static_cast<T>(multiplier.value() << (NB - ActualBits))};
     return associativeOperatorIterated_regressive(
-        multiplicand, S{0}, multiplier, S{S::MostSignificantBit}, operation,
-        ActualBits, halver
+        multiplicand,
+        S{0},
+        multiplier,
+        S{S::MostSignificantBit},
+        operation,
+        ActualBits,
+        halver
     );
 }
 
-/// \note Not removed yet because it is an example of "progressive" associative exponentiation
+template<int ActualBits, int NB, typename T>
+constexpr auto exponentiation_OverflowUnsafe_SpecificBitCount(
+    SWAR<NB, T> x,
+    SWAR<NB, T> exponent
+) {
+    using S = SWAR<NB, T>;
+
+    auto operation = [](auto left, auto right, auto counts) {
+      const auto mask = makeLaneMaskFromMSB(counts);
+      const auto product =
+        multiplication_OverflowUnsafe_SpecificBitCount<ActualBits>(left, right);
+      return (product & mask) | (left & ~mask);
+    };
+
+    // halver should work same as multiplication... i think...
+    auto halver = [](auto counts) {
+        auto msbCleared = counts & ~S{S::MostSignificantBit};
+        return S{static_cast<T>(msbCleared.value() << 1)};
+    };
+
+    exponent = S{static_cast<T>(exponent.value() << (NB - ActualBits))};
+    return associativeOperatorIterated_regressive(
+        x,
+        S{meta::BitmaskMaker<T, 1, NB>().value}, // neutral is lane wise..
+        exponent,
+        S{S::MostSignificantBit},
+        operation,
+        ActualBits,
+        halver
+    );
+}
+
+// \note Not removed yet because it is an example of "progressive" associative exponentiation
 template<int ActualBits, int NB, typename T>
 constexpr auto multiplication_OverflowUnsafe_SpecificBitCount_deprecated(
     SWAR<NB, T> multiplicand,
@@ -462,6 +504,17 @@ constexpr auto multiplication_OverflowUnsafe(
         );
 }
 
+template<int NB, typename T>
+constexpr auto exponentiation_OverflowUnsafe(
+    SWAR<NB, T> base,
+    SWAR<NB, T> exponent
+) {
+    return
+       exponentiation_OverflowUnsafe_SpecificBitCount<NB>(
+            base, exponent
+        );
+}
+
 template<int NB, typename T>
 struct SWAR_Pair{
     SWAR<NB, T> even, odd;

diff --git a/test/swar/BasicOperations.cpp b/test/swar/BasicOperations.cpp
@@ -7,7 +7,6 @@
 #include <iostream>
 #include <type_traits>
 
-
 using namespace zoo;
 using namespace zoo::swar;
 
@@ -64,8 +63,31 @@ static_assert(
     multiplication_OverflowUnsafe_SpecificBitCount<3>(Micand, Mplier).value()
 );
 
+static_assert(0b00000010000000110000010100000110 == 0x02'03'05'06);
+
+TEST_CASE("Expontiation with 8-bit lane width (overflow unsafe)") {
+  using S = SWAR<8, u32>;
+  constexpr auto base     = S::fromLaneLiterals({2,   3,  5,  6});
+  constexpr auto exponent = S::fromLaneLiterals({7,   4,  2,  3});
+  constexpr auto expected = S::fromLaneLiterals({128, 81, 25, 216});
+  constexpr auto actual = exponentiation_OverflowUnsafe(base, exponent);
+  static_assert(expected.value() == actual.value());
+  CHECK(expected.value() == actual.value());
+}
+
+TEST_CASE("Expontiation with 16-bit lane width (overflow unsafe)") {
+  using S = SWAR<16, u64>; // Change to 16-bit lane width
+  constexpr auto base     = S::fromLaneLiterals({10,   2,  7, 3});
+  constexpr auto exponent = S::fromLaneLiterals({3,    5,  1, 4});
+  constexpr auto expected = S::fromLaneLiterals({1000, 32, 7, 81});
+  constexpr auto actual = exponentiation_OverflowUnsafe(base, exponent);
+  static_assert(expected.value() == actual.value());
+  CHECK(expected.value() == actual.value());
 }
 
+};
+
+
 #define HE(nbits, t, v0, v1) \
     static_assert(horizontalEquality<nbits, t>(\
         SWAR<nbits, t>(v0),\
@@ -425,7 +447,7 @@ TEST_CASE(
     "BooleanSWAR MSBtoLaneMask",
     "[swar]"
 ) {
-    // BooleanSWAR as a mask: 
+    // BooleanSWAR as a mask:
     auto bswar =BooleanSWAR<4, u32>(0x0808'0000);
     auto mask = S4_32(0x0F0F'0000);
     CHECK(bswar.MSBtoLaneMask().value() == mask.value());
@@ -452,6 +474,6 @@ TEST_CASE(
     CHECK(SWAR<4, u16>(0x0400).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0100), SWAR<4, u16>(0x0300)).value());
     CHECK(SWAR<4, u16>(0x0B00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0300)).value());
     CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0700)).value());
-    CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value()); 
-    CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value()); 
+    CHECK(SWAR<4, u16>(0x0F00).value() == saturatingUnsignedAddition(SWAR<4, u16>(0x0800), SWAR<4, u16>(0x0800)).value());
+    CHECK(S4_32(0x0F0C'F000).value() == saturatingUnsignedAddition(S4_32(0x0804'F000), S4_32(0x0808'F000)).value());
 }