Z3Prover · dsyme · Sep 17, 2025
diff --git a/bitvector_simd_benchmark.cpp b/bitvector_simd_benchmark.cpp
@@ -0,0 +1,161 @@
+/*
+ * SIMD BitVector Performance Benchmark
+ * Tests the performance improvements from SIMD-optimized bitwise operations
+ */
+
+#include <iostream>
+#include <chrono>
+#include <vector>
+#include <random>
+#include <iomanip>
+#include "src/util/bit_vector.h"
+
+class PerformanceBenchmark {
+private:
+    static constexpr size_t NUM_ITERATIONS = 10000;
+    static constexpr size_t BIT_VECTOR_SIZE = 1024; // 32 words = good for SIMD
+
+    std::vector<bit_vector> test_vectors;
+    std::mt19937 rng;
+
+public:
+    PerformanceBenchmark() : rng(std::random_device{}()) {
+        // Create test vectors with random data
+        test_vectors.reserve(100);
+        for (int i = 0; i < 100; ++i) {
+            bit_vector bv(BIT_VECTOR_SIZE);
+
+            // Fill with random bits
+            for (size_t j = 0; j < BIT_VECTOR_SIZE; ++j) {
+                bv.push_back(rng() % 2 == 0);
+            }
+            test_vectors.push_back(std::move(bv));
+        }
+    }
+
+    double benchmark_or_operation() {
+        auto start = std::chrono::high_resolution_clock::now();
+
+        for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
+            bit_vector result = test_vectors[0];
+            for (size_t i = 1; i < test_vectors.size(); ++i) {
+                result |= test_vectors[i];
+            }
+
+            // Prevent optimization
+            volatile unsigned hash = result.get_hash();
+            (void)hash;
+        }
+
+        auto end = std::chrono::high_resolution_clock::now();
+        return std::chrono::duration<double, std::milli>(end - start).count();
+    }
+
+    double benchmark_and_operation() {
+        auto start = std::chrono::high_resolution_clock::now();
+
+        for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
+            bit_vector result = test_vectors[0];
+            for (size_t i = 1; i < test_vectors.size(); ++i) {
+                result &= test_vectors[i];
+            }
+
+            // Prevent optimization
+            volatile unsigned hash = result.get_hash();
+            (void)hash;
+        }
+
+        auto end = std::chrono::high_resolution_clock::now();
+        return std::chrono::duration<double, std::milli>(end - start).count();
+    }
+
+    double benchmark_equality_operation() {
+        auto start = std::chrono::high_resolution_clock::now();
+
+        size_t equal_count = 0;
+        for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
+            for (size_t i = 0; i < test_vectors.size(); ++i) {
+                for (size_t j = i + 1; j < test_vectors.size(); ++j) {
+                    if (test_vectors[i] == test_vectors[j]) {
+                        equal_count++;
+                    }
+                }
+            }
+        }
+
+        // Prevent optimization
+        volatile size_t count = equal_count;
+        (void)count;
+
+        auto end = std::chrono::high_resolution_clock::now();
+        return std::chrono::duration<double, std::milli>(end - start).count();
+    }
+
+    double benchmark_negation_operation() {
+        auto start = std::chrono::high_resolution_clock::now();
+
+        for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
+            for (size_t i = 0; i < test_vectors.size(); ++i) {
+                bit_vector copy = test_vectors[i];
+                copy.neg();
+
+                // Prevent optimization
+                volatile unsigned hash = copy.get_hash();
+                (void)hash;
+            }
+        }
+
+        auto end = std::chrono::high_resolution_clock::now();
+        return std::chrono::duration<double, std::milli>(end - start).count();
+    }
+
+    void print_system_info() {
+        std::cout << "=== BitVector SIMD Optimization Benchmark ===" << std::endl;
+        std::cout << "Test Configuration:" << std::endl;
+        std::cout << "  - BitVector size: " << BIT_VECTOR_SIZE << " bits ("
+                  << (BIT_VECTOR_SIZE/32) << " words)" << std::endl;
+        std::cout << "  - Number of test vectors: " << test_vectors.size() << std::endl;
+        std::cout << "  - Iterations per test: " << NUM_ITERATIONS << std::endl;
+
+#ifdef __SSE2__
+        std::cout << "  - SIMD optimization: ENABLED (SSE2)" << std::endl;
+#else
+        std::cout << "  - SIMD optimization: DISABLED (scalar only)" << std::endl;
+#endif
+        std::cout << std::endl;
+    }
+
+    void run_benchmark() {
+        print_system_info();
+
+        std::cout << "Running performance benchmarks..." << std::endl;
+        std::cout << std::fixed << std::setprecision(2);
+
+        double or_time = benchmark_or_operation();
+        std::cout << "OR operation:       " << or_time << " ms" << std::endl;
+
+        double and_time = benchmark_and_operation();
+        std::cout << "AND operation:      " << and_time << " ms" << std::endl;
+
+        double eq_time = benchmark_equality_operation();
+        std::cout << "Equality operation: " << eq_time << " ms" << std::endl;
+
+        double neg_time = benchmark_negation_operation();
+        std::cout << "Negation operation: " << neg_time << " ms" << std::endl;
+
+        double total_time = or_time + and_time + eq_time + neg_time;
+        std::cout << std::endl;
+        std::cout << "Total benchmark time: " << total_time << " ms" << std::endl;
+    }
+};
+
+int main() {
+    try {
+        PerformanceBenchmark benchmark;
+        benchmark.run_benchmark();
+        return 0;
+    } catch (const std::exception& e) {
+        std::cerr << "Error: " << e.what() << std::endl;
+        return 1;
+    }
+}
diff --git a/simd_test.cpp b/simd_test.cpp
@@ -0,0 +1,193 @@
+/*
+ * Simple SIMD optimization test for bitwise operations
+ * Tests the performance of SIMD vs scalar bitwise operations
+ */
+
+#include <iostream>
+#include <chrono>
+#include <vector>
+#include <random>
+#include <iomanip>
+#include <cstring>
+
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+// Simple test for SIMD bitwise operations on raw arrays
+class SimdBitwiseTest {
+private:
+    static constexpr size_t ARRAY_SIZE = 1024; // 32-bit words
+    static constexpr size_t NUM_ITERATIONS = 100000;
+
+    std::vector<unsigned> array1;
+    std::vector<unsigned> array2;
+    std::vector<unsigned> result;
+
+public:
+    SimdBitwiseTest() {
+        std::mt19937 rng(42); // Fixed seed for reproducible results
+
+        array1.resize(ARRAY_SIZE);
+        array2.resize(ARRAY_SIZE);
+        result.resize(ARRAY_SIZE);
+
+        // Fill with random data
+        for (size_t i = 0; i < ARRAY_SIZE; ++i) {
+            array1[i] = rng();
+            array2[i] = rng();
+        }
+    }
+
+    double benchmark_scalar_or() {
+        auto start = std::chrono::high_resolution_clock::now();
+
+        for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
+            for (size_t i = 0; i < ARRAY_SIZE; ++i) {
+                result[i] = array1[i] | array2[i];
+            }
+        }
+
+        auto end = std::chrono::high_resolution_clock::now();
+        return std::chrono::duration<double, std::milli>(end - start).count();
+    }
+
+#ifdef __SSE2__
+    double benchmark_simd_or() {
+        auto start = std::chrono::high_resolution_clock::now();
+
+        const size_t simd_size = ARRAY_SIZE / 4;
+        const __m128i* a_simd = reinterpret_cast<const __m128i*>(array1.data());
+        const __m128i* b_simd = reinterpret_cast<const __m128i*>(array2.data());
+        __m128i* result_simd = reinterpret_cast<__m128i*>(result.data());
+
+        for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
+            for (size_t i = 0; i < simd_size; ++i) {
+                __m128i a = _mm_load_si128(&a_simd[i]);
+                __m128i b = _mm_load_si128(&b_simd[i]);
+                _mm_store_si128(&result_simd[i], _mm_or_si128(a, b));
+            }
+        }
+
+        auto end = std::chrono::high_resolution_clock::now();
+        return std::chrono::duration<double, std::milli>(end - start).count();
+    }
+
+    double benchmark_simd_and() {
+        auto start = std::chrono::high_resolution_clock::now();
+
+        const size_t simd_size = ARRAY_SIZE / 4;
+        const __m128i* a_simd = reinterpret_cast<const __m128i*>(array1.data());
+        const __m128i* b_simd = reinterpret_cast<const __m128i*>(array2.data());
+        __m128i* result_simd = reinterpret_cast<__m128i*>(result.data());
+
+        for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
+            for (size_t i = 0; i < simd_size; ++i) {
+                __m128i a = _mm_load_si128(&a_simd[i]);
+                __m128i b = _mm_load_si128(&b_simd[i]);
+                _mm_store_si128(&result_simd[i], _mm_and_si128(a, b));
+            }
+        }
+
+        auto end = std::chrono::high_resolution_clock::now();
+        return std::chrono::duration<double, std::milli>(end - start).count();
+    }
+#endif
+
+    double benchmark_scalar_and() {
+        auto start = std::chrono::high_resolution_clock::now();
+
+        for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
+            for (size_t i = 0; i < ARRAY_SIZE; ++i) {
+                result[i] = array1[i] & array2[i];
+            }
+        }
+
+        auto end = std::chrono::high_resolution_clock::now();
+        return std::chrono::duration<double, std::milli>(end - start).count();
+    }
+
+    void run_benchmark() {
+        std::cout << "=== SIMD Bitwise Operations Benchmark ===" << std::endl;
+        std::cout << "Array size: " << ARRAY_SIZE << " words (" << (ARRAY_SIZE * 4) << " bytes)" << std::endl;
+        std::cout << "Iterations: " << NUM_ITERATIONS << std::endl;
+
+#ifdef __SSE2__
+        std::cout << "SSE2 support: ENABLED" << std::endl;
+#else
+        std::cout << "SSE2 support: DISABLED" << std::endl;
+#endif
+        std::cout << std::endl;
+
+        std::cout << std::fixed << std::setprecision(2);
+
+        double scalar_or_time = benchmark_scalar_or();
+        std::cout << "Scalar OR:  " << scalar_or_time << " ms" << std::endl;
+
+#ifdef __SSE2__
+        double simd_or_time = benchmark_simd_or();
+        std::cout << "SIMD OR:    " << simd_or_time << " ms";
+        if (simd_or_time > 0) {
+            double or_speedup = scalar_or_time / simd_or_time;
+            std::cout << " (speedup: " << or_speedup << "x)";
+        }
+        std::cout << std::endl;
+#endif
+
+        double scalar_and_time = benchmark_scalar_and();
+        std::cout << "Scalar AND: " << scalar_and_time << " ms" << std::endl;
+
+#ifdef __SSE2__
+        double simd_and_time = benchmark_simd_and();
+        std::cout << "SIMD AND:   " << simd_and_time << " ms";
+        if (simd_and_time > 0) {
+            double and_speedup = scalar_and_time / simd_and_time;
+            std::cout << " (speedup: " << and_speedup << "x)";
+        }
+        std::cout << std::endl;
+#endif
+
+        std::cout << std::endl;
+        std::cout << "Verification: ";
+        // Quick verification that results are the same
+        bool correct = true;
+        std::vector<unsigned> scalar_result(ARRAY_SIZE);
+        for (size_t i = 0; i < ARRAY_SIZE; ++i) {
+            scalar_result[i] = array1[i] & array2[i];
+        }
+
+#ifdef __SSE2__
+        // Run one SIMD AND operation for verification
+        const size_t simd_size = ARRAY_SIZE / 4;
+        const __m128i* a_simd = reinterpret_cast<const __m128i*>(array1.data());
+        const __m128i* b_simd = reinterpret_cast<const __m128i*>(array2.data());
+        __m128i* result_simd = reinterpret_cast<__m128i*>(result.data());
+
+        for (size_t i = 0; i < simd_size; ++i) {
+            __m128i a = _mm_load_si128(&a_simd[i]);
+            __m128i b = _mm_load_si128(&b_simd[i]);
+            _mm_store_si128(&result_simd[i], _mm_and_si128(a, b));
+        }
+
+        for (size_t i = 0; i < ARRAY_SIZE; ++i) {
+            if (result[i] != scalar_result[i]) {
+                correct = false;
+                break;
+            }
+        }
+#endif
+
+        std::cout << (correct ? "PASSED" : "FAILED") << std::endl;
+    }
+};
+
+int main() {
+    try {
+        SimdBitwiseTest test;
+        test.run_benchmark();
+        return 0;
+    } catch (const std::exception& e) {
+        std::cerr << "Error: " << e.what() << std::endl;
+        return 1;
+    }
+}