Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 161 additions & 0 deletions bitvector_simd_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
/*
* SIMD BitVector Performance Benchmark
* Tests the performance improvements from SIMD-optimized bitwise operations
*/

#include <iostream>
#include <chrono>
#include <vector>
#include <random>
#include <iomanip>
#include "src/util/bit_vector.h"

class PerformanceBenchmark {
private:
static constexpr size_t NUM_ITERATIONS = 10000;
static constexpr size_t BIT_VECTOR_SIZE = 1024; // 32 words = good for SIMD

std::vector<bit_vector> test_vectors;
std::mt19937 rng;

public:
PerformanceBenchmark() : rng(std::random_device{}()) {
// Create test vectors with random data
test_vectors.reserve(100);
for (int i = 0; i < 100; ++i) {
bit_vector bv(BIT_VECTOR_SIZE);

// Fill with random bits
for (size_t j = 0; j < BIT_VECTOR_SIZE; ++j) {
bv.push_back(rng() % 2 == 0);
}
test_vectors.push_back(std::move(bv));
}
}

double benchmark_or_operation() {
auto start = std::chrono::high_resolution_clock::now();

for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
bit_vector result = test_vectors[0];
for (size_t i = 1; i < test_vectors.size(); ++i) {
result |= test_vectors[i];
}

// Prevent optimization
volatile unsigned hash = result.get_hash();
(void)hash;
}

auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration<double, std::milli>(end - start).count();
}

double benchmark_and_operation() {
auto start = std::chrono::high_resolution_clock::now();

for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
bit_vector result = test_vectors[0];
for (size_t i = 1; i < test_vectors.size(); ++i) {
result &= test_vectors[i];
}

// Prevent optimization
volatile unsigned hash = result.get_hash();
(void)hash;
}

auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration<double, std::milli>(end - start).count();
}

double benchmark_equality_operation() {
auto start = std::chrono::high_resolution_clock::now();

size_t equal_count = 0;
for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
for (size_t i = 0; i < test_vectors.size(); ++i) {
for (size_t j = i + 1; j < test_vectors.size(); ++j) {
if (test_vectors[i] == test_vectors[j]) {
equal_count++;
}
}
}
}

// Prevent optimization
volatile size_t count = equal_count;
(void)count;

auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration<double, std::milli>(end - start).count();
}

double benchmark_negation_operation() {
auto start = std::chrono::high_resolution_clock::now();

for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
for (size_t i = 0; i < test_vectors.size(); ++i) {
bit_vector copy = test_vectors[i];
copy.neg();

// Prevent optimization
volatile unsigned hash = copy.get_hash();
(void)hash;
}
}

auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration<double, std::milli>(end - start).count();
}

void print_system_info() {
std::cout << "=== BitVector SIMD Optimization Benchmark ===" << std::endl;
std::cout << "Test Configuration:" << std::endl;
std::cout << " - BitVector size: " << BIT_VECTOR_SIZE << " bits ("
<< (BIT_VECTOR_SIZE/32) << " words)" << std::endl;
std::cout << " - Number of test vectors: " << test_vectors.size() << std::endl;
std::cout << " - Iterations per test: " << NUM_ITERATIONS << std::endl;

#ifdef __SSE2__
std::cout << " - SIMD optimization: ENABLED (SSE2)" << std::endl;
#else
std::cout << " - SIMD optimization: DISABLED (scalar only)" << std::endl;
#endif
std::cout << std::endl;
}

void run_benchmark() {
print_system_info();

std::cout << "Running performance benchmarks..." << std::endl;
std::cout << std::fixed << std::setprecision(2);

double or_time = benchmark_or_operation();
std::cout << "OR operation: " << or_time << " ms" << std::endl;

double and_time = benchmark_and_operation();
std::cout << "AND operation: " << and_time << " ms" << std::endl;

double eq_time = benchmark_equality_operation();
std::cout << "Equality operation: " << eq_time << " ms" << std::endl;

double neg_time = benchmark_negation_operation();
std::cout << "Negation operation: " << neg_time << " ms" << std::endl;

double total_time = or_time + and_time + eq_time + neg_time;
std::cout << std::endl;
std::cout << "Total benchmark time: " << total_time << " ms" << std::endl;
}
};

int main() {
try {
PerformanceBenchmark benchmark;
benchmark.run_benchmark();
return 0;
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return 1;
}
}
193 changes: 193 additions & 0 deletions simd_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
/*
* Simple SIMD optimization test for bitwise operations
* Tests the performance of SIMD vs scalar bitwise operations
*/

#include <iostream>
#include <chrono>
#include <vector>
#include <random>
#include <iomanip>
#include <cstring>

#ifdef __SSE2__
#include <emmintrin.h>
#endif

// Simple test for SIMD bitwise operations on raw arrays
class SimdBitwiseTest {
private:
static constexpr size_t ARRAY_SIZE = 1024; // 32-bit words
static constexpr size_t NUM_ITERATIONS = 100000;

std::vector<unsigned> array1;
std::vector<unsigned> array2;
std::vector<unsigned> result;

public:
SimdBitwiseTest() {
std::mt19937 rng(42); // Fixed seed for reproducible results

array1.resize(ARRAY_SIZE);
array2.resize(ARRAY_SIZE);
result.resize(ARRAY_SIZE);

// Fill with random data
for (size_t i = 0; i < ARRAY_SIZE; ++i) {
array1[i] = rng();
array2[i] = rng();
}
}

double benchmark_scalar_or() {
auto start = std::chrono::high_resolution_clock::now();

for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
for (size_t i = 0; i < ARRAY_SIZE; ++i) {
result[i] = array1[i] | array2[i];
}
}

auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration<double, std::milli>(end - start).count();
}

#ifdef __SSE2__
double benchmark_simd_or() {
auto start = std::chrono::high_resolution_clock::now();

const size_t simd_size = ARRAY_SIZE / 4;
const __m128i* a_simd = reinterpret_cast<const __m128i*>(array1.data());
const __m128i* b_simd = reinterpret_cast<const __m128i*>(array2.data());
__m128i* result_simd = reinterpret_cast<__m128i*>(result.data());

for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
for (size_t i = 0; i < simd_size; ++i) {
__m128i a = _mm_load_si128(&a_simd[i]);
__m128i b = _mm_load_si128(&b_simd[i]);
_mm_store_si128(&result_simd[i], _mm_or_si128(a, b));
}
}

auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration<double, std::milli>(end - start).count();
}

double benchmark_simd_and() {
auto start = std::chrono::high_resolution_clock::now();

const size_t simd_size = ARRAY_SIZE / 4;
const __m128i* a_simd = reinterpret_cast<const __m128i*>(array1.data());
const __m128i* b_simd = reinterpret_cast<const __m128i*>(array2.data());
__m128i* result_simd = reinterpret_cast<__m128i*>(result.data());

for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
for (size_t i = 0; i < simd_size; ++i) {
__m128i a = _mm_load_si128(&a_simd[i]);
__m128i b = _mm_load_si128(&b_simd[i]);
_mm_store_si128(&result_simd[i], _mm_and_si128(a, b));
}
}

auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration<double, std::milli>(end - start).count();
}
#endif

double benchmark_scalar_and() {
auto start = std::chrono::high_resolution_clock::now();

for (size_t iter = 0; iter < NUM_ITERATIONS; ++iter) {
for (size_t i = 0; i < ARRAY_SIZE; ++i) {
result[i] = array1[i] & array2[i];
}
}

auto end = std::chrono::high_resolution_clock::now();
return std::chrono::duration<double, std::milli>(end - start).count();
}

void run_benchmark() {
std::cout << "=== SIMD Bitwise Operations Benchmark ===" << std::endl;
std::cout << "Array size: " << ARRAY_SIZE << " words (" << (ARRAY_SIZE * 4) << " bytes)" << std::endl;
std::cout << "Iterations: " << NUM_ITERATIONS << std::endl;

#ifdef __SSE2__
std::cout << "SSE2 support: ENABLED" << std::endl;
#else
std::cout << "SSE2 support: DISABLED" << std::endl;
#endif
std::cout << std::endl;

std::cout << std::fixed << std::setprecision(2);

double scalar_or_time = benchmark_scalar_or();
std::cout << "Scalar OR: " << scalar_or_time << " ms" << std::endl;

#ifdef __SSE2__
double simd_or_time = benchmark_simd_or();
std::cout << "SIMD OR: " << simd_or_time << " ms";
if (simd_or_time > 0) {
double or_speedup = scalar_or_time / simd_or_time;
std::cout << " (speedup: " << or_speedup << "x)";
}
std::cout << std::endl;
#endif

double scalar_and_time = benchmark_scalar_and();
std::cout << "Scalar AND: " << scalar_and_time << " ms" << std::endl;

#ifdef __SSE2__
double simd_and_time = benchmark_simd_and();
std::cout << "SIMD AND: " << simd_and_time << " ms";
if (simd_and_time > 0) {
double and_speedup = scalar_and_time / simd_and_time;
std::cout << " (speedup: " << and_speedup << "x)";
}
std::cout << std::endl;
#endif

std::cout << std::endl;
std::cout << "Verification: ";
// Quick verification that results are the same
bool correct = true;
std::vector<unsigned> scalar_result(ARRAY_SIZE);
for (size_t i = 0; i < ARRAY_SIZE; ++i) {
scalar_result[i] = array1[i] & array2[i];
}

#ifdef __SSE2__
// Run one SIMD AND operation for verification
const size_t simd_size = ARRAY_SIZE / 4;
const __m128i* a_simd = reinterpret_cast<const __m128i*>(array1.data());
const __m128i* b_simd = reinterpret_cast<const __m128i*>(array2.data());
__m128i* result_simd = reinterpret_cast<__m128i*>(result.data());

for (size_t i = 0; i < simd_size; ++i) {
__m128i a = _mm_load_si128(&a_simd[i]);
__m128i b = _mm_load_si128(&b_simd[i]);
_mm_store_si128(&result_simd[i], _mm_and_si128(a, b));
}

for (size_t i = 0; i < ARRAY_SIZE; ++i) {
if (result[i] != scalar_result[i]) {
correct = false;
break;
}
}
#endif

std::cout << (correct ? "PASSED" : "FAILED") << std::endl;
}
};

int main() {
try {
SimdBitwiseTest test;
test.run_benchmark();
return 0;
} catch (const std::exception& e) {
std::cerr << "Error: " << e.what() << std::endl;
return 1;
}
}
Loading
Loading