Skip to content

Examples & Tutorials

whisprer edited this page Aug 4, 2025 · 2 revisions

Examples & Tutorials - Universal RNG Library

🎯 Quick Start Examples

Hello World - Your First Random Numbers

#include <universal_rng/core.hpp>
#include <iostream>

int main() {
    // Create a 64-bit generator with default algorithm (Xoroshiro128++)
    auto rng = UniversalRNG::create_generator<64>(UniversalRNG::Algorithm::Xoroshiro128Plus, 12345);
    
    // Generate some random numbers
    for (int i = 0; i < 10; ++i) {
        std::cout << "Random value " << i << ": " << rng.next() << std::endl;
    }
    
    return 0;
}

High-Performance Batch Generation

#include <universal_rng/batch.hpp>
#include <vector>
#include <iostream>
#include <chrono>

int main() {
    // Create high-performance batch generator
    auto batch_rng = UniversalRNG::create_batch_generator<64>();
    
    // Generate a million random numbers efficiently
    constexpr size_t count = 1'000'000;
    std::vector<uint64_t> random_numbers(count);
    
    auto start = std::chrono::high_resolution_clock::now();
    batch_rng.generate_batch(random_numbers.data(), count);
    auto end = std::chrono::high_resolution_clock::now();
    
    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
    double rate = count / (duration.count() / 1e6) / 1e6;  // M ops/sec
    
    std::cout << "Generated " << count << " numbers in " << duration.count() 
              << " μs (" << rate << " M ops/sec)" << std::endl;
    std::cout << "SIMD implementation: " << batch_rng.simd_implementation() << std::endl;
    
    return 0;
}

🎮 Real-World Applications

Game Development - Procedural World Generation

Terrain Generation

#include <universal_rng/batch.hpp>
#include <vector>
#include <cmath>

class TerrainGenerator {
private:
    UniversalRNG::BatchGenerator<uint64_t, UniversalRNG::Algorithm::WyRand> rng_;
    
public:
    explicit TerrainGenerator(uint64_t world_seed) : rng_{world_seed} {}
    
    // Generate height map for terrain
    std::vector<float> generate_heightmap(int width, int height, float scale = 1.0f) {
        const size_t total_points = width * height;
        std::vector<uint64_t> raw_values(total_points);
        std::vector<float> heightmap(total_points);
        
        // Generate raw random values efficiently
        rng_.generate_batch(raw_values.data(), total_points);
        
        // Convert to normalized heights and apply noise
        for (size_t i = 0; i < total_points; ++i) {
            float normalized = static_cast<float>(raw_values[i]) / UINT64_MAX;
            heightmap[i] = normalized * scale;
        }
        
        return heightmap;
    }
    
    // Generate biome data
    enum class Biome { Desert, Forest, Mountain, Ocean, Plains };
    
    std::vector<Biome> generate_biomes(int width, int height) {
        const size_t total_points = width * height;
        std::vector<uint64_t> raw_values(total_points);
        std::vector<Biome> biomes(total_points);
        
        rng_.generate_batch(raw_values.data(), total_points);
        
        for (size_t i = 0; i < total_points; ++i) {
            uint32_t biome_index = raw_values[i] % 5;
            biomes[i] = static_cast<Biome>(biome_index);
        }
        
        return biomes;
    }
};

// Usage example
int main() {
    TerrainGenerator terrain{42};  // Deterministic world seed
    
    auto heightmap = terrain.generate_heightmap(512, 512, 100.0f);
    auto biomes = terrain.generate_biomes(512, 512);
    
    std::cout << "Generated 512x512 world with " << heightmap.size() 
              << " height points and biome data" << std::endl;
    
    return 0;
}

Entity Spawning System

#include <universal_rng/core.hpp>
#include <vector>
#include <unordered_map>

struct SpawnPoint {
    float x, y, z;
    float spawn_chance;
};

struct Entity {
    enum Type { Goblin, Orc, Dragon, Treasure };
    Type type;
    float x, y, z;
    int level;
};

class EntitySpawner {
private:
    UniversalRNG::Generator<uint64_t, UniversalRNG::Algorithm::Xoroshiro128Plus> rng_;
    std::vector<SpawnPoint> spawn_points_;
    
public:
    explicit EntitySpawner(uint64_t seed) : rng_{seed} {}
    
    void add_spawn_point(float x, float y, float z, float chance) {
        spawn_points_.push_back({x, y, z, chance});
    }
    
    std::vector<Entity> spawn_entities() {
        std::vector<Entity> entities;
        
        for (const auto& point : spawn_points_) {
            // Check if something spawns at this point
            float spawn_roll = static_cast<float>(rng_.next()) / UINT64_MAX;
            if (spawn_roll > point.spawn_chance) continue;
            
            // Determine entity type
            uint32_t type_roll = rng_.next() % 100;
            Entity::Type type;
            if (type_roll < 60) type = Entity::Goblin;
            else if (type_roll < 85) type = Entity::Orc;
            else if (type_roll < 98) type = Entity::Treasure;
            else type = Entity::Dragon;
            
            // Determine level (1-20)
            int level = 1 + (rng_.next() % 20);
            
            // Add some position variance
            float offset_x = (static_cast<float>(rng_.next()) / UINT64_MAX - 0.5f) * 10.0f;
            float offset_z = (static_cast<float>(rng_.next()) / UINT64_MAX - 0.5f) * 10.0f;
            
            entities.push_back({
                type,
                point.x + offset_x,
                point.y,
                point.z + offset_z,
                level
            });
        }
        
        return entities;
    }
};

Scientific Computing - Monte Carlo Simulation

π Estimation (Parallel)

#include <universal_rng/batch.hpp>
#include <thread>
#include <atomic>
#include <vector>
#include <iostream>
#include <cmath>

class PiEstimator {
private:
    struct ThreadData {
        UniversalRNG::BatchGenerator<uint64_t, UniversalRNG::Algorithm::Xoroshiro128Plus> rng;
        size_t samples_per_thread;
        std::atomic<size_t>* points_in_circle;
        
        ThreadData(uint64_t seed, size_t samples, std::atomic<size_t>* counter)
            : rng{seed}, samples_per_thread{samples}, points_in_circle{counter} {}
    };
    
public:
    static double estimate_pi(size_t total_samples, size_t num_threads = std::thread::hardware_concurrency()) {
        std::atomic<size_t> points_in_circle{0};
        std::vector<std::thread> threads;
        std::vector<std::unique_ptr<ThreadData>> thread_data;
        
        size_t samples_per_thread = total_samples / num_threads;
        
        // Create worker threads
        for (size_t i = 0; i < num_threads; ++i) {
            uint64_t thread_seed = std::random_device{}() + i;
            thread_data.emplace_back(std::make_unique<ThreadData>(
                thread_seed, samples_per_thread, &points_in_circle));
            
            threads.emplace_back([data = thread_data.back().get()]() {
                worker_function(*data);
            });
        }
        
        // Wait for completion
        for (auto& t : threads) {
            t.join();
        }
        
        // Calculate π estimate
        return 4.0 * points_in_circle.load() / total_samples;
    }
    
private:
    static void worker_function(ThreadData& data) {
        constexpr size_t batch_size = 1000;
        std::vector<uint64_t> x_values(batch_size);
        std::vector<uint64_t> y_values(batch_size);
        
        size_t remaining = data.samples_per_thread;
        size_t local_count = 0;
        
        while (remaining > 0) {
            size_t current_batch = std::min(batch_size, remaining);
            
            // Generate batch of x and y coordinates
            data.rng.generate_batch(x_values.data(), current_batch);
            data.rng.generate_batch(y_values.data(), current_batch);
            
            // Check which points fall inside unit circle
            for (size_t i = 0; i < current_batch; ++i) {
                double x = static_cast<double>(x_values[i]) / UINT64_MAX;
                double y = static_cast<double>(y_values[i]) / UINT64_MAX;
                
                if (x*x + y*y <= 1.0) {
                    local_count++;
                }
            }
            
            remaining -= current_batch;
        }
        
        data.points_in_circle->fetch_add(local_count);
    }
};

// Usage
int main() {
    constexpr size_t samples = 100'000'000;  // 100 million samples
    
    auto start = std::chrono::high_resolution_clock::now();
    double pi_estimate = PiEstimator::estimate_pi(samples);
    auto end = std::chrono::high_resolution_clock::now();
    
    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
    double error = std::abs(pi_estimate - M_PI);
    
    std::cout << "π estimate: " << pi_estimate << std::endl;
    std::cout << "Actual π:   " << M_PI << std::endl;
    std::cout << "Error:      " << error << std::endl;
    std::cout << "Time:       " << duration.count() << " ms" << std::endl;
    std::cout << "Rate:       " << samples / (duration.count() / 1000.0) / 1e6 << " M samples/sec" << std::endl;
    
    return 0;
}

Financial Modeling - Risk Analysis

Portfolio Risk Simulation

#include <universal_rng/batch.hpp>
#include <vector>
#include <cmath>
#include <algorithm>
#include <numeric>

class PortfolioRiskAnalyzer {
private:
    struct Asset {
        std::string name;
        double weight;           // Portfolio weight
        double expected_return;  // Annual expected return
        double volatility;       // Annual volatility
    };
    
    std::vector<Asset> assets_;
    UniversalRNG::BatchGenerator<uint64_t, UniversalRNG::Algorithm::Xoroshiro128Plus> rng_;
    
public:
    explicit PortfolioRiskAnalyzer(uint64_t seed) : rng_{seed} {}
    
    void add_asset(const std::string& name, double weight, double expected_return, double volatility) {
        assets_.push_back({name, weight, expected_return, volatility});
    }
    
    struct RiskMetrics {
        double value_at_risk_95;    // 95% VaR
        double value_at_risk_99;    // 99% VaR
        double expected_shortfall;  // Average loss beyond VaR
        double max_drawdown;        // Maximum portfolio loss
        std::vector<double> return_distribution;
    };
    
    RiskMetrics analyze_risk(double initial_value, int time_horizon_days, size_t num_simulations) {
        std::vector<double> final_values;
        final_values.reserve(num_simulations);
        
        constexpr size_t batch_size = 1000;
        std::vector<uint64_t> random_batch(batch_size);
        
        for (size_t sim = 0; sim < num_simulations; ++sim) {
            double portfolio_value = initial_value;
            
            for (int day = 0; day < time_horizon_days; ++day) {
                // Generate batch of random numbers when needed
                if (day % batch_size == 0) {
                    size_t needed = std::min(batch_size, 
                        static_cast<size_t>((time_horizon_days - day) * assets_.size()));
                    rng_.generate_batch(random_batch.data(), needed);
                }
                
                double daily_return = 0.0;
                
                for (size_t asset_idx = 0; asset_idx < assets_.size(); ++asset_idx) {
                    const auto& asset = assets_[asset_idx];
                    
                    // Convert uniform random to normal distribution (Box-Muller approximation)
                    size_t rand_idx = (day * assets_.size() + asset_idx) % batch_size;
                    double u = static_cast<double>(random_batch[rand_idx]) / UINT64_MAX;
                    double z = inverse_normal_cdf(u);  // Simplified normal conversion
                    
                    // Calculate asset return using geometric Brownian motion
                    double dt = 1.0 / 252.0;  // Daily time step (252 trading days/year)
                    double asset_return = (asset.expected_return - 0.5 * asset.volatility * asset.volatility) * dt
                                        + asset.volatility * sqrt(dt) * z;
                    
                    daily_return += asset.weight * asset_return;
                }
                
                portfolio_value *= (1.0 + daily_return);
            }
            
            final_values.push_back(portfolio_value);
        }
        
        // Calculate risk metrics
        std::sort(final_values.begin(), final_values.end());
        
        RiskMetrics metrics;
        
        // Value at Risk (VaR)
        size_t var_95_idx = static_cast<size_t>(0.05 * num_simulations);
        size_t var_99_idx = static_cast<size_t>(0.01 * num_simulations);
        
        metrics.value_at_risk_95 = initial_value - final_values[var_95_idx];
        metrics.value_at_risk_99 = initial_value - final_values[var_99_idx];
        
        // Expected Shortfall (average loss beyond VaR)
        double sum_shortfall = 0.0;
        for (size_t i = 0; i < var_95_idx; ++i) {
            sum_shortfall += initial_value - final_values[i];
        }
        metrics.expected_shortfall = sum_shortfall / var_95_idx;
        
        // Maximum drawdown
        metrics.max_drawdown = initial_value - final_values[0];
        
        // Return distribution
        metrics.return_distribution.reserve(final_values.size());
        for (double final_val : final_values) {
            metrics.return_distribution.push_back((final_val - initial_value) / initial_value);
        }
        
        return metrics;
    }
    
private:
    // Simplified inverse normal CDF approximation
    double inverse_normal_cdf(double u) const {
        // Box-Muller method would be more accurate
        return sqrt(-2.0 * log(u)) * cos(2.0 * M_PI * u);
    }
};

// Usage example
int main() {
    PortfolioRiskAnalyzer analyzer{12345};
    
    // Build a sample portfolio
    analyzer.add_asset("Stocks", 0.6, 0.08, 0.15);    // 60% stocks, 8% return, 15% volatility
    analyzer.add_asset("Bonds", 0.3, 0.04, 0.05);     // 30% bonds, 4% return, 5% volatility
    analyzer.add_asset("Cash", 0.1, 0.02, 0.01);      // 10% cash, 2% return, 1% volatility
    
    double initial_portfolio = 1000000.0;  // $1M portfolio
    int time_horizon = 252;                // 1 year
    size_t simulations = 100000;           // 100K Monte Carlo runs
    
    auto start = std::chrono::high_resolution_clock::now();
    auto risk_metrics = analyzer.analyze_risk(initial_portfolio, time_horizon, simulations);
    auto end = std::chrono::high_resolution_clock::now();
    
    auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
    
    std::cout << "Portfolio Risk Analysis Results:" << std::endl;
    std::cout << "Initial Value: $" << initial_portfolio << std::endl;
    std::cout << "Time Horizon: " << time_horizon << " days" << std::endl;
    std::cout << "Simulations: " << simulations << std::endl;
    std::cout << std::endl;
    std::cout << "Risk Metrics:" << std::endl;
    std::cout << "95% VaR: $" << risk_metrics.value_at_risk_95 << std::endl;
    std::cout << "99% VaR: $" << risk_metrics.value_at_risk_99 << std::endl;
    std::cout << "Expected Shortfall: $" << risk_metrics.expected_shortfall << std::endl;
    std::cout << "Maximum Drawdown: $" << risk_metrics.max_drawdown << std::endl;
    std::cout << std::endl;
    std::cout << "Simulation completed in " << duration.count() << " ms" << std::endl;
    
    return 0;
}

🧪 Testing and Benchmarking

Custom Algorithm Benchmarking

#include <universal_rng/core.hpp>
#include <universal_rng/batch.hpp>
#include <chrono>
#include <vector>
#include <iostream>

class AlgorithmBenchmark {
public:
    struct BenchmarkResult {
        std::string algorithm_name;
        double single_ops_per_sec;
        double batch_ops_per_sec;
        double speedup_factor;
        size_t state_size_bytes;
    };
    
    template<UniversalRNG::Algorithm Algo>
    static BenchmarkResult benchmark_algorithm(const std::string& name, uint64_t seed = 12345) {
        BenchmarkResult result;
        result.algorithm_name = name;
        
        // Benchmark single-value generation
        auto single_gen = UniversalRNG::Generator<uint64_t, Algo>{seed};
        result.single_ops_per_sec = benchmark_single_generation(single_gen);
        
        // Benchmark batch generation
        auto batch_gen = UniversalRNG::BatchGenerator<uint64_t, Algo>{seed};
        result.batch_ops_per_sec = benchmark_batch_generation(batch_gen);
        
        // Calculate speedup
        result.speedup_factor = result.batch_ops_per_sec / result.single_ops_per_sec;
        
        // Get state size (approximate)
        result.state_size_bytes = sizeof(single_gen);
        
        return result;
    }
    
    static void run_comprehensive_benchmark() {
        std::vector<BenchmarkResult> results;
        
        // Benchmark all available algorithms
        results.push_back(benchmark_algorithm<UniversalRNG::Algorithm::Xoroshiro128Plus>("Xoroshiro128++"));
        results.push_back(benchmark_algorithm<UniversalRNG::Algorithm::WyRand>("WyRand"));
        results.push_back(benchmark_algorithm<UniversalRNG::Algorithm::Mt19937_64>("MT19937-64"));
        
        // Print results table
        std::cout << "Algorithm Benchmark Results:" << std::endl;
        std::cout << std::string(80, '=') << std::endl;
        printf("%-15s %12s %12s %8s %10s\n", 
               "Algorithm", "Single M/s", "Batch M/s", "Speedup", "State(B)");
        std::cout << std::string(80, '-') << std::endl;
        
        for (const auto& result : results) {
            printf("%-15s %12.1f %12.1f %8.1fx %10zu\n",
                   result.algorithm_name.c_str(),
                   result.single_ops_per_sec / 1e6,
                   result.batch_ops_per_sec / 1e6,
                   result.speedup_factor,
                   result.state_size_bytes);
        }
        std::cout << std::string(80, '=') << std::endl;
        
        // Find best performers
        auto best_single = std::max_element(results.begin(), results.end(),
            [](const auto& a, const auto& b) { return a.single_ops_per_sec < b.single_ops_per_sec; });
        auto best_batch = std::max_element(results.begin(), results.end(),
            [](const auto& a, const auto& b) { return a.batch_ops_per_sec < b.batch_ops_per_sec; });
        auto best_speedup = std::max_element(results.begin(), results.end(),
            [](const auto& a, const auto& b) { return a.speedup_factor < b.speedup_factor; });
        
        std::cout << std::endl << "Performance Leaders:" << std::endl;
        std::cout << "Best Single: " << best_single->algorithm_name << std::endl;
        std::cout << "Best Batch:  " << best_batch->algorithm_name << std::endl;
        std::cout << "Best Speedup: " << best_speedup->algorithm_name 
                  << " (" << best_speedup->speedup_factor << "x)" << std::endl;
    }
    
private:
    template<typename Generator>
    static double benchmark_single_generation(Generator& gen) {
        constexpr size_t iterations = 10'000'000;
        
        auto start = std::chrono::high_resolution_clock::now();
        
        for (size_t i = 0; i < iterations; ++i) {
            volatile auto value = gen.next();  // Prevent optimization
        }
        
        auto end = std::chrono::high_resolution_clock::now();
        auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
        
        return iterations / (duration.count() / 1e9);  // ops per second
    }
    
    template<typename BatchGenerator>
    static double benchmark_batch_generation(BatchGenerator& gen) {
        constexpr size_t batch_size = 10000;
        constexpr size_t num_batches = 1000;
        constexpr size_t total_values = batch_size * num_batches;
        
        std::vector<uint64_t> buffer(batch_size);
        
        auto start = std::chrono::high_resolution_clock::now();
        
        for (size_t i = 0; i < num_batches; ++i) {
            gen.generate_batch(buffer.data(), batch_size);
        }
        
        auto end = std::chrono::high_resolution_clock::now();
        auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
        
        return total_values / (duration.count() / 1e9);  // ops per second
    }
};

// Usage
int main() {
    AlgorithmBenchmark::run_comprehensive_benchmark();
    return 0;
}

Statistical Quality Testing

#include <universal_rng/core.hpp>
#include <vector>
#include <unordered_map>
#include <cmath>
#include <iostream>

class StatisticalTester {
public:
    struct TestResults {
        bool uniform_distribution_pass;
        bool independence_pass;
        bool periodicity_pass;
        double chi_square_statistic;
        double correlation_coefficient;
        std::string quality_assessment;
    };
    
    template<typename Generator>
    static TestResults run_basic_tests(Generator& gen, size_t sample_size = 1'000'000) {
        std::vector<uint64_t> samples(sample_size);
        
        // Generate samples
        for (auto& sample : samples) {
            sample = gen.next();
        }
        
        TestResults results;
        
        // Test 1: Uniform distribution (Chi-square test)
        results.chi_square_statistic = chi_square_uniformity_test(samples);
        results.uniform_distribution_pass = (results.chi_square_statistic < critical_chi_square_value());
        
        // Test 2: Serial correlation test
        results.correlation_coefficient = serial_correlation_test(samples);
        results.independence_pass = (std::abs(results.correlation_coefficient) < 0.01);
        
        // Test 3: Simple periodicity check
        results.periodicity_pass = periodicity_test(samples);
        
        // Overall assessment
        int passed_tests = results.uniform_distribution_pass + 
                          results.independence_pass + 
                          results.periodicity_pass;
        
        if (passed_tests == 3) results.quality_assessment = "Excellent";
        else if (passed_tests == 2) results.quality_assessment = "Good";
        else if (passed_tests == 1) results.quality_assessment = "Fair";
        else results.quality_assessment = "Poor";
        
        return results;
    }
    
    static void print_test_results(const TestResults& results, const std::string& algorithm_name) {
        std::cout << "Statistical Quality Test Results - " << algorithm_name << std::endl;
        std::cout << std::string(50, '=') << std::endl;
        
        std::cout << "Uniform Distribution: " << (results.uniform_distribution_pass ? "PASS" : "FAIL")
                  << " (χ² = " << results.chi_square_statistic << ")" << std::endl;
        
        std::cout << "Serial Independence: " << (results.independence_pass ? "PASS" : "FAIL")
                  << " (r = " << results.correlation_coefficient << ")" << std::endl;
        
        std::cout << "Periodicity Check: " << (results.periodicity_pass ? "PASS" : "FAIL") << std::endl;
        
        std::cout << "Overall Quality: " << results.quality_assessment << std::endl;
        std::cout << std::string(50, '=') << std::endl << std::endl;
    }
    
private:
    static double chi_square_uniformity_test(const std::vector<uint64_t>& samples) {
        constexpr size_t num_bins = 256;  // Use upper 8 bits
        std::vector<size_t> bin_counts(num_bins, 0);
        
        // Count samples in each bin
        for (uint64_t sample : samples) {
            size_t bin = (sample >> 56);  // Use top 8 bits
            bin_counts[bin]++;
        }
        
        // Calculate chi-square statistic
        double expected = static_cast<double>(samples.size()) / num_bins;
        double chi_square = 0.0;
        
        for (size_t count : bin_counts) {
            double deviation = count - expected;
            chi_square += (deviation * deviation) / expected;
        }
        
        return chi_square;
    }
    
    static double serial_correlation_test(const std::vector<uint64_t>& samples) {
        if (samples.size() < 2) return 0.0;
        
        // Calculate correlation between consecutive samples
        double sum_x = 0.0, sum_y = 0.0, sum_xy = 0.0;
        double sum_x2 = 0.0, sum_y2 = 0.0;
        size_t n = samples.size() - 1;
        
        for (size_t i = 0; i < n; ++i) {
            double x = static_cast<double>(samples[i]);
            double y = static_cast<double>(samples[i + 1]);
            
            sum_x += x;
            sum_y += y;
            sum_xy += x * y;
            sum_x2 += x * x;
            sum_y2 += y * y;
        }
        
        double mean_x = sum_x / n;
        double mean_y = sum_y / n;
        
        double numerator = sum_xy - n * mean_x * mean_y;
        double denominator = sqrt((sum_x2 - n * mean_x * mean_x) * (sum_y2 - n * mean_y * mean_y));
        
        return (denominator != 0.0) ? numerator / denominator : 0.0;
    }
    
    static bool periodicity_test(const std::vector<uint64_t>& samples) {
        // Simple test: check if any value repeats too soon
        constexpr size_t min_distance = 1000;  // Minimum distance between repetitions
        std::unordered_map<uint64_t, size_t> last_seen;
        
        for (size_t i = 0; i < samples.size(); ++i) {
            uint64_t sample = samples[i];
            
            if (last_seen.find(sample) != last_seen.end()) {
                if (i - last_seen[sample] < min_distance) {
                    return false;  // Found repetition too soon
                }
            }
            
            last_seen[sample] = i;
        }
        
        return true;  // No problematic repetitions found
    }
    
    static double critical_chi_square_value() {
        // Critical value for χ² test with 255 degrees of freedom at 0.05 significance
        return 293.25;  // Approximate value
    }
};

// Usage example
int main() {
    // Test different algorithms
    auto xoroshiro_gen = UniversalRNG::create_generator<64>(UniversalRNG::Algorithm::Xoroshiro128Plus);
    auto wyrand_gen = UniversalRNG::create_generator<64>(UniversalRNG::Algorithm::WyRand);

    auto xoroshiro_results = StatisticalTester::run_basic_tests(xoroshiro_gen);
    auto wyrand_results = StatisticalTester::run_basic_tests(wyrand_gen);
    
    StatisticalTester::print_test_results(xoroshiro_results, "Xoroshiro128++");
    StatisticalTester::print_test_results(wyrand_results, "WyRand");
    
    return 0;
}

🔧 Integration Examples

CMake Integration

# CMakeLists.txt example for integrating Universal RNG
cmake_minimum_required(VERSION 3.14)
project(MyRandomProject)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

# Option 1: Add as subdirectory
add_subdirectory(external/universal-rng)

# Option 2: Use find_package (if installed)
# find_package(UniversalRNG REQUIRED)

# Create your executable
add_executable(my_app src/main.cpp)

# Link against Universal RNG
target_link_libraries(my_app 
    PRIVATE 
        UniversalRNG::universal_rng
)

# Enable optimizations for performance
if(CMAKE_BUILD_TYPE STREQUAL "Release")
    target_compile_options(my_app PRIVATE
        $<$<CXX_COMPILER_ID:GNU,Clang>:-O3 -march=native -mavx2>
        $<$<CXX_COMPILER_ID:MSVC>:/O2 /arch:AVX2>
    )
endif()

C API Usage Example

#include "universal_rng_c.h"
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

int main() {
    URng_Generator_t rng;
    URng_Result result;
    
    // Create generator with system time seed
    uint64_t seed = (uint64_t)time(NULL);
    result = urng_create_generator(&rng, URNG_XOROSHIRO128_PLUS, seed);
    
    if (result != URNG_SUCCESS) {
        fprintf(stderr, "Failed to create generator: %s\n", 
                urng_error_string(result));
        return 1;
    }
    
    printf("Universal RNG C API Example\n");
    printf("Algorithm: %s\n", urng_algorithm_name(URNG_XOROSHIRO128_PLUS));
    printf("Generating 10 random numbers:\n\n");
    
    // Generate and display random numbers
    for (int i = 0; i < 10; ++i) {
        uint64_t value;
        result = urng_next_uint64(rng, &value);
        
        if (result == URNG_SUCCESS) {
            printf("Random[%2d]: %20lu (0x%016lX)\n", i, value, value);
        } else {
            fprintf(stderr, "Generation failed: %s\n", urng_error_string(result));
            break;
        }
    }
    
    // Batch generation example
    printf("\nBatch generation example:\n");
    
    URng_BatchGenerator_t batch_rng;
    result = urng_create_batch_generator(&batch_rng, URNG_WYRAND, seed + 1);
    
    if (result == URNG_SUCCESS) {
        const size_t batch_size = 1000;
        uint64_t* batch_buffer = malloc(batch_size * sizeof(uint64_t));
        
        if (batch_buffer) {
            clock_t start = clock();
            result = urng_generate_batch(batch_rng, batch_buffer, batch_size);
            clock_t end = clock();
            
            if (result == URNG_SUCCESS) {
                double duration = ((double)(end - start)) / CLOCKS_PER_SEC;
                double rate = batch_size / duration / 1e6;  // M ops/sec
                
                printf("Generated %zu numbers in %.3f seconds (%.1f M ops/sec)\n",
                       batch_size, duration, rate);
                
                // Show first few values
                printf("First 5 values: ");
                for (int i = 0; i < 5; ++i) {
                    printf("%lu ", batch_buffer[i]);
                }
                printf("\n");
            }
            
            free(batch_buffer);
        }
        
        urng_destroy_batch_generator(batch_rng);
    }
    
    // Cleanup
    urng_destroy_generator(rng);
    
    return 0;
}

Multi-threaded Random Number Generation

#include <universal_rng/batch.hpp>
#include <thread>
#include <vector>
#include <mutex>
#include <queue>
#include <condition_variable>
#include <atomic>

// Thread-safe random number pool
class RandomNumberPool {
private:
    std::queue<uint64_t> pool_;
    std::mutex pool_mutex_;
    std::condition_variable pool_cv_;
    std::atomic<bool> shutdown_{false};
    
    // Producer thread
    std::thread producer_thread_;
    UniversalRNG::BatchGenerator<uint64_t, UniversalRNG::Algorithm::Xoroshiro128Plus> generator_;
    
    static constexpr size_t BATCH_SIZE = 10000;
    static constexpr size_t MIN_POOL_SIZE = 1000;
    static constexpr size_t MAX_POOL_SIZE = 100000;
    
public:
    explicit RandomNumberPool(uint64_t seed) 
        : generator_{seed}
        , producer_thread_{&RandomNumberPool::producer_worker, this} {
    }
    
    ~RandomNumberPool() {
        shutdown_.store(true);
        pool_cv_.notify_all();
        if (producer_thread_.joinable()) {
            producer_thread_.join();
        }
    }
    
    // Get random number (thread-safe)
    uint64_t get_random() {
        std::unique_lock<std::mutex> lock(pool_mutex_);
        
        // Wait for numbers to be available
        pool_cv_.wait(lock, [this] { 
            return !pool_.empty() || shutdown_.load(); 
        });
        
        if (shutdown_.load() && pool_.empty()) {
            return 0;  // Pool is shutting down
        }
        
        uint64_t value = pool_.front();
        pool_.pop();
        
        // Notify producer if pool is getting low
        if (pool_.size() < MIN_POOL_SIZE) {
            pool_cv_.notify_one();
        }
        
        return value;
    }
    
    // Get multiple random numbers at once
    std::vector<uint64_t> get_random_batch(size_t count) {
        std::vector<uint64_t> result;
        result.reserve(count);
        
        std::unique_lock<std::mutex> lock(pool_mutex_);
        
        // Wait for enough numbers
        pool_cv_.wait(lock, [this, count] { 
            return pool_.size() >= count || shutdown_.load(); 
        });
        
        // Extract requested numbers
        for (size_t i = 0; i < count && !pool_.empty(); ++i) {
            result.push_back(pool_.front());
            pool_.pop();
        }
        
        // Notify producer
        if (pool_.size() < MIN_POOL_SIZE) {
            pool_cv_.notify_one();
        }
        
        return result;
    }
    
    size_t pool_size() const {
        std::lock_guard<std::mutex> lock(pool_mutex_);
        return pool_.size();
    }
    
private:
    void producer_worker() {
        std::vector<uint64_t> batch(BATCH_SIZE);
        
        while (!shutdown_.load()) {
            std::unique_lock<std::mutex> lock(pool_mutex_);
            
            // Wait until pool needs refilling
            pool_cv_.wait(lock, [this] {
                return pool_.size() < MIN_POOL_SIZE || shutdown_.load();
            });
            
            if (shutdown_.load()) break;
            
            // Don't overfill the pool
            if (pool_.size() >= MAX_POOL_SIZE) {
                continue;
            }
            
            lock.unlock();
            
            // Generate new batch
            generator_.generate_batch(batch.data(), BATCH_SIZE);
            
            // Add to pool
            lock.lock();
            for (uint64_t value : batch) {
                if (pool_.size() < MAX_POOL_SIZE) {
                    pool_.push(value);
                } else {
                    break;  // Pool is full
                }
            }
            
            // Notify consumers
            pool_cv_.notify_all();
        }
    }
};

// Usage example
int main() {
    RandomNumberPool pool{std::random_device{}()};
    
    std::vector<std::thread> consumer_threads;
    std::atomic<size_t> total_consumed{0};
    
    // Create consumer threads
    for (int i = 0; i < 4; ++i) {
        consumer_threads.emplace_back([&pool, &total_consumed, i]() {
            constexpr size_t numbers_per_thread = 1000000;
            
            auto start = std::chrono::high_resolution_clock::now();
            
            for (size_t j = 0; j < numbers_per_thread; ++j) {
                volatile uint64_t value = pool.get_random();
                total_consumed.fetch_add(1);
            }
            
            auto end = std::chrono::high_resolution_clock::now();
            auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
            
            std::cout << "Thread " << i << " consumed " << numbers_per_thread 
                      << " numbers in " << duration.count() << " ms" << std::endl;
        });
    }
    
    // Monitor pool status
    std::thread monitor([&pool, &total_consumed]() {
        while (total_consumed.load() < 4000000) {
            std::this_thread::sleep_for(std::chrono::milliseconds(500));
            std::cout << "Pool size: " << pool.pool_size() 
                      << ", Total consumed: " << total_consumed.load() << std::endl;
        }
    });
    
    // Wait for completion
    for (auto& t : consumer_threads) {
        t.join();
    }
    monitor.join();
    
    std::cout << "All threads completed. Final pool size: " << pool.pool_size() << std::endl;
    
    return 0;
}

🎯 Best Practices Guide

Performance Optimization Tips

1. Choose the Right Algorithm for Your Use Case

// For maximum speed with good quality
auto speed_rng = UniversalRNG::create_batch_generator<64>(UniversalRNG::Algorithm::WyRand);

// For best statistical quality
auto quality_rng = UniversalRNG::create_batch_generator<64>(UniversalRNG::Algorithm::Xoroshiro128Plus);

// For compatibility with existing code
auto compat_rng = UniversalRNG::create_generator<64>(UniversalRNG::Algorithm::Mt19937_64);

2. Use Batch Generation for High Throughput

// GOOD: Batch generation for high performance
constexpr size_t BATCH_SIZE = 10000;
std::vector<uint64_t> buffer(BATCH_SIZE);
auto rng = UniversalRNG::create_batch_generator<64>();

while (need_more_numbers) {
    rng.generate_batch(buffer.data(), BATCH_SIZE);
    process_batch(buffer);
}

// AVOID: Single generation in tight loops
auto single_rng = UniversalRNG::create_generator<64>();
for (size_t i = 0; i < 10000; ++i) {
    auto value = single_rng.next();  // Much slower!
    process_single(value);
}

3. Memory Alignment for SIMD Performance

// GOOD: Properly aligned buffer
alignas(32) uint64_t aligned_buffer[1000];
rng.generate_batch(aligned_buffer, 1000);

// GOOD: Use aligned allocator
std::vector<uint64_t, boost::alignment::aligned_allocator<uint64_t, 32>> simd_buffer(1000);

// AVOID: Unaligned dynamic allocation
auto* unaligned_buffer = new uint64_t[1000];  // May not be 32-byte aligned

4. Seeding Best Practices

// GOOD: High-quality seed from system entropy
std::random_device entropy_source;
uint64_t high_quality_seed = entropy_source();

// GOOD: Use SplitMix64 for generating multiple seeds
auto seed_gen = UniversalRNG::Generator<uint64_t, UniversalRNG::Algorithm::SplitMix64>{high_quality_seed};
std::vector<uint64_t> seeds;
for (int i = 0; i < num_generators; ++i) {
    seeds.push_back(seed_gen.next());
}

// AVOID: Poor quality seeds
auto bad_rng = UniversalRNG::create_generator<64>(UniversalRNG::Algorithm::Xoroshiro128Plus, 0);  // Bad!
auto time_seed = UniversalRNG::create_generator<64>(UniversalRNG::Algorithm::Xoroshiro128Plus, time(nullptr));  // Predictable!

Distribution Generation Examples

Uniform Floating Point Distribution

class UniformFloatGenerator {
private:
    UniversalRNG::BatchGenerator<uint64_t, UniversalRNG::Algorithm::Xoroshiro128Plus> rng_;
    
public:
    explicit UniformFloatGenerator(uint64_t seed) : rng_{seed} {}
    
    // Generate uniform float in [0, 1)
    float uniform_float() {
        uint64_t raw = rng_.next();
        // Use upper 24 bits for single precision
        uint32_t mantissa = static_cast<uint32_t>(raw >> 40);
        return mantissa * (1.0f / (1ULL << 24));
    }
    
    // Generate uniform double in [0, 1)
    double uniform_double() {
        uint64_t raw = rng_.next();
        // Use upper 53 bits for double precision
        uint64_t mantissa = raw >> 11;
        return mantissa * (1.0 / (1ULL << 53));
    }
    
    // Generate uniform float in [min, max)
    float uniform_range(float min, float max) {
        return min + uniform_float() * (max - min);
    }
};

Normal (Gaussian) Distribution

class NormalGenerator {
private:
    UniversalRNG::BatchGenerator<uint64_t, UniversalRNG::Algorithm::Xoroshiro128Plus> rng_;
    bool has_spare_;
    double spare_;
    
public:
    explicit NormalGenerator(uint64_t seed) : rng_{seed}, has_spare_{false} {}
    
    // Box-Muller transform for normal distribution
    double normal(double mean = 0.0, double stddev = 1.0) {
        if (has_spare_) {
            has_spare_ = false;
            return spare_ * stddev + mean;
        }
        
        has_spare_ = true;
        
        // Generate two uniform random numbers
        double u1 = uniform_double();
        double u2 = uniform_double();
        
        // Box-Muller transformation
        double mag = stddev * sqrt(-2.0 * log(u1));
        spare_ = mag * cos(2.0 * M_PI * u2);
        
        return mag * sin(2.0 * M_PI * u2) + mean;
    }
    
private:
    double uniform_double() {
        uint64_t raw = rng_.next();
        uint64_t mantissa = raw >> 11;
        return mantissa * (1.0 / (1ULL << 53));
    }
};

Exponential Distribution

class ExponentialGenerator {
private:
    UniversalRNG::Generator<uint64_t, UniversalRNG::Algorithm::WyRand> rng_;
    
public:
    explicit ExponentialGenerator(uint64_t seed) : rng_{seed} {}
    
    // Generate exponentially distributed values
    double exponential(double lambda = 1.0) {
        uint64_t raw = rng_.next();
        double u = static_cast<double>(raw >> 11) * (1.0 / (1ULL << 53));
        
        // Ensure u is not zero to avoid infinite result
        u = std::max(u, std::numeric_limits<double>::min());
        
        return -log(u) / lambda;
    }
};

Error Handling and Debugging

SIMD Detection and Fallback

#include <universal_rng/cpu.hpp>

void setup_optimal_generator() {
    if (UniversalRNG::CPU::has_avx2()) {
        std::cout << "Using AVX2 acceleration" << std::endl;
        auto rng = UniversalRNG::create_batch_generator<64>();
        std::cout << "Implementation: " << rng.simd_implementation() << std::endl;
    } else if (UniversalRNG::CPU::has_sse42()) {
        std::cout << "Using SSE4.2 acceleration" << std::endl;
        // Library automatically falls back to best available
    } else {
        std::cout << "Using scalar implementation" << std::endl;
    }
    
    // Print CPU capabilities
    auto cpu_info = UniversalRNG::CPU::detect_capabilities();
    std::cout << "Recommended algorithm: " 
              << static_cast<int>(cpu_info.recommended_algorithm) << std::endl;
    std::cout << "Optimal batch size: " << cpu_info.recommended_batch_size << std::endl;
}

Performance Validation

void validate_performance() {
    auto rng = UniversalRNG::create_batch_generator<64>();
    
    // Check if we're getting expected SIMD speedup
    double theoretical_speedup = rng.theoretical_speedup();
    std::cout << "Theoretical speedup: " << theoretical_speedup << "x" << std::endl;
    
    if (theoretical_speedup < 3.0) {
        std::cout << "Warning: SIMD acceleration may not be working optimally" << std::endl;
        std::cout << "Current implementation: " << rng.simd_implementation() << std::endl;
        
        // Check CPU capabilities
        if (!UniversalRNG::CPU::has_avx2()) {
            std::cout << "AVX2 not available on this CPU" << std::endl;
        }
    }
}

📚 Learning Resources

Understanding PRNG Theory

  1. Start with the basics: Learn about linear congruential generators and their limitations
  2. Study modern algorithms: Understand why Xoroshiro and WyRand are superior
  3. Statistical testing: Learn about TestU01 and PractRand test suites
  4. Performance analysis: Understand SIMD parallelization principles

Performance Optimization

  1. SIMD fundamentals: Learn AVX2 intrinsics and vectorization concepts
  2. Memory hierarchy: Understand cache behavior and alignment requirements
  3. Compiler optimization: Learn about -march=native and profile-guided optimization
  4. Benchmarking methodology: Proper timing and statistical significance

Recommended Reading

  • "Numerical Recipes": Classic reference for scientific computing
  • "Handbook of Monte Carlo Methods": Comprehensive guide to MC techniques
  • Intel Optimization Manuals: SIMD programming best practices
  • Agner Fog's Optimization Guides: Assembly-level performance tuning

🎉 Conclusion

The Universal RNG Library provides the tools you need for high-performance random number generation across a wide range of applications. Whether you're building games, running scientific simulations, or developing financial models, these examples demonstrate how to harness the library's full potential.

Key takeaways:

  • Use batch generation for maximum performance
  • Choose algorithms based on your quality vs speed requirements
  • Leverage SIMD acceleration with proper memory alignment
  • Test statistical quality for critical applications
  • Monitor performance to ensure optimal configuration

Start with the simple examples and gradually work up to the more complex use cases as you become familiar with the library's capabilities. The performance gains from proper usage can be substantial - often 4x or more compared to naive implementations!

Happy random number generating, fren! 🎲

PLEASE DO BEAR IN CONSTANT MIND ABOVE ALL ELSE: CURRENT STATE OF DEVELOPMENT THE C++ STD LIBRARY EMPLOYING MERSENNE TWISTER STILL OUTPERFORMS SINGLE CALCULATION OPERATIONS FOR NON-SIMD BOOSTED COMPUTERS. THESE LIBRARIES FULLY REQUIRE AT LEAST AVX2 MINIMUM TO BENEFIT OVER THE STD GENERATION METHODS WHEN CONSIDERING SINGLE NUMBER GENERATION TASKS.

Clone this wiki locally