diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index b572c1d50b..35c945f62b 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -15,7 +15,10 @@ ### Improvements -* Implement probs(wires) using a bit-shift implementation akin to the gate kernels in Lightning-Qubit. +* Parallelize Lightning-Qubit `probs` with OpenMP when using the `-DLQ_ENABLE_KERNEL_OMP=1` CMake argument. + [(#800)](https://github.com/PennyLaneAI/pennylane-lightning/pull/800) + +* Implement `probs(wires)` using a bit-shift implementation akin to the gate kernels in Lightning-Qubit. [(#795)](https://github.com/PennyLaneAI/pennylane-lightning/pull/795) * Enable setting the PennyLane version when invoking, for example, `make docker-build version=master pl_version=master`. @@ -449,7 +452,7 @@ Vincent Michaud-Rioux * The `BlockEncode` operation from PennyLane is now supported on all Lightning devices. [(#599)](https://github.com/PennyLaneAI/pennylane-lightning/pull/599) -* OpenMP acceleration can now be enabled at compile time for all `lightning.qubit` gate kernels using the "-DLQ_ENABLE_KERNEL_OMP=1" CMake argument. +* OpenMP acceleration can now be enabled at compile time for all `lightning.qubit` gate kernels using the `-DLQ_ENABLE_KERNEL_OMP=1` CMake argument. [(#510)](https://github.com/PennyLaneAI/pennylane-lightning/pull/510) * Enable building Docker images for any branch or tag. Set the Docker build cron job to build images for the latest release and `master`. diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py index f695fbccc5..02e7041e1a 100644 --- a/pennylane_lightning/core/_version.py +++ b/pennylane_lightning/core/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.38.0-dev7" +__version__ = "0.38.0-dev8" diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/CMakeLists.txt b/pennylane_lightning/core/src/simulators/lightning_qubit/CMakeLists.txt index f07b94923c..da457fd80c 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/CMakeLists.txt +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/CMakeLists.txt @@ -49,14 +49,21 @@ else() endif() if(LQ_ENABLE_KERNEL_OMP) + message(STATUS "OpenMP-parallelized kernels: ON.") add_definitions("-DPL_LQ_KERNEL_OMP") + target_compile_definitions(lightning_qubit PUBLIC -DPL_LQ_KERNEL_OMP) +else() + message(STATUS "OpenMP-parallelized kernels: OFF.") endif() if(LQ_ENABLE_KERNEL_AVX_STREAMING) if(NOT LQ_ENABLE_KERNEL_OMP) message(WARNING "AVX streaming operations require `LQ_ENABLE_KERNEL_OMP` to be enabled.") endif() + message(STATUS "AVX streaming operations: ON.") add_definitions("-DPL_LQ_KERNEL_AVX_STREAMING") +else() + message(STATUS "AVX streaming operations: OFF.") endif() target_link_libraries(lightning_qubit PUBLIC lightning_compile_options diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementKernels.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementKernels.hpp index d7fb833a0f..066db5ad31 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementKernels.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementKernels.hpp @@ -382,7 +382,12 @@ auto probs_bitshift(const std::complex *arr, PROBS_CORE_DECLARE_P(6) PROBS_CORE_DECLARE_P(7) PROBS_CORE_DECLARE_P(8) - std::vector probs(PUtil::exp2(n_wires), 0); + constexpr std::size_t n_probs = one << n_wires; + std::vector probabilities(n_probs, 0); + auto *probs = probabilities.data(); +#if defined PL_LQ_KERNEL_OMP && defined _OPENMP +#pragma omp parallel for reduction(+ : probs[ : n_probs]) +#endif for (std::size_t k = 0; k < exp2(num_qubits - n_wires); k++) { std::size_t i0; PROBS_CORE_SUM_1 @@ -394,7 +399,7 @@ auto probs_bitshift(const std::complex *arr, PROBS_CORE_SUM_7 PROBS_CORE_SUM_8 } - return probs; + return probabilities; } // NOLINTEND(hicpp-function-size,readability-function-size) } // namespace Pennylane::LightningQubit::Measures \ No newline at end of file diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementsLQubit.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementsLQubit.hpp index cff5f1917a..19c0ba7035 100644 --- a/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementsLQubit.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementsLQubit.hpp @@ -80,13 +80,16 @@ class Measurements final */ auto probs() -> std::vector { const ComplexT *arr_data = this->_statevector.getData(); - std::vector basis_probs(this->_statevector.getLength(), 0); - - std::transform( - arr_data, arr_data + this->_statevector.getLength(), - basis_probs.begin(), - [](const ComplexT &z) -> PrecisionT { return std::norm(z); }); - return basis_probs; + const std::size_t n_probs = this->_statevector.getLength(); + std::vector probabilities(n_probs, 0); + auto *probs = probabilities.data(); +#if defined PL_LQ_KERNEL_OMP && defined _OPENMP +#pragma omp parallel for +#endif + for (std::size_t k = 0; k < n_probs; k++) { + probs[k] = std::norm(arr_data[k]); + } + return probabilities; }; /** @@ -128,10 +131,14 @@ class Measurements final Gates::getIndicesAfterExclusion(wires, num_qubits), num_qubits); const std::size_t n_probs = PUtil::exp2(n_wires); std::vector probabilities(n_probs, 0); + auto *probs = probabilities.data(); std::size_t ind_probs = 0; for (auto index : all_indices) { +#if defined PL_LQ_KERNEL_OMP && defined _OPENMP +#pragma omp parallel for reduction(+ : probs[ : n_probs]) +#endif for (auto offset : all_offsets) { - probabilities[ind_probs] += std::norm(arr_data[index + offset]); + probs[ind_probs] += std::norm(arr_data[index + offset]); } ind_probs++; }