diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
index b572c1d50b..35c945f62b 100644
--- a/.github/CHANGELOG.md
+++ b/.github/CHANGELOG.md
@@ -15,7 +15,10 @@
 
 ### Improvements
 
-* Implement probs(wires) using a bit-shift implementation akin to the gate kernels in Lightning-Qubit.
+* Parallelize Lightning-Qubit `probs` with OpenMP when using the `-DLQ_ENABLE_KERNEL_OMP=1` CMake argument.
+  [(#800)](https://github.com/PennyLaneAI/pennylane-lightning/pull/800)
+
+* Implement `probs(wires)` using a bit-shift implementation akin to the gate kernels in Lightning-Qubit.
   [(#795)](https://github.com/PennyLaneAI/pennylane-lightning/pull/795)
 
 * Enable setting the PennyLane version when invoking, for example, `make docker-build version=master pl_version=master`.
@@ -449,7 +452,7 @@ Vincent Michaud-Rioux
 * The `BlockEncode` operation from PennyLane is now supported on all Lightning devices.
   [(#599)](https://github.com/PennyLaneAI/pennylane-lightning/pull/599)
 
-* OpenMP acceleration can now be enabled at compile time for all `lightning.qubit` gate kernels using the "-DLQ_ENABLE_KERNEL_OMP=1" CMake argument.
+* OpenMP acceleration can now be enabled at compile time for all `lightning.qubit` gate kernels using the `-DLQ_ENABLE_KERNEL_OMP=1` CMake argument.
   [(#510)](https://github.com/PennyLaneAI/pennylane-lightning/pull/510)
 
 * Enable building Docker images for any branch or tag. Set the Docker build cron job to build images for the latest release and `master`.
diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py
index f695fbccc5..02e7041e1a 100644
--- a/pennylane_lightning/core/_version.py
+++ b/pennylane_lightning/core/_version.py
@@ -16,4 +16,4 @@
    Version number (major.minor.patch[-label])
 """
 
-__version__ = "0.38.0-dev7"
+__version__ = "0.38.0-dev8"
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/CMakeLists.txt b/pennylane_lightning/core/src/simulators/lightning_qubit/CMakeLists.txt
index f07b94923c..da457fd80c 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/CMakeLists.txt
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/CMakeLists.txt
@@ -49,14 +49,21 @@ else()
 endif()
 
 if(LQ_ENABLE_KERNEL_OMP)
+    message(STATUS "OpenMP-parallelized kernels: ON.")
     add_definitions("-DPL_LQ_KERNEL_OMP")
+    target_compile_definitions(lightning_qubit PUBLIC -DPL_LQ_KERNEL_OMP)
+else()
+    message(STATUS "OpenMP-parallelized kernels: OFF.")
 endif()
 
 if(LQ_ENABLE_KERNEL_AVX_STREAMING)
     if(NOT LQ_ENABLE_KERNEL_OMP)
         message(WARNING "AVX streaming operations require `LQ_ENABLE_KERNEL_OMP` to be enabled.")
     endif()
+    message(STATUS "AVX streaming operations: ON.")
     add_definitions("-DPL_LQ_KERNEL_AVX_STREAMING")
+else()
+    message(STATUS "AVX streaming operations: OFF.")
 endif()
 
 target_link_libraries(lightning_qubit PUBLIC    lightning_compile_options
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementKernels.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementKernels.hpp
index d7fb833a0f..066db5ad31 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementKernels.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementKernels.hpp
@@ -382,7 +382,12 @@ auto probs_bitshift(const std::complex<PrecisionT> *arr,
     PROBS_CORE_DECLARE_P(6)
     PROBS_CORE_DECLARE_P(7)
     PROBS_CORE_DECLARE_P(8)
-    std::vector<PrecisionT> probs(PUtil::exp2(n_wires), 0);
+    constexpr std::size_t n_probs = one << n_wires;
+    std::vector<PrecisionT> probabilities(n_probs, 0);
+    auto *probs = probabilities.data();
+#if defined PL_LQ_KERNEL_OMP && defined _OPENMP
+#pragma omp parallel for reduction(+ : probs[ : n_probs])
+#endif
     for (std::size_t k = 0; k < exp2(num_qubits - n_wires); k++) {
         std::size_t i0;
         PROBS_CORE_SUM_1
@@ -394,7 +399,7 @@ auto probs_bitshift(const std::complex<PrecisionT> *arr,
         PROBS_CORE_SUM_7
         PROBS_CORE_SUM_8
     }
-    return probs;
+    return probabilities;
 }
 // NOLINTEND(hicpp-function-size,readability-function-size)
 } // namespace Pennylane::LightningQubit::Measures
\ No newline at end of file
diff --git a/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementsLQubit.hpp b/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementsLQubit.hpp
index cff5f1917a..19c0ba7035 100644
--- a/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementsLQubit.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_qubit/measurements/MeasurementsLQubit.hpp
@@ -80,13 +80,16 @@ class Measurements final
      */
     auto probs() -> std::vector<PrecisionT> {
         const ComplexT *arr_data = this->_statevector.getData();
-        std::vector<PrecisionT> basis_probs(this->_statevector.getLength(), 0);
-
-        std::transform(
-            arr_data, arr_data + this->_statevector.getLength(),
-            basis_probs.begin(),
-            [](const ComplexT &z) -> PrecisionT { return std::norm(z); });
-        return basis_probs;
+        const std::size_t n_probs = this->_statevector.getLength();
+        std::vector<PrecisionT> probabilities(n_probs, 0);
+        auto *probs = probabilities.data();
+#if defined PL_LQ_KERNEL_OMP && defined _OPENMP
+#pragma omp parallel for
+#endif
+        for (std::size_t k = 0; k < n_probs; k++) {
+            probs[k] = std::norm(arr_data[k]);
+        }
+        return probabilities;
     };
 
     /**
@@ -128,10 +131,14 @@ class Measurements final
             Gates::getIndicesAfterExclusion(wires, num_qubits), num_qubits);
         const std::size_t n_probs = PUtil::exp2(n_wires);
         std::vector<PrecisionT> probabilities(n_probs, 0);
+        auto *probs = probabilities.data();
         std::size_t ind_probs = 0;
         for (auto index : all_indices) {
+#if defined PL_LQ_KERNEL_OMP && defined _OPENMP
+#pragma omp parallel for reduction(+ : probs[ : n_probs])
+#endif
             for (auto offset : all_offsets) {
-                probabilities[ind_probs] += std::norm(arr_data[index + offset]);
+                probs[ind_probs] += std::norm(arr_data[index + offset]);
             }
             ind_probs++;
         }