Skip to content

Commit

Permalink
feat(crypto): use gpu msm when building with --config cuda
Browse files Browse the repository at this point in the history
  • Loading branch information
chokobole committed May 30, 2024
1 parent 621aaff commit c21fb4c
Show file tree
Hide file tree
Showing 4 changed files with 167 additions and 26 deletions.
3 changes: 3 additions & 0 deletions tachyon/crypto/commitments/kzg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ tachyon_cc_library(
"//tachyon/base/buffer:copyable",
"//tachyon/base/containers:container_util",
"//tachyon/crypto/commitments:batch_commitment_state",
"//tachyon/device/gpu:scoped_mem_pool",
"//tachyon/device/gpu:scoped_stream",
"//tachyon/math/elliptic_curves/msm:variable_base_msm",
"//tachyon/math/elliptic_curves/msm:variable_base_msm_gpu",
"//tachyon/math/polynomials/univariate:univariate_evaluation_domain",
],
)
Expand Down
117 changes: 103 additions & 14 deletions tachyon/crypto/commitments/kzg/kzg.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <stddef.h>

#include <algorithm>
#include <limits>
#include <memory>
#include <utility>
#include <vector>
Expand All @@ -21,6 +22,12 @@
#include "tachyon/math/elliptic_curves/point_conversions.h"
#include "tachyon/math/polynomials/univariate/univariate_evaluation_domain.h"

#if TACHYON_CUDA
#include "tachyon/device/gpu/scoped_mem_pool.h"
#include "tachyon/device/gpu/scoped_stream.h"
#include "tachyon/math/elliptic_curves/msm/variable_base_msm_gpu.h"
#endif

namespace tachyon {
namespace crypto {

Expand All @@ -42,6 +49,9 @@ class KZG {
g1_powers_of_tau_lagrange_(std::move(g1_powers_of_tau_lagrange)) {
CHECK_EQ(g1_powers_of_tau_.size(), g1_powers_of_tau_lagrange_.size());
CHECK_LE(g1_powers_of_tau_.size(), kMaxDegree + 1);
#if TACHYON_CUDA
SetupForGpu();
#endif
}

const std::vector<G1Point>& g1_powers_of_tau() const {
Expand All @@ -52,21 +62,73 @@ class KZG {
return g1_powers_of_tau_lagrange_;
}

void ResizeBatchCommitments(size_t size) { batch_commitments_.resize(size); }
#if TACHYON_CUDA
void SetupForGpu() {
CHECK(!msm_gpu_);

gpuMemPoolProps props = {gpuMemAllocationTypePinned,
gpuMemHandleTypeNone,
{gpuMemLocationTypeDevice, 0}};
mem_pool_ = device::gpu::CreateMemPool(&props);

uint64_t mem_pool_threshold = std::numeric_limits<uint64_t>::max();
gpuError_t error = gpuMemPoolSetAttribute(
mem_pool_.get(), gpuMemPoolAttrReleaseThreshold, &mem_pool_threshold);
CHECK_EQ(error, gpuSuccess);
stream_ = device::gpu::CreateStream();

msm_gpu_.reset(
new math::VariableBaseMSMGpu<G1Point>(mem_pool_.get(), stream_.get()));
}
#endif

void ResizeBatchCommitments(size_t size) {
#if TACHYON_CUDA
if (msm_gpu_) {
gpu_batch_commitments_.resize(size);
return;
}
#endif
cpu_batch_commitments_.resize(size);
}

std::vector<Commitment> GetBatchCommitments(BatchCommitmentState& state) {
std::vector<Commitment> batch_commitments;
if constexpr (std::is_same_v<Commitment, Bucket>) {
batch_commitments = std::move(batch_commitments_);
} else if constexpr (std::is_same_v<Commitment, math::AffinePoint<Curve>>) {
batch_commitments.resize(batch_commitments_.size());
CHECK(Bucket::BatchNormalize(batch_commitments_, &batch_commitments));
batch_commitments_.clear();
#if TACHYON_CUDA
if (msm_gpu_) {
if constexpr (std::is_same_v<Commitment, math::ProjectivePoint<Curve>>) {
batch_commitments = std::move(gpu_batch_commitments_);
// NOLINTNEXTLINE(readability/braces)
} else if constexpr (std::is_same_v<Commitment,
math::AffinePoint<Curve>>) {
batch_commitments.resize(gpu_batch_commitments_.size());
CHECK(math::ProjectivePoint<Curve>::BatchNormalize(
gpu_batch_commitments_, &batch_commitments));
gpu_batch_commitments_.clear();
} else {
batch_commitments.resize(gpu_batch_commitments_.size());
CHECK(math::ConvertPoints(gpu_batch_commitments_, &batch_commitments));
gpu_batch_commitments_.clear();
}
} else {
batch_commitments.resize(batch_commitments_.size());
CHECK(math::ConvertPoints(batch_commitments_, &batch_commitments));
batch_commitments_.clear();
#endif
if constexpr (std::is_same_v<Commitment, Bucket>) {
batch_commitments = std::move(cpu_batch_commitments_);
// NOLINTNEXTLINE(readability/braces)
} else if constexpr (std::is_same_v<Commitment,
math::AffinePoint<Curve>>) {
batch_commitments.resize(cpu_batch_commitments_.size());
CHECK(
Bucket::BatchNormalize(cpu_batch_commitments_, &batch_commitments));
cpu_batch_commitments_.clear();
} else {
batch_commitments.resize(cpu_batch_commitments_.size());
CHECK(math::ConvertPoints(cpu_batch_commitments_, &batch_commitments));
cpu_batch_commitments_.clear();
}
#if TACHYON_CUDA
}
#endif
state.Reset();
return batch_commitments;
}
Expand Down Expand Up @@ -133,8 +195,22 @@ class KZG {

private:
template <typename BaseContainer, typename ScalarContainer>
static bool DoMSM(const BaseContainer& bases, const ScalarContainer& scalars,
Commitment* out) {
bool DoMSM(const BaseContainer& bases, const ScalarContainer& scalars,
Commitment* out) const {
#if TACHYON_CUDA
if (msm_gpu_) {
absl::Span<const G1Point> bases_span = absl::Span<const G1Point>(
bases.data(), std::min(bases.size(), scalars.size()));
if constexpr (std::is_same_v<Commitment, math::ProjectivePoint<Curve>>) {
return msm_gpu_->Run(bases_span, scalars, out);
} else {
math::ProjectivePoint<Curve> result;
if (!msm_gpu_->Run(bases_span, scalars, &result)) return false;
*out = math::ConvertPoint<Commitment>(result);
return true;
}
}
#endif
math::VariableBaseMSM<G1Point> msm;
absl::Span<const G1Point> bases_span = absl::Span<const G1Point>(
bases.data(), std::min(bases.size(), scalars.size()));
Expand All @@ -151,15 +227,28 @@ class KZG {
template <typename BaseContainer, typename ScalarContainer>
bool DoMSM(const BaseContainer& bases, const ScalarContainer& scalars,
BatchCommitmentState& state, size_t index) {
#if TACHYON_CUDA
if (msm_gpu_) {
absl::Span<const G1Point> bases_span = absl::Span<const G1Point>(
bases.data(), std::min(bases.size(), scalars.size()));
return msm_gpu_->Run(bases_span, scalars, &gpu_batch_commitments_[index]);
}
#endif
math::VariableBaseMSM<G1Point> msm;
absl::Span<const G1Point> bases_span = absl::Span<const G1Point>(
bases.data(), std::min(bases.size(), scalars.size()));
return msm.Run(bases_span, scalars, &batch_commitments_[index]);
return msm.Run(bases_span, scalars, &cpu_batch_commitments_[index]);
}

std::vector<G1Point> g1_powers_of_tau_;
std::vector<G1Point> g1_powers_of_tau_lagrange_;
std::vector<Bucket> batch_commitments_;
std::vector<Bucket> cpu_batch_commitments_;
#if TACHYON_CUDA
device::gpu::ScopedMemPool mem_pool_;
device::gpu::ScopedStream stream_;
std::unique_ptr<math::VariableBaseMSMGpu<G1Point>> msm_gpu_;
std::vector<math::ProjectivePoint<Curve>> gpu_batch_commitments_;
#endif
};

} // namespace crypto
Expand Down
68 changes: 57 additions & 11 deletions tachyon/crypto/commitments/kzg/kzg_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,32 @@ TEST_F(KZGTest, CommitLagrange) {

Poly poly = Poly::Random(N - 1);

math::bn254::G1AffinePoint commit;
ASSERT_TRUE(pcs.Commit(poly.coefficients().coefficients(), &commit));
math::bn254::G1AffinePoint cpu_commit;
ASSERT_TRUE(pcs.Commit(poly.coefficients().coefficients(), &cpu_commit));

std::unique_ptr<Domain> domain = Domain::Create(N);
Evals poly_evals = domain->FFT(std::move(poly));
Evals poly_evals = domain->FFT(poly);

math::bn254::G1AffinePoint commit_lagrange;
ASSERT_TRUE(pcs.CommitLagrange(poly_evals.evaluations(), &commit_lagrange));
math::bn254::G1AffinePoint cpu_commit_lagrange;
ASSERT_TRUE(
pcs.CommitLagrange(poly_evals.evaluations(), &cpu_commit_lagrange));

EXPECT_EQ(commit, commit_lagrange);
EXPECT_EQ(cpu_commit, cpu_commit_lagrange);

#if TACHYON_CUDA
pcs.SetupForGpu();

math::bn254::G1AffinePoint gpu_commit;
ASSERT_TRUE(pcs.Commit(poly.coefficients().coefficients(), &gpu_commit));

EXPECT_EQ(gpu_commit, cpu_commit);

math::bn254::G1AffinePoint gpu_commit_lagrange;
ASSERT_TRUE(
pcs.CommitLagrange(poly_evals.evaluations(), &gpu_commit_lagrange));

EXPECT_EQ(gpu_commit_lagrange, cpu_commit_lagrange);
#endif
}

TEST_F(KZGTest, BatchCommitLagrange) {
Expand All @@ -67,27 +83,57 @@ TEST_F(KZGTest, BatchCommitLagrange) {
for (size_t i = 0; i < num_polys; ++i) {
ASSERT_TRUE(pcs.Commit(polys[i].coefficients().coefficients(), state, i));
}
std::vector<math::bn254::G1AffinePoint> batch_commitments =
std::vector<math::bn254::G1AffinePoint> cpu_batch_commitments =
pcs.GetBatchCommitments(state);
EXPECT_EQ(state.batch_mode, false);
EXPECT_EQ(state.batch_count, size_t{0});

std::unique_ptr<Domain> domain = Domain::Create(N);
std::vector<Evals> poly_evals = base::Map(
polys, [&domain](Poly& poly) { return domain->FFT(std::move(poly)); });
std::vector<Evals> poly_evals =
base::Map(polys, [&domain](Poly& poly) { return domain->FFT(poly); });

state.batch_mode = true;
state.batch_count = num_polys;
pcs.ResizeBatchCommitments(num_polys);
for (size_t i = 0; i < num_polys; ++i) {
ASSERT_TRUE(pcs.CommitLagrange(poly_evals[i].evaluations(), state, i));
}
std::vector<math::bn254::G1AffinePoint> cpu_batch_commitments_lagrange =
pcs.GetBatchCommitments(state);
EXPECT_EQ(state.batch_mode, false);
EXPECT_EQ(state.batch_count, size_t{0});

EXPECT_EQ(cpu_batch_commitments, cpu_batch_commitments_lagrange);

#if TACHYON_CUDA
pcs.SetupForGpu();

state.batch_mode = true;
state.batch_count = num_polys;
pcs.ResizeBatchCommitments(num_polys);
for (size_t i = 0; i < num_polys; ++i) {
ASSERT_TRUE(pcs.Commit(polys[i].coefficients().coefficients(), state, i));
}
std::vector<math::bn254::G1AffinePoint> gpu_batch_commitments =
pcs.GetBatchCommitments(state);
EXPECT_EQ(state.batch_mode, false);
EXPECT_EQ(state.batch_count, size_t{0});

EXPECT_EQ(gpu_batch_commitments, cpu_batch_commitments);

state.batch_mode = true;
state.batch_count = num_polys;
pcs.ResizeBatchCommitments(num_polys);
for (size_t i = 0; i < num_polys; ++i) {
ASSERT_TRUE(pcs.CommitLagrange(poly_evals[i].evaluations(), state, i));
}
std::vector<math::bn254::G1AffinePoint> batch_commitments_lagrange =
std::vector<math::bn254::G1AffinePoint> gpu_batch_commitments_lagrange =
pcs.GetBatchCommitments(state);
EXPECT_EQ(state.batch_mode, false);
EXPECT_EQ(state.batch_count, size_t{0});

EXPECT_EQ(batch_commitments, batch_commitments_lagrange);
EXPECT_EQ(gpu_batch_commitments_lagrange, cpu_batch_commitments_lagrange);
#endif
}

TEST_F(KZGTest, Downsize) {
Expand Down
5 changes: 4 additions & 1 deletion tachyon/math/elliptic_curves/msm/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda")
load("//bazel:tachyon.bzl", "if_gpu_is_configured")
load(
"//bazel:tachyon_cc.bzl",
Expand Down Expand Up @@ -54,7 +55,9 @@ tachyon_cc_library(
tachyon_cc_library(
name = "variable_base_msm_gpu",
hdrs = ["variable_base_msm_gpu.h"],
deps = ["//tachyon/math/elliptic_curves/msm/algorithms/icicle:icicle_msm"],
deps = ["//tachyon/math/elliptic_curves/msm/algorithms/icicle:icicle_msm"] + if_cuda([
"@local_config_cuda//cuda:cudart_static",
]),
)

tachyon_cc_unittest(
Expand Down

0 comments on commit c21fb4c

Please sign in to comment.