Skip to content

Commit

Permalink
Small changes to prevent mem transfers
Browse files Browse the repository at this point in the history
  • Loading branch information
jeremyfelder committed Feb 2, 2025
1 parent 8d1c4ac commit a49edf9
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 54 deletions.
8 changes: 4 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@ tracing = "0.1.40"
[profile.bench]
codegen-units = 1
lto = true

[profile.release]
opt-level = 3
11 changes: 4 additions & 7 deletions crates/prover/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ tracing.workspace = true
rayon = { version = "1.10.0", optional = true }
serde = { version = "1.0", features = ["derive"] }

icicle-cuda-runtime = { git = "https://github.com/ingonyama-zk/icicle.git", optional = true, rev="5f0eaf226c1432277421a6095fd758b03fc20510"}
icicle-core = { git = "https://github.com/ingonyama-zk/icicle.git", optional = true, rev="5f0eaf226c1432277421a6095fd758b03fc20510"}
icicle-m31 = { git = "https://github.com/ingonyama-zk/icicle.git", optional = true, rev="5f0eaf226c1432277421a6095fd758b03fc20510"}
icicle-hash = { git = "https://github.com/ingonyama-zk/icicle.git", optional = true, rev="5f0eaf226c1432277421a6095fd758b03fc20510"}
icicle-cuda-runtime = { git = "https://github.com/ingonyama-zk/icicle.git", optional = true, rev="591de67f46c9e378f92fddd2f7da8685d7ea5f54"}
icicle-core = { git = "https://github.com/ingonyama-zk/icicle.git", optional = true, rev="591de67f46c9e378f92fddd2f7da8685d7ea5f54"}
icicle-m31 = { git = "https://github.com/ingonyama-zk/icicle.git", optional = true, rev="591de67f46c9e378f92fddd2f7da8685d7ea5f54"}
icicle-hash = { git = "https://github.com/ingonyama-zk/icicle.git", optional = true, rev="591de67f46c9e378f92fddd2f7da8685d7ea5f54"}

nvtx = { version = "*", optional = true }

Expand Down Expand Up @@ -118,6 +118,3 @@ name = "pcs"
[[bench]]
harness = false
name = "accumulate"

[profile.release]
opt-level = 3
69 changes: 31 additions & 38 deletions crates/prover/src/core/backend/icicle/circle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@ use std::mem::transmute;

use icicle_core::ntt::{FieldImpl, NTTConfig, Ordering};
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::memory::{DeviceSlice, DeviceVec, HostSlice};
use icicle_cuda_runtime::memory::{DeviceSlice, DeviceVec, HostSlice, HostOrDeviceSlice};
use icicle_m31::dcct::{self, get_dcct_root_of_unity, initialize_dcct_domain};
use icicle_m31::field::ScalarField;
use itertools::Itertools;

use super::IcicleBackend;
use crate::core::backend::icicle::column::DeviceColumn;
use crate::core::backend::{Col, Column, CpuBackend};
use crate::core::backend::cpu::{CpuCircleEvaluation, CpuCirclePoly};
use crate::core::circle::{CirclePoint, Coset};
use crate::core::fields::m31::{BaseField, M31};
use crate::core::fields::qm31::SecureField;
Expand Down Expand Up @@ -40,46 +41,43 @@ impl PolyOps for IcicleBackend {
eval: CircleEvaluation<Self, BaseField, BitReversedOrder>,
itwiddles: &TwiddleTree<Self>,
) -> CirclePoly<Self> {
// todo!()
if eval.domain.log_size() <= 3 || eval.domain.log_size() == 7 {
// TODO: as property .is_dcct_available etc...
// return unsafe {
// transmute(CpuBackend::interpolate(
// transmute(eval),
// transmute(itwiddles),
// ))
// };
todo!(
"log2={} size not implemented in Icicle backend",
eval.domain.log_size()
)
let cpu_eval = CpuCircleEvaluation::new(eval.domain, eval.values.to_cpu());

let cpu_circle_poly = CpuBackend::interpolate(
cpu_eval,
unsafe { transmute(itwiddles) },
);

let icicle_coeffs = DeviceColumn::from_cpu(cpu_circle_poly.coeffs.as_slice());

return IcicleCirclePoly::new(icicle_coeffs);
}

let values = eval.values;
nvtx::range_push!("[ICICLE] get_dcct_root_of_unity");
let rou = get_dcct_root_of_unity(eval.domain.size() as _);
nvtx::range_pop!();

nvtx::range_push!("[ICICLE] initialize_dcct_domain");
initialize_dcct_domain(eval.domain.log_size(), rou, &DeviceContext::default()).unwrap();
nvtx::range_pop!();
let eval_values = unsafe { transmute::<&DeviceSlice<BaseField>, &DeviceSlice<ScalarField>>(&eval.values.data[..]) };

let mut evaluations = vec![ScalarField::zero(); values.len()];
let mut coeffs = unsafe { DeviceColumn::uninitialized(eval_values.len()) };
let mut coeffs_data = unsafe { transmute::<&mut DeviceSlice<BaseField>, &mut DeviceSlice<ScalarField>>(&mut coeffs.data[..]) };

let mut cfg = NTTConfig::default();
cfg.ordering = Ordering::kMN;
nvtx::range_push!("[ICICLE] interpolate");
dcct::interpolate(
unsafe { transmute::<_, &DeviceSlice<_>>(&values.data[..]) },
eval_values,
&cfg,
HostSlice::from_mut_slice(&mut evaluations),
coeffs_data,
)
.unwrap();
nvtx::range_pop!();

let values: Vec<BaseField> = unsafe { transmute(evaluations) };

CirclePoly::new(DeviceColumn::from_cpu(&values))
CirclePoly::new(coeffs)
}

fn eval_at_point(poly: &CirclePoly<Self>, point: CirclePoint<SecureField>) -> SecureField {
Expand Down Expand Up @@ -117,19 +115,18 @@ impl PolyOps for IcicleBackend {
domain: CircleDomain,
twiddles: &TwiddleTree<Self>,
) -> CircleEvaluation<Self, BaseField, BitReversedOrder> {
// todo!()
if domain.log_size() <= 3 || domain.log_size() == 7 {
// return unsafe {
// transmute(CpuBackend::evaluate(
// transmute(poly),
// domain,
// transmute(twiddles),
// ))
// };
todo!(
"log2={} size not implemented in Icicle backend",
domain.log_size()
)
let cpu_poly = CpuCirclePoly::new(poly.coeffs.to_cpu());

let cpu_circle_eval = CpuBackend::evaluate(
&cpu_poly,
domain,
unsafe { transmute(twiddles) },
);

let icicle_eval_values = DeviceColumn::from_cpu(cpu_circle_eval.values.as_slice());

return IcicleCircleEvaluation::new(cpu_circle_eval.domain, icicle_eval_values);
}

let values = poly.extend(domain.log_size()).coeffs;
Expand All @@ -153,12 +150,8 @@ impl PolyOps for IcicleBackend {
)
.unwrap();
nvtx::range_pop!();
unsafe {
transmute(IcicleCircleEvaluation::<BaseField, BitReversedOrder>::new(
domain,
evaluations,
))
}

IcicleCircleEvaluation::<BaseField, BitReversedOrder>::new(domain, evaluations)
}

fn interpolate_columns(
Expand Down
5 changes: 1 addition & 4 deletions crates/prover/src/core/backend/icicle/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,7 @@ impl ColumnOps<BaseField> for IcicleBackend {

impl Column<BaseField> for DeviceColumn {
fn zeros(length: usize) -> Self {
let mut data = DeviceVec::cuda_malloc(length).unwrap();

let host_data = vec![BaseField::zero(); length];
data.copy_from_host(HostSlice::from_slice(&host_data));
let data = DeviceVec::cuda_malloc_zeros(length).unwrap();

Self { data }
}
Expand Down
2 changes: 1 addition & 1 deletion crates/prover/src/examples/wide_fibonacci/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ mod tests {
type TheBackend = IcicleBackend;
// type TheBackend = CpuBackend;

let min_log = get_env_var("MIN_FIB_LOG", 5u32);
let min_log = get_env_var("MIN_FIB_LOG", 6u32);
let max_log = get_env_var("MAX_FIB_LOG", 23u32);

nvtx::name_thread!("stark_prover");
Expand Down

0 comments on commit a49edf9

Please sign in to comment.