Skip to content

Commit

Permalink
Merge branch 'main' into gpu_compile_24
Browse files Browse the repository at this point in the history
  • Loading branch information
dannys4 committed Feb 8, 2024
2 parents 60dbc3b + f576511 commit d4f02da
Show file tree
Hide file tree
Showing 8 changed files with 61 additions and 106 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ if(NOT Kokkos_FOUND)
FetchContent_Declare(
kokkos
GIT_REPOSITORY https://github.com/kokkos/kokkos
GIT_TAG 3.7.00
GIT_TAG 4.2.00
GIT_SHALLOW TRUE
)
FetchContent_MakeAvailable(kokkos)
Expand Down
5 changes: 3 additions & 2 deletions MParT/TrainMap.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@

#include <functional>
#include <nlopt.hpp>
#include "ConditionalMapBase.h"
#include "MapObjective.h"
#include <iostream>
#include "MParT/ConditionalMapBase.h"
#include "MParT/MapObjective.h"

namespace mpart {

Expand Down
49 changes: 0 additions & 49 deletions MParT/Utilities/LinearAlgebra.h
Original file line number Diff line number Diff line change
Expand Up @@ -195,55 +195,6 @@ StridedMatrix<double, MemorySpace> operator*(TransposeObject<MemorySpace> A,
return C;
}

enum class ReduceDimMap { sum, norm };

template<ReduceDimMap Map>
KOKKOS_INLINE_FUNCTION double ReduceDimMapKernel(double) {return 0.;};

template<>
KOKKOS_INLINE_FUNCTION double ReduceDimMapKernel<ReduceDimMap::sum>(double x) {return x;};

template<>
KOKKOS_INLINE_FUNCTION double ReduceDimMapKernel<ReduceDimMap::norm>(double x) {return x*x;};


// This struct allows you to reduce the columns of a mxn matrix
// Performs alpha*A*[1,...,1]
template<ReduceDimMap Map, typename MemorySpace, unsigned int Dim = 1, typename = std::enable_if_t<Dim <= 1,int> >
struct ReduceDim {
using value_type = double[];
using size_type = typename StridedMatrix<double, MemorySpace>::size_type;

// Keep track of the dimension we are not reducing
size_type value_count;

StridedMatrix<const double, MemorySpace> A_;
double alpha_;

ReduceDim(StridedMatrix<double, MemorySpace> A, double alpha): value_count(A.extent(1-Dim)), A_(A), alpha_(alpha) {}

KOKKOS_INLINE_FUNCTION void operator()(const size_type reduce_idx, value_type sum) const {
for(size_type full_idx=0; full_idx<value_count; ++full_idx) {
double aij = 0.;
if(Dim == 1) aij = A_(full_idx,reduce_idx);
else aij = A_(reduce_idx,full_idx);
sum[full_idx] += ReduceDimMapKernel<Map>(aij)*alpha_;
}
}

KOKKOS_INLINE_FUNCTION void join (volatile value_type dst, const volatile value_type src) const {
for (size_type i = 0; i < value_count; ++i) {
dst[i] += src[i];
}
}

KOKKOS_INLINE_FUNCTION void init (value_type sum) const {
for (size_type i = 0; i < value_count; ++i) {
sum[i] = 0.0;
}
}
};

/** Mimics the interface of the Eigen::PartialPivLU class, but using Kokkos::Views and CUBLAS/CUSOLVER linear algebra.
Note that the layout of the matrices used in this class is important. Cublas expects column major (layout left).
Expand Down
1 change: 0 additions & 1 deletion bindings/julia/src/CommonJuliaUtilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,5 @@ void mpart::binding::CommonUtilitiesWrapper(jlcxx::Module &mod)
mod.method("Initialize", [](){mpart::binding::Initialize(std::vector<std::string> {});});
mod.method("Initialize", [](std::vector<std::string> v){mpart::binding::Initialize(makeInitArguments(v));});
mod.method("Concurrency", &Kokkos::DefaultExecutionSpace::concurrency);
mod.add_type<Kokkos::HostSpace>("HostSpace");
mod.add_type<Kokkos::LayoutStride>("LayoutStride");
}
9 changes: 5 additions & 4 deletions src/Distributions/GaussianSamplerDensity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,11 @@ void GaussianSamplerDensity<MemorySpace>::LogDensityImpl(StridedMatrix<const dou
covChol_.solveInPlaceL(diff);
}

ReduceDim<ReduceDimMap::norm,MemorySpace,0> rr(diff, -0.5);
Kokkos::parallel_reduce(M, rr, &output(0));
Kokkos::parallel_for( "log terms", N, KOKKOS_LAMBDA (const int& j) {
output(j) -= 0.5*( M*logtau_ + logDetCov_ );
Kokkos::parallel_for(N, KOKKOS_LAMBDA(const int& j){
output(j) = -0.5*( M*logtau_ + logDetCov_ );
for(int d=0; d<M; ++d){
output(j) += -0.5*diff(d,j)*diff(d,j);
}
});
}

Expand Down
40 changes: 36 additions & 4 deletions src/MapObjective.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,36 @@ std::shared_ptr<MapObjective<MemorySpace>> ObjectiveFactory::CreateGaussianKLObj
template<typename MemorySpace>
double KLObjective<MemorySpace>::ObjectivePlusCoeffGradImpl(StridedMatrix<const double, MemorySpace> data, StridedVector<double, MemorySpace> grad, std::shared_ptr<ConditionalMapBase<MemorySpace>> map) const {
unsigned int N_samps = data.extent(1);
unsigned int grad_dim = grad.extent(0);
PullbackDensity<MemorySpace> pullback {map, density_};
StridedVector<double, MemorySpace> densityX = pullback.LogDensity(data);
StridedMatrix<double, MemorySpace> densityGradX = pullback.LogDensityCoeffGrad(data);
double sumDensity = 0.;

Kokkos::parallel_reduce ("Sum Negative Log Likelihood", N_samps, KOKKOS_LAMBDA (const int i, double &sum) {
sum -= densityX(i);
}, sumDensity);
ReduceDim<ReduceDimMap::sum,MemorySpace> rc(densityGradX, -1.0/((double) N_samps));
Kokkos::parallel_reduce(N_samps, rc, &grad(0));


if (grad.data()!=nullptr){
double scale = -1.0/((double) N_samps);

Kokkos::TeamPolicy<MemoryToExecution<MemorySpace>> policy(grad_dim, Kokkos::AUTO());

Kokkos::parallel_for(policy,
KOKKOS_LAMBDA(auto& tag, auto&& teamMember){
int row = teamMember.league_rank();
double thisRowSum = 0.0;
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, N_samps),
KOKKOS_LAMBDA(int col, double& innerUpdate){
innerUpdate += scale*densityGradX(row,col);
},
thisRowSum);

grad(row) = thisRowSum;
}
);
}
return sumDensity/N_samps;
}

Expand All @@ -79,10 +100,21 @@ double KLObjective<MemorySpace>::ObjectiveImpl(StridedMatrix<const double, Memor
template<typename MemorySpace>
void KLObjective<MemorySpace>::CoeffGradImpl(StridedMatrix<const double, MemorySpace> data, StridedVector<double, MemorySpace> grad, std::shared_ptr<ConditionalMapBase<MemorySpace>> map) const {
unsigned int N_samps = data.extent(1);
unsigned int grad_dim = grad.extent(0);
PullbackDensity<MemorySpace> pullback {map, density_};
StridedMatrix<double, MemorySpace> densityGradX = pullback.LogDensityCoeffGrad(data);
ReduceDim<ReduceDimMap::sum,MemorySpace> rc(densityGradX, -1.0/((double) N_samps));
Kokkos::parallel_reduce(N_samps, rc, &grad(0));

double scale = -1.0/((double) N_samps);
Kokkos::parallel_for("grad", Kokkos::TeamPolicy<MemoryToExecution<MemorySpace>>(grad_dim, Kokkos::AUTO()),
KOKKOS_LAMBDA(auto t, typename Kokkos::TeamPolicy<MemoryToExecution<MemorySpace>>::member_type teamMember){
int row = teamMember.league_rank();
double thisRowSum = 0.0;
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember, N_samps), KOKKOS_LAMBDA(int col, double& innerUpdate){
innerUpdate += scale*densityGradX(row,col);
}, thisRowSum);
grad(row) = thisRowSum;
}
);
}

// Explicit template instantiation
Expand Down
41 changes: 0 additions & 41 deletions tests/Test_LinearAlgebra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,47 +139,6 @@ TEST_CASE( "Testing Linear Mat-Mat product", "[LinearAlgebra_MatMat]" ) {
}
}

TEST_CASE( "Testing ReduceDim", "ReduceDim" ) {
int N = 23;
StridedMatrix<double, Kokkos::HostSpace> A = Kokkos::View<double**, Kokkos::HostSpace>("A", 3, N);
StridedMatrix<double, Kokkos::HostSpace> AT = Kokkos::View<double**, Kokkos::HostSpace>("AT", N, 3);
double ref_avg = ((double) (N+1)) / 2.0;
for(int i = 0; i < N; i++) {
A(0,i) = i+1;
A(1,i) = N-i;
A(2,i) = ref_avg;

AT(i,0) = i+1;
AT(i,1) = N-i;
AT(i,2) = ref_avg;
}
ReduceDim<ReduceDimMap::sum,Kokkos::HostSpace,1> rc (A, 1.0);
Kokkos::View<double*, Kokkos::HostSpace> avg_col("avg_col", 3);
Kokkos::parallel_reduce(N, rc, avg_col.data());
CHECK(avg_col(0) == Approx(N*ref_avg).epsilon(1e-14).margin(1e-14));
CHECK(avg_col(1) == Approx(N*ref_avg).epsilon(1e-14).margin(1e-14));
CHECK(avg_col(2) == Approx(N*ref_avg).epsilon(1e-14).margin(1e-14));

ReduceDim<ReduceDimMap::sum,Kokkos::HostSpace,0> rr (AT,1.0);
Kokkos::View<double*, Kokkos::HostSpace> avg_row("avg_row", 3);
Kokkos::parallel_reduce(N, rr, avg_row.data());
CHECK(avg_row(0) == Approx(N*ref_avg).epsilon(1e-14).margin(1e-14));
CHECK(avg_row(1) == Approx(N*ref_avg).epsilon(1e-14).margin(1e-14));
CHECK(avg_row(2) == Approx(N*ref_avg).epsilon(1e-14).margin(1e-14));

rc = ReduceDim<ReduceDimMap::sum,Kokkos::HostSpace>(A, -1.0/((double) N));
Kokkos::parallel_reduce(N, rc, avg_col.data());
CHECK(avg_col(0) == Approx(-ref_avg).epsilon(1e-14).margin(1e-14));
CHECK(avg_col(1) == Approx(-ref_avg).epsilon(1e-14).margin(1e-14));
CHECK(avg_col(2) == Approx(-ref_avg).epsilon(1e-14).margin(1e-14));

rr = ReduceDim<ReduceDimMap::sum,Kokkos::HostSpace,0>(AT, -1.0/((double) N));
Kokkos::parallel_reduce(N, rr, avg_row.data());
CHECK(avg_row(0) == Approx(-ref_avg).epsilon(1e-14).margin(1e-14));
CHECK(avg_row(1) == Approx(-ref_avg).epsilon(1e-14).margin(1e-14));
CHECK(avg_row(2) == Approx(-ref_avg).epsilon(1e-14).margin(1e-14));
}

TEST_CASE( "Testing LU Factorization", "LinearAlgebra_LU" ) {
Kokkos::View<double**, Kokkos::LayoutLeft, Kokkos::HostSpace> A("A", 3, 3);
Kokkos::View<double**, Kokkos::LayoutLeft, Kokkos::HostSpace> B("B", 3, 2);
Expand Down
20 changes: 16 additions & 4 deletions tests/Test_TrainMapAdaptive.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,30 @@ void NormalizeSamples(StridedMatrix<double, Kokkos::HostSpace> mat) {
unsigned int dim = mat.extent(0);
unsigned int N_samples = mat.extent(1);
// Take sum of each row, divide by 1/N_samples
ReduceDim<ReduceDimMap::sum,MemorySpace,1> rd_mean(mat, 1./(static_cast<double>(N_samples)));
Kokkos::View<double*, MemorySpace> meanVar("MeanStd", dim);
Kokkos::parallel_reduce(N_samples, rd_mean, &meanVar(0));
for(int i=0; i<dim; ++i){
for(int j=0; j<N_samples; ++j){
meanVar(i) += mat(i,j);
}
meanVar(i) /= static_cast<double>(N_samples);
}

// Subtract mean from each point
using ExecSpace = typename MemoryToExecution<MemorySpace>::Space;
auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>,ExecSpace>({0,0},{dim,N_samples});
Kokkos::parallel_for("Center data", policy, KOKKOS_LAMBDA(const unsigned int i, const unsigned int j){
mat(i,j) -= meanVar(i);
});

// Take || . ||_2^2 of each row, divide by 1/(N_samples-1)
ReduceDim<ReduceDimMap::norm,MemorySpace,1> rd_var(mat, 1./(static_cast<double>(N_samples-1)));
Kokkos::parallel_reduce(N_samples, rd_var, &meanVar(0));
for(int i=0; i<dim; ++i){
meanVar(i) = 0.0;
for(int j=0; j<N_samples; ++j){
meanVar(i) += mat(i,j)*mat(i,j);
}
meanVar(i) /= static_cast<double>(N_samples-1);
}

// Divide each point by appropriate standard deviation
Kokkos::parallel_for("Scale data", policy, KOKKOS_LAMBDA(const unsigned int i, const unsigned int j){
mat(i,j) /= std::sqrt(meanVar(i));
Expand Down

0 comments on commit d4f02da

Please sign in to comment.