Skip to content

Commit 227fd04

Browse files
committed
Merge remote-tracking branch 'origin/master' into gaudel/feature/tot_inner_tensor_ops
# Conflicts: # src/TiledArray/tensor/tensor.h
2 parents c9d16a9 + e26379a commit 227fd04

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+1038
-551
lines changed

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,9 @@ add_feature_info(TA_RANGEV3 TA_RANGEV3 "Range-V3 ranges library")
175175
option(TA_TTG "Enable search/build of TTG library" OFF)
176176
add_feature_info(TA_TTG TA_TTG "TTG library")
177177

178+
option(IntelMKL_FAIR_DISPATCH "Enable fair dispatch in Intel MKL" OFF)
179+
add_feature_info(IntelMKL_FAIR_DISPATCH IntelMKL_FAIR_DISPATCH "Use of fair dispatch in Intel MKL")
180+
178181
# Enable shared library support options
179182
redefaultable_option(TA_ASSUMES_ASLR_DISABLED "TiledArray assumes the Address Space Layout Randomization (ASLR) to be disabled" OFF)
180183
add_feature_info(ASSUMES_ASLR_DISABLED TA_ASSUMES_ASLR_DISABLED

INSTALL.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,7 @@ support may be added.
423423
* `TA_TENSOR_MEM_PROFILE` -- Set to `ON` to profile host memory allocations used by TA::Tensor. This causes the use of Umpire for host memory allocation. This also enables additional tracing facilities provided by Umpire; these can be controlled via [environment variable `UMPIRE_LOG_LEVEL`](https://umpire.readthedocs.io/en/develop/sphinx/features/logging_and_replay.html), but note that the default is to log Umpire info into a file rather than stdout.
424424
* `TA_TENSOR_MEM_TRACE` -- Set to `ON` to *trace* host memory allocations used by TA::Tensor. This turns on support for tracking memory used by `Tensor` objects; such tracking must be enabled programmatically. This can greatly increase memory consumption by the application and is only intended for expert developers troubleshooting memory use by TiledArray.
425425
* `TA_UT_CTEST_TIMEOUT` -- The value (in seconds) of the timeout to use for running the TA unit tests via CTest when building the `check`/`check-tiledarray` targets. The default timeout is 1500s.
426+
* `IntelMKL_FAIR_DISPATCH` -- If want to use Intel MKL library on non-Intel (e.g., AMD) CPUs, set to `ON` to use fair kernel dispatch. [Default=OFF].
426427

427428
# Build TiledArray
428429

examples/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ endif()
3030
# Add Subdirectories
3131
add_subdirectory (cc)
3232
add_subdirectory (device)
33-
add_subdirectory (dgemm)
33+
add_subdirectory (gemm)
3434
add_subdirectory (demo)
3535
add_subdirectory (scalapack)
3636
add_subdirectory (fock)

examples/dgemm/ta_dense_new_tile.cpp

Lines changed: 0 additions & 168 deletions
This file was deleted.

examples/dgemm/CMakeLists.txt renamed to examples/gemm/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
# Create example executable
2727

2828
foreach(_exec ta_blas ta_eigen ta_band ta_dense ta_sparse ta_dense_nonuniform
29-
ta_dense_asymm ta_sparse_grow ta_dense_new_tile
29+
ta_dense_asymm ta_sparse_grow
3030
ta_cc_abcd)
3131

3232
# Add executable

examples/dgemm/README renamed to examples/gemm/README

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ Applications usage:
1212

1313
ta_band matrix_size block_size band_width [repetitions]
1414

15-
blas matrix_size [repetitions]
15+
ta_blas matrix_size [repetitions]
1616

17-
eigen matrix_size [repetitions]
17+
ta_eigen matrix_size [repetitions]
1818

1919
Argument definitions:
2020

File renamed without changes.

examples/dgemm/ta_band.cpp renamed to examples/gemm/ta_band.cpp

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
*
1818
*/
1919

20+
#include <TiledArray/util/time.h>
2021
#include <tiledarray.h>
2122
#include <iostream>
2223

@@ -104,38 +105,33 @@ int main(int argc, char** argv) {
104105
for (; j < j_end; ++j, ++ij) shape_tensor[ij] = 1.0;
105106
}
106107

107-
TiledArray::SparseShape<float> shape(shape_tensor, trange);
108+
TiledArray::SparseShape<float> shape(
109+
shape_tensor, trange, /* per_element_norms_already = */ true);
108110

109111
// Construct and initialize arrays
110112
TiledArray::TSpArrayD a(world, trange, shape);
111113
TiledArray::TSpArrayD b(world, trange, shape);
112-
TiledArray::TSpArrayD c(world, trange);
114+
TiledArray::TSpArrayD c;
113115
a.fill(1.0);
114116
b.fill(1.0);
115117

116-
// Start clock
117-
world.gop.fence();
118-
const double wall_time_start = madness::wall_time();
119-
120118
// Do matrix multiplication
119+
world.gop.fence();
121120
for (int i = 0; i < repeat; ++i) {
122-
c("m,n") = a("m,k") * b("k,n");
123-
world.gop.fence();
121+
TA_RECORD_DURATION(c("m,n") = a("m,k") * b("k,n"); world.gop.fence();)
124122
if (world.rank() == 0) std::cout << "Iteration " << i + 1 << "\n";
125123
}
126124

127-
// Stop clock
128-
const double wall_time_stop = madness::wall_time();
129-
130125
// Print results
131-
const long flop = 2.0 * c("m,n").sum().get();
126+
const auto gflops_per_call = 2.0 * c("m,n").sum().get() / 1.e9;
132127
if (world.rank() == 0) {
133-
std::cout << "Average wall time = "
134-
<< (wall_time_stop - wall_time_start) / double(repeat)
135-
<< "\nAverage GFLOPS = "
136-
<< double(repeat) * double(flop) /
137-
(wall_time_stop - wall_time_start) / 1.0e9
138-
<< "\n";
128+
auto durations = TiledArray::duration_statistics();
129+
std::cout << "Average wall time = " << durations.mean
130+
<< " s\nAverage GFLOPS = "
131+
<< gflops_per_call * durations.mean_reciprocal
132+
<< "\nMedian wall time = " << durations.median
133+
<< " s\nMedian GFLOPS = "
134+
<< gflops_per_call / durations.median << "\n";
139135
}
140136

141137
} catch (TiledArray::Exception& e) {

examples/dgemm/ta_blas.cpp renamed to examples/gemm/ta_blas.cpp

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,14 @@
1717
*
1818
*/
1919

20+
#include <TiledArray/util/time.h>
2021
#include <tiledarray.h>
2122
#include <iostream>
2223

2324
int main(int argc, char** argv) {
2425
// Get command line arguments
2526
if (argc < 2) {
26-
std::cout << "Usage: " << argv[0] << " matrix_size [repetitions]\n";
27+
std::cout << "Usage: " << argv[0] << " matrix_size [repetitions = 5]\n";
2728
return 0;
2829
}
2930
const long matrix_size = atol(argv[1]);
@@ -66,31 +67,25 @@ int main(int argc, char** argv) {
6667
const integer m = matrix_size, n = matrix_size, k = matrix_size;
6768
const integer lda = matrix_size, ldb = matrix_size, ldc = matrix_size;
6869

69-
// Start clock
70-
const double wall_time_start = madness::wall_time();
71-
72-
// Do matrix multiplcation
73-
// Note: If TiledArray has not been configured with blas, this will be an
74-
// eigen call.
70+
// Do matrix multiplication
7571
for (int i = 0; i < repeat; ++i) {
76-
gemm(opa, opb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
72+
TA_RECORD_DURATION(
73+
gemm(opa, opb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc));
7774
}
78-
79-
// Stop clock
80-
const double wall_time_stop = madness::wall_time();
75+
auto durations = TiledArray::duration_statistics();
8176

8277
// Cleanup memory
8378
free(a);
8479
free(b);
8580
free(c);
8681

87-
std::cout << "Average wall time = "
88-
<< (wall_time_stop - wall_time_start) / double(repeat)
89-
<< "\nAverage GFLOPS = "
90-
<< double(repeat) * 2.0 *
91-
double(matrix_size * matrix_size * matrix_size) /
92-
(wall_time_stop - wall_time_start) / 1.0e9
93-
<< "\n";
82+
const auto gflops_per_call =
83+
2.0 * double(matrix_size * matrix_size * matrix_size) / 1.0e9;
84+
std::cout << "Average wall time = " << durations.mean << "\nAverage GFLOPS = "
85+
<< gflops_per_call * durations.mean_reciprocal
86+
<< "\nMedian wall time = " << durations.median
87+
<< "\nMedian GFLOPS = " << gflops_per_call / durations.median
88+
<< std::endl;
9489

9590
return 0;
9691
}

0 commit comments

Comments
 (0)