Skip to content

Commit ce8f756

Browse files
committed
Merge branch 'develop'
2 parents 1b2c0a4 + 78a2699 commit ce8f756

File tree

238 files changed

+9434
-1615
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

238 files changed

+9434
-1615
lines changed

app/bpfile_replay.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
#include <mpi.h>
21
#include <adios2.h>
32
#include <string>
43
#include <cassert>
@@ -22,7 +21,6 @@ int main(int argc, char** argv){
2221
enableVerboseLogging() = true;
2322
}
2423

25-
MPI_Init(&argc, &argv);
2624
if(argc < 3){
2725
std::cout << "Usage: bpfile_replay <input BPfile filename> <output step freq (ms)> <options>\n"
2826
<< "Output will be on SST under the same filename ${input BPfile filename}. The temporary .sst file created will thus be ${input BPfile filename}.sst\n"
@@ -77,7 +75,7 @@ int main(int argc, char** argv){
7775
}
7876

7977

80-
adios2::ADIOS ad = adios2::ADIOS(MPI_COMM_SELF, adios2::DebugON);
78+
adios2::ADIOS ad = adios2::ADIOS(adios2::DebugON);
8179

8280
adios2::IO io_out = ad.DeclareIO("writer");
8381

@@ -217,6 +215,5 @@ int main(int argc, char** argv){
217215
std::cout << "Shutting down" << std::endl;
218216
wr.Close();
219217

220-
MPI_Finalize();
221218
return 0;
222219
};

app/driver.cpp

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#include<chimbuko_config.h>
2+
#ifdef USE_MPI
3+
#include<mpi.h>
4+
#endif
15
#include "chimbuko/chimbuko.hpp"
26
#include "chimbuko/verbose.hpp"
37
#include "chimbuko/util/string.hpp"
@@ -78,8 +82,9 @@ optionalArgsParser & getOptionalArgsParser(){
7882
addOptionalCommandLineArg(p, anom_win_size, "When anomaly data are recorded a window of this size (in units of function execution events) around the anomalous event are also recorded (default 10)");
7983
addOptionalCommandLineArg(p, prov_outputpath, "Output provenance data to this directory. Can be used in place of or in conjunction with the provenance database. An empty string \"\" (default) disables this output");
8084
#ifdef ENABLE_PROVDB
81-
addOptionalCommandLineArg(p, provdb_addr, "Address of the provenance database. If empty (default) the provenance DB will not be used.\nHas format \"ofi+tcp;ofi_rxm://${IP_ADDR}:${PORT}\". Should also accept \"tcp://${IP_ADDR}:${PORT}\"");
85+
addOptionalCommandLineArg(p, provdb_addr_dir, "Directory in which the provenance database outputs its address files. If empty (default) the provenance DB will not be used.");
8286
addOptionalCommandLineArg(p, nprovdb_shards, "Number of provenance database shards. Clients connect to shards round-robin by rank (default 1)");
87+
addOptionalCommandLineArg(p, nprovdb_instances, "Number of provenance database instances. Shards are divided uniformly over instances. (default 1)");
8388
#endif
8489
#ifdef _PERF_METRIC
8590
addOptionalCommandLineArg(p, perf_outputpath, "Output path for AD performance monitoring data. If an empty string (default) no output is written.");
@@ -89,12 +94,22 @@ optionalArgsParser & getOptionalArgsParser(){
8994
addOptionalCommandLineArg(p, trace_connect_timeout, "(For SST mode) Set the timeout in seconds on the connection to the TAU-instrumented binary (default 60s)");
9095
addOptionalCommandLineArg(p, parser_beginstep_timeout, "Set the timeout in seconds on waiting for the next ADIOS2 timestep (default 30s)");
9196

92-
addOptionalCommandLineArg(p, rank, "Set the rank index of the trace data. Used for verification unless override_rank is set. A value < 0 signals the value to be equal to the MPI rank of Chimbuko driver (default)");
97+
addOptionalCommandLineArg(p, rank,
98+
#ifdef USE_MPI
99+
"Set the rank index of the trace data. Used for verification unless override_rank is set. A value < 0 signals the value to be equal to the MPI rank of Chimbuko driver (default)"
100+
#else
101+
"Set the rank index of the trace data (default 0)"
102+
#endif
103+
);
104+
105+
93106
p.addOptionalArg(new overrideRankArg); //-override_rank <idx>
94107
p.addOptionalArg(new setLoggingHeadRankArg); //-logging_head_rank <rank>
95108

96109
addOptionalCommandLineArg(p, outlier_statistic, "Set the statistic used for outlier detection. Options: exclusive_runtime (default), inclusive_runtime");
97110
addOptionalCommandLineArg(p, step_report_freq, "Set the steps between Chimbuko reporting IO step progress. Use 0 to deactivate this logging entirely (default 1)");
111+
addOptionalCommandLineArg(p, prov_record_startstep, "If != -1, the IO step on which to start recording provenance information for anomalies (for testing, default -1)");
112+
addOptionalCommandLineArg(p, prov_record_stopstep, "If != -1, the IO step on which to stop recording provenance information for anomalies (for testing, default -1)");
98113

99114
initialized = true;
100115
}
@@ -110,7 +125,11 @@ void printHelp(){
110125
getOptionalArgsParser().help(std::cout);
111126
}
112127

113-
ChimbukoParams getParamsFromCommandLine(int argc, char** argv, const int mpi_world_rank){
128+
ChimbukoParams getParamsFromCommandLine(int argc, char** argv
129+
#ifdef USE_MPI
130+
, const int mpi_world_rank
131+
#endif
132+
){
114133
if(argc < 4){
115134
std::cerr << "Expected at least 4 arguments: <exe> <BPFile/SST> <.bp location> <bp file prefix>" << std::endl;
116135
exit(-1);
@@ -140,7 +159,8 @@ ChimbukoParams getParamsFromCommandLine(int argc, char** argv, const int mpi_wor
140159
params.prov_outputpath = "";
141160
#ifdef ENABLE_PROVDB
142161
params.nprovdb_shards = 1;
143-
params.provdb_addr = ""; //don't use provDB by default
162+
params.nprovdb_instances = 1;
163+
params.provdb_addr_dir = ""; //don't use provDB by default
144164
#endif
145165
params.err_outputpath = ""; //use std::cerr for errors by default
146166
params.trace_connect_timeout = 60;
@@ -153,8 +173,13 @@ ChimbukoParams getParamsFromCommandLine(int argc, char** argv, const int mpi_wor
153173

154174
//By default assign the rank index of the trace data as the MPI rank of the AD process
155175
//Allow override by user
156-
if(params.rank < 0)
176+
if(params.rank < 0){
177+
#ifdef USE_MPI
157178
params.rank = mpi_world_rank;
179+
#else
180+
params.rank = 0; //default to 0 for non-MPI applications
181+
#endif
182+
}
158183

159184
params.verbose = params.rank == 0; //head node produces verbose output
160185

@@ -169,7 +194,7 @@ ChimbukoParams getParamsFromCommandLine(int argc, char** argv, const int mpi_wor
169194
//If neither the provenance database or the provenance output path are set, default to outputting to pwd
170195
if(params.prov_outputpath.size() == 0
171196
#ifdef ENABLE_PROVDB
172-
&& params.provdb_addr.size() == 0
197+
&& params.provdb_addr_dir.size() == 0
173198
#endif
174199
){
175200
params.prov_outputpath = ".";
@@ -187,26 +212,35 @@ int main(int argc, char ** argv){
187212
return 0;
188213
}
189214

215+
#ifdef USE_MPI
190216
assert( MPI_Init(&argc, &argv) == MPI_SUCCESS );
191217

192218
int mpi_world_rank, mpi_world_size;
193219
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_world_rank);
194220
MPI_Comm_size(MPI_COMM_WORLD, &mpi_world_size);
221+
#endif
195222

196223
//Parse environment variables
197224
if(const char* env_p = std::getenv("CHIMBUKO_VERBOSE"))
198225
enableVerboseLogging() = true;
199226

200227
//Parse Chimbuko parameters
201-
ChimbukoParams params = getParamsFromCommandLine(argc, argv, mpi_world_rank);
228+
ChimbukoParams params = getParamsFromCommandLine(argc, argv
229+
#ifdef USE_MPI
230+
, mpi_world_rank
231+
#endif
232+
);
233+
202234
if(params.rank == progressHeadRank()) params.print();
203235

204-
if(enableVerboseLogging())
236+
if(enableVerboseLogging()){
205237
headProgressStream(params.rank) << "Driver rank " << params.rank << ": Enabling verbose debug output" << std::endl;
206-
238+
}
207239

208240
verboseStream << "Driver rank " << params.rank << ": waiting at pre-run barrier" << std::endl;
241+
#ifdef USE_MPI
209242
MPI_Barrier(MPI_COMM_WORLD);
243+
#endif
210244

211245
bool error = false;
212246

@@ -248,6 +282,7 @@ int main(int argc, char ** argv){
248282
// -----------------------------------------------------------------------
249283
// Average analysis time and total number of outliers
250284
// -----------------------------------------------------------------------
285+
#ifdef USE_MPI
251286
verboseStream << "Driver rank " << params.rank << ": waiting at post-run barrier" << std::endl;
252287
MPI_Barrier(MPI_COMM_WORLD);
253288
processing_time = duration_cast<milliseconds>(t2 - t1).count();
@@ -274,6 +309,19 @@ int main(int argc, char ** argv){
274309
total_n_comm_events = global_measures[1];
275310
total_n_counter_events = global_measures[2];
276311
}
312+
#else
313+
//Without MPI only report local parameters. In principle we could aggregate using the Pserver if we really want the global information
314+
total_processing_time = processing_time;
315+
total_n_outliers = n_outliers;
316+
total_frames = frames;
317+
318+
total_n_func_events = n_func_events;
319+
total_n_comm_events = n_comm_events;
320+
total_n_counter_events = n_counter_events;
321+
322+
int mpi_world_size = 1;
323+
#endif
324+
277325

278326
headProgressStream(params.rank) << "Driver rank " << params.rank << ": Final report\n"
279327
<< "Avg. num. frames over MPI ranks : " << (double)total_frames/(double)mpi_world_size << "\n"
@@ -301,7 +349,9 @@ int main(int argc, char ** argv){
301349
error = true;
302350
}
303351

352+
#ifdef USE_MPI
304353
MPI_Finalize();
354+
#endif
305355
headProgressStream(params.rank) << "Driver is exiting" << std::endl;
306356
return error ? 1 : 0;
307357
}

app/hpserver.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
//The hierarchical parameter server main program. This program collects statistics from the node-instances of the anomaly detector
2+
#include <chimbuko_config.h>
23
#ifndef _USE_ZMQNET
34
#include<iostream>
45
#warning "Hierarchical parameter server requires ZMQNet"
@@ -12,7 +13,9 @@ int main(void){
1213

1314
#include <chimbuko/pserver.hpp>
1415
#include <chimbuko/net/zmqme_net.hpp>
16+
#ifdef USE_MPI
1517
#include <mpi.h>
18+
#endif
1619
#include <chimbuko/param/sstd_param.hpp>
1720
#include <chimbuko/util/commandLineParser.hpp>
1821
#include <chimbuko/verbose.hpp>
@@ -103,7 +106,10 @@ int main (int argc, char ** argv){
103106
}
104107

105108
ZMQMENet net;
109+
#ifdef USE_MPI
106110
MPI_Init(&argc, &argv);
111+
#endif
112+
107113
PSstatSender stat_sender(args.stat_send_freq);
108114

109115
try {
@@ -169,7 +175,7 @@ int main (int argc, char ** argv){
169175

170176
std::cout << "HPserver finalizing the network" << std::endl;
171177
net.finalize();
172-
#ifdef _USE_ZMQNET
178+
#if defined(_USE_ZMQNET) && defined(USE_MPI)
173179
MPI_Finalize();
174180
#endif
175181

app/pclient.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
//A test application that mocks part of the anomaly detection modules, acting as a client for the parameter server and sending it anomaly information
2+
#include <chimbuko_config.h>
23

34
#include "chimbuko/param/sstd_param.hpp"
45
#include "chimbuko/message.hpp"
@@ -7,6 +8,9 @@
78
#include "chimbuko/net/mpi_net.hpp"
89
#else
910
#include "chimbuko/net/zmq_net.hpp"
11+
#endif
12+
13+
#ifdef USE_MPI
1014
#include <mpi.h>
1115
#endif
1216

@@ -18,11 +22,13 @@ using namespace chimbuko;
1822

1923
int main (int argc, char** argv)
2024
{
21-
int size, rank;
25+
int size=1, rank=0;
2226

27+
#ifdef USE_MPI
2328
MPI_Init(&argc, &argv);
2429
MPI_Comm_size(MPI_COMM_WORLD, &size);
2530
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
31+
#endif
2632

2733
#ifdef _USE_MPINET
2834
int count;
@@ -114,7 +120,9 @@ s */
114120
//g_param.assign(msg.data_buffer());
115121
}
116122

123+
#ifdef USE_MPI
117124
MPI_Barrier(MPI_COMM_WORLD);
125+
#endif
118126

119127
#ifdef _USE_MPINET
120128
if (rank == 0) {
@@ -159,6 +167,8 @@ s */
159167
zmq_ctx_term(context);
160168
#endif
161169

170+
#ifdef USE_MPI
162171
MPI_Finalize();
172+
#endif
163173
return EXIT_SUCCESS;
164174
}

app/pclient_stats.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
//A test application that mocks part of the anomaly detection modules, acting as a client for the parameter server and sending it function statistics information
2+
#include <chimbuko_config.h>
23

34
#include "chimbuko/param/sstd_param.hpp"
45
#include "chimbuko/message.hpp"
@@ -9,6 +10,9 @@
910
#include "chimbuko/net/mpi_net.hpp"
1011
#else
1112
#include "chimbuko/net/zmq_net.hpp"
13+
#endif
14+
15+
#ifdef USE_MPI
1216
#include <mpi.h>
1317
#endif
1418

@@ -21,8 +25,9 @@ using namespace chimbuko;
2125
int main (int argc, char** argv)
2226
{
2327
const int N_MPI_PROCESSORS = 10;
24-
int size, rank;
28+
int size=1, rank=0;
2529

30+
#ifdef USE_MPI
2631
MPI_Init(&argc, &argv);
2732
MPI_Comm_size(MPI_COMM_WORLD, &size);
2833
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
@@ -33,6 +38,7 @@ int main (int argc, char** argv)
3338
MPI_Finalize();
3439
return EXIT_SUCCESS;
3540
}
41+
#endif
3642

3743
#ifdef _USE_MPINET
3844
throw std::runtime_error("Not implemented yet.");
@@ -103,8 +109,10 @@ int main (int argc, char** argv)
103109
std::this_thread::sleep_for(std::chrono::milliseconds(10));
104110
}
105111

112+
#ifdef USE_MPI
106113
MPI_Barrier(MPI_COMM_WORLD);
107-
114+
#endif
115+
108116
// terminate parameter server
109117
#ifdef _USE_MPINET
110118
throw std::runtime_error("Not implemented yet.");
@@ -127,6 +135,8 @@ int main (int argc, char** argv)
127135
zmq_ctx_term(context);
128136
#endif
129137

138+
#ifdef USE_MPI
130139
MPI_Finalize();
140+
#endif
131141
return EXIT_SUCCESS;
132142
}

0 commit comments

Comments
 (0)