Skip to content

Commit b2f24f5

Browse files
authored
[QC-953] non-critical tasks (#1707)
* proof of concept * format * making the check expendable * also make aggregators non-critical * make mergers critical if task is critical * format * indicate the default value for "critical", remove the unused "blocking parameter" * doc * Post-processing * rebase with master * proxies should be expendable if the task is expendable * proxies should be expendable if the task is expendable * Add the resilient label to the mergers * make checkers resilient * make aggregators resilient * format * bad merge * single line labels * update doc * update doc * Update Advanced.md
1 parent 8548e53 commit b2f24f5

17 files changed

+100
-18
lines changed

Framework/basic.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
"tasks": {
4040
"QcTask": {
4141
"active": "true",
42+
"critical": "false", "": "if false the task is allowed to die without stopping the workflow, default: true",
4243
"className": "o2::quality_control_modules::skeleton::SkeletonTask",
4344
"moduleName": "QcSkeleton",
4445
"detectorName": "TST",
@@ -105,8 +106,7 @@
105106
"fraction": "0.1",
106107
"seed": "1234"
107108
}
108-
],
109-
"blocking": "false"
109+
]
110110
}
111111
]
112112
}

Framework/include/QualityControl/InfrastructureGenerator.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -219,9 +219,13 @@ class InfrastructureGenerator
219219
static void generateMergers(framework::WorkflowSpec& workflow, const std::string& taskName,
220220
size_t numberOfLocalMachines,
221221
std::vector<std::pair<size_t, size_t>> cycleDurationSeconds,
222-
const std::string& mergingMode, size_t resetAfterCycles,
223-
std::string monitoringUrl, const std::string& detectorName,
224-
std::vector<size_t> mergersPerLayer, bool enableMovingWindows);
222+
const std::string& mergingMode,
223+
size_t resetAfterCycles,
224+
std::string monitoringUrl,
225+
const std::string& detectorName,
226+
std::vector<size_t> mergersPerLayer,
227+
bool enableMovingWindows,
228+
bool critical);
225229
static void generateCheckRunners(framework::WorkflowSpec& workflow, const InfrastructureSpec& infrastructureSpec);
226230
static void generateAggregator(framework::WorkflowSpec& workflow, const InfrastructureSpec& infrastructureSpec);
227231
static void generatePostProcessing(framework::WorkflowSpec& workflow, const InfrastructureSpec& infrastructureSpec);

Framework/include/QualityControl/PostProcessingConfig.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ struct PostProcessingConfig {
4747
std::string consulUrl;
4848
core::Activity activity;
4949
bool matchAnyRunNumber = false;
50+
bool critical;
5051
core::CustomParameters customParameters;
5152
};
5253

Framework/include/QualityControl/PostProcessingTaskSpec.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ struct PostProcessingTaskSpec {
4141
std::string id = "Invalid";
4242
std::string taskName = "Invalid";
4343
bool active = true;
44+
bool critical = true;
4445
std::string detectorName = "Invalid";
4546
boost::property_tree::ptree tree = {};
4647
core::CustomParameters customParameters;

Framework/include/QualityControl/TaskRunnerConfig.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ struct TaskRunnerConfig {
4747
std::string className;
4848
std::vector<std::pair<size_t, size_t>> cycleDurations = {};
4949
int maxNumberCycles;
50+
bool critical;
5051
std::string consulUrl{};
5152
std::string conditionUrl{};
5253
std::string monitoringUrl{};

Framework/include/QualityControl/TaskSpec.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ struct TaskSpec {
5959
DataSourceSpec dataSource;
6060
// advanced
6161
bool active = true;
62+
bool critical = true;
6263
int maxNumberCycles = -1;
6364
size_t resetAfterCycles = 0;
6465
std::string saveObjectsToFile;

Framework/postprocessing.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
"postprocessing": {
4343
"ExamplePostprocessing": {
4444
"active": "true",
45+
"critical": "false", "": "if false the task is allowed to die without stopping the workflow, default: true",
4546
"className": "o2::quality_control_modules::skeleton::SkeletonPostProcessing",
4647
"moduleName": "QcSkeleton",
4748
"detectorName": "TST",

Framework/src/AggregatorRunnerFactory.cxx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ DataProcessorSpec AggregatorRunnerFactory::create(const core::CommonSpec& common
4949
};
5050
newAggregatorRunner.labels.emplace_back(o2::framework::ecs::qcReconfigurable);
5151
newAggregatorRunner.labels.emplace_back(AggregatorRunner::getLabel());
52+
framework::DataProcessorLabel resilientLabel = { "resilient" };
53+
newAggregatorRunner.labels.emplace_back(resilientLabel);
5254
newAggregatorRunner.algorithm = adaptFromTask<AggregatorRunner>(std::move(aggregatorRunner));
5355
return newAggregatorRunner;
5456
}

Framework/src/CheckRunner.cxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,7 @@ QualityObjectsType CheckRunner::check()
357357
QualityObjectsType allQOs;
358358
for (auto& [checkName, check] : mChecks) {
359359
if (updatePolicyManager.isReady(check.getName())) {
360+
ILOG(Debug, Support) << "Monitor Objects for the check '" << checkName << "' are ready --> check()" << ENDM;
360361
auto newQOs = check.check(mMonitorObjects);
361362
mTotalNumberCheckExecuted += newQOs.size();
362363

@@ -366,7 +367,7 @@ QualityObjectsType CheckRunner::check()
366367
// Was checked, update latest revision
367368
updatePolicyManager.updateActorRevision(checkName);
368369
} else {
369-
ILOG(Info, Support) << "Monitor Objects for the check '" << checkName << "' are not ready, ignoring" << ENDM;
370+
ILOG(Debug, Support) << "Monitor Objects for the check '" << checkName << "' are not ready, ignoring" << ENDM;
370371
}
371372
}
372373
return allQOs;

Framework/src/CheckRunnerFactory.cxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ DataProcessorSpec CheckRunnerFactory::create(CheckRunnerConfig checkRunnerConfig
4545
options };
4646
newCheckRunner.labels.emplace_back(o2::framework::ecs::qcReconfigurable);
4747
newCheckRunner.labels.emplace_back(CheckRunner::getCheckRunnerLabel());
48+
newCheckRunner.labels.emplace_back(framework::DataProcessorLabel{ "resilient" });
4849
newCheckRunner.algorithm = adaptFromTask<CheckRunner>(std::move(qcCheckRunner));
4950
return newCheckRunner;
5051
}
@@ -61,7 +62,7 @@ DataProcessorSpec CheckRunnerFactory::createSinkDevice(const CheckRunnerConfig&
6162
checkRunnerConfig.options,
6263
{},
6364
{ o2::framework::ecs::qcReconfigurable } };
64-
65+
newCheckRunner.labels.emplace_back(framework::DataProcessorLabel{ "resilient" });
6566
return newCheckRunner;
6667
}
6768

0 commit comments

Comments
 (0)