Skip to content

Commit 6201d75

Browse files
authored
fix: process metrics later in executeTemplate. Fixes argoproj#13162 (argoproj#13163)
Signed-off-by: Dillen Padhiar <dillen_padhiar@intuit.com>
1 parent 9c57c37 commit 6201d75

File tree

3 files changed

+71
-23
lines changed

3 files changed

+71
-23
lines changed

test/e2e/metrics_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,23 @@ func (s *MetricsSuite) TestDAGMetrics() {
9494
})
9595
}
9696

97+
func (s *MetricsSuite) TestFailedMetric() {
98+
s.Given().
99+
Workflow(`@testdata/template-status-failed-conditional-metric.yaml`).
100+
When().
101+
SubmitWorkflow().
102+
WaitForWorkflow(fixtures.ToBeFailed).
103+
Then().
104+
ExpectWorkflow(func(t *testing.T, metadata *metav1.ObjectMeta, status *wfv1.WorkflowStatus) {
105+
assert.Equal(t, wfv1.WorkflowFailed, status.Phase)
106+
s.e(s.T()).GET("").
107+
Expect().
108+
Status(200).
109+
Body().
110+
Contains(`argo_workflows_task_failure 1`)
111+
})
112+
}
113+
97114
func TestMetricsSuite(t *testing.T) {
98115
suite.Run(t, new(MetricsSuite))
99116
}
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
apiVersion: argoproj.io/v1alpha1
2+
kind: Workflow
3+
metadata:
4+
generateName: template-status-failed-conditional-metric-
5+
spec:
6+
entrypoint: dag
7+
templates:
8+
- name: dag
9+
metrics:
10+
prometheus:
11+
- counter:
12+
value: "1"
13+
help: Task failed
14+
name: task_failure
15+
when: '{{status}} == Failed'
16+
dag:
17+
tasks:
18+
- name: test
19+
template: echo
20+
arguments:
21+
parameters:
22+
- name: message
23+
value: "test"
24+
25+
- name: echo
26+
inputs:
27+
parameters:
28+
- name: message
29+
container:
30+
image: alpine:3.7
31+
command: ["{{inputs.parameters.message}}"]

workflow/controller/operator.go

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2218,29 +2218,6 @@ func (woc *wfOperationCtx) executeTemplate(ctx context.Context, nodeName string,
22182218
woc.controller.syncManager.Release(woc.wf, node.ID, processedTmpl.Synchronization)
22192219
}
22202220

2221-
if processedTmpl.Metrics != nil {
2222-
// Check if the node was just created, if it was emit realtime metrics.
2223-
// If the node did not previously exist, we can infer that it was created during the current operation, emit real time metrics.
2224-
if _, ok := woc.preExecutionNodePhases[node.ID]; !ok {
2225-
localScope, realTimeScope := woc.prepareMetricScope(node)
2226-
woc.computeMetrics(processedTmpl.Metrics.Prometheus, localScope, realTimeScope, true)
2227-
}
2228-
// Check if the node completed during this execution, if it did emit metrics
2229-
//
2230-
// This check is necessary because sometimes a node will be marked completed during the current execution and will
2231-
// not be considered again. The best example of this is the entrypoint steps/dag template (once completed, the
2232-
// workflow ends and it's not reconsidered). This checks makes sure that its metrics also get emitted.
2233-
//
2234-
// In this check, a completed node may or may not have existed prior to this execution. If it did exist, ensure that it wasn't
2235-
// completed before this execution. If it did not exist prior, then we can infer that it was completed during this execution.
2236-
// The statement "(!ok || !prevNodeStatus.Fulfilled())" checks for this behavior and represents the material conditional
2237-
// "ok -> !prevNodeStatus.Fulfilled()" (https://en.wikipedia.org/wiki/Material_conditional)
2238-
if prevNodeStatus, ok := woc.preExecutionNodePhases[node.ID]; (!ok || !prevNodeStatus.Fulfilled()) && node.Fulfilled() {
2239-
localScope, realTimeScope := woc.prepareMetricScope(node)
2240-
woc.computeMetrics(processedTmpl.Metrics.Prometheus, localScope, realTimeScope, false)
2241-
}
2242-
}
2243-
22442221
retrieveNode, err := woc.wf.GetNodeByName(node.Name)
22452222
if err != nil {
22462223
err := fmt.Errorf("no Node found by the name of %s; wf.Status.Nodes=%+v", node.Name, woc.wf.Status.Nodes)
@@ -2270,6 +2247,29 @@ func (woc *wfOperationCtx) executeTemplate(ctx context.Context, nodeName string,
22702247
node = retryNode
22712248
}
22722249

2250+
if processedTmpl.Metrics != nil {
2251+
// Check if the node was just created, if it was emit realtime metrics.
2252+
// If the node did not previously exist, we can infer that it was created during the current operation, emit real time metrics.
2253+
if _, ok := woc.preExecutionNodePhases[node.ID]; !ok {
2254+
localScope, realTimeScope := woc.prepareMetricScope(node)
2255+
woc.computeMetrics(processedTmpl.Metrics.Prometheus, localScope, realTimeScope, true)
2256+
}
2257+
// Check if the node completed during this execution, if it did emit metrics
2258+
//
2259+
// This check is necessary because sometimes a node will be marked completed during the current execution and will
2260+
// not be considered again. The best example of this is the entrypoint steps/dag template (once completed, the
2261+
// workflow ends and it's not reconsidered). This checks makes sure that its metrics also get emitted.
2262+
//
2263+
// In this check, a completed node may or may not have existed prior to this execution. If it did exist, ensure that it wasn't
2264+
// completed before this execution. If it did not exist prior, then we can infer that it was completed during this execution.
2265+
// The statement "(!ok || !prevNodeStatus.Fulfilled())" checks for this behavior and represents the material conditional
2266+
// "ok -> !prevNodeStatus.Fulfilled()" (https://en.wikipedia.org/wiki/Material_conditional)
2267+
if prevNodeStatus, ok := woc.preExecutionNodePhases[node.ID]; (!ok || !prevNodeStatus.Fulfilled()) && node.Fulfilled() {
2268+
localScope, realTimeScope := woc.prepareMetricScope(node)
2269+
woc.computeMetrics(processedTmpl.Metrics.Prometheus, localScope, realTimeScope, false)
2270+
}
2271+
}
2272+
22732273
return node, nil
22742274
}
22752275

0 commit comments

Comments
 (0)