Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion plugins/nf-k8s/src/main/nextflow/k8s/K8sTaskHandler.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,15 @@ class K8sTaskHandler extends TaskHandler implements FusionAwareTask {
}
else {
// finalize the task
task.exitStatus = readExitFile()
// read the exit code from the K8s container terminated state, if 0 (successful) or missing
// take the exit code from the `.exitcode` file created by nextflow
// the rationale is that in case of error (e.g. OOMKilled, pod eviction), the exit code from
// the K8s API is more reliable because the container may terminate before the exit file is written
// See https://github.com/nextflow-io/nextflow/issues/6436
// https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.30/#containerstateterminated-v1-core
log.trace("[k8s] Container Terminated state ${state.terminated}")
final k8sExitCode = (state.terminated as Map)?.exitCode as Integer
task.exitStatus = k8sExitCode ?: readExitFile()
task.stdout = outputFile
task.stderr = errorFile
}
Expand Down
26 changes: 26 additions & 0 deletions plugins/nf-k8s/src/test/nextflow/k8s/K8sTaskHandlerTest.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,32 @@ class K8sTaskHandlerTest extends Specification {

}

def 'should use K8s exit code when available' () {
given:
def ERR_FILE = Paths.get('err.file')
def OUT_FILE = Paths.get('out.file')
def POD_NAME = 'pod-xyz'
def client = Mock(K8sClient)
def termState = [ reason: "Error",
startedAt: "2018-01-13T10:09:36Z",
finishedAt: "2018-01-13T10:19:36Z",
exitCode: 137 ]
def task = new TaskRun()
def handler = Spy(new K8sTaskHandler(task: task, client:client, podName: POD_NAME, outputFile: OUT_FILE, errorFile: ERR_FILE))

when:
def result = handler.checkIfCompleted()
then:
1 * handler.getState() >> [terminated: termState]
1 * handler.updateTimestamps(termState)
0 * handler.readExitFile()
1 * handler.deletePodIfSuccessful(task) >> null
1 * handler.savePodLogOnError(task) >> null
handler.task.exitStatus == 137
handler.status == TaskStatus.COMPLETED
result == true
}

def 'should kill a pod' () {
given:
def POD_NAME = 'pod-xyz'
Expand Down