@@ -18,15 +18,15 @@ package org.apache.spark.deploy.kubernetes
18
18
19
19
import java .io .{File , FileInputStream }
20
20
import java .security .{KeyStore , SecureRandom }
21
- import java .util .concurrent .{TimeoutException , TimeUnit }
21
+ import java .util .concurrent .{CountDownLatch , TimeoutException , TimeUnit }
22
22
import java .util .concurrent .atomic .AtomicBoolean
23
23
import javax .net .ssl .{SSLContext , TrustManagerFactory , X509TrustManager }
24
24
25
25
import com .google .common .base .Charsets
26
26
import com .google .common .io .Files
27
27
import com .google .common .util .concurrent .SettableFuture
28
28
import io .fabric8 .kubernetes .api .model ._
29
- import io .fabric8 .kubernetes .client .{ConfigBuilder , DefaultKubernetesClient , KubernetesClient , KubernetesClientException , Watcher }
29
+ import io .fabric8 .kubernetes .client .{ConfigBuilder => K8SConfigBuilder , DefaultKubernetesClient , KubernetesClient , KubernetesClientException , Watcher }
30
30
import io .fabric8 .kubernetes .client .Watcher .Action
31
31
import org .apache .commons .codec .binary .Base64
32
32
import scala .collection .JavaConverters ._
@@ -67,6 +67,8 @@ private[spark] class Client(
67
67
private val uiPort = sparkConf.getInt(" spark.ui.port" , DEFAULT_UI_PORT )
68
68
private val driverSubmitTimeoutSecs = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TIMEOUT )
69
69
70
+ private val waitForAppCompletion : Boolean = sparkConf.get(WAIT_FOR_APP_COMPLETION )
71
+
70
72
private val secretBase64String = {
71
73
val secretBytes = new Array [Byte ](128 )
72
74
SECURE_RANDOM .nextBytes(secretBytes)
@@ -81,9 +83,11 @@ private[spark] class Client(
81
83
ThreadUtils .newDaemonSingleThreadExecutor(" kubernetes-client-retryable-futures" ))
82
84
83
85
def run (): Unit = {
86
+ logInfo(s " Starting application $kubernetesAppId in Kubernetes... " )
84
87
val (driverSubmitSslOptions, isKeyStoreLocalFile) = parseDriverSubmitSslOptions()
88
+
85
89
val parsedCustomLabels = parseCustomLabels(customLabels)
86
- var k8ConfBuilder = new ConfigBuilder ()
90
+ var k8ConfBuilder = new K8SConfigBuilder ()
87
91
.withApiVersion(" v1" )
88
92
.withMasterUrl(master)
89
93
.withNamespace(namespace)
@@ -116,73 +120,97 @@ private[spark] class Client(
116
120
SPARK_APP_NAME_LABEL -> appName)
117
121
++ parsedCustomLabels).asJava
118
122
val containerPorts = buildContainerPorts()
119
- val submitCompletedFuture = SettableFuture .create[Boolean ]
120
- val submitPending = new AtomicBoolean (false )
121
- val podWatcher = new DriverPodWatcher (
122
- submitCompletedFuture,
123
- submitPending,
124
- kubernetesClient,
125
- driverSubmitSslOptions,
126
- Array (submitServerSecret) ++ sslSecrets,
127
- driverKubernetesSelectors)
123
+
124
+ // start outer watch for status logging of driver pod
125
+ val driverPodCompletedLatch = new CountDownLatch (1 )
126
+ // only enable interval logging if in waitForAppCompletion mode
127
+ val loggingInterval = if (waitForAppCompletion) sparkConf.get(REPORT_INTERVAL ) else 0
128
+ val loggingWatch = new LoggingPodStatusWatcher (driverPodCompletedLatch, kubernetesAppId,
129
+ loggingInterval)
128
130
Utils .tryWithResource(kubernetesClient
129
131
.pods()
130
132
.withLabels(driverKubernetesSelectors)
131
- .watch(podWatcher)) { _ =>
132
- kubernetesClient.pods().createNew()
133
- .withNewMetadata()
134
- .withName(kubernetesAppId)
133
+ .watch(loggingWatch)) { _ =>
134
+
135
+ // launch driver pod with inner watch to upload jars when it's ready
136
+ val submitCompletedFuture = SettableFuture .create[Boolean ]
137
+ val submitPending = new AtomicBoolean (false )
138
+ val podWatcher = new DriverPodWatcher (
139
+ submitCompletedFuture,
140
+ submitPending,
141
+ kubernetesClient,
142
+ driverSubmitSslOptions,
143
+ Array (submitServerSecret) ++ sslSecrets,
144
+ driverKubernetesSelectors)
145
+ Utils .tryWithResource(kubernetesClient
146
+ .pods()
135
147
.withLabels(driverKubernetesSelectors)
136
- .endMetadata()
137
- .withNewSpec()
138
- .withRestartPolicy(" OnFailure" )
139
- .addNewVolume()
140
- .withName(SUBMISSION_APP_SECRET_VOLUME_NAME )
141
- .withNewSecret()
142
- .withSecretName(submitServerSecret.getMetadata.getName)
143
- .endSecret()
144
- .endVolume
145
- .addToVolumes(sslVolumes : _* )
146
- .withServiceAccount(serviceAccount)
147
- .addNewContainer()
148
- .withName(DRIVER_CONTAINER_NAME )
149
- .withImage(driverDockerImage)
150
- .withImagePullPolicy(" IfNotPresent" )
151
- .addNewVolumeMount()
148
+ .watch(podWatcher)) { _ =>
149
+ kubernetesClient.pods().createNew()
150
+ .withNewMetadata()
151
+ .withName(kubernetesAppId)
152
+ .withLabels(driverKubernetesSelectors)
153
+ .endMetadata()
154
+ .withNewSpec()
155
+ .withRestartPolicy(" OnFailure" )
156
+ .addNewVolume()
152
157
.withName(SUBMISSION_APP_SECRET_VOLUME_NAME )
153
- .withMountPath(secretDirectory)
154
- .withReadOnly(true )
155
- .endVolumeMount()
156
- .addToVolumeMounts(sslVolumeMounts : _* )
157
- .addNewEnv()
158
- .withName(ENV_SUBMISSION_SECRET_LOCATION )
159
- .withValue(s " $secretDirectory/ $SUBMISSION_APP_SECRET_NAME" )
160
- .endEnv()
161
- .addNewEnv()
162
- .withName(ENV_SUBMISSION_SERVER_PORT )
163
- .withValue(SUBMISSION_SERVER_PORT .toString)
164
- .endEnv()
165
- .addToEnv(sslEnvs : _* )
166
- .withPorts(containerPorts.asJava)
167
- .endContainer()
168
- .endSpec()
169
- .done()
170
- var submitSucceeded = false
171
- try {
172
- submitCompletedFuture.get(driverSubmitTimeoutSecs, TimeUnit .SECONDS )
173
- submitSucceeded = true
174
- } catch {
175
- case e : TimeoutException =>
176
- val finalErrorMessage : String = buildSubmitFailedErrorMessage(kubernetesClient, e)
177
- logError(finalErrorMessage, e)
178
- throw new SparkException (finalErrorMessage, e)
179
- } finally {
180
- if (! submitSucceeded) {
181
- Utils .tryLogNonFatalError {
182
- kubernetesClient.pods.withName(kubernetesAppId).delete()
158
+ .withNewSecret()
159
+ .withSecretName(submitServerSecret.getMetadata.getName)
160
+ .endSecret()
161
+ .endVolume
162
+ .addToVolumes(sslVolumes : _* )
163
+ .withServiceAccount(serviceAccount)
164
+ .addNewContainer()
165
+ .withName(DRIVER_CONTAINER_NAME )
166
+ .withImage(driverDockerImage)
167
+ .withImagePullPolicy(" IfNotPresent" )
168
+ .addNewVolumeMount()
169
+ .withName(SUBMISSION_APP_SECRET_VOLUME_NAME )
170
+ .withMountPath(secretDirectory)
171
+ .withReadOnly(true )
172
+ .endVolumeMount()
173
+ .addToVolumeMounts(sslVolumeMounts : _* )
174
+ .addNewEnv()
175
+ .withName(ENV_SUBMISSION_SECRET_LOCATION )
176
+ .withValue(s " $secretDirectory/ $SUBMISSION_APP_SECRET_NAME" )
177
+ .endEnv()
178
+ .addNewEnv()
179
+ .withName(ENV_SUBMISSION_SERVER_PORT )
180
+ .withValue(SUBMISSION_SERVER_PORT .toString)
181
+ .endEnv()
182
+ .addToEnv(sslEnvs : _* )
183
+ .withPorts(containerPorts.asJava)
184
+ .endContainer()
185
+ .endSpec()
186
+ .done()
187
+ var submitSucceeded = false
188
+ try {
189
+ submitCompletedFuture.get(driverSubmitTimeoutSecs, TimeUnit .SECONDS )
190
+ submitSucceeded = true
191
+ logInfo(s " Finished launching local resources to application $kubernetesAppId" )
192
+ } catch {
193
+ case e : TimeoutException =>
194
+ val finalErrorMessage : String = buildSubmitFailedErrorMessage(kubernetesClient, e)
195
+ logError(finalErrorMessage, e)
196
+ throw new SparkException (finalErrorMessage, e)
197
+ } finally {
198
+ if (! submitSucceeded) {
199
+ Utils .tryLogNonFatalError {
200
+ kubernetesClient.pods.withName(kubernetesAppId).delete()
201
+ }
183
202
}
184
203
}
185
204
}
205
+
206
+ // wait if configured to do so
207
+ if (waitForAppCompletion) {
208
+ logInfo(s " Waiting for application $kubernetesAppId to finish... " )
209
+ driverPodCompletedLatch.await()
210
+ logInfo(s " Application $kubernetesAppId finished. " )
211
+ } else {
212
+ logInfo(s " Application $kubernetesAppId successfully launched. " )
213
+ }
186
214
}
187
215
} finally {
188
216
Utils .tryLogNonFatalError {
@@ -377,6 +405,8 @@ private[spark] class Client(
377
405
Future {
378
406
sparkConf.set(" spark.driver.host" , pod.getStatus.getPodIP)
379
407
val submitRequest = buildSubmissionRequest()
408
+ logInfo(s " Submitting local resources to driver pod for application " +
409
+ s " $kubernetesAppId ... " )
380
410
driverSubmitter.submitApplication(submitRequest)
381
411
}
382
412
}
0 commit comments