-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
17 changed files
with
277 additions
and
661 deletions.
There are no files selected for viewing
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
approvers: | ||
- hougangliu | ||
reviewers: | ||
- hougangliu |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
name: Kubeflow - Launch TFJob | ||
description: Kubeflow TFJob launcher | ||
inputs: | ||
- {name: Name, type: String, description: 'TFJob name.'} | ||
- {name: Namespace, type: String, default: kubeflow, description: 'TFJob namespace.'} | ||
- {name: Version, type: String, default: v1, description: 'TFJob version.'} | ||
- {name: ActiveDeadlineSeconds, type: Integer, default: -1, description: 'Specifies the duration (in seconds) since startTime during which the job can remain active before it is terminated. Must be a positive integer. This setting applies only to pods where restartPolicy is OnFailure or Always.'} | ||
- {name: BackoffLimit, type: Integer, default: -1, description: 'Number of retries before marking this job as failed.'} | ||
- {name: ttl Seconds After Finished, type: Integer, default: -1, description: 'Defines the TTL for cleaning up finished TFJobs.'} | ||
- {name: CleanPodPolicy, type: String, default: Running, description: 'Defines the policy for cleaning up pods after the TFJob completes.'} | ||
- {name: PS Spec, type: JSON, default: '{}', description: 'TFJob ps replicaSpecs.'} | ||
- {name: Worker Spec, type: JSON, default: '{}', description: 'TFJob worker replicaSpecs.'} | ||
- {name: Chief Spec, type: JSON, default: '{}', description: 'TFJob chief replicaSpecs.'} | ||
- {name: Evaluator Spec, type: JSON, default: '{}', description: 'TFJob evaluator replicaSpecs.'} | ||
- {name: Tfjob Timeout Minutes, type: Integer, default: 1440, description: 'Time in minutes to wait for the TFJob to complete.'} | ||
- {name: Delete Finished Tfjob, type: Bool, default: 'True' , description: 'Whether to delete the tfjob after it is finished.'} | ||
implementation: | ||
container: | ||
image: liuhougangxa/kubeflow-tfjob-launcher:latest | ||
command: [python, /ml/launch_tfjob.py] | ||
args: [ | ||
--name, {inputValue: Name}, | ||
--namespace, {inputValue: Namespace}, | ||
--version, {inputValue: Version}, | ||
--activeDeadlineSeconds, {inputValue: ActiveDeadlineSeconds}, | ||
--backoffLimit, {inputValue: BackoffLimit}, | ||
--cleanPodPolicy, {inputValue: CleanPodPolicy}, | ||
--ttlSecondsAfterFinished, {inputValue: ttl Seconds After Finished}, | ||
--psSpec, {inputValue: PS Spec}, | ||
--workerSpec, {inputValue: Worker Spec}, | ||
--chiefSpec, {inputValue: Chief Spec}, | ||
--evaluatorSpec, {inputValue: Evaluator Spec}, | ||
--tfjobTimeoutMinutes, {inputValue: Tfjob Timeout Minutes}, | ||
--deleteAfterDone, {inputValue: Delete Finished Tfjob}, | ||
] |
34 changes: 0 additions & 34 deletions
34
components/kubeflow/launcher/kubeflow_tfjob_launcher_op.py
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
import json | ||
from kfp import components | ||
import kfp.dsl as dsl | ||
|
||
@dsl.pipeline( | ||
name="Launch kubeflow tfjob", | ||
description="An example to launch tfjob." | ||
) | ||
def mnist_train( | ||
name="mnist", | ||
namespace="kubeflow", | ||
workerNum=3, | ||
ttlSecondsAfterFinished=-1, | ||
tfjobTimeoutMinutes=60, | ||
deleteAfterDone=False): | ||
tfjob_launcher_op = components.load_component_from_file("./component.yaml") | ||
# tfjob_launcher_op = components.load_component_from_url('https://raw.githubusercontent.com/kubeflow/pipelines/master/components/kubeflow/launcher/component.yaml') | ||
|
||
chief = { | ||
"replicas": 1, | ||
"restartPolicy": "OnFailure", | ||
"template": { | ||
"spec": { | ||
"containers": [ | ||
{ | ||
"command": [ | ||
"python", | ||
"/opt/model.py" | ||
], | ||
"args": [ | ||
"--tf-train-steps=6000" | ||
], | ||
"image": "liuhougangxa/tf-estimator-mnist", | ||
"name": "tensorflow", | ||
} | ||
] | ||
} | ||
} | ||
} | ||
worker = {} | ||
if workerNum > 0: | ||
worker = { | ||
"replicas": workerNum, | ||
"restartPolicy": "OnFailure", | ||
"template": { | ||
"spec": { | ||
"containers": [ | ||
{ | ||
"command": [ | ||
"python", | ||
"/opt/model.py" | ||
], | ||
"args": [ | ||
"--tf-train-steps=6000" | ||
], | ||
"image": "liuhougangxa/tf-estimator-mnist", | ||
"name": "tensorflow", | ||
} | ||
] | ||
} | ||
} | ||
} | ||
tfjob_launcher_op( | ||
name=name, | ||
namespace=namespace, | ||
ttl_seconds_after_finished=ttlSecondsAfterFinished, | ||
worker_spec=worker, | ||
chief_spec=chief, | ||
tfjob_timeout_minutes=tfjobTimeoutMinutes, | ||
delete_finished_tfjob=deleteAfterDone | ||
) | ||
|
||
if __name__ == "__main__": | ||
import kfp.compiler as compiler | ||
compiler.Compiler().compile(mnist_train, __file__ + ".tar.gz") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.