Skip to content

Commit

Permalink
optimize(sdk): Rewrite data passing script into more optimized code (k…
Browse files Browse the repository at this point in the history
  • Loading branch information
Tomcli authored Aug 16, 2022
1 parent 71f1cc1 commit 9df5873
Show file tree
Hide file tree
Showing 8 changed files with 190 additions and 192 deletions.
56 changes: 32 additions & 24 deletions sdk/python/kfp_tekton/compiler/_data_passing_rewriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,7 @@ def append_taskrun_params(task_name_append: str, task_path_name: str):
copy_results_artifact_step = _get_base_step('copy-results-artifacts')
copy_results_artifact_step['onError'] = 'continue' # supported by v0.27+ of tekton.
script = "set -exo pipefail\nTOTAL_SIZE=0\n"
injected_script = False
for result in task_spec['results']:
if task['name'] in artifact_items:
artifact_i = artifact_items[task['name']]
Expand All @@ -666,30 +667,37 @@ def append_taskrun_params(task_name_append: str, task_path_name: str):
src = artifact
dst = '$(results.%s.path)' % sanitize_k8s_name(result['name'], allow_capital=True)
if artifact_name == result['name'] and src != dst:
add_copy_results_artifacts_step = True
total_size_command = 'ARTIFACT_SIZE=`wc -c %s${SUFFIX} | awk \'{print $1}\'`\n' % src + \
'TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE)\n'
copy_command = ' cp ' + src + ' ' + dst + '\n'
if env.get('OUTPUT_PREVIEW', 'false').lower() == 'true':
preview_size = env.get('OUTPUT_PREVIEW_SIZE', '100')
total_size_command = 'TOTAL_SIZE=$( expr $TOTAL_SIZE + %s)\n' % preview_size
copy_command = ' dd if=' + src + ' of=' + \
dst + ' bs=' + preview_size + ' count=1\n'
script += (
'if [ -d ' + src + ' ]; then\n' +
' tar -czvf ' + src + '.tar.gz ' + src + '\n' +
' SUFFIX=".tar.gz"\n' +
'fi\n' +
total_size_command +
'touch ' + dst + '\n' + # create an empty file by default.
'if [[ $TOTAL_SIZE -lt 3072 ]]; then\n' +
' if [ -d ' + src + ' ]; then\n' +
' tar -tzf ' + src + '.tar.gz > ' + dst + '\n' +
' elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" %s; then\n' % src +
copy_command +
' fi\n' +
'fi\n'
)
if not injected_script:
add_copy_results_artifacts_step = True
src_arg = '"$1"'
dst_arg = '"$2"'
total_size_command = 'ARTIFACT_SIZE=`wc -c %s${SUFFIX} | awk \'{print $1}\'`\n' % src_arg + \
'TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE)\n'
copy_command = ' cp ' + src_arg + ' ' + dst_arg + '\n'
if env.get('OUTPUT_PREVIEW', 'false').lower() == 'true':
preview_size = env.get('OUTPUT_PREVIEW_SIZE', '100')
total_size_command = 'TOTAL_SIZE=$( expr $TOTAL_SIZE + %s)\n' % preview_size
copy_command = ' dd if=' + src_arg + ' of=' + \
dst_arg + ' bs=' + preview_size + ' count=1\n'
script += (
'copy_artifact() {\n'
'if [ -d ' + src_arg + ' ]; then\n' +
' tar -czvf ' + src_arg + '.tar.gz ' + src_arg + '\n' +
' SUFFIX=".tar.gz"\n' +
'fi\n' +
total_size_command +
'touch ' + dst_arg + '\n' + # create an empty file by default.
'if [[ $TOTAL_SIZE -lt 3072 ]]; then\n' +
' if [ -d ' + src_arg + ' ]; then\n' +
' tar -tzf ' + src_arg + '.tar.gz > ' + dst_arg + '\n' +
' elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" %s; then\n' % src_arg +
copy_command +
' fi\n' +
'fi\n' +
'}\n'
)
injected_script = True
script += 'copy_artifact %s %s\n' % (src, dst)
copy_results_artifact_step['command'].append(script)
_append_original_pr_name_env_to_step(copy_results_artifact_step)
if add_copy_results_artifacts_step:
Expand Down
19 changes: 11 additions & 8 deletions sdk/python/tests/compiler/testdata/artifact_outputs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,20 +79,23 @@ spec:
- |
set -exo pipefail
TOTAL_SIZE=0
if [ -d $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data ]; then
tar -czvf $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data.tar.gz $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data
copy_artifact() {
if [ -d "$1" ]; then
tar -czvf "$1".tar.gz "$1"
SUFFIX=".tar.gz"
fi
ARTIFACT_SIZE=`wc -c $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data${SUFFIX} | awk '{print $1}'`
ARTIFACT_SIZE=`wc -c "$1"${SUFFIX} | awk '{print $1}'`
TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE)
touch $(results.data.path)
touch "$2"
if [[ $TOTAL_SIZE -lt 3072 ]]; then
if [ -d $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data ]; then
tar -tzf $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data.tar.gz > $(results.data.path)
elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data; then
cp $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data $(results.data.path)
if [ -d "$1" ]; then
tar -tzf "$1".tar.gz > "$2"
elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" "$1"; then
cp "$1" "$2"
fi
fi
}
copy_artifact $(workspaces.gcs-download.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/data $(results.data.path)
onError: continue
env:
- name: ORIG_PR_NAME
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,23 @@ spec:
- |
set -exo pipefail
TOTAL_SIZE=0
if [ -d $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 ]; then
tar -czvf $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2.tar.gz $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2
copy_artifact() {
if [ -d "$1" ]; then
tar -czvf "$1".tar.gz "$1"
SUFFIX=".tar.gz"
fi
ARTIFACT_SIZE=`wc -c $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2${SUFFIX} | awk '{print $1}'`
ARTIFACT_SIZE=`wc -c "$1"${SUFFIX} | awk '{print $1}'`
TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE)
touch $(results.Output-2.path)
touch "$2"
if [[ $TOTAL_SIZE -lt 3072 ]]; then
if [ -d $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 ]; then
tar -tzf $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2.tar.gz > $(results.Output-2.path)
elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2; then
cp $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 $(results.Output-2.path)
if [ -d "$1" ]; then
tar -tzf "$1".tar.gz > "$2"
elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" "$1"; then
cp "$1" "$2"
fi
fi
}
copy_artifact $(workspaces.producer.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 $(results.Output-2.path)
onError: continue
env:
- name: ORIG_PR_NAME
Expand Down Expand Up @@ -166,20 +169,23 @@ spec:
- |
set -exo pipefail
TOTAL_SIZE=0
if [ -d $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 ]; then
tar -czvf $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2.tar.gz $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2
copy_artifact() {
if [ -d "$1" ]; then
tar -czvf "$1".tar.gz "$1"
SUFFIX=".tar.gz"
fi
ARTIFACT_SIZE=`wc -c $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2${SUFFIX} | awk '{print $1}'`
ARTIFACT_SIZE=`wc -c "$1"${SUFFIX} | awk '{print $1}'`
TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE)
touch $(results.Output-2.path)
touch "$2"
if [[ $TOTAL_SIZE -lt 3072 ]]; then
if [ -d $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 ]; then
tar -tzf $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2.tar.gz > $(results.Output-2.path)
elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2; then
cp $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 $(results.Output-2.path)
if [ -d "$1" ]; then
tar -tzf "$1".tar.gz > "$2"
elif ! awk "/[^[:print:]]/{f=1} END{exit !f}" "$1"; then
cp "$1" "$2"
fi
fi
}
copy_artifact $(workspaces.processor.path)/artifacts/$ORIG_PR_NAME/$(context.taskRun.name)/Output-2 $(results.Output-2.path)
onError: continue
env:
- name: ORIG_PR_NAME
Expand Down
Loading

0 comments on commit 9df5873

Please sign in to comment.