Skip to content

Commit

Permalink
KFP 1.0.4 Rebase (kubeflow#337)
Browse files Browse the repository at this point in the history
* KFP 1.0.3 Rebase

* comment out line 130-142 in metrics_reporter.go (not supported in Tekton)

* fix merge conflicts in metrics_reporter.go#readNodeMetricsJSONOrEmpty()

* remove OWNERS file

* include changes from KFP 1.0.4
  • Loading branch information
kfp-tekton-bot authored Oct 26, 2020
1 parent 058c6c9 commit ae65557
Show file tree
Hide file tree
Showing 323 changed files with 19,817 additions and 19,413 deletions.
8 changes: 4 additions & 4 deletions .cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,11 @@ steps:
- id: 'buildCpuTrainer'
name: 'gcr.io/cloud-builders/docker'
entrypoint: '/bin/bash'
args: ['-c', 'cd /workspace/components/kubeflow/dnntrainer && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA -l ml-pipeline-kubeflow-tf-trainer -b 1.6.0']
args: ['-c', 'cd /workspace/components/kubeflow/dnntrainer && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA -l ml-pipeline-kubeflow-tf-trainer -b 2.3.0']
waitFor: ["-"]
- name: 'gcr.io/cloud-builders/docker'
entrypoint: '/bin/bash'
args: ['-c', 'cd /workspace/components/kubeflow/dnntrainer && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA -l ml-pipeline-kubeflow-tf-trainer-gpu -b 1.6.0-gpu']
args: ['-c', 'cd /workspace/components/kubeflow/dnntrainer && ./build_image.sh -p $PROJECT_ID -t $COMMIT_SHA -l ml-pipeline-kubeflow-tf-trainer-gpu -b 2.3.0-gpu']
id: 'buildGpuTrainer'
waitFor: ["-"]

Expand Down Expand Up @@ -206,7 +206,7 @@ steps:

# Pull third_party images
- name: 'gcr.io/cloud-builders/docker'
args: ['pull', 'gcr.io/tfx-oss-public/ml_metadata_store_server:0.21.1']
args: ['pull', 'gcr.io/tfx-oss-public/ml_metadata_store_server:0.22.1']
id: 'pullMetadataServer'
- name: 'gcr.io/cloud-builders/docker'
args: ['pull', 'gcr.io/ml-pipeline/minio:RELEASE.2019-08-14T20-37-41Z-license-compliance']
Expand Down Expand Up @@ -238,7 +238,7 @@ steps:
# Tag for Hosted - Tag to hosted folder with MKP friendly name
- id: 'tagForHosted'
waitFor: ['parseMajorMinorVersion', 'buildFrontend', 'buildApiServer', 'buildScheduledWorkflow',
waitFor: ['parseMajorMinorVersion', 'buildFrontend', 'buildApiServer', 'buildScheduledWorkflow',
'buildViewerCrdController', 'buildPersistenceAgent', 'buildInverseProxyAgent', 'buildVisualizationServer',
'buildMetadataWriter', 'buildCacheServer', 'buildCacheDeployer', 'buildMetadataEnvoy',
'buildMarketplaceDeployer', 'pullMetadataServer', 'pullMinio', 'pullMysql', 'pullCloudsqlProxy',
Expand Down
10 changes: 5 additions & 5 deletions .release.cloudbuild.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -260,14 +260,14 @@ steps:
docker push gcr.io/ml-pipeline/google/pipelines-test/visualizationserver:$(cat /workspace/mm.ver)
- name: 'gcr.io/cloud-builders/docker'
args: ['pull', 'gcr.io/tfx-oss-public/ml_metadata_store_server:0.21.1']
args: ['pull', 'gcr.io/tfx-oss-public/ml_metadata_store_server:0.22.1']
id: 'pullMetadataServer'
- name: 'gcr.io/cloud-builders/docker'
args: ['tag', 'gcr.io/tfx-oss-public/ml_metadata_store_server:0.21.1', 'gcr.io/ml-pipeline/google/pipelines/metadataserver:$TAG_NAME']
args: ['tag', 'gcr.io/tfx-oss-public/ml_metadata_store_server:0.22.1', 'gcr.io/ml-pipeline/google/pipelines/metadataserver:$TAG_NAME']
id: 'tagMetadataServerForMarketplace'
waitFor: ['pullMetadataServer']
- name: 'gcr.io/cloud-builders/docker'
args: ['tag', 'gcr.io/tfx-oss-public/ml_metadata_store_server:0.21.1', 'gcr.io/ml-pipeline/google/pipelines-test/metadataserver:$TAG_NAME']
args: ['tag', 'gcr.io/tfx-oss-public/ml_metadata_store_server:0.22.1', 'gcr.io/ml-pipeline/google/pipelines-test/metadataserver:$TAG_NAME']
id: 'tagMetadataServerForMarketplaceTest'
waitFor: ['pullMetadataServer']
- id: 'tagMetadataServerForMarketplaceMajorMinor'
Expand All @@ -277,8 +277,8 @@ steps:
args:
- -ceux
- |
docker tag gcr.io/tfx-oss-public/ml_metadata_store_server:0.21.1 gcr.io/ml-pipeline/google/pipelines/metadataserver:$(cat /workspace/mm.ver)
docker tag gcr.io/tfx-oss-public/ml_metadata_store_server:0.21.1 gcr.io/ml-pipeline/google/pipelines-test/metadataserver:$(cat /workspace/mm.ver)
docker tag gcr.io/tfx-oss-public/ml_metadata_store_server:0.22.1 gcr.io/ml-pipeline/google/pipelines/metadataserver:$(cat /workspace/mm.ver)
docker tag gcr.io/tfx-oss-public/ml_metadata_store_server:0.22.1 gcr.io/ml-pipeline/google/pipelines-test/metadataserver:$(cat /workspace/mm.ver)
docker push gcr.io/ml-pipeline/google/pipelines/metadataserver:$(cat /workspace/mm.ver)
docker push gcr.io/ml-pipeline/google/pipelines-test/metadataserver:$(cat /workspace/mm.ver)
Expand Down
45 changes: 14 additions & 31 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,22 @@
### Bazel build cannot work with the Tekton library because the current
### KFP Bazel does not support go.mod "replace" on key dependencies.

ARG BAZEL_IMAGE=golang:1.13.0
FROM $BAZEL_IMAGE as builder
# 1. Build api server application
# Use golang:1.13.1-stretch to keep GLIBC at 2.24 https://github.com/gotify/server/issues/225
FROM golang:1.13.1-stretch as builder
RUN apt-get update && apt-get install -y cmake clang musl-dev openssl

RUN apt-get update && \
apt-get install -y cmake clang musl-dev openssl
WORKDIR /go/src/github.com/kubeflow/pipelines

COPY . .
RUN go mod vendor
RUN GO111MODULE=on go build -o /bin/apiserver backend/src/apiserver/*.go

ARG google_application_credentials
ARG use_remote_build=false

# RUN bazel build -c opt --action_env=PATH --define=grpc_no_ares=true backend/src/apiserver:apiserver
# RUN if [ "$use_remote_build" = "true" ]; then \
# echo "Using remote build execution ..." && \
# printf "%s" "$google_application_credentials" > /credentials.json && \
# bazel --bazelrc=tools/bazel_builder/bazelrc \
# build -c opt backend/src/apiserver:apiserver --config=remote \
# --google_credentials=/credentials.json; \
# else \
# echo "Using local build execution..." && \
# bazel --bazelrc=tools/bazel_builder/bazelrc \
# build -c opt backend/src/apiserver:apiserver; \
# fi
RUN mkdir -p /go/src/github.com/kubeflow/pipelines/bazel-bin/backend/src/apiserver/
RUN go build -o /go/src/github.com/kubeflow/pipelines/bazel-bin/backend/src/apiserver/apiserver ./backend/src/apiserver

# Compile
# 2. Compile preloaded pipeline samples
FROM python:3.5 as compiler
RUN apt-get update -y && \
apt-get install --no-install-recommends -y -q default-jdk python3-setuptools python3-dev jq
RUN apt-get update -y && apt-get install --no-install-recommends -y -q default-jdk python3-setuptools python3-dev jq
RUN wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py
COPY backend/requirements.txt .
RUN python3 -m pip install -r requirements.txt
Expand Down Expand Up @@ -61,6 +46,7 @@ RUN set -e; \
python3 "$pipeline_py"; \
done

# 3. Start api web server
FROM golang:1.13.0

ARG COMMIT_SHA=unknown
Expand All @@ -71,17 +57,14 @@ ENV TAG_NAME=${TAG_NAME}
WORKDIR /bin

COPY third_party/license.txt /bin/license.txt
COPY --from=builder /go/src/github.com/kubeflow/pipelines/bazel-bin/backend/src/apiserver/ /usr/local/apiserver
RUN cp /usr/local/apiserver/apiserver /bin/apiserver && \
rm -rf /usr/local/apiserver
COPY backend/src/apiserver/config/ /config

COPY --from=builder /bin/apiserver /bin/apiserver
COPY --from=compiler /samples/ /samples/
RUN chmod +x /bin/apiserver

# Adding CA certificate so API server can download pipeline through URL
RUN apt-get update && apt-get install -y ca-certificates \
# wget is used for liveness/readiness probe command
wget
# Adding CA certificate so API server can download pipeline through URL and wget is used for liveness/readiness probe command
RUN apt-get update && apt-get install -y ca-certificates wget

# Pin sample doc links to the commit that built the backend image
# Commented out due to no commit sha for non-release build
Expand All @@ -92,4 +75,4 @@ RUN apt-get update && apt-get install -y ca-certificates \
EXPOSE 8888

# Start the apiserver
CMD apiserver --config=/config --sampleconfig=/config/sample_config.json -logtostderr=true
CMD /bin/apiserver --config=/config --sampleconfig=/config/sample_config.json -logtostderr=true
4 changes: 2 additions & 2 deletions backend/Dockerfile.persistenceagent
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ COPY --from=builder /go/src/github.com/kubeflow/pipelines/third_party/license.tx

ENV NAMESPACE ""

# Set Workflow TTL to 7 days
ENV TTL_SECONDS_AFTER_WORKFLOW_FINISH 604800
# Set Workflow TTL to 1 day. The way to use a different value for a particular Kubeflow Pipelines deployment is demonstrated in manifests/kustomize/base/pipeline/ml-pipeline-persistenceagent-deployment.yaml
ENV TTL_SECONDS_AFTER_WORKFLOW_FINISH 86400

CMD persistence_agent --logtostderr=true --namespace=${NAMESPACE} --ttlSecondsAfterWorkflowFinish=${TTL_SECONDS_AFTER_WORKFLOW_FINISH}
9 changes: 1 addition & 8 deletions backend/Dockerfile.visualization
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
FROM tensorflow/tensorflow:2.1.0-py3

RUN apt-get update \
&& apt-get install -y wget curl tar \
pkg-config libcairo2-dev libgirepository1.0-dev # For the pygobject and pycairo package setup due to licensing
&& apt-get install -y wget curl tar

RUN curl https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.tar.gz > /tmp/google-cloud-sdk.tar.gz
RUN mkdir -p /usr/local/gcloud
Expand All @@ -35,12 +34,6 @@ COPY backend/src/apiserver/visualization/requirements.txt /src

RUN pip3 install -r requirements.txt

COPY backend/src/apiserver/visualization/license.sh /src
COPY backend/src/apiserver/visualization/third_party_licenses.csv /src
COPY backend/src/apiserver/visualization/third_party_licenses /usr/licenses

RUN ./license.sh third_party_licenses.csv /usr/licenses

COPY backend/src/apiserver/visualization /src

ENTRYPOINT [ "python3", "server.py" ]
4 changes: 2 additions & 2 deletions backend/api/python_http_client/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ This file contains REST API specification for Kubeflow Pipelines. The file is au

This Python package is automatically generated by the [OpenAPI Generator](https://openapi-generator.tech) project:

- API version: 1.0.0
- Package version: 1.0.0
- API version: 1.0.4
- Package version: 1.0.4
- Build package: org.openapitools.codegen.languages.PythonClientCodegen
For more information, please visit [https://www.google.com](https://www.google.com)

Expand Down
2 changes: 1 addition & 1 deletion backend/api/python_http_client/kfp_server_api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

from __future__ import absolute_import

__version__ = "1.0.0"
__version__ = "1.0.4"

# import apis into sdk package
from kfp_server_api.api.experiment_service_api import ExperimentServiceApi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def __init__(self, configuration=None, header_name=None, header_value=None,
self.default_headers[header_name] = header_value
self.cookie = cookie
# Set default User-Agent.
self.user_agent = 'OpenAPI-Generator/1.0.0/python'
self.user_agent = 'OpenAPI-Generator/1.0.4/python'
self.client_side_validation = configuration.client_side_validation

def __enter__(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -365,8 +365,8 @@ def to_debug_report(self):
return "Python SDK Debug Report:\n"\
"OS: {env}\n"\
"Python Version: {pyversion}\n"\
"Version of the API: 1.0.0\n"\
"SDK Package Version: 1.0.0".\
"Version of the API: 1.0.4\n"\
"SDK Package Version: 1.0.4".\
format(env=sys.platform, pyversion=sys.version)

def get_host_settings(self):
Expand Down
2 changes: 1 addition & 1 deletion backend/api/python_http_client/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from setuptools import setup, find_packages # noqa: H301

NAME = "kfp-server-api"
VERSION = "1.0.0"
VERSION = "1.0.4"
# To install the library, run the following
#
# python setup.py install
Expand Down
2 changes: 1 addition & 1 deletion backend/api/swagger/kfp_api_single_file.swagger.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions backend/metadata_writer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
FROM python:3.7
COPY backend/metadata_writer/requirements.txt /kfp/metadata_writer/
RUN python3 -m pip install -r /kfp/metadata_writer/requirements.txt
COPY components/license.sh components/third_party_licenses.csv /kfp/metadata_writer/
RUN mkdir /usr/licenses && /kfp/metadata_writer/license.sh /kfp/metadata_writer/third_party_licenses.csv /usr/licenses

COPY backend/metadata_writer/src/* /kfp/metadata_writer/
CMD python3 -u /kfp/metadata_writer/metadata_writer.py
6 changes: 4 additions & 2 deletions backend/src/agent/persistence/client/pipeline_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,10 @@ func (p *PipelineClient) ReportWorkflow(workflow *util.Workflow) error {

if err != nil {
statusCode, _ := status.FromError(err)
if statusCode.Code() == codes.InvalidArgument {
// Do not retry if there is something wrong with the workflow
if statusCode.Code() == codes.InvalidArgument || statusCode.Code() == codes.NotFound {
// Do not retry if either:
// * there is something wrong with the workflow
// * the workflow has been deleted by someone else
return util.NewCustomError(err, util.CUSTOM_CODE_PERMANENT,
"Error while reporting workflow resource (code: %v, message: %v): %v, %+v",
statusCode.Code(),
Expand Down
6 changes: 6 additions & 0 deletions backend/src/agent/persistence/client/pipeline_client_fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ type PipelineClientFake struct {
scheduledWorkflows map[string]*util.ScheduledWorkflow
err error
artifacts map[string]*api.ReadArtifactResponse
readArtifactRequest *api.ReadArtifactRequest
reportedMetricsRequest *api.ReportRunMetricsRequest
reportMetricsResponseStub *api.ReportRunMetricsResponse
reportMetricsErrorStub error
Expand Down Expand Up @@ -57,6 +58,7 @@ func (p *PipelineClientFake) ReportScheduledWorkflow(swf *util.ScheduledWorkflow
}

func (p *PipelineClientFake) ReadArtifact(request *api.ReadArtifactRequest) (*api.ReadArtifactResponse, error) {
p.readArtifactRequest = request
return p.artifacts[request.String()], nil
}

Expand All @@ -81,6 +83,10 @@ func (p *PipelineClientFake) StubArtifact(request *api.ReadArtifactRequest, resp
p.artifacts[request.String()] = response
}

func (p *PipelineClientFake) GetReadArtifactRequest() *api.ReadArtifactRequest {
return p.readArtifactRequest
}

func (p *PipelineClientFake) StubReportRunMetrics(response *api.ReportRunMetricsResponse, err error) {
p.reportMetricsResponseStub = response
p.err = err
Expand Down
20 changes: 17 additions & 3 deletions backend/src/agent/persistence/worker/metrics_reporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func (r MetricsReporter) collectNodeMetricsOrNil(
nodeStatus.Status.TaskRunStatusFields.CompletionTime == nil {
return nil, nil
}
metricsJSON, err := r.readNodeMetricsJSONOrEmpty(runID, nodeStatus.PipelineTaskName)
metricsJSON, err := r.readNodeMetricsJSONOrEmpty(runID, nodeStatus)
if err != nil || metricsJSON == "" {
return nil, err
}
Expand Down Expand Up @@ -132,10 +132,24 @@ func (r MetricsReporter) collectNodeMetricsOrNil(
return reportMetricsRequest.GetMetrics(), nil
}

func (r MetricsReporter) readNodeMetricsJSONOrEmpty(runID string, nodeID string) (string, error) {
func (r MetricsReporter) readNodeMetricsJSONOrEmpty(runID string, nodeStatus workflowapi.PipelineRunTaskRunStatus) (string, error) {
// Tekton doesn't support any artifact spec, artifact records are done by our custom metadata writers:
// if nodeStatus.Outputs == nil || nodeStatus.Outputs.Artifacts == nil {
// return "", nil // No output artifacts, skip the reporting
// }
// var foundMetricsArtifact bool = false
// for _, artifact := range nodeStatus.Outputs.Artifacts {
// if artifact.Name == metricsArtifactName {
// foundMetricsArtifact = true
// }
// }
// if !foundMetricsArtifact {
// return "", nil // No metrics artifact, skip the reporting
// }

artifactRequest := &api.ReadArtifactRequest{
RunId: runID,
NodeId: nodeID,
NodeId: nodeStatus.PipelineTaskName,
ArtifactName: metricsArtifactName,
}
artifactResponse, err := r.pipelineClient.ReadArtifact(artifactRequest)
Expand Down
Loading

0 comments on commit ae65557

Please sign in to comment.