Skip to content

Commit

Permalink
Merge pull request #362 from canonical/1.8-updates-dev-branch
Browse files Browse the repository at this point in the history
`kfp-operators` upgrades from 2.0.0-alpha.7 -> 2.0.3 for CKF 1.8 release
  • Loading branch information
DnPlas authored Nov 20, 2023
2 parents 772af24 + 8319ea0 commit 9386488
Show file tree
Hide file tree
Showing 72 changed files with 2,070 additions and 448 deletions.
11 changes: 9 additions & 2 deletions .github/workflows/integrate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ jobs:
matrix:
charm:
- kfp-api
- kfp-metadata-writer
- kfp-persistence
- kfp-profile-controller
- kfp-schedwf
Expand All @@ -33,6 +34,7 @@ jobs:
matrix:
charm:
- kfp-api
- kfp-metadata-writer
- kfp-persistence
- kfp-profile-controller
- kfp-schedwf
Expand All @@ -52,6 +54,7 @@ jobs:
matrix:
charm:
- kfp-api
- kfp-metadata-writer
- kfp-persistence
- kfp-profile-controller
- kfp-schedwf
Expand Down Expand Up @@ -106,7 +109,11 @@ jobs:
test-bundle:
name: Test the bundle
runs-on: ubuntu-20.04

strategy:
matrix:
sdk:
- v1
- v2
steps:
# This is a workaround for https://github.com/canonical/kfp-operators/issues/250
# Ideally we'd use self-hosted runners, but this effort is still not stable
Expand Down Expand Up @@ -142,7 +149,7 @@ jobs:
# Run integration tests against the 1.7 generic install bundle definition
# Using destructive mode because of https://github.com/canonical/charmcraft/issues/1132
# and https://github.com/canonical/charmcraft/issues/1138
sg snap_microk8s -c "tox -e bundle-integration -- --model kubeflow --bundle=./tests/integration/bundles/kfp_1.7_stable_install.yaml.j2 --destructive-mode"
sg snap_microk8s -c "tox -e bundle-integration-${{ matrix.sdk }} -- --model kubeflow --bundle=./tests/integration/bundles/kfp_latest_edge.yaml.j2 --destructive-mode"
- name: Get all
run: kubectl get all -A
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ __pycache__/
.coverage
.idea
.vscode
venv
2 changes: 1 addition & 1 deletion charms/kfp-api/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ options:
Used if service account is left unspecified when creating a run
init-connection-timeout:
type: string
default: '5s'
default: '6m'
description: |
Connection timeout used when initializing clients for related services.
The format used can be anything accepted by `time.ParseDuration`.
Expand Down
4 changes: 2 additions & 2 deletions charms/kfp-api/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ website: https://charmhub.io/kfp-api
source: https://github.com/canonical/kfp-operators
issues: https://github.com/canonical/kfp-operators/issues
containers:
ml-pipeline-api-server:
apiserver:
resource: oci-image
resources:
oci-image:
type: oci-image
description: Backing OCI image
upstream-source: charmedkubeflow/api-server:2.0.0-alpha.7_20.04_1
upstream-source: gcr.io/ml-pipeline/api-server:2.0.3
requires:
mysql:
interface: mysql
Expand Down
144 changes: 52 additions & 92 deletions charms/kfp-api/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
https://github.com/canonical/kfp-operators/
"""

import json
import logging
from pathlib import Path

Expand All @@ -24,14 +23,7 @@
from lightkube.models.core_v1 import ServicePort
from ops.charm import CharmBase
from ops.main import main
from ops.model import (
ActiveStatus,
BlockedStatus,
Container,
MaintenanceStatus,
ModelError,
WaitingStatus,
)
from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, ModelError, WaitingStatus
from ops.pebble import CheckStatus, Layer
from serialized_data_interface import (
NoCompatibleVersions,
Expand Down Expand Up @@ -77,7 +69,7 @@ def __init__(self, *args):
f"--sampleconfig={SAMPLE_CONFIG} "
"-logtostderr=true "
)
self._container_name = "ml-pipeline-api-server"
self._container_name = "apiserver"
self._database_name = "mlpipeline"
self._container = self.unit.get_container(self._container_name)

Expand All @@ -101,7 +93,7 @@ def __init__(self, *args):
# setup events to be handled by main event handler
self.framework.observe(self.on.leader_elected, self._on_event)
self.framework.observe(self.on.config_changed, self._on_event)
self.framework.observe(self.on.ml_pipeline_api_server_pebble_ready, self._on_event)
self.framework.observe(self.on.apiserver_pebble_ready, self._on_event)
change_events = [
self.on["object-storage"].relation_changed,
self.on["kfp-viz"].relation_changed,
Expand Down Expand Up @@ -175,9 +167,7 @@ def k8s_resource_handler(self, handler: KubernetesResourceHandler):
@property
def service_environment(self):
"""Return environment variables based on model configuration."""
ret_env_vars = {"POD_NAMESPACE": self.model.name}

return ret_env_vars
return self._generate_environment()

@property
def _kfp_api_layer(self) -> Layer:
Expand Down Expand Up @@ -210,76 +200,71 @@ def _kfp_api_layer(self) -> Layer:

return Layer(layer_config)

def _generate_config(self, interfaces):
"""Generate configuration based on supplied data.
def _generate_environment(self) -> dict:
"""Generate environment based on supplied data.
Configuration is generated based on:
- Supplied interfaces.
- Database data: from MySQL relation data or from data platform library.
- Model configuration.
Return:
env_vars(dict): a dictionary of environment variables for the api server.
"""

config = self.model.config
try:
interfaces = self._get_interfaces()
db_data = self._get_db_data()
os = self._get_object_storage(interfaces)
viz = self._get_viz(interfaces)
object_storage = self._get_object_storage(interfaces)
viz_data = self._get_viz(interfaces)
except ErrorWithStatus as error:
self.logger.error("Failed to generate container configuration.")
raise error

# at this point all data is correctly populated and proper config can be generated
config_json = {
"DBConfig": {
"ConMaxLifeTime": "120s",
"DBName": db_data["db_name"],
"DriverName": "mysql",
"GroupConcatMaxLen": "4194304",
"Host": db_data["db_host"],
"Password": db_data["db_password"],
"Port": db_data["db_port"],
"User": db_data["db_username"],
},
"ObjectStoreConfig": {
"AccessKey": os["access-key"],
"BucketName": config["object-store-bucket-name"],
"Host": f"{os['service']}.{os['namespace']}",
"Multipart": {"Disable": "true"},
"PipelinePath": "pipelines",
"Port": str(os["port"]),
"Region": "",
"SecretAccessKey": os["secret-key"],
"Secure": str(os["secure"]).lower(),
},
"ARCHIVE_CONFIG_LOG_FILE_NAME": config["log-archive-filename"],
"ARCHIVE_CONFIG_LOG_PATH_PREFIX": config["log-archive-prefix"],
"AUTO_UPDATE_PIPELINE_DEFAULT_VERSION": str(
config["auto-update-default-version"]
).lower(),
"CACHE_IMAGE": config["cache-image"],
"CACHE_NODE_RESTRICTIONS": "false",
"CacheEnabled": str(config["cache-enabled"]).lower(),
"DefaultPipelineRunnerServiceAccount": config["runner-sa"],
"InitConnectionTimeout": config["init-connection-timeout"],
env_vars = {
# Configurations that are also defined in the upstream manifests
"AUTO_UPDATE_PIPELINE_DEFAULT_VERSION": self.model.config[
"auto-update-default-version"
],
"KFP_API_SERVICE_NAME": KFP_API_SERVICE_NAME,
"KUBEFLOW_USERID_HEADER": "kubeflow-userid",
"KUBEFLOW_USERID_PREFIX": "",
"POD_NAMESPACE": self.model.name,
"OBJECTSTORECONFIG_SECURE": "false",
"OBJECTSTORECONFIG_BUCKETNAME": self.model.config["object-store-bucket-name"],
"DBCONFIG_CONMAXLIFETIME": "120s",
"DB_DRIVER_NAME": "mysql",
"DBCONFIG_MYSQLCONFIG_USER": db_data["db_username"],
"DBCONFIG_MYSQLCONFIG_PASSWORD": db_data["db_password"],
"DBCONFIG_MYSQLCONFIG_DBNAME": db_data["db_name"],
"DBCONFIG_MYSQLCONFIG_HOST": db_data["db_host"],
"DBCONFIG_MYSQLCONFIG_PORT": db_data["db_port"],
"OBJECTSTORECONFIG_ACCESSKEY": object_storage["access-key"],
"OBJECTSTORECONFIG_SECRETACCESSKEY": object_storage["secret-key"],
"DEFAULTPIPELINERUNNERSERVICEACCOUNT": "default-editor",
"MULTIUSER": "true",
"ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_HOST": viz["service-name"],
"ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_PORT": viz["service-port"],
"VISUALIZATIONSERVICE_NAME": viz_data["service-name"],
"VISUALIZATIONSERVICE_PORT": viz_data["service-port"],
"ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_HOST": viz_data["service-name"],
"ML_PIPELINE_VISUALIZATIONSERVER_SERVICE_PORT": viz_data["service-port"],
"CACHE_IMAGE": self.model.config["cache-image"],
# Configurations charmed-kubeflow adds to those of upstream
"ARCHIVE_CONFIG_LOG_FILE_NAME": self.model.config["log-archive-filename"],
"ARCHIVE_CONFIG_LOG_PATH_PREFIX": self.model.config["log-archive-prefix"],
# OBJECTSTORECONFIG_HOST and _PORT set the object storage configurations,
# taking precedence over configuration in the config.json or
# MINIO_SERVICE_SERVICE_* environment variables.
# NOTE: While OBJECTSTORECONFIG_HOST and _PORT control the object store
# that the apiserver connects to, other parts of kfp currently cannot use
# object stores with arbitrary names. See
# https://github.com/kubeflow/pipelines/issues/9689 and
# https://github.com/canonical/minio-operator/pull/151 for more details.
"OBJECTSTORECONFIG_HOST": f"{object_storage['service']}.{object_storage['namespace']}",
"OBJECTSTORECONFIG_PORT": str(object_storage["port"]),
"OBJECTSTORECONFIG_REGION": "",
}
return config_json

def _check_container_connection(self, container: Container) -> None:
"""Check if connection can be made with container.
Args:
container: the named container in a unit to check.

Raises:
ErrorWithStatus if the connection cannot be made.
"""
if not container.can_connect():
raise ErrorWithStatus("Pod startup is not complete", MaintenanceStatus)
return env_vars

def _check_status(self):
"""Check status of workload and set status accordingly."""
Expand All @@ -301,28 +286,6 @@ def _check_status(self):
raise ErrorWithStatus("Workload failed health check", MaintenanceStatus)
self.model.unit.status = ActiveStatus()

def _upload_files_to_container(self, config_json):
"""Upload required files to container."""
try:
self._check_container_connection(self.container)
except ErrorWithStatus as error:
self.model.unit.status = error.status
raise error
try:
with open("src/sample_config.json", "r") as sample_config:
file_content = sample_config.read()
self.container.push(SAMPLE_CONFIG, file_content, make_dirs=True)
except ErrorWithStatus as error:
self.logger.error("Failed to upload sample config to container.")
raise error
try:
file_content = json.dumps(config_json)
config = CONFIG_DIR / "config.json"
self.container.push(config, file_content, make_dirs=True)
except ErrorWithStatus as error:
self.logger.error("Failed to upload config to container.")
raise error

def _send_info(self, interfaces):
if interfaces["kfp-api"]:
interfaces["kfp-api"].send_data(
Expand Down Expand Up @@ -680,12 +643,9 @@ def _on_event(self, event, force_conflicts: bool = False) -> None:
# Set up all relations/fetch required data
try:
self._check_leader()
interfaces = self._get_interfaces()
config_json = self._generate_config(interfaces)
self._upload_files_to_container(config_json)
self._apply_k8s_resources(force_conflicts=force_conflicts)
update_layer(self._container_name, self._container, self._kfp_api_layer, self.logger)
self._send_info(interfaces)
self._send_info(self._get_interfaces())
except ErrorWithStatus as err:
self.model.unit.status = err.status
self.logger.error(f"Failed to handle {event} with error: {err}")
Expand Down
12 changes: 0 additions & 12 deletions charms/kfp-api/src/sample_config.json

This file was deleted.

28 changes: 24 additions & 4 deletions charms/kfp-api/src/templates/auth_manifests.yaml.j2
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
# Source manifests/apps/pipeline/upstream/base/installs/multi-user/api-service/cluster-role**.yaml
# These manifest files have been modified to suit the needs of the charm; the app label, metadata name,
# and namespace fields will be rendered with information from the application and the model.
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: {{ app_name }}
labels:
app: {{ app_name }}
name: {{ app_name }}-role
rules:
- apiGroups:
- ''
Expand Down Expand Up @@ -51,21 +56,32 @@ rules:
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ app_name }}
labels:
app: {{ app_name }}
name: {{ app_name }}-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ app_name }}
name: {{ app_name }}-role
subjects:
- kind: ServiceAccount
name: {{ app_name }}
name: {{ app_name }}-sa
namespace: {{ namespace }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: {{ app_name }}
name: {{ app_name }}-sa
namespace: {{ namespace }}
---
# manifests/apps/pipeline/upstream/base/installs/multi-user/view-edit-cluster-roles.yaml
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app: {{ app_name }}
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true"
name: kubeflow-pipelines-edit
aggregationRule:
Expand All @@ -81,6 +97,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app: {{ app_name }}
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-pipelines-edit: "true"
rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true"
name: kubeflow-pipelines-view
Expand All @@ -97,6 +114,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app: {{ app_name }}
app.kubernetes.io/component: ml-pipeline
app.kubernetes.io/name: kubeflow-pipelines
application-crd-id: kubeflow-pipelines
Expand Down Expand Up @@ -165,6 +183,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app: {{ app_name }}
app.kubernetes.io/component: ml-pipeline
app.kubernetes.io/name: kubeflow-pipelines
application-crd-id: kubeflow-pipelines
Expand Down Expand Up @@ -208,6 +227,7 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app: {{ app_name }}
application-crd-id: kubeflow-pipelines
rbac.authorization.k8s.io/aggregate-to-admin: "true"
name: argo-aggregate-to-admin
Expand Down
5 changes: 4 additions & 1 deletion charms/kfp-api/src/templates/ml-pipeline-service.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,7 @@ spec:
protocol: TCP
targetPort: 8888
selector:
app.kubernetes.io/name: {{ service }}
# This selector ensures this Service identifies
# the kfp-api Pod correctly as ti will have
# the same tag
app.kubernetes.io/name: {{ app_name }}
Loading

0 comments on commit 9386488

Please sign in to comment.