From 10e978cb6d683e319f257adfa3386622af20b8f8 Mon Sep 17 00:00:00 2001 From: Jacob Nesbitt Date: Fri, 11 Aug 2023 14:55:18 -0400 Subject: [PATCH] Add build timing processor --- .github/workflows/custom_docker_builds.yml | 4 + analytics/Dockerfile | 12 ++ .../commands/upload_build_timings.py | 116 ++++++++++++++++++ analytics/requirements.txt | 7 +- images/build-timing-processor/Dockerfile | 13 ++ images/build-timing-processor/app.py | 66 ++++++++++ .../build-timing-processor/job-template.yaml | 44 +++++++ .../build-timing-processor/requirements.txt | 4 + .../build-timing-processor/deployments.yaml | 38 ++++++ .../sealed-secrets.yaml | 18 +++ .../service-accounts.yaml | 32 +++++ .../build-timing-processor/services.yaml | 18 +++ .../build-timing-processor/kustomization.yaml | 23 ++++ 13 files changed, 393 insertions(+), 2 deletions(-) create mode 100644 analytics/Dockerfile create mode 100644 analytics/analytics/management/commands/upload_build_timings.py create mode 100644 images/build-timing-processor/Dockerfile create mode 100644 images/build-timing-processor/app.py create mode 100644 images/build-timing-processor/job-template.yaml create mode 100644 images/build-timing-processor/requirements.txt create mode 100644 k8s/production/custom/build-timing-processor/deployments.yaml create mode 100644 k8s/production/custom/build-timing-processor/sealed-secrets.yaml create mode 100644 k8s/production/custom/build-timing-processor/service-accounts.yaml create mode 100644 k8s/production/custom/build-timing-processor/services.yaml create mode 100644 k8s/staging/custom/build-timing-processor/kustomization.yaml diff --git a/.github/workflows/custom_docker_builds.yml b/.github/workflows/custom_docker_builds.yml index b387eb3de..05b10744a 100644 --- a/.github/workflows/custom_docker_builds.yml +++ b/.github/workflows/custom_docker_builds.yml @@ -42,6 +42,10 @@ jobs: image-tags: ghcr.io/spack/snapshot-release-tags:0.0.2 - docker-image: ./images/cache-indexer image-tags: ghcr.io/spack/cache-indexer:0.0.1 + - docker-image: ./images/build-timing-processor + image-tags: ghcr.io/spack/build-timing-processor:0.0.1 + - docker-image: ./analytics + image-tags: ghcr.io/spack/upload-build-timings:0.0.1 steps: - name: Checkout uses: actions/checkout@v3 diff --git a/analytics/Dockerfile b/analytics/Dockerfile new file mode 100644 index 000000000..89fbd7255 --- /dev/null +++ b/analytics/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY requirements.txt /app/requirements.txt + +RUN pip install --upgrade pip +RUN pip install -r requirements.txt + +COPY . . + +CMD [ "./manage.py", "upload_build_timings" ] diff --git a/analytics/analytics/management/commands/upload_build_timings.py b/analytics/analytics/management/commands/upload_build_timings.py new file mode 100644 index 000000000..35a25fe58 --- /dev/null +++ b/analytics/analytics/management/commands/upload_build_timings.py @@ -0,0 +1,116 @@ +import json +import os +import re +import tempfile +import zipfile + +import gitlab +from gitlab.v4.objects import Project, ProjectJob + +from analytics.models import Job, Phase, Timer + +# Grab env vars +GITLAB_TOKEN = os.environ["GITLAB_TOKEN"] +JOB_INPUT_DATA = os.environ["JOB_INPUT_DATA"] + +# Other constants +PACKAGE_NAME_REGEX = r"(.+)/[a-zA-Z0-9]+ .+" + + +# Instantiate gitlab api wrapper +gl = gitlab.Gitlab("https://gitlab.spack.io", GITLAB_TOKEN) + + +def create_job(project: Project, job: ProjectJob) -> Job: + # Grab package name and runner tags + package_name = re.match(PACKAGE_NAME_REGEX, job.name).group(1) + runner_tags = gl.runners.get(job.runner["id"]).tag_list + + # Return created job + return Job.objects.create( + job_id=job.get_id(), + project_id=project.get_id(), + name=job.name, + started_at=job.started_at, + duration=job.duration, + ref=job.ref, + tags=job.tag_list, + package_name=package_name, + aws=("aws" in runner_tags), + ) + + +def get_timings_json(job: ProjectJob) -> dict | None: + # Download job artifacts and parse timings json + with tempfile.NamedTemporaryFile(suffix=".zip") as temp: + artifacts_file = temp.name + with open(artifacts_file, "wb") as f: + job.artifacts(streamed=True, action=f.write) + + # Read in timing json + try: + timing_filename = "jobs_scratch_dir/user_data/install_times.json" + with zipfile.ZipFile(artifacts_file) as zfile: + with zfile.open(timing_filename) as timing_file: + return json.load(timing_file) + except KeyError: + pass + + return None + + +def main(): + # Read input data and extract params + job_input_data = json.loads(JOB_INPUT_DATA) + job_id = job_input_data["build_id"] + + # Retrieve project and job from gitlab API + gl_project = gl.projects.get(job_input_data["project_id"]) + gl_job = gl_project.jobs.get(job_input_data["build_id"]) + + # Get or create job record + job = Job.objects.filter(job_id=job_id).first() + if job is None: + job = create_job(gl_project, gl_job) + + # Get timings + timings = get_timings_json(gl_job) + if not timings: + return + + # Iterate through each timer and create timers and phase results + phases = [] + for entry in timings: + # Sometimes name can be missing, skip if so + if "name" not in entry: + continue + + # Create timer + timer = Timer.objects.create( + name=entry["name"], + hash=entry["hash"], + cache=entry["cache"], + time_total=entry["total"], + job=job, + ) + + # Add all phases to bulk phase list + phases.extend( + [ + Phase( + timer=timer, + name=phase["name"], + path=phase["path"], + seconds=phase["seconds"], + count=phase["count"], + ) + for phase in entry["phases"] + ] + ) + + # Bulk create phases + Phase.objects.bulk_create(phases) + + +if __name__ == "__main__": + main() diff --git a/analytics/requirements.txt b/analytics/requirements.txt index 8a964ede5..6beff67a8 100644 --- a/analytics/requirements.txt +++ b/analytics/requirements.txt @@ -1,2 +1,5 @@ -django -django-extensions +django==4.2.4 +django-extensions==3.2.3 +kubernetes==26.1.0 +python-gitlab==3.11.0 +psycopg2-binary==2.9.5 diff --git a/images/build-timing-processor/Dockerfile b/images/build-timing-processor/Dockerfile new file mode 100644 index 000000000..4b4b7c06c --- /dev/null +++ b/images/build-timing-processor/Dockerfile @@ -0,0 +1,13 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY requirements.txt /app/requirements.txt + +RUN pip install --upgrade pip +RUN pip install -r requirements.txt + +COPY . . + +EXPOSE 8080 +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"] diff --git a/images/build-timing-processor/app.py b/images/build-timing-processor/app.py new file mode 100644 index 000000000..dd31f5da5 --- /dev/null +++ b/images/build-timing-processor/app.py @@ -0,0 +1,66 @@ +import json +import re +from pathlib import Path + +import yaml +from fastapi import FastAPI, HTTPException, Request, Response +from kubernetes import client, config + +config.load_incluster_config() + +batch = client.BatchV1Api() + +app = FastAPI() + + +BUILD_STAGE_REGEX = r"^stage-\d+$" + + +@app.post("/") +async def gitlab_webhook_consumer(request: Request): + """ + This endpoint receives the gitlab webhook for failed jobs and creates + a k8s job to parse the logs and upload them to opensearch. + """ + job_input_data = await request.json() + if job_input_data.get("object_kind", "") != "build": + raise HTTPException(status_code=400, detail="Invalid request") + + # Exit if not a build stage + if not re.match(BUILD_STAGE_REGEX, job_input_data["build_stage"]): + return Response("Skipping non-build stage...") + + # Only process finished and successful jobs + status = job_input_data["build_status"] + if status == "failed": + return Response("Skipping failed job...") + if status != "success": + return Response("Skipping in-progress or cancelled job..") + + # Read in job template and set env vars + with open(Path(__file__).parent / "job-template.yaml") as f: + job_template = yaml.safe_load(f) + for container in job_template["spec"]["template"]["spec"]["containers"]: + container.setdefault("env", []).extend( + [dict(name="JOB_INPUT_DATA", value=json.dumps(job_input_data))] + ) + + # Set k8s job name + job_build_id = str(job_input_data["build_id"]) + job_pipeline_id = str(job_input_data["pipeline_id"]) + job_template["metadata"][ + "name" + ] = f"build-timing-processing-job-{job_build_id}-{job_pipeline_id}" + + # Add labels to make finding the job that proccessed the error log easier. + job_template["metadata"]["labels"] = { + "spack.io/gitlab-build-id": job_build_id, + "spack.io/gitlab-pipeline-id": job_pipeline_id, + } + + batch.create_namespaced_job( + "custom", + job_template, + ) + + return Response("Upload job dispatched.", status_code=202) diff --git a/images/build-timing-processor/job-template.yaml b/images/build-timing-processor/job-template.yaml new file mode 100644 index 000000000..d8a54a698 --- /dev/null +++ b/images/build-timing-processor/job-template.yaml @@ -0,0 +1,44 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: build-timing-processing-job + labels: + app: build-timing-processing-job +spec: + ttlSecondsAfterFinished: 7200 + template: + metadata: + labels: + app: build-timing-processing-job + spec: + restartPolicy: OnFailure + containers: + - name: build-timing-processing-job + image: ghcr.io/spack/upload-build-timings:0.0.1 + imagePullPolicy: Always + env: + - name: GITLAB_TOKEN + valueFrom: + secretKeyRef: + name: build-timing-processor + key: gitlab-token + + # DB credentials, will be read by Django when running the management command + - name: DB_HOST + valueFrom: + secretKeyRef: + name: build-timing-processor + key: analytics-postgresql-host + - name: DB_NAME + value: analytics + - name: DB_USER + value: postgres + - name: DB_PASS + valueFrom: + secretKeyRef: + name: build-timing-processor + key: analytics-postgresql-password + + nodeSelector: + spack.io/node-pool: base diff --git a/images/build-timing-processor/requirements.txt b/images/build-timing-processor/requirements.txt new file mode 100644 index 000000000..29ad927de --- /dev/null +++ b/images/build-timing-processor/requirements.txt @@ -0,0 +1,4 @@ +fastapi==0.85.1 +kubernetes==25.3.0 +PyYAML==6.0 +uvicorn==0.19.0 diff --git a/k8s/production/custom/build-timing-processor/deployments.yaml b/k8s/production/custom/build-timing-processor/deployments.yaml new file mode 100644 index 000000000..18f5626ad --- /dev/null +++ b/k8s/production/custom/build-timing-processor/deployments.yaml @@ -0,0 +1,38 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: build-timing-processor + namespace: custom + labels: + app: build-timing-processor + svc: web +spec: + selector: + matchLabels: + app: build-timing-processor + svc: web + replicas: 2 + template: + metadata: + labels: + app: build-timing-processor + svc: web + spec: + restartPolicy: Always + serviceAccountName: build-timing-processor + containers: + - name: build-timing-processor + image: ghcr.io/spack/build-timing-processor:0.0.1 + imagePullPolicy: Always + resources: + requests: + cpu: 350m + memory: 1G + limits: + cpu: 1600m + memory: 1G + ports: + - containerPort: 8080 + nodeSelector: + spack.io/node-pool: base diff --git a/k8s/production/custom/build-timing-processor/sealed-secrets.yaml b/k8s/production/custom/build-timing-processor/sealed-secrets.yaml new file mode 100644 index 000000000..26a5f27c0 --- /dev/null +++ b/k8s/production/custom/build-timing-processor/sealed-secrets.yaml @@ -0,0 +1,18 @@ +apiVersion: bitnami.com/v1alpha1 +kind: SealedSecret +metadata: + name: build-timing-processor + namespace: custom +spec: + encryptedData: + # GitLab personal access token with read access to spack/spack repo + gitlab-token: TODO + # The host for the analytics RDS instance + analytics-postgresql-host: TODO + # The password for the analytics RDS instance + analytics-postgresql-password: TODO + template: + metadata: + annotations: + kustomize.toolkit.fluxcd.io/reconcile: disabled + sealedsecrets.bitnami.com/managed: "true" diff --git a/k8s/production/custom/build-timing-processor/service-accounts.yaml b/k8s/production/custom/build-timing-processor/service-accounts.yaml new file mode 100644 index 000000000..24ecb93ef --- /dev/null +++ b/k8s/production/custom/build-timing-processor/service-accounts.yaml @@ -0,0 +1,32 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: build-timing-processor + namespace: custom + +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: build-timing-processor + namespace: custom +rules: + - apiGroups: ["batch"] + resources: ["jobs"] + verbs: ["create"] + +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: build-timing-processor + namespace: custom +subjects: + - kind: ServiceAccount + name: build-timing-processor + namespace: custom +roleRef: + kind: Role + name: build-timing-processor + apiGroup: rbac.authorization.k8s.io diff --git a/k8s/production/custom/build-timing-processor/services.yaml b/k8s/production/custom/build-timing-processor/services.yaml new file mode 100644 index 000000000..61428ba82 --- /dev/null +++ b/k8s/production/custom/build-timing-processor/services.yaml @@ -0,0 +1,18 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: build-timing-processor + namespace: custom + labels: + app: build-timing-processor + svc: web +spec: + type: ClusterIP + ports: + - name: web + port: 80 + targetPort: 8080 + selector: + app: build-timing-processor + svc: web diff --git a/k8s/staging/custom/build-timing-processor/kustomization.yaml b/k8s/staging/custom/build-timing-processor/kustomization.yaml new file mode 100644 index 000000000..6289576c0 --- /dev/null +++ b/k8s/staging/custom/build-timing-processor/kustomization.yaml @@ -0,0 +1,23 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../../production/custom/deployments.yaml + - ../../production/custom/sealed-secrets.yaml + - ../../production/custom/service-accounts.yaml + - ../../production/custom/services.yaml + +patches: + - target: + kind: SealedSecret + name: build-timing-processor + namespace: custom + patch: |- + - op: replace + path: /spec/encryptedData/gitlab-token + value: AgCvt34ogfJdkij4Mlp6m8rC0+syDHTnXPGcmpzfFtVE0rPKasRE8VcInfj45RBxmmkG7FzbIaETqLFsQHMyNE+uUuu5OCkaU4U7/ez8zP2O1Agg5pLBt+jSmASwHSGvUYPXaBcgdvvjJ1RkmWgdl1FvqZ86MMqNVt1vQyfewPONFExpJVpbOnA7ag1iI+dc4nCcKbbKIVx7qPRrwPeXK+ZjB9o4kG8uisYvOiRzRaOZMLNEFryStkZDpSZABENQBjzTkiTaIqJzJ37sRy/w2N6eEX9ZO76Xybcon6zmrtVib5xp1Rc1alRkM1JtG8ap3U6+wiXMASAMqzb6dgT8muBEcNO2nrlwLAJ51XHudL4VJeeEOY8veosiK6H7KOYfL4CI6cZcM2sdq7Nc73qEoNwA/sM42IE10FIfGSrz7gF1voIr/tXX2GLHOKJPGpgDr0Eucf1n3I3Fhjc/mMlfv91sBp9W7Owu1SWk8MmfuwssdRUZ7pFqqqA0hnimpjtBrHZ0LbwFYRwOZEkAX2N+shLm3y6xGIONZ3ERhjWrBuj0nsvmIAoQyLK52Z8mjKczGWOMrQMnpWWAbeyyfOR4VRC5VnoB6YUsgAlQgjnnpU8QlQZYIHYxVwfo3LZXJDIj0f34fFSAP1w6S6nSsOPGAGCrwhPsqWNPoqHSGSffpkt0vofFf/98jQchxfgaSCOavbitl4prJG0wOcE76/gnmv0vEI394g== + - op: replace + path: /spec/encryptedData/analytics-postgresql-host + value: AgBhS8lh371LEFllnnRuO+rkk4AiGvpg8IxqEjnwG09o5mKyMBKz8IMXxb8NF6Y1k56BME0CDUy6+TgA/uZvyuS1W3Ny0yVO2IKZFLZ2soIfUhsxhdsXMtf2VYTKS1jhGEyOryrwG0MWEDHQ6TjuT2FZVc4ITnKHheKoV3AXhJHwfouseryEKzCOnlONblWZVwvyzexEbkCFswuz1ZNDnHC4s7B1CL8VQSHb0ST0LzxxTa3BvNm/O0sB+JIVZZdITTbxRiCw4y80NZEkTzLjNGWrmdO6PLS4LDDPViCJf14c3tHefZm3i4+h3tIZezAQIjCbG1gWV7emdGsSyiNkCW0mj+LzGNr0IYZj6jnVTKbMD6Skz7Vx9BLP1grC/vpCuXydoX9CFAb4/BXDQdS3avipPfNmJIyZqSJ4JoPKG3w5mDtqHHzwi2Eqm0y1gpcih+3vvNeFuBP6qSwU1TMLAB+CCT4NvF4SNQ1YmHup7H89YfJAsgCHwni39QEWspzQEpYGKe5LbyOVDsR+nVUQzVJVc8luG3x6oIJmRTD21plbdEKj8InkYFI3DSSKgxs2hTOAcE8s2ZQMM1wupw2+8Q7i8H0+nv7D1n7WJ6nXnbO/nIdBzbn5oi0Hk/h3fgR9Pcb96X4cjRaehAzc/MmiW7ZPBo0DzO7guWJ/Pwz/mnNzP/9rfCOudk5iSjAKhAO85q3pT4l1vgIxOlSvCCE0RCtGSF67Xn0ZapbmqEnbTJ+3eYw/40xDN7ypML6v+rNMyKD0Zk/xiVR7D6sV + - op: replace + path: /spec/encryptedData/analytics-postgresql-password + value: AgAVBlXS31yQGm6+WTwmhvhAYUSeYz4guM0Y6cIdhSQSaDshNM1MpcrQtnL+y4nXhf3vuj/EWn+IL3fyIPhiI3wtfudecpeWVfo31FHrZYxddVM1y4AX4gH0aXllxwyXW/WGolg7LhEGyQ7O6dWl1NG9Ga2PZpWnA/pyqVXGLfGyXwNUyYpuBikKIAOvkEDJFS2kPNm8e5/Uo9P2WkdjsiiyTkpxj/5XFrN64fAoeWCvsri/0jYqbiqOJLBG+d/yYHpOhMB2yYnFP+LnSqOWbDo1eiO2SkNWmHFiiIpteViqhaQi6DrO17Xs/vSWhEJn1QaPNfCMjrYHV8HCsxmKYghqasLsYsT76HYcUt6TnhX8WvxGX08UH6mjf0BQ3fkvkKby790czmMaObWbEMCS/yFa+2RT0697AmyUzmrC5T981tXhHMmC4wjU9Kpx6RRh538nZ08iJDDyDJNWsQaPNg2+CGADpym9f8vMxmgwGsvM6cIYOK9Ktm4/twP7i1STsLS7AcrxY8UYm9yft5uffzIRZR2lF2HDptuyP+S+YQ5zcD2RBmrnn+P4EAwK+JwD7MGt0ofl4D8DRE+1FeItbcnNQzGIgSQhY+SBt3YJmRJVDkwYT9vDzaOd7Vuw/tSl56n0ABLgQ6wR/HSGGHTyU1Lm90cXLKR7IgCny7ao3AkwetJWBJggJcfMmsZK+R21SNMixXBjBPFbfO+jeTxcvPv+