Skip to content

Commit

Permalink
Add build timing processor
Browse files Browse the repository at this point in the history
  • Loading branch information
jjnesbitt committed Aug 17, 2023
1 parent 406940d commit 10e978c
Show file tree
Hide file tree
Showing 13 changed files with 393 additions and 2 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/custom_docker_builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ jobs:
image-tags: ghcr.io/spack/snapshot-release-tags:0.0.2
- docker-image: ./images/cache-indexer
image-tags: ghcr.io/spack/cache-indexer:0.0.1
- docker-image: ./images/build-timing-processor
image-tags: ghcr.io/spack/build-timing-processor:0.0.1
- docker-image: ./analytics
image-tags: ghcr.io/spack/upload-build-timings:0.0.1
steps:
- name: Checkout
uses: actions/checkout@v3
Expand Down
12 changes: 12 additions & 0 deletions analytics/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM python:3.11-slim

WORKDIR /app

COPY requirements.txt /app/requirements.txt

RUN pip install --upgrade pip
RUN pip install -r requirements.txt

COPY . .

CMD [ "./manage.py", "upload_build_timings" ]
116 changes: 116 additions & 0 deletions analytics/analytics/management/commands/upload_build_timings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import json
import os
import re
import tempfile
import zipfile

import gitlab
from gitlab.v4.objects import Project, ProjectJob

from analytics.models import Job, Phase, Timer

# Grab env vars
GITLAB_TOKEN = os.environ["GITLAB_TOKEN"]
JOB_INPUT_DATA = os.environ["JOB_INPUT_DATA"]

# Other constants
PACKAGE_NAME_REGEX = r"(.+)/[a-zA-Z0-9]+ .+"


# Instantiate gitlab api wrapper
gl = gitlab.Gitlab("https://gitlab.spack.io", GITLAB_TOKEN)


def create_job(project: Project, job: ProjectJob) -> Job:
# Grab package name and runner tags
package_name = re.match(PACKAGE_NAME_REGEX, job.name).group(1)
runner_tags = gl.runners.get(job.runner["id"]).tag_list

# Return created job
return Job.objects.create(
job_id=job.get_id(),
project_id=project.get_id(),
name=job.name,
started_at=job.started_at,
duration=job.duration,
ref=job.ref,
tags=job.tag_list,
package_name=package_name,
aws=("aws" in runner_tags),
)


def get_timings_json(job: ProjectJob) -> dict | None:
# Download job artifacts and parse timings json
with tempfile.NamedTemporaryFile(suffix=".zip") as temp:
artifacts_file = temp.name
with open(artifacts_file, "wb") as f:
job.artifacts(streamed=True, action=f.write)

# Read in timing json
try:
timing_filename = "jobs_scratch_dir/user_data/install_times.json"
with zipfile.ZipFile(artifacts_file) as zfile:
with zfile.open(timing_filename) as timing_file:
return json.load(timing_file)
except KeyError:
pass

return None


def main():
# Read input data and extract params
job_input_data = json.loads(JOB_INPUT_DATA)
job_id = job_input_data["build_id"]

# Retrieve project and job from gitlab API
gl_project = gl.projects.get(job_input_data["project_id"])
gl_job = gl_project.jobs.get(job_input_data["build_id"])

# Get or create job record
job = Job.objects.filter(job_id=job_id).first()
if job is None:
job = create_job(gl_project, gl_job)

# Get timings
timings = get_timings_json(gl_job)
if not timings:
return

# Iterate through each timer and create timers and phase results
phases = []
for entry in timings:
# Sometimes name can be missing, skip if so
if "name" not in entry:
continue

# Create timer
timer = Timer.objects.create(
name=entry["name"],
hash=entry["hash"],
cache=entry["cache"],
time_total=entry["total"],
job=job,
)

# Add all phases to bulk phase list
phases.extend(
[
Phase(
timer=timer,
name=phase["name"],
path=phase["path"],
seconds=phase["seconds"],
count=phase["count"],
)
for phase in entry["phases"]
]
)

# Bulk create phases
Phase.objects.bulk_create(phases)


if __name__ == "__main__":
main()
7 changes: 5 additions & 2 deletions analytics/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
django
django-extensions
django==4.2.4
django-extensions==3.2.3
kubernetes==26.1.0
python-gitlab==3.11.0
psycopg2-binary==2.9.5
13 changes: 13 additions & 0 deletions images/build-timing-processor/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM python:3.11-slim

WORKDIR /app

COPY requirements.txt /app/requirements.txt

RUN pip install --upgrade pip
RUN pip install -r requirements.txt

COPY . .

EXPOSE 8080
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]
66 changes: 66 additions & 0 deletions images/build-timing-processor/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import json
import re
from pathlib import Path

import yaml
from fastapi import FastAPI, HTTPException, Request, Response
from kubernetes import client, config

config.load_incluster_config()

batch = client.BatchV1Api()

app = FastAPI()


BUILD_STAGE_REGEX = r"^stage-\d+$"


@app.post("/")
async def gitlab_webhook_consumer(request: Request):
"""
This endpoint receives the gitlab webhook for failed jobs and creates
a k8s job to parse the logs and upload them to opensearch.
"""
job_input_data = await request.json()
if job_input_data.get("object_kind", "") != "build":
raise HTTPException(status_code=400, detail="Invalid request")

# Exit if not a build stage
if not re.match(BUILD_STAGE_REGEX, job_input_data["build_stage"]):
return Response("Skipping non-build stage...")

# Only process finished and successful jobs
status = job_input_data["build_status"]
if status == "failed":
return Response("Skipping failed job...")
if status != "success":
return Response("Skipping in-progress or cancelled job..")

# Read in job template and set env vars
with open(Path(__file__).parent / "job-template.yaml") as f:
job_template = yaml.safe_load(f)
for container in job_template["spec"]["template"]["spec"]["containers"]:
container.setdefault("env", []).extend(
[dict(name="JOB_INPUT_DATA", value=json.dumps(job_input_data))]
)

# Set k8s job name
job_build_id = str(job_input_data["build_id"])
job_pipeline_id = str(job_input_data["pipeline_id"])
job_template["metadata"][
"name"
] = f"build-timing-processing-job-{job_build_id}-{job_pipeline_id}"

# Add labels to make finding the job that proccessed the error log easier.
job_template["metadata"]["labels"] = {
"spack.io/gitlab-build-id": job_build_id,
"spack.io/gitlab-pipeline-id": job_pipeline_id,
}

batch.create_namespaced_job(
"custom",
job_template,
)

return Response("Upload job dispatched.", status_code=202)
44 changes: 44 additions & 0 deletions images/build-timing-processor/job-template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: build-timing-processing-job
labels:
app: build-timing-processing-job
spec:
ttlSecondsAfterFinished: 7200
template:
metadata:
labels:
app: build-timing-processing-job
spec:
restartPolicy: OnFailure
containers:
- name: build-timing-processing-job
image: ghcr.io/spack/upload-build-timings:0.0.1
imagePullPolicy: Always
env:
- name: GITLAB_TOKEN
valueFrom:
secretKeyRef:
name: build-timing-processor
key: gitlab-token

# DB credentials, will be read by Django when running the management command
- name: DB_HOST
valueFrom:
secretKeyRef:
name: build-timing-processor
key: analytics-postgresql-host
- name: DB_NAME
value: analytics
- name: DB_USER
value: postgres
- name: DB_PASS
valueFrom:
secretKeyRef:
name: build-timing-processor
key: analytics-postgresql-password

nodeSelector:
spack.io/node-pool: base
4 changes: 4 additions & 0 deletions images/build-timing-processor/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
fastapi==0.85.1
kubernetes==25.3.0
PyYAML==6.0
uvicorn==0.19.0
38 changes: 38 additions & 0 deletions k8s/production/custom/build-timing-processor/deployments.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: build-timing-processor
namespace: custom
labels:
app: build-timing-processor
svc: web
spec:
selector:
matchLabels:
app: build-timing-processor
svc: web
replicas: 2
template:
metadata:
labels:
app: build-timing-processor
svc: web
spec:
restartPolicy: Always
serviceAccountName: build-timing-processor
containers:
- name: build-timing-processor
image: ghcr.io/spack/build-timing-processor:0.0.1
imagePullPolicy: Always
resources:
requests:
cpu: 350m
memory: 1G
limits:
cpu: 1600m
memory: 1G
ports:
- containerPort: 8080
nodeSelector:
spack.io/node-pool: base
18 changes: 18 additions & 0 deletions k8s/production/custom/build-timing-processor/sealed-secrets.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: bitnami.com/v1alpha1
kind: SealedSecret
metadata:
name: build-timing-processor
namespace: custom
spec:
encryptedData:
# GitLab personal access token with read access to spack/spack repo
gitlab-token: TODO
# The host for the analytics RDS instance
analytics-postgresql-host: TODO
# The password for the analytics RDS instance
analytics-postgresql-password: TODO
template:
metadata:
annotations:
kustomize.toolkit.fluxcd.io/reconcile: disabled
sealedsecrets.bitnami.com/managed: "true"
32 changes: 32 additions & 0 deletions k8s/production/custom/build-timing-processor/service-accounts.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: build-timing-processor
namespace: custom

---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: build-timing-processor
namespace: custom
rules:
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["create"]

---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: build-timing-processor
namespace: custom
subjects:
- kind: ServiceAccount
name: build-timing-processor
namespace: custom
roleRef:
kind: Role
name: build-timing-processor
apiGroup: rbac.authorization.k8s.io
18 changes: 18 additions & 0 deletions k8s/production/custom/build-timing-processor/services.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
apiVersion: v1
kind: Service
metadata:
name: build-timing-processor
namespace: custom
labels:
app: build-timing-processor
svc: web
spec:
type: ClusterIP
ports:
- name: web
port: 80
targetPort: 8080
selector:
app: build-timing-processor
svc: web
Loading

0 comments on commit 10e978c

Please sign in to comment.