Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add django webhook handler to save job traces to opensearch #704

Merged
merged 10 commits into from
Jan 3, 2024
Merged
23 changes: 22 additions & 1 deletion .github/workflows/analytics_migrations.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,28 @@ on:

jobs:
check-migrations:
services:
postgres:
image: postgres:latest
env:
POSTGRES_DB: django
POSTGRES_PASSWORD: postgres
ports:
- 5432:5432
elasticsearch:
image: elasticsearch:7.14.0
env:
ES_JAVA_OPTS: "-Xms250m -Xmx750m"
discovery.type: single-node
xpack.security.enabled: "true"
ELASTIC_PASSWORD: elastic
ports:
- 9200:9200
redis:
image: redis:latest
ports:
- 6379:6379

runs-on: ubuntu-latest
steps:
- name: Checkout
Expand All @@ -19,6 +41,5 @@ jobs:
run: cat ./analytics/dev/.env.docker-compose >> $GITHUB_ENV

# This runs on pull requests and blocks until any necessary migrations have been created
# *Note*: This will display a warning about not being able to connect to a database. This can be ignored.
- name: Check for Migrations
run: ./analytics/manage.py makemigrations --check --noinput
2 changes: 1 addition & 1 deletion .github/workflows/custom_docker_builds.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
- docker-image: ./images/build-timing-processor
image-tags: ghcr.io/spack/build-timing-processor:0.0.4
- docker-image: ./analytics
image-tags: ghcr.io/spack/upload-build-timings:0.0.4
image-tags: ghcr.io/spack/django:0.0.1
steps:
- name: Checkout
uses: actions/checkout@c85c95e3d7251135ab7dc9ce3241c5835cc595a9 # v3.5.3
Expand Down
2 changes: 1 addition & 1 deletion analytics/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ RUN pip install -r requirements.txt

COPY . .

CMD [ "./manage.py", "upload_build_timings" ]
CMD [ "gunicorn", "--bind", "0.0.0.0:8080", "--access-logfile", "-", "analytics.wsgi" ]
3 changes: 3 additions & 0 deletions analytics/analytics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .celery import app as celery_app

__all__ = ("celery_app",)
9 changes: 9 additions & 0 deletions analytics/analytics/celery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from celery import Celery
import os


os.environ.setdefault("DJANGO_SETTINGS_MODULE", "analytics.settings")

app = Celery("analytics")
app.config_from_object("django.conf:settings", namespace="CELERY")
app.autodiscover_tasks()
58 changes: 58 additions & 0 deletions analytics/analytics/job_log_uploader/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import json
import re
from datetime import datetime
from typing import Any
from celery import shared_task

import gitlab
from opensearch_dsl import Date, Document, connections

from django.conf import settings


class JobLog(Document):
timestamp = Date()

class Index:
name = "gitlab-job-logs-*"

def save(self, **kwargs):
# assign now if no timestamp given
if not self.timestamp:
self.timestamp = datetime.utcnow()

# override the index to go to the proper timeslot
kwargs["index"] = self.timestamp.strftime("gitlab-job-logs-%Y%m%d")
return super().save(**kwargs)


@shared_task(name="upload_job_log")
def upload_job_log(job_input_data_json: str) -> None:
job_input_data: dict[str, Any] = json.loads(job_input_data_json)
gl = gitlab.Gitlab(settings.GITLAB_ENDPOINT, settings.GITLAB_TOKEN)

# Retrieve project and job from gitlab API
project = gl.projects.get(job_input_data["project_id"])
job = project.jobs.get(job_input_data["build_id"])
job_trace: str = job.trace().decode()

# Remove ANSI escape sequences from colorized output
# TODO: this still leaves trailing ;m in the output
job_trace = re.sub(
r"\x1b\[([0-9,A-Z]{1,2}(;[0-9]{1,2})?(;[0-9]{3})?)?[m|G|K]?", "", job_trace
)
danlamanna marked this conversation as resolved.
Show resolved Hide resolved

# Upload to OpenSearch
connections.create_connection(
hosts=[settings.OPENSEARCH_ENDPOINT],
http_auth=(
settings.OPENSEARCH_USERNAME,
settings.OPENSEARCH_PASSWORD,
),
)
doc = JobLog(
**job_input_data,
job_url=f'{job_input_data["project"]["web_url"]}/-/jobs/{job_input_data["build_id"]}',
job_trace=job_trace,
)
doc.save()
25 changes: 24 additions & 1 deletion analytics/settings.py → analytics/analytics/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent

ROOT_URLCONF = "analytics.urls"

# SECURITY WARNING: don't run with debug turned on in production!
# DEBUG = True
DEBUG = False

# Application definition
INSTALLED_APPS = [
Expand Down Expand Up @@ -44,3 +45,25 @@
# Default primary key field type
# https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"

SECRET_KEY = os.environ["SECRET_KEY"]
danlamanna marked this conversation as resolved.
Show resolved Hide resolved

CELERY_BROKER_URL = os.environ["CELERY_BROKER_URL"]

# These settings are mostly copied from https://github.com/girder/django-composed-configuration/blob/master/composed_configuration/_celery.py
CELERY_RESULT_BACKEND = None
CELERY_TASK_ACKS_LATE = not DEBUG
CELERY_TASK_REJECT_ON_WORKER_LOST = False
CELERY_TASK_ACKS_ON_FAILURE_OR_TIMEOUT = True
CELERY_WORKER_CANCEL_LONG_RUNNING_TASKS_ON_CONNECTION_LOSS = True
CELERY_WORKER_PREFETCH_MULTIPLIER = 1
CELERY_WORKER_CONCURRENCY = 1 if DEBUG else None

# Custom settings

OPENSEARCH_ENDPOINT = os.environ["OPENSEARCH_ENDPOINT"]
OPENSEARCH_USERNAME = os.environ["OPENSEARCH_USERNAME"]
OPENSEARCH_PASSWORD = os.environ["OPENSEARCH_PASSWORD"]

GITLAB_ENDPOINT: str = os.environ["GITLAB_ENDPOINT"]
GITLAB_TOKEN: str = os.environ["GITLAB_TOKEN"]
22 changes: 22 additions & 0 deletions analytics/analytics/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""
URL configuration for analytics project.

The `urlpatterns` list routes URLs to views. For more information please see:
https://docs.djangoproject.com/en/4.2/topics/http/urls/
Examples:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: path('', views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.urls import include, path
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
"""
from django.urls import path
from analytics.views import webhook_handler

urlpatterns = [
path("", webhook_handler),
]
19 changes: 19 additions & 0 deletions analytics/analytics/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from typing import Any
from django.http import HttpRequest, HttpResponse
import json

import sentry_sdk

from analytics.job_log_uploader import upload_job_log


def webhook_handler(request: HttpRequest) -> HttpResponse:
job_input_data: dict[str, Any] = json.loads(request.body)

if job_input_data.get("object_kind") != "build":
sentry_sdk.capture_message("Not a build event")
return HttpResponse("Not a build event", status=400)

upload_job_log.delay(request.body)

return HttpResponse("OK", status=200)
16 changes: 16 additions & 0 deletions analytics/analytics/wsgi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
"""
WSGI config for analytics project.

It exposes the WSGI callable as a module-level variable named ``application``.

For more information on this file, see
https://docs.djangoproject.com/en/4.2/howto/deployment/wsgi/
"""

import os

from django.core.wsgi import get_wsgi_application

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "analytics.settings")

application = get_wsgi_application()
7 changes: 7 additions & 0 deletions analytics/dev/.env.docker-compose
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,10 @@ DB_USER=postgres
DB_PASS=postgres
DB_HOST=localhost
DB_PORT=5432
OPENSEARCH_ENDPOINT=http://localhost:9200
OPENSEARCH_USERNAME=elastic
OPENSEARCH_PASSWORD=elastic
CELERY_BROKER_URL=redis://localhost:6379/0
SECRET_KEY=deadbeef
GITLAB_ENDPOINT="http://fakeurl"
GITLAB_TOKEN="bar"
16 changes: 16 additions & 0 deletions analytics/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,19 @@ services:
POSTGRES_PASSWORD: postgres
ports:
- ${DOCKER_POSTGRES_PORT-5432}:5432

elasticsearch:
image: elasticsearch:7.14.0
environment:
ES_JAVA_OPTS: "-Xms250m -Xmx750m"
discovery.type: single-node
xpack.security.enabled: "true"
ELASTIC_PASSWORD: elastic
ports:
- ${DOCKER_ELASTICSEARCH_PORT-9200}:9200
- ${DOCKER_ELASTICSEARCH_PORT-9300}:9300

redis:
image: redis:latest
ports:
- ${DOCKER_REDIS_PORT-6379}:6379
2 changes: 1 addition & 1 deletion analytics/manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

def main():
"""Run administrative tasks."""
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings")
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "analytics.settings")
try:
from django.core.management import execute_from_command_line
except ImportError as exc:
Expand Down
4 changes: 4 additions & 0 deletions analytics/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ django-extensions==3.2.3
kubernetes==26.1.0
python-gitlab==3.11.0
psycopg2-binary==2.9.5
opensearch-dsl==2.0.1
sentry-sdk[django]
gunicorn
celery[redis]

1 change: 1 addition & 0 deletions images/build-timing-processor/job-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ spec:
- name: build-timing-processing-job
image: ghcr.io/spack/upload-build-timings:0.0.4
imagePullPolicy: Always
command: ["./manage.py", "upload_build_timings"]
jjnesbitt marked this conversation as resolved.
Show resolved Hide resolved
env:
- name: GITLAB_TOKEN
valueFrom:
Expand Down
Loading
Loading