Skip to content
This repository has been archived by the owner on Aug 4, 2022. It is now read-only.

Commit

Permalink
Bug 1341214 - Add a small API to handle taskcluster queue and index r…
Browse files Browse the repository at this point in the history
…equests. r=dustin

Various modules under taskcluster are doing ad-hoc url formatting or
requests to taskcluster services. While we could use the taskcluster
client python module, it's kind of overkill for the simple requests done
here. So instead of vendoring that module, create a smaller one with
a limited set of functions we need.

This changes the behavior of the get_artifact function to return a
file-like object when the file is neither a json nor a yaml, but that
branch was never used (and was actually returning an unassigned
variable, so it was broken anyways).

At the same time, make the function that does HTTP requests more
error-resistant, using urllib3's Retry with a backoff factor.

Also add a function that retrieves the list of artifacts, that while
currently unused, will be used by `mach artifact` shortly.
  • Loading branch information
glandium committed Feb 17, 2017
1 parent 59b0428 commit ea2cee9
Show file tree
Hide file tree
Showing 12 changed files with 116 additions and 79 deletions.
2 changes: 0 additions & 2 deletions taskcluster/mach_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@

from mozbuild.base import MachCommandBase

ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'


class ShowTaskGraphSubCommand(SubCommand):
"""A SubCommand with TaskGraph-specific arguments"""
Expand Down
14 changes: 2 additions & 12 deletions taskcluster/taskgraph/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,17 @@

from __future__ import absolute_import, print_function, unicode_literals

import json
import logging
import requests
import yaml

from .create import create_tasks
from .decision import write_artifact
from .optimize import optimize_task_graph
from .taskgraph import TaskGraph
from .util.taskcluster import get_artifact


logger = logging.getLogger(__name__)
TASKCLUSTER_QUEUE_URL = "https://queue.taskcluster.net/v1/task"
TREEHERDER_URL = "https://treeherder.mozilla.org/api"

# We set this to 5 for now because this is what SETA sets the
Expand Down Expand Up @@ -63,15 +62,6 @@ def add_tasks(decision_task_id, task_labels, prefix=''):
create_tasks(optimized_graph, label_to_taskid, decision_params)


def get_artifact(task_id, path):
resp = requests.get(url="{}/{}/artifacts/{}".format(TASKCLUSTER_QUEUE_URL, task_id, path))
if path.endswith('.json'):
artifact = json.loads(resp.text)
elif path.endswith('.yml'):
artifact = yaml.load(resp.text)
return artifact


def backfill(project, job_id):
"""
Run the backfill task. This function implements `mach taskgraph backfill-task`,
Expand Down
10 changes: 1 addition & 9 deletions taskcluster/taskgraph/cron/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import logging
import os
import traceback
import requests
import yaml

from . import decision, schema
Expand All @@ -24,6 +23,7 @@
from .. import GECKO
from taskgraph.util.attributes import match_run_on_projects
from taskgraph.util.schema import resolve_keyed_by
from taskgraph.util.taskcluster import get_session

# Functions to handle each `job.type` in `.cron.yml`. These are called with
# the contents of the `job` property from `.cron.yml` and should return a
Expand All @@ -34,14 +34,6 @@
}

logger = logging.getLogger(__name__)
_session = None


def get_session():
global _session
if not _session:
_session = requests.Session()
return _session


def load_jobs(params):
Expand Down
17 changes: 9 additions & 8 deletions taskcluster/taskgraph/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,32 @@
import subprocess
import tarfile
import tempfile
import urllib2
import which
from subprocess import Popen, PIPE
from io import BytesIO

from taskgraph.util import docker
from taskgraph.util.taskcluster import (
find_task_id,
get_artifact_url,
)
from . import GECKO

INDEX_URL = 'https://index.taskcluster.net/v1/task/' + docker.INDEX_PREFIX + '.{}.{}.hash.{}'
ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
DOCKER_INDEX = docker.INDEX_PREFIX + '.{}.{}.hash.{}'


def load_image_by_name(image_name, tag=None):
context_path = os.path.join(GECKO, 'taskcluster', 'docker', image_name)
context_hash = docker.generate_context_hash(GECKO, context_path, image_name)

image_index_url = INDEX_URL.format('level-3', image_name, context_hash)
print("Fetching", image_index_url)
task = json.load(urllib2.urlopen(image_index_url))
index_path = DOCKER_INDEX.format('level-3', image_name, context_hash)
task_id = find_task_id(index_path)

return load_image_by_task_id(task['taskId'], tag)
return load_image_by_task_id(task_id, tag)


def load_image_by_task_id(task_id, tag=None):
artifact_url = ARTIFACT_URL.format(task_id, 'public/image.tar.zst')
artifact_url = get_artifact_url(task_id, 'public/image.tar.zst')
result = load_image(artifact_url, tag)
print("Found docker image: {}:{}".format(result['image'], result['tag']))
if tag:
Expand Down
6 changes: 2 additions & 4 deletions taskcluster/taskgraph/parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,7 @@ def load_parameters_file(options):
Load parameters from the --parameters option
"""
import urllib

url_prefix = "https://queue.taskcluster.net/v1/task/"
url_postfix = "/artifacts/public/parameters.yml"
from taskgraph.util.taskcluster import get_artifact_url

filename = options['parameters']

Expand All @@ -78,7 +76,7 @@ def load_parameters_file(options):
# fetching parameters.yml using task task-id or supplied url
if filename.startswith("task-id="):
task_id = filename.split("=")[1]
filename = url_prefix + task_id + url_postfix
filename = get_artifact_url(task_id, 'public/parameters.yml')
f = urllib.urlopen(filename)

if filename.endswith('.yml'):
Expand Down
21 changes: 7 additions & 14 deletions taskcluster/taskgraph/task/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,9 @@
from __future__ import absolute_import, print_function, unicode_literals

import abc
import json
import os
import urllib2


# if running in a task, prefer to use the taskcluster proxy (http://taskcluster/),
# otherwise hit the services directly
if os.environ.get('TASK_ID'):
INDEX_URL = 'http://taskcluster/index/v1/task/{}'
else:
INDEX_URL = 'https://index.taskcluster.net/v1/task/{}'
import requests
from taskgraph.util.taskcluster import find_task_id


class Task(object):
Expand Down Expand Up @@ -109,11 +101,12 @@ def optimize(self, params):
"""
for index_path in self.index_paths:
try:
url = INDEX_URL.format(index_path)
existing_task = json.load(urllib2.urlopen(url))
task_id = find_task_id(
index_path,
use_proxy=bool(os.environ.get('TASK_ID')))

return True, existing_task['taskId']
except urllib2.HTTPError:
return True, task_id
except requests.exceptions.HTTPError:
pass

return False, None
Expand Down
13 changes: 4 additions & 9 deletions taskcluster/taskgraph/task/docker_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,11 @@
generate_context_hash,
INDEX_PREFIX,
)
from taskgraph.util.taskcluster import get_artifact_url
from taskgraph.util.templates import Templates

logger = logging.getLogger(__name__)

# if running in a task, prefer to use the taskcluster proxy (http://taskcluster/),
# otherwise hit the services directly
if os.environ.get('TASK_ID'):
ARTIFACT_URL = 'http://taskcluster/queue/v1/task/{}/artifacts/{}'
else:
ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'


class DockerImageTask(base.Task):

Expand Down Expand Up @@ -93,8 +87,9 @@ def optimize(self, params):
try:
# Only return the task ID if the artifact exists for the indexed
# task.
request = urllib2.Request(
ARTIFACT_URL.format(taskId, 'public/image.tar.zst'))
request = urllib2.Request(get_artifact_url(
taskId, 'public/image.tar.zst',
use_proxy=bool(os.environ.get('TASK_ID'))))
request.get_method = lambda: 'HEAD'
urllib2.urlopen(request)

Expand Down
2 changes: 0 additions & 2 deletions taskcluster/taskgraph/taskgraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from .graph import Graph
from .util.python_path import find_object

TASKCLUSTER_QUEUE_URL = "https://queue.taskcluster.net/v1/task/"


class TaskGraph(object):
"""
Expand Down
29 changes: 14 additions & 15 deletions taskcluster/taskgraph/transforms/job/mozharness_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from voluptuous import Schema, Required
from taskgraph.util.taskcluster import get_artifact_url
from taskgraph.transforms.job import run_job_using
from taskgraph.transforms.tests import (
test_description_schema,
Expand All @@ -15,8 +16,6 @@
import os
import re

ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'

ARTIFACTS = [
# (artifact name prefix, in-image path)
("public/logs/", "build/upload/logs/"),
Expand Down Expand Up @@ -59,11 +58,11 @@ def mozharness_test_on_docker(config, job, taskdesc):
("public/test_info/", "/home/worker/workspace/build/blobber_upload_dir/"),
]

installer_url = ARTIFACT_URL.format('<build>', mozharness['build-artifact-name'])
test_packages_url = ARTIFACT_URL.format('<build>',
'public/build/target.test_packages.json')
mozharness_url = ARTIFACT_URL.format('<build>',
'public/build/mozharness.zip')
installer_url = get_artifact_url('<build>', mozharness['build-artifact-name'])
test_packages_url = get_artifact_url('<build>',
'public/build/target.test_packages.json')
mozharness_url = get_artifact_url('<build>',
'public/build/mozharness.zip')

worker['artifacts'] = [{
'name': prefix,
Expand Down Expand Up @@ -206,11 +205,11 @@ def mozharness_test_on_windows(config, job, taskdesc):

target = 'firefox-{}.en-US.{}'.format(get_firefox_version(), build_platform)

installer_url = ARTIFACT_URL.format(
installer_url = get_artifact_url(
'<build>', 'public/build/{}.zip'.format(target))
test_packages_url = ARTIFACT_URL.format(
test_packages_url = get_artifact_url(
'<build>', 'public/build/{}.test_packages.json'.format(target))
mozharness_url = ARTIFACT_URL.format(
mozharness_url = get_artifact_url(
'<build>', 'public/build/mozharness.zip')

taskdesc['scopes'].extend(
Expand Down Expand Up @@ -270,11 +269,11 @@ def mozharness_test_on_native_engine(config, job, taskdesc):
mozharness = test['mozharness']
worker = taskdesc['worker']

installer_url = ARTIFACT_URL.format('<build>', mozharness['build-artifact-name'])
test_packages_url = ARTIFACT_URL.format('<build>',
'public/build/target.test_packages.json')
mozharness_url = ARTIFACT_URL.format('<build>',
'public/build/mozharness.zip')
installer_url = get_artifact_url('<build>', mozharness['build-artifact-name'])
test_packages_url = get_artifact_url('<build>',
'public/build/target.test_packages.json')
mozharness_url = get_artifact_url('<build>',
'public/build/mozharness.zip')

worker['artifacts'] = [{
'name': prefix.rstrip('/'),
Expand Down
3 changes: 0 additions & 3 deletions taskcluster/taskgraph/transforms/signing.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@
from voluptuous import Schema, Any, Required, Optional


ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/<{}>/artifacts/{}'


# Voluptuous uses marker objects as dictionary *keys*, but they are not
# comparable, so we cast all of the keys back to regular strings
task_description_schema = {str(k): v for k, v in task_description_schema.schema.iteritems()}
Expand Down
1 change: 0 additions & 1 deletion taskcluster/taskgraph/util/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

IMAGE_DIR = os.path.join(GECKO, 'taskcluster', 'docker')
INDEX_PREFIX = 'docker.images.v2'
ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'


def docker_image(name, by_tag=False):
Expand Down
77 changes: 77 additions & 0 deletions taskcluster/taskgraph/util/taskcluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# -*- coding: utf-8 -*-

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

from __future__ import absolute_import, print_function, unicode_literals

import functools
import yaml
import requests
from mozbuild.util import memoize
from requests.packages.urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter


@memoize
def get_session():
session = requests.Session()
retry = Retry(total=5, backoff_factor=0.1,
status_forcelist=[500, 502, 503, 504])
session.mount('http://', HTTPAdapter(max_retries=retry))
session.mount('https://', HTTPAdapter(max_retries=retry))
return session


def _do_request(url):
session = get_session()
return session.get(url, stream=True)


def get_artifact_url(task_id, path, use_proxy=False):
if use_proxy:
ARTIFACT_URL = 'http://taskcluster/queue/v1/task/{}/artifacts/{}'
else:
ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
return ARTIFACT_URL.format(task_id, path)


def get_artifact(task_id, path, use_proxy=False):
"""
Returns the artifact with the given path for the given task id.
If the path ends with ".json" or ".yml", the content is deserialized as,
respectively, json or yaml, and the corresponding python data (usually
dict) is returned.
For other types of content, a file-like object is returned.
"""
response = _do_request(get_artifact_url(task_id, path, use_proxy))
response.raise_for_status()
if path.endswith('.json'):
return response.json()
if path.endswith('.yml'):
return yaml.load(response.text)
response.raw.read = functools.partial(response.raw.read,
decode_content=True)
return response.raw


def list_artifacts(task_id, use_proxy=False):
response = _do_request(get_artifact_url(task_id, '', use_proxy).rstrip('/'))
response.raise_for_status()
return response.json()['artifacts']


def get_index_url(index_path, use_proxy=False):
if use_proxy:
INDEX_URL = 'http://taskcluster/index/v1/task/{}'
else:
INDEX_URL = 'https://index.taskcluster.net/v1/task/{}'
return INDEX_URL.format(index_path)


def find_task_id(index_path, use_proxy=False):
response = _do_request(get_index_url(index_path, use_proxy))
response.raise_for_status()
return response.json()['taskId']

0 comments on commit ea2cee9

Please sign in to comment.