Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
ruairif committed Apr 19, 2018
2 parents 8e7bcd3 + 4ac4a76 commit e24342c
Show file tree
Hide file tree
Showing 29 changed files with 733 additions and 173 deletions.
13 changes: 11 additions & 2 deletions portia_server/portia_api/resources/projects.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from collections import OrderedDict

from django.conf import settings
from django.utils.functional import cached_property
from dulwich.objects import Commit
from rest_framework.decorators import detail_route
from rest_framework.response import Response
from rest_framework.status import HTTP_200_OK, HTTP_201_CREATED
from six import iteritems
from scrapy.utils.misc import load_object

from portia_orm.models import Project
from storage import get_storage_class
Expand All @@ -19,6 +21,7 @@
JsonApiConflictError)
from ..utils.download import ProjectArchiver, CodeProjectArchiver
from ..utils.copy import ModelCopier, MissingModelException
Deployer = load_object(settings.PROJECT_DEPLOYER)


class ProjectDownloadMixin(object):
Expand Down Expand Up @@ -164,6 +167,11 @@ def publish(self, *args, **kwargs):
response = self.retrieve()
return Response(response.data, status=HTTP_200_OK)

@detail_route(methods=['POST'])
def deploy(self, *args, **kwargs):
data = self._deploy()
return Response(data, HTTP_200_OK)

@detail_route(methods=['put', 'patch', 'post'])
def reset(self, *args, **kwargs):
if not self.storage.version_control and hasattr(self.storage, 'repo'):
Expand Down Expand Up @@ -265,5 +273,6 @@ def get_project_changes(self):
for type_, path, old_path
in storage.changed_files()]

def deploy(self):
pass
def _deploy(self):
if settings.CAPABILITIES.get('deploy_projects'):
return Deployer(self.project).deploy()
22 changes: 11 additions & 11 deletions portia_server/portia_api/resources/route.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from collections import Sequence, OrderedDict
from collections import Sequence
from operator import attrgetter

from django.db import transaction
Expand All @@ -11,8 +11,6 @@
from rest_framework.status import (HTTP_200_OK, HTTP_201_CREATED,
HTTP_204_NO_CONTENT)
from rest_framework.viewsets import ViewSet
from six import iterkeys, text_type
from six.moves import map

from portia_orm.collection import ModelCollection
from portia_orm.exceptions import ProtectedError
Expand All @@ -26,7 +24,7 @@
from ..jsonapi.registry import get_schema
from ..jsonapi.renderers import JSONApiRenderer, JSONRenderer
from ..jsonapi.serializers import JsonApiPolymorphicSerializer
from ..jsonapi.utils import get_status_title, type_from_model_name
from ..jsonapi.utils import type_from_model_name


class JsonApiRoute(ViewSet):
Expand Down Expand Up @@ -80,7 +78,8 @@ def handle_exception(self, exc):
status_code = response.status_code
if (isinstance(response.data, dict) and len(response.data) == 1 and
'detail' in response.data):
response.data = render_exception(status_code, response.data['detail'])
response.data = render_exception(status_code,
response.data['detail'])
return response

def get_instance(self):
Expand All @@ -100,8 +99,9 @@ def filter_collection(self, collection):

collection = collection.__class__((collection[id_] for id_ in ids))

for key in iterkeys(self.query):
if key != 'filter[id]' and key.startswith('filter[') and key[-1] == ']':
for key in self.query.keys():
if (key != 'filter[id]' and key.startswith('filter[') and
key[-1] == ']'):
field_name = key[7:-1]
field_values = set()
for field_list in self.query.getlist(key):
Expand All @@ -115,8 +115,8 @@ def filter_collection(self, collection):
related = getattr(obj, field_name)
filter_values = {related.pk if related else 'null'}
elif isinstance(field, HasMany):
filter_values = set(map(attrgetter('pk'),
getattr(obj, field_name)))
filter_values = {attrgetter('pk')(f)
for f in getattr(obj, field_name)}
else:
value = getattr(obj, field_name)
if isinstance(value, Sequence):
Expand Down Expand Up @@ -171,7 +171,7 @@ def get_request_kwargs(self):
kwargs['include_data'] = include

fields = {}
for key in iterkeys(self.query):
for key in self.query.keys():
if key.startswith('fields[') and key[-1] == ']':
field = key[7:-1]
for field_list in self.query.getlist(key):
Expand Down Expand Up @@ -247,7 +247,7 @@ def update(self, *args, **kwargs):
raise Http404

if kwargs.pop('partial', False):
partial = set(instance.__class__._ordered_fields).difference({'id'})
partial = set(instance.__class__._ordered_fields) - {'id'}
else:
partial = False

Expand Down
30 changes: 3 additions & 27 deletions portia_server/portia_api/resources/spiders.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
from collections import OrderedDict
from django.http.response import Http404

import requests

from rest_framework.decorators import detail_route
from rest_framework.response import Response
from rest_framework.status import HTTP_200_OK, HTTP_400_BAD_REQUEST

from django.conf import settings
from scrapy.utils.misc import load_object

from .projects import BaseProjectModelRoute, ProjectDownloadMixin
from ..jsonapi.exceptions import JsonApiGeneralException
from ..utils.extract import Pages, FetchError
from ..utils.spiders import load_spider
from portia_orm.models import Spider
Deployer = load_object(settings.PROJECT_DEPLOYER)


class SpiderRoute(ProjectDownloadMixin, BaseProjectModelRoute):
Expand Down Expand Up @@ -78,28 +77,5 @@ def rename(self, *args, **kwargs):
@detail_route(methods=['post'])
def schedule(self, *args, **kwargs):
spider_id = self.data['data']['id']
schedule_data = self._schedule_data(spider_id, self.data)
request = requests.post(settings.SCHEDULE_URL, data=schedule_data)
if request.status_code != 200:
raise JsonApiGeneralException(
request.status_code, request.content)
response = self.retrieve()
data = OrderedDict()
data.update(response.data)
data.setdefault('meta', {})['scheduled'] = True
data = Deployer(self.project).schedule(spider_id)
return Response(data, status=HTTP_200_OK)

def _schedule_data(self, spider_id, args):
data = {
'project': self.project.id,
'spider': spider_id
}
if self.storage.version_control:
branch = self.query.get('branch', None)
commit = self.query.get('commit_id', None)
if not branch and self.storage.repo.has_branch(self.user.username):
branch = self.user.username
self.storage.checkout(commit, branch)
commit_id = self.storage._commit.id
data['version'] = commit_id
return data
22 changes: 22 additions & 0 deletions portia_server/portia_api/utils/deploy/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from portia_api.utils.download import ProjectArchiver


class BaseDeploy(object):
def __init__(self, project):
self.project = project
self.storage = project.storage
self.config = self._get_config()
self.config.version = self.project.version

def build_archive(self):
return ProjectArchiver(self.storage, project=self.project).archive(
egg_info=True)

def _get_config(self):
raise NotImplementedError

def deploy(self, target=None):
raise NotImplementedError

def schedule(self, spider, args=None, settings=None, target=None):
raise NotImplementedError
60 changes: 60 additions & 0 deletions portia_server/portia_api/utils/deploy/package.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import os
import textwrap
import zipfile

from datetime import datetime
from distutils.dist import DistributionMetadata
from io import StringIO


class EggInfo(object):
def __init__(self, project, archive):
self.project = project
self.archive = archive
self.tstamp = datetime.now().timetuple()[:6]

def write(self):
self._write_file('PKG-INFO', self.build_pkg_info())
self._write_file('SOURCES.txt', self.build_sources())
self._write_file('dependency_links.txt', self.build_dependency())
self._write_file('entry_points.txt', self.build_entry_points())
self._write_file('top_level.txt', self.build_top_level())
self._write_file('zip-safe', self.build_zip_safe())

def _write_file(self, filename, contents):
filepath = os.path.join('EGG-INFO', filename)
fileinfo = zipfile.ZipInfo(filepath, self.tstamp)
fileinfo.external_attr = 0o666 << 16
self.archive.writestr(fileinfo, contents, zipfile.ZIP_DEFLATED)

def build_pkg_info(self):
meta = DistributionMetadata()
meta.name = self.project.name
meta.version = self.project.version
file = StringIO()
meta.write_pkg_file(file)
file.seek(0)
return file.read()

def build_sources(self):
return '\n'.join(sorted(f.filename for f in self.archive.filelist))

def build_top_level(self):
return '\n'.join(sorted({
fn.split('/', 1)[0] for fn in (
fn for fn in (
f.filename for f in self.archive.filelist))
if fn.endswith('.py')
}))

def build_dependency(self):
return '\n'

def build_entry_points(self):
return textwrap.dedent("""\
[scrapy]
settings = spiders.settings
""")

def build_zip_safe(self):
return ''
92 changes: 92 additions & 0 deletions portia_server/portia_api/utils/deploy/scrapinghub.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import json
import os

from six import StringIO
from urllib.parse import urljoin

from django.conf import settings as app_settings
from rest_framework import status
from shub import exceptions
from shub.config import ShubConfig
from shub.schedule import schedule_spider
from shub.utils import make_deploy_request
from portia_api.jsonapi.exceptions import JsonApiGeneralException
from storage.projecttemplates import templates

from .base import BaseDeploy


class ScrapinghubDeploy(BaseDeploy):
SHUB_DOCS_URL = 'https://shub.readthedocs.io/en/stable/configuration.html'
EXCEPTIONS = (
exceptions.InvalidAuthException, # EX_NOPERM
exceptions.RemoteErrorException, # EX_PROTOCOL
)
STATUS_CODES = {
os.EX_UNAVAILABLE: status.HTTP_404_NOT_FOUND,
os.EX_PROTOCOL: status.HTTP_503_SERVICE_UNAVAILABLE,
}

def _get_config(self):
conf = ShubConfig()
conf.load(StringIO(json.dumps(self._default_config())))
if 'SHUB_APIKEY' in os.environ:
conf.apikeys['default'] = os.environ['SHUB_APIKEY']
try:
conf.load(self.storage.open('scrapinghub.yml'))
except OSError:
raise ('Need a `scrapinghub.yml` file to identify which project '
'to deploy to. Find more information at: {}'.format(
self.SHUB_DOCS_URL
))
return conf

def _default_config(self):
config = {
'stack': 'scrapy:1.5-py-latest',
}
if getattr(app_settings, 'SCRAPINGHUB_APIKEY', None):
config['apikeys'] = {
'default': app_settings.SCRAPINGHUB_APIKEY,
}
return config

def deploy(self, target='default'):
try:
conf = self.config.get_target_conf(target)
archive = self.build_archive()
data = {
'project': conf.project_id,
'version': self.project.version,
'stack': conf.stack
}
files = [('egg', archive)]
if conf.requirements_file:
try:
file = self.storage.open(conf.requirements_file)
except OSError:
file = StringIO(templates['REQUIREMENTS'])
files.append(('requirements', file))
make_deploy_request(
urljoin(conf.endpoint, 'scrapyd/addversion.json'),
data, files, (conf.apikey, ''), False, False)
except self.EXCEPTIONS as e:
raise JsonApiGeneralException(
e.format_message(),
self.STATUS_CODES.get(getattr(e, 'exit_code', None), 500),
)
return {
'message': 'Your deploy completed successfully',
}

def schedule(self, spider, args=None, settings=None, target='default'):
try:
conf = self.config.get_target_conf(target)
schedule_spider(
conf.project_id, conf.endpoint, conf.apikey, spider,
arguments=args or (), settings=settings or ())
except self.EXCEPTIONS as e:
raise JsonApiGeneralException(
e.format_message(),
self.STATUS_CODES.get(getattr(e, 'exit_code', None), 500),
)
Loading

0 comments on commit e24342c

Please sign in to comment.