Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 21 additions & 13 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,27 +1,35 @@
language: python
sudo: false
sudo: required

services:
- docker

branches:
only:
- master
- /^\d\.\d+$/

matrix:
include:
- python: 2.7
env: TOXENV=py27
- python: 3.4
env: TOXENV=py34
- python: 3.5
env: TOXENV=py35
- python: 3.6
env: TOXENV=py36
- python: 2.7
env: TOXENV=py27-scrapy10
include:
- python: 2.7
env: TOXENV=py27
- python: 3.4
env: TOXENV=py34
- python: 3.5
env: TOXENV=py35
- python: 3.6
env: TOXENV=py36
- python: 2.7
env: TOXENV=py27-scrapy11

before_install:
- docker pull scrapinghub/splash
- docker run --rm -d -p 8050:8050 --network host scrapinghub/splash

install:
- pip install -U tox codecov

script: tox
script: SPLASH_URL=http://127.0.0.1:8050 tox

after_success:
- codecov
Expand Down
6 changes: 6 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -679,3 +679,9 @@ https://github.com/scrapy-plugins/scrapy-splash

To run tests, install "tox" Python package and then run ``tox`` command
from the source checkout.

To run integration tests, start Splash and set SPLASH_URL env variable
to Splash address before running ``tox`` command::

docker run -d --rm -p8050:8050 scrapinghub/splash:3.0
SPLASH_URL=http://127.0.0.1:8050 tox -e py36
5 changes: 5 additions & 0 deletions requirements-test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pytest >= 3.3.2
pytest-cov >= 2.5.1
pytest-twisted >= 1.6
hypothesis >= 3.44.14
hypothesis-pytest
9 changes: 6 additions & 3 deletions scrapy_splash/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,11 @@ class SlotPolicy(object):

class SplashCookiesMiddleware(object):
"""
This middleware maintains cookiejars for Splash requests.
This downloader middleware maintains cookiejars for Splash requests.

It gets cookies from 'cookies' field in Splash JSON responses
and sends current cookies in 'cookies' JSON POST argument.
and sends current cookies in 'cookies' JSON POST argument instead of
sending them in http headers.

It should process requests before SplashMiddleware, and process responses
after SplashMiddleware.
Expand All @@ -57,12 +58,14 @@ def from_crawler(cls, crawler):
def process_request(self, request, spider):
"""
For Splash requests add 'cookies' key with current
cookies to request.meta['splash']['args']
cookies to ``request.meta['splash']['args']`` and remove cookie
headers sent to Splash itself.
"""
if 'splash' not in request.meta:
return

if request.meta.get('_splash_processed'):
request.headers.pop('Cookie', None)
return

splash_options = request.meta['splash']
Expand Down
33 changes: 33 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import os

import pytest
from scrapy.settings import Settings


@pytest.fixture()
def settings(request):
""" Default scrapy-splash settings """
s = dict(
# collect scraped items to .collected_items attribute
ITEM_PIPELINES={
'tests.utils.CollectorPipeline': 100,
},

# scrapy-splash settings
SPLASH_URL=os.environ.get('SPLASH_URL'),
DOWNLOADER_MIDDLEWARES={
# Engine side
'scrapy_splash.SplashCookiesMiddleware': 723,
'scrapy_splash.SplashMiddleware': 725,
'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
# Downloader side
},
SPIDER_MIDDLEWARES={
'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,
},
DUPEFILTER_CLASS='scrapy_splash.SplashAwareDupeFilter',
HTTPCACHE_STORAGE='scrapy_splash.SplashAwareFSCacheStorage',
)
return Settings(s)


56 changes: 56 additions & 0 deletions tests/mockserver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/usr/bin/env python
import argparse, socket, sys, time
from subprocess import Popen, PIPE
from importlib import import_module

from twisted.internet import reactor
from twisted.web.server import Site


def get_ephemeral_port():
s = socket.socket()
s.bind(("", 0))
return s.getsockname()[1]


class MockServer():
def __init__(self, resource, port=None):
self.resource = '{}.{}'.format(resource.__module__, resource.__name__)
self.proc = None
host = socket.gethostbyname(socket.gethostname())
self.port = port or get_ephemeral_port()
self.root_url = 'http://%s:%d' % (host, self.port)

def __enter__(self):
self.proc = Popen(
[sys.executable, '-u', '-m', 'tests.mockserver',
self.resource, '--port', str(self.port)],
stdout=PIPE)
self.proc.stdout.readline()
return self

def __exit__(self, exc_type, exc_value, traceback):
self.proc.kill()
self.proc.wait()
time.sleep(0.2)


def main():
parser = argparse.ArgumentParser()
parser.add_argument('resource')
parser.add_argument('--port', type=int)
args = parser.parse_args()
module_name, name = args.resource.rsplit('.', 1)
sys.path.append('.')
resource = getattr(import_module(module_name), name)()
http_port = reactor.listenTCP(args.port, Site(resource))
def print_listening():
host = http_port.getHost()
print('Mock server {} running at http://{}:{}'.format(
resource, host.host, host.port))
reactor.callWhenRunning(print_listening)
reactor.run()


if __name__ == "__main__":
main()
Loading