scrapy-plugins · kmike · Jan 16, 2018 · Jan 13, 2018 · Jan 13, 2018 · Jan 13, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -1,27 +1,35 @@
 language: python
-sudo: false
+sudo: required
+
+services:
+    - docker
 
 branches:
     only:
         - master
         - /^\d\.\d+$/
+
 matrix:
-  include:
-    - python: 2.7
-      env: TOXENV=py27
-    - python: 3.4
-      env: TOXENV=py34
-    - python: 3.5
-      env: TOXENV=py35
-    - python: 3.6
-      env: TOXENV=py36
-    - python: 2.7
-      env: TOXENV=py27-scrapy10
+    include:
+        - python: 2.7
+          env: TOXENV=py27
+        - python: 3.4
+          env: TOXENV=py34
+        - python: 3.5
+          env: TOXENV=py35
+        - python: 3.6
+          env: TOXENV=py36
+        - python: 2.7
+          env: TOXENV=py27-scrapy11
+
+before_install:
+    - docker pull scrapinghub/splash
+    - docker run --rm -d -p 8050:8050 --network host scrapinghub/splash
 
 install:
     - pip install -U tox codecov
 
-script: tox
+script: SPLASH_URL=http://127.0.0.1:8050 tox
 
 after_success:
     - codecov

diff --git a/README.rst b/README.rst
@@ -679,3 +679,9 @@ https://github.com/scrapy-plugins/scrapy-splash
 
 To run tests, install "tox" Python package and then run ``tox`` command
 from the source checkout.
+
+To run integration tests, start Splash and set SPLASH_URL env variable
+to Splash address before running ``tox`` command::
+
+   docker run -d --rm -p8050:8050 scrapinghub/splash:3.0
+   SPLASH_URL=http://127.0.0.1:8050 tox -e py36
diff --git a/requirements-test.txt b/requirements-test.txt
@@ -0,0 +1,5 @@
+pytest >= 3.3.2
+pytest-cov >= 2.5.1
+pytest-twisted >= 1.6
+hypothesis >= 3.44.14
+hypothesis-pytest
diff --git a/scrapy_splash/middleware.py b/scrapy_splash/middleware.py
@@ -38,10 +38,11 @@ class SlotPolicy(object):
 
 class SplashCookiesMiddleware(object):
     """
-    This middleware maintains cookiejars for Splash requests.
+    This downloader middleware maintains cookiejars for Splash requests.
 
     It gets cookies from 'cookies' field in Splash JSON responses
-    and sends current cookies in 'cookies' JSON POST argument.
+    and sends current cookies in 'cookies' JSON POST argument instead of
+    sending them in http headers.
 
     It should process requests before SplashMiddleware, and process responses
     after SplashMiddleware.
@@ -57,12 +58,14 @@ def from_crawler(cls, crawler):
     def process_request(self, request, spider):
         """
         For Splash requests add 'cookies' key with current
-        cookies to request.meta['splash']['args']
+        cookies to ``request.meta['splash']['args']`` and remove cookie
+        headers sent to Splash itself.
         """
         if 'splash' not in request.meta:
             return
 
         if request.meta.get('_splash_processed'):
+            request.headers.pop('Cookie', None)
             return
 
         splash_options = request.meta['splash']

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,33 @@
+import os
+
+import pytest
+from scrapy.settings import Settings
+
+
+@pytest.fixture()
+def settings(request):
+    """ Default scrapy-splash settings """
+    s = dict(
+        # collect scraped items to .collected_items attribute
+        ITEM_PIPELINES={
+            'tests.utils.CollectorPipeline': 100,
+        },
+
+        # scrapy-splash settings
+        SPLASH_URL=os.environ.get('SPLASH_URL'),
+        DOWNLOADER_MIDDLEWARES={
+            # Engine side
+            'scrapy_splash.SplashCookiesMiddleware': 723,
+            'scrapy_splash.SplashMiddleware': 725,
+            'scrapy.downloadermiddlewares.httpcompression.HttpCompressionMiddleware': 810,
+            # Downloader side
+        },
+        SPIDER_MIDDLEWARES={
+            'scrapy_splash.SplashDeduplicateArgsMiddleware': 100,
+        },
+        DUPEFILTER_CLASS='scrapy_splash.SplashAwareDupeFilter',
+        HTTPCACHE_STORAGE='scrapy_splash.SplashAwareFSCacheStorage',
+    )
+    return Settings(s)
+
+
diff --git a/tests/mockserver.py b/tests/mockserver.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python
+import argparse, socket, sys, time
+from subprocess import Popen, PIPE
+from importlib import import_module
+
+from twisted.internet import reactor
+from twisted.web.server import Site
+
+
+def get_ephemeral_port():
+    s = socket.socket()
+    s.bind(("", 0))
+    return s.getsockname()[1]
+
+
+class MockServer():
+    def __init__(self, resource, port=None):
+        self.resource = '{}.{}'.format(resource.__module__, resource.__name__)
+        self.proc = None
+        host = socket.gethostbyname(socket.gethostname())
+        self.port = port or get_ephemeral_port()
+        self.root_url = 'http://%s:%d' % (host, self.port)
+
+    def __enter__(self):
+        self.proc = Popen(
+            [sys.executable, '-u', '-m', 'tests.mockserver',
+             self.resource, '--port', str(self.port)],
+            stdout=PIPE)
+        self.proc.stdout.readline()
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.proc.kill()
+        self.proc.wait()
+        time.sleep(0.2)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('resource')
+    parser.add_argument('--port', type=int)
+    args = parser.parse_args()
+    module_name, name = args.resource.rsplit('.', 1)
+    sys.path.append('.')
+    resource = getattr(import_module(module_name), name)()
+    http_port = reactor.listenTCP(args.port, Site(resource))
+    def print_listening():
+        host = http_port.getHost()
+        print('Mock server {} running at http://{}:{}'.format(
+            resource, host.host, host.port))
+    reactor.callWhenRunning(print_listening)
+    reactor.run()
+
+
+if __name__ == "__main__":
+    main()