diff --git a/docs/intro/overview.rst b/docs/intro/overview.rst index 3f9f24efdf9..289e975b8b1 100644 --- a/docs/intro/overview.rst +++ b/docs/intro/overview.rst @@ -236,9 +236,6 @@ scraping easy and efficient, such as: * A :ref:`System service ` designed to ease the deployment and run of your spiders in production. -* A built-in :ref:`Web service ` for monitoring and - controlling your bot - * A :ref:`Telnet console ` for hooking into a Python console running inside your Scrapy process, to introspect and debug your crawler diff --git a/docs/topics/extensions.rst b/docs/topics/extensions.rst index eb944fa34a1..593a08ddc0b 100644 --- a/docs/topics/extensions.rst +++ b/docs/topics/extensions.rst @@ -36,7 +36,6 @@ by a string: the full Python path to the extension's class name. For example:: EXTENSIONS = { 'scrapy.contrib.corestats.CoreStats': 500, - 'scrapy.webservice.WebService': 500, 'scrapy.telnet.TelnetConsole': 500, } @@ -178,18 +177,6 @@ Core Stats extension Enable the collection of core statistics, provided the stats collection is enabled (see :ref:`topics-stats`). -.. _topics-extensions-ref-webservice: - -Web service extension -~~~~~~~~~~~~~~~~~~~~~ - -.. module:: scrapy.webservice - :synopsis: Web service - -.. class:: scrapy.webservice.WebService - -See `topics-webservice`. - .. _topics-extensions-ref-telnetconsole: Telnet console extension diff --git a/docs/topics/settings.rst b/docs/topics/settings.rst index 629fac2dd78..0838cfc46f1 100644 --- a/docs/topics/settings.rst +++ b/docs/topics/settings.rst @@ -463,7 +463,6 @@ Default:: { 'scrapy.contrib.corestats.CoreStats': 0, - 'scrapy.webservice.WebService': 0, 'scrapy.telnet.TelnetConsole': 0, 'scrapy.contrib.memusage.MemoryUsage': 0, 'scrapy.contrib.memdebug.MemoryDebugger': 0, diff --git a/docs/topics/webservice.rst b/docs/topics/webservice.rst index ce85b5e4872..9a211896d2a 100644 --- a/docs/topics/webservice.rst +++ b/docs/topics/webservice.rst @@ -4,231 +4,8 @@ Web Service =========== -Scrapy comes with a built-in web service for monitoring and controlling a -running crawler. The service exposes most resources using the `JSON-RPC 2.0`_ -protocol, but there are also other (read-only) resources which just output JSON -data. +webservice has been moved into a separate project. -Provides an extensible web service for managing a Scrapy process. It's enabled -by the :setting:`WEBSERVICE_ENABLED` setting. The web server will listen in the -port specified in :setting:`WEBSERVICE_PORT`, and will log to the file -specified in :setting:`WEBSERVICE_LOGFILE`. - -The web service is a :ref:`built-in Scrapy extension ` -which comes enabled by default, but you can also disable it if you're running -tight on memory. - -.. _topics-webservice-resources: - -Web service resources -===================== - -The web service contains several resources, defined in the -:setting:`WEBSERVICE_RESOURCES` setting. Each resource provides a different -functionality. See :ref:`topics-webservice-resources-ref` for a list of -resources available by default. - -Although you can implement your own resources using any protocol, there are -two kinds of resources bundled with Scrapy: - -* Simple JSON resources - which are read-only and just output JSON data -* JSON-RPC resources - which provide direct access to certain Scrapy objects - using the `JSON-RPC 2.0`_ protocol - -.. module:: scrapy.contrib.webservice - :synopsis: Built-in web service resources - -.. _topics-webservice-resources-ref: - -Available JSON-RPC resources ----------------------------- - -These are the JSON-RPC resources available by default in Scrapy: - -.. _topics-webservice-crawler: - -Crawler JSON-RPC resource -~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. module:: scrapy.contrib.webservice.crawler - :synopsis: Crawler JSON-RPC resource - -.. class:: CrawlerResource - - Provides access to the main Crawler object that controls the Scrapy - process. - - Available by default at: http://localhost:6080/crawler - -Stats Collector JSON-RPC resource -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. module:: scrapy.contrib.webservice.stats - :synopsis: Stats JSON-RPC resource - -.. class:: StatsResource - - Provides access to the Stats Collector used by the crawler. - - Available by default at: http://localhost:6080/stats - -Spider Manager JSON-RPC resource -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can access the spider manager JSON-RPC resource through the -:ref:`topics-webservice-crawler` at: http://localhost:6080/crawler/spiders - -Extension Manager JSON-RPC resource -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -You can access the extension manager JSON-RPC resource through the -:ref:`topics-webservice-crawler` at: http://localhost:6080/crawler/spiders - -Available JSON resources ------------------------- - -These are the JSON resources available by default: - -Engine status JSON resource -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. module:: scrapy.contrib.webservice.enginestatus - :synopsis: Engine Status JSON resource - -.. class:: EngineStatusResource - - Provides access to engine status metrics. - - Available by default at: http://localhost:6080/enginestatus - -Web service settings -==================== - -These are the settings that control the web service behaviour: - -.. setting:: WEBSERVICE_ENABLED - -WEBSERVICE_ENABLED ------------------- - -Default: ``True`` - -A boolean which specifies if the web service will be enabled (provided its -extension is also enabled). - -.. setting:: WEBSERVICE_LOGFILE - -WEBSERVICE_LOGFILE ------------------- - -Default: ``None`` - -A file to use for logging HTTP requests made to the web service. If unset web -the log is sent to standard scrapy log. - -.. setting:: WEBSERVICE_PORT - -WEBSERVICE_PORT ---------------- - -Default: ``[6080, 7030]`` - -The port range to use for the web service. If set to ``None`` or ``0``, a -dynamically assigned port is used. - -.. setting:: WEBSERVICE_HOST - -WEBSERVICE_HOST ---------------- - -Default: ``'127.0.0.1'`` - -The interface the web service should listen on - -WEBSERVICE_RESOURCES --------------------- - -Default: ``{}`` - -The list of web service resources enabled for your project. See -:ref:`topics-webservice-resources`. These are added to the ones available by -default in Scrapy, defined in the :setting:`WEBSERVICE_RESOURCES_BASE` setting. - -WEBSERVICE_RESOURCES_BASE -------------------------- - -Default:: - - { - 'scrapy.contrib.webservice.crawler.CrawlerResource': 1, - 'scrapy.contrib.webservice.enginestatus.EngineStatusResource': 1, - 'scrapy.contrib.webservice.stats.StatsResource': 1, - } - -The list of web service resources available by default in Scrapy. You shouldn't -change this setting in your project, change :setting:`WEBSERVICE_RESOURCES` -instead. If you want to disable some resource set its value to ``None`` in -:setting:`WEBSERVICE_RESOURCES`. - -Writing a web service resource -============================== - -Web service resources are implemented using the Twisted Web API. See this -`Twisted Web guide`_ for more information on Twisted web and Twisted web -resources. - -To write a web service resource you should subclass the :class:`JsonResource` or -:class:`JsonRpcResource` classes and implement the :class:`renderGET` method. - -.. class:: scrapy.webservice.JsonResource - - A subclass of `twisted.web.resource.Resource`_ that implements a JSON web - service resource. See - - .. attribute:: ws_name - - The name by which the Scrapy web service will known this resource, and - also the path where this resource will listen. For example, assuming - Scrapy web service is listening on http://localhost:6080/ and the - ``ws_name`` is ``'resource1'`` the URL for that resource will be: - - http://localhost:6080/resource1/ - -.. class:: scrapy.webservice.JsonRpcResource(crawler, target=None) - - This is a subclass of :class:`JsonResource` for implementing JSON-RPC - resources. JSON-RPC resources wrap Python (Scrapy) objects around a - JSON-RPC API. The resource wrapped must be returned by the - :meth:`get_target` method, which returns the target passed in the - constructor by default - - .. method:: get_target() - - Return the object wrapped by this JSON-RPC resource. By default, it - returns the object passed on the constructor. - -Examples of web service resources -================================= - -StatsResource (JSON-RPC resource) ---------------------------------- - -.. literalinclude:: ../../scrapy/contrib/webservice/stats.py - -EngineStatusResource (JSON resource) -------------------------------------- - -.. literalinclude:: ../../scrapy/contrib/webservice/enginestatus.py - -Example of web service client -============================= - -scrapy-ws.py script -------------------- - -.. literalinclude:: ../../extras/scrapy-ws.py - -.. _Twisted Web guide: http://jcalderone.livejournal.com/50562.html -.. _JSON-RPC 2.0: http://www.jsonrpc.org/ -.. _twisted.web.resource.Resource: http://twistedmatrix.com/documents/10.0.0/api/twisted.web.resource.Resource.html +Its is now hosted at: + https://github.com/scrapy/scrapy-jsonrpc diff --git a/scrapy/contrib/webservice/__init__.py b/scrapy/contrib/webservice/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/scrapy/contrib/webservice/crawler.py b/scrapy/contrib/webservice/crawler.py deleted file mode 100644 index f25d9047424..00000000000 --- a/scrapy/contrib/webservice/crawler.py +++ /dev/null @@ -1,8 +0,0 @@ -from scrapy.webservice import JsonRpcResource - -class CrawlerResource(JsonRpcResource): - - ws_name = 'crawler' - - def __init__(self, crawler): - JsonRpcResource.__init__(self, crawler, crawler) diff --git a/scrapy/contrib/webservice/enginestatus.py b/scrapy/contrib/webservice/enginestatus.py deleted file mode 100644 index 20e806423db..00000000000 --- a/scrapy/contrib/webservice/enginestatus.py +++ /dev/null @@ -1,22 +0,0 @@ -from scrapy.webservice import JsonResource -from scrapy.utils.engine import get_engine_status - -class EngineStatusResource(JsonResource): - - ws_name = 'enginestatus' - - def __init__(self, crawler, spider_name=None): - JsonResource.__init__(self, crawler) - self._spider_name = spider_name - self.isLeaf = spider_name is not None - - def render_GET(self, txrequest): - status = get_engine_status(self.crawler.engine) - if self._spider_name is None: - return status - for sp, st in status['spiders'].items(): - if sp.name == self._spider_name: - return st - - def getChild(self, name, txrequest): - return EngineStatusResource(name, self.crawler) diff --git a/scrapy/contrib/webservice/stats.py b/scrapy/contrib/webservice/stats.py deleted file mode 100644 index df17a8a7df6..00000000000 --- a/scrapy/contrib/webservice/stats.py +++ /dev/null @@ -1,8 +0,0 @@ -from scrapy.webservice import JsonRpcResource - -class StatsResource(JsonRpcResource): - - ws_name = 'stats' - - def __init__(self, crawler): - JsonRpcResource.__init__(self, crawler, crawler.stats) diff --git a/scrapy/settings/default_settings.py b/scrapy/settings/default_settings.py index c7e408049d1..f01203c420f 100644 --- a/scrapy/settings/default_settings.py +++ b/scrapy/settings/default_settings.py @@ -109,7 +109,6 @@ EXTENSIONS_BASE = { 'scrapy.contrib.corestats.CoreStats': 0, - 'scrapy.webservice.WebService': 0, 'scrapy.telnet.TelnetConsole': 0, 'scrapy.contrib.memusage.MemoryUsage': 0, 'scrapy.contrib.memdebug.MemoryDebugger': 0, @@ -239,17 +238,6 @@ TELNETCONSOLE_PORT = [6023, 6073] TELNETCONSOLE_HOST = '127.0.0.1' -WEBSERVICE_ENABLED = True -WEBSERVICE_LOGFILE = None -WEBSERVICE_PORT = [6080, 7030] -WEBSERVICE_HOST = '127.0.0.1' -WEBSERVICE_RESOURCES = {} -WEBSERVICE_RESOURCES_BASE = { - 'scrapy.contrib.webservice.crawler.CrawlerResource': 1, - 'scrapy.contrib.webservice.enginestatus.EngineStatusResource': 1, - 'scrapy.contrib.webservice.stats.StatsResource': 1, -} - SPIDER_CONTRACTS = {} SPIDER_CONTRACTS_BASE = { 'scrapy.contracts.default.UrlContract': 1, diff --git a/scrapy/utils/jsonrpc.py b/scrapy/utils/jsonrpc.py deleted file mode 100644 index 19c28a7b586..00000000000 --- a/scrapy/utils/jsonrpc.py +++ /dev/null @@ -1,97 +0,0 @@ -""" -This module implements the JSON-RPC 2.0 protocol, as defined in: -http://groups.google.com/group/json-rpc/web/json-rpc-2-0 -""" - -import urllib -import json -import traceback - -from scrapy.utils.serialize import ScrapyJSONDecoder - -# JSON-RPC 2.0 errors, as defined in: -class jsonrpc_errors: - PARSE_ERROR = -32700 - INVALID_REQUEST = -32600 - METHOD_NOT_FOUND = -32601 - INVALID_PARAMS = -32602 - INTERNAL_ERROR = -32603 - -class JsonRpcError(Exception): - - def __init__(self, code, message, data=None): - super(JsonRpcError, self).__init__() - self.code = code - self.message = message - self.data = data - - def __str__(self): - return "JSON-RPC error (code %d): %s" % (self.code, self.message) - -def jsonrpc_client_call(url, method, *args, **kwargs): - """Execute a JSON-RPC call on the given url""" - _urllib = kwargs.pop('_urllib', urllib) - if args and kwargs: - raise ValueError("Pass *args or **kwargs but not both to jsonrpc_client_call") - req = {'jsonrpc': '2.0', 'method': method, 'params': args or kwargs, 'id': 1} - res = json.loads(_urllib.urlopen(url, json.dumps(req)).read()) - if 'result' in res: - return res['result'] - elif 'error' in res: - er = res['error'] - raise JsonRpcError(er['code'], er['message'], er['data']) - else: - msg = "JSON-RPC response must contain 'result' or 'error': %s" % res - raise ValueError(msg) - -def jsonrpc_server_call(target, jsonrpc_request, json_decoder=None): - """Execute the given JSON-RPC request (as JSON-encoded string) on the given - target object and return the JSON-RPC response, as a dict - """ - if json_decoder is None: - json_decoder = ScrapyJSONDecoder() - - try: - req = json_decoder.decode(jsonrpc_request) - except Exception as e: - return jsonrpc_error(None, jsonrpc_errors.PARSE_ERROR, 'Parse error', \ - traceback.format_exc()) - - try: - id, methname = req['id'], req['method'] - except KeyError: - return jsonrpc_error(None, jsonrpc_errors.INVALID_REQUEST, 'Invalid Request') - - try: - method = getattr(target, methname) - except AttributeError: - return jsonrpc_error(id, jsonrpc_errors.METHOD_NOT_FOUND, 'Method not found') - - params = req.get('params', []) - a, kw = ([], params) if isinstance(params, dict) else (params, {}) - kw = dict([(str(k), v) for k, v in kw.items()]) # convert kw keys to str - try: - return jsonrpc_result(id, method(*a, **kw)) - except Exception as e: - return jsonrpc_error(id, jsonrpc_errors.INTERNAL_ERROR, str(e), \ - traceback.format_exc()) - -def jsonrpc_error(id, code, message, data=None): - """Create JSON-RPC error response""" - return { - 'jsonrpc': '2.0', - 'error': { - 'code': code, - 'message': message, - 'data': data, - }, - 'id': id, - } - -def jsonrpc_result(id, result): - """Create JSON-RPC result response""" - return { - 'jsonrpc': '2.0', - 'result': result, - 'id': id, - } diff --git a/scrapy/utils/serialize.py b/scrapy/utils/serialize.py index 95f98587292..8320be09570 100644 --- a/scrapy/utils/serialize.py +++ b/scrapy/utils/serialize.py @@ -1,93 +1,18 @@ -import re +import json import datetime import decimal -import json from twisted.internet import defer -from scrapy.spider import Spider from scrapy.http import Request, Response from scrapy.item import BaseItem -class SpiderReferencer(object): - """Class to serialize (and deserialize) objects (typically dicts) - containing references to running spiders (ie. Spider objects). This is - required because json library fails to serialize dicts containing - non-primitive types as keys, even when you override - ScrapyJSONEncoder.default() with a custom encoding mechanism. - """ - - spider_ref_re = re.compile('^spider:([0-9a-f]+)?:?(.+)?$') - - def __init__(self, crawler): - self.crawler = crawler - - def get_reference_from_spider(self, spider): - return 'spider:%x:%s' % (id(spider), spider.name) - - def get_spider_from_reference(self, ref): - """Returns the Spider referenced by text, if text is a spider - reference. Otherwise it returns the text itself. If the text references - a non-running spider it raises a RuntimeError. - """ - m = self.spider_ref_re.search(ref) - if m: - spid, spname = m.groups() - for spider in self.crawler.engine.open_spiders: - if "%x" % id(spider) == spid or spider.name == spname: - return spider - raise RuntimeError("Spider not running: %s" % ref) - return ref - - def encode_references(self, obj): - """Look for Spider objects and replace them with spider references""" - if isinstance(obj, Spider): - return self.get_reference_from_spider(obj) - elif isinstance(obj, dict): - d = {} - for k, v in obj.items(): - k = self.encode_references(k) - v = self.encode_references(v) - d[k] = v - return d - elif isinstance(obj, (list, tuple)): - return [self.encode_references(x) for x in obj] - else: - return obj - - def decode_references(self, obj): - """Look for spider references and replace them with Spider objects""" - if isinstance(obj, basestring): - return self.get_spider_from_reference(obj) - elif isinstance(obj, dict): - d = {} - for k, v in obj.items(): - k = self.decode_references(k) - v = self.decode_references(v) - d[k] = v - return d - elif isinstance(obj, (list, tuple)): - return [self.decode_references(x) for x in obj] - else: - return obj - - class ScrapyJSONEncoder(json.JSONEncoder): DATE_FORMAT = "%Y-%m-%d" TIME_FORMAT = "%H:%M:%S" - def __init__(self, *a, **kw): - crawler = kw.pop('crawler', None) - self.spref = kw.pop('spref', None) or SpiderReferencer(crawler) - super(ScrapyJSONEncoder, self).__init__(*a, **kw) - - def encode(self, o): - if self.spref: - o = self.spref.encode_references(o) - return super(ScrapyJSONEncoder, self).encode(o) - def default(self, o): if isinstance(o, datetime.datetime): return o.strftime("%s %s" % (self.DATE_FORMAT, self.TIME_FORMAT)) @@ -110,14 +35,4 @@ def default(self, o): class ScrapyJSONDecoder(json.JSONDecoder): - - def __init__(self, *a, **kw): - crawler = kw.pop('crawler', None) - self.spref = kw.pop('spref', None) or SpiderReferencer(crawler) - super(ScrapyJSONDecoder, self).__init__(*a, **kw) - - def decode(self, s): - o = super(ScrapyJSONDecoder, self).decode(s) - if self.spref: - o = self.spref.decode_references(o) - return o + pass diff --git a/scrapy/utils/txweb.py b/scrapy/utils/txweb.py deleted file mode 100644 index 91eaa3bbcbd..00000000000 --- a/scrapy/utils/txweb.py +++ /dev/null @@ -1,20 +0,0 @@ -import json - -from twisted.web import resource - -class JsonResource(resource.Resource): - - json_encoder = json.JSONEncoder() - - def render(self, txrequest): - r = resource.Resource.render(self, txrequest) - return self.render_object(r, txrequest) - - def render_object(self, obj, txrequest): - r = self.json_encoder.encode(obj) + "\n" - txrequest.setHeader('Content-Type', 'application/json') - txrequest.setHeader('Access-Control-Allow-Origin', '*') - txrequest.setHeader('Access-Control-Allow-Methods', 'GET, POST, PATCH, PUT, DELETE') - txrequest.setHeader('Access-Control-Allow-Headers',' X-Requested-With') - txrequest.setHeader('Content-Length', len(r)) - return r \ No newline at end of file diff --git a/scrapy/webservice.py b/scrapy/webservice.py deleted file mode 100644 index d1ee2973e3c..00000000000 --- a/scrapy/webservice.py +++ /dev/null @@ -1,97 +0,0 @@ -""" -Scrapy web services extension - -See docs/topics/webservice.rst -""" - -from twisted.web import server, resource - -from scrapy.exceptions import NotConfigured -from scrapy import log, signals -from scrapy.utils.jsonrpc import jsonrpc_server_call -from scrapy.utils.serialize import ScrapyJSONEncoder, ScrapyJSONDecoder -from scrapy.utils.misc import load_object -from scrapy.utils.txweb import JsonResource as JsonResource_ -from scrapy.utils.reactor import listen_tcp -from scrapy.utils.conf import build_component_list - - -class JsonResource(JsonResource_): - - def __init__(self, crawler, target=None): - JsonResource_.__init__(self) - self.crawler = crawler - self.json_encoder = ScrapyJSONEncoder(crawler=crawler) - -class JsonRpcResource(JsonResource): - - def __init__(self, crawler, target=None): - JsonResource.__init__(self, crawler, target) - self.json_decoder = ScrapyJSONDecoder(crawler=crawler) - self.crawler = crawler - self._target = target - - def render_GET(self, txrequest): - return self.get_target() - - def render_POST(self, txrequest): - reqstr = txrequest.content.getvalue() - target = self.get_target() - return jsonrpc_server_call(target, reqstr, self.json_decoder) - - def getChild(self, name, txrequest): - target = self.get_target() - try: - newtarget = getattr(target, name) - return JsonRpcResource(self.crawler, newtarget) - except AttributeError: - return resource.ErrorPage(404, "No Such Resource", "No such child resource.") - - def get_target(self): - return self._target - - -class RootResource(JsonResource): - - def render_GET(self, txrequest): - return {'resources': self.children.keys()} - - def getChild(self, name, txrequest): - if name == '': - return self - return JsonResource.getChild(self, name, txrequest) - - -class WebService(server.Site): - - def __init__(self, crawler): - if not crawler.settings.getbool('WEBSERVICE_ENABLED'): - raise NotConfigured - self.crawler = crawler - logfile = crawler.settings['WEBSERVICE_LOGFILE'] - self.portrange = [int(x) for x in crawler.settings.getlist('WEBSERVICE_PORT')] - self.host = crawler.settings['WEBSERVICE_HOST'] - root = RootResource(crawler) - reslist = build_component_list(crawler.settings['WEBSERVICE_RESOURCES_BASE'], \ - crawler.settings['WEBSERVICE_RESOURCES']) - for res_cls in map(load_object, reslist): - res = res_cls(crawler) - root.putChild(res.ws_name, res) - server.Site.__init__(self, root, logPath=logfile) - self.noisy = False - crawler.signals.connect(self.start_listening, signals.engine_started) - crawler.signals.connect(self.stop_listening, signals.engine_stopped) - - @classmethod - def from_crawler(cls, crawler): - return cls(crawler) - - def start_listening(self): - self.port = listen_tcp(self.portrange, self.host, self) - h = self.port.getHost() - log.msg(format='Web service listening on %(host)s:%(port)d', - level=log.DEBUG, host=h.host, port=h.port) - - def stop_listening(self): - self.port.stopListening() - diff --git a/tests/test_utils_jsonrpc.py b/tests/test_utils_jsonrpc.py deleted file mode 100644 index e0aaef95244..00000000000 --- a/tests/test_utils_jsonrpc.py +++ /dev/null @@ -1,122 +0,0 @@ -import unittest, json -from io import BytesIO - -from scrapy.utils.jsonrpc import jsonrpc_client_call, jsonrpc_server_call, \ - JsonRpcError, jsonrpc_errors -from scrapy.utils.serialize import ScrapyJSONDecoder -from tests.test_utils_serialize import CrawlerMock - -class urllib_mock(object): - def __init__(self, result=None, error=None): - response = {} - if result: - response.update(result=result) - if error: - response.update(error=error) - self.response = json.dumps(response) - self.request = None - - def urlopen(self, url, request): - self.url = url - self.request = request - return BytesIO(self.response) - -class TestTarget(object): - - def call(self, *args, **kwargs): - return list(args), kwargs - - def exception(self): - raise Exception("testing-errors") - -class JsonRpcUtilsTestCase(unittest.TestCase): - - def setUp(self): - crawler = CrawlerMock([]) - self.json_decoder = ScrapyJSONDecoder(crawler=crawler) - - def test_jsonrpc_client_call_args_kwargs_raises(self): - self.assertRaises(ValueError, jsonrpc_client_call, 'url', 'test', 'one', kw=123) - - def test_jsonrpc_client_call_request(self): - ul = urllib_mock(1) - jsonrpc_client_call('url', 'test', 'one', 2, _urllib=ul) - req = json.loads(ul.request) - assert 'id' in req - self.assertEqual(ul.url, 'url') - self.assertEqual(req['jsonrpc'], '2.0') - self.assertEqual(req['method'], 'test') - self.assertEqual(req['params'], ['one', 2]) - - def test_jsonrpc_client_call_response(self): - ul = urllib_mock() - # must return result or error - self.assertRaises(ValueError, jsonrpc_client_call, 'url', 'test', _urllib=ul) - ul = urllib_mock(result={'one': 1}) - self.assertEquals(jsonrpc_client_call('url', 'test', _urllib=ul), {'one': 1}) - ul = urllib_mock(error={'code': 123, 'message': 'hello', 'data': 'some data'}) - - raised = False - try: - jsonrpc_client_call('url', 'test', _urllib=ul) - except JsonRpcError as e: - raised = True - self.assertEqual(e.code, 123) - self.assertEqual(e.message, 'hello') - self.assertEqual(e.data, 'some data') - assert '123' in str(e) - assert 'hello' in str(e) - assert raised, "JsonRpcError not raised" - - def test_jsonrpc_server_call(self): - t = TestTarget() - r = jsonrpc_server_call(t, 'invalid json data', self.json_decoder) - assert 'error' in r - assert r['jsonrpc'] == '2.0' - assert r['id'] is None - self.assertEqual(r['error']['code'], jsonrpc_errors.PARSE_ERROR) - assert 'Traceback' in r['error']['data'] - - r = jsonrpc_server_call(t, '{"test": "test"}', self.json_decoder) - assert 'error' in r - assert r['jsonrpc'] == '2.0' - assert r['id'] is None - self.assertEqual(r['error']['code'], jsonrpc_errors.INVALID_REQUEST) - - r = jsonrpc_server_call(t, '{"method": "notfound", "id": 1}', self.json_decoder) - assert 'error' in r - assert r['jsonrpc'] == '2.0' - assert r['id'] == 1 - self.assertEqual(r['error']['code'], jsonrpc_errors.METHOD_NOT_FOUND) - - r = jsonrpc_server_call(t, '{"method": "exception", "id": 1}', self.json_decoder) - assert 'error' in r - assert r['jsonrpc'] == '2.0' - assert r['id'] == 1 - self.assertEqual(r['error']['code'], jsonrpc_errors.INTERNAL_ERROR) - assert 'testing-errors' in r['error']['message'] - assert 'Traceback' in r['error']['data'] - - r = jsonrpc_server_call(t, '{"method": "call", "id": 2}', self.json_decoder) - assert 'result' in r - assert r['jsonrpc'] == '2.0' - assert r['id'] == 2 - self.assertEqual(r['result'], ([], {})) - - r = jsonrpc_server_call(t, '{"method": "call", "params": [456, 123], "id": 3}', \ - self.json_decoder) - assert 'result' in r - assert r['jsonrpc'] == '2.0' - assert r['id'] == 3 - self.assertEqual(r['result'], ([456, 123], {})) - - r = jsonrpc_server_call(t, '{"method": "call", "params": {"data": 789}, "id": 3}', \ - self.json_decoder) - assert 'result' in r - assert r['jsonrpc'] == '2.0' - assert r['id'] == 3 - self.assertEqual(r['result'], ([], {'data': 789})) - -if __name__ == "__main__": - unittest.main() - diff --git a/tests/test_utils_serialize.py b/tests/test_utils_serialize.py index 1335dc12261..278cf91e3e5 100644 --- a/tests/test_utils_serialize.py +++ b/tests/test_utils_serialize.py @@ -1,88 +1,20 @@ +import json import unittest import datetime -import json from decimal import Decimal from twisted.internet import defer -from scrapy.utils.serialize import SpiderReferencer, ScrapyJSONEncoder, ScrapyJSONDecoder -from scrapy.spider import Spider +from scrapy.utils.serialize import ScrapyJSONEncoder from scrapy.http import Request, Response -class _EngineMock(object): - def __init__(self, open_spiders): - self.open_spiders = open_spiders - -class CrawlerMock(object): - def __init__(self, open_spiders): - self.engine = _EngineMock(open_spiders) - -class BaseTestCase(unittest.TestCase): +class JsonEncoderTestCase(unittest.TestCase): def setUp(self): - self.spider1 = Spider('name1') - self.spider2 = Spider('name2') - open_spiders = set([self.spider1, self.spider2]) - crawler = CrawlerMock(open_spiders) - self.spref = SpiderReferencer(crawler) - self.encoder = ScrapyJSONEncoder(spref=self.spref) - self.decoder = ScrapyJSONDecoder(spref=self.spref) - -class SpiderReferencerTestCase(BaseTestCase): - - def test_spiders_and_references(self): - ref1 = self.spref.get_reference_from_spider(self.spider1) - assert isinstance(ref1, str) - assert self.spider1.name in ref1 - ref2 = self.spref.get_reference_from_spider(self.spider2) - ref1_ = self.spref.get_reference_from_spider(self.spider1) - assert ref1 == ref1_ - assert ref1 != ref2 - - sp1 = self.spref.get_spider_from_reference(ref1) - sp2 = self.spref.get_spider_from_reference(ref2) - sp1_ = self.spref.get_spider_from_reference(ref1) - assert isinstance(sp1, Spider) - assert sp1 is not sp2 - assert sp1 is sp1_ - - # referring to spiders by name - assert sp1 is self.spref.get_spider_from_reference('spider::name1') - assert sp2 is self.spref.get_spider_from_reference('spider::name2') + self.encoder = ScrapyJSONEncoder() - # must return string as-is if spider id not found - assert 'lala' == self.spref.get_spider_from_reference('lala') - # must raise RuntimeError if spider id is not found and spider is not running - self.assertRaises(RuntimeError, self.spref.get_spider_from_reference, 'spider:fffffff') - - def test_encode_decode(self): - sr = self.spref - sp1 = self.spider1 - sp2 = self.spider2 - ref1 = sr.get_reference_from_spider(sp1) - ref2 = sr.get_reference_from_spider(sp2) - - examples = [ - ('lala', 'lala'), - (sp1, ref1), - (['lala', sp1], ['lala', ref1]), - ({'lala': sp1}, {'lala': ref1}), - ({sp1: sp2}, {ref1: ref2}), - ({sp1: {sp2: ['lala', sp1]}}, {ref1: {ref2: ['lala', ref1]}}) - ] - for spiders, refs in examples: - self.assertEqual(sr.encode_references(spiders), refs) - self.assertEqual(sr.decode_references(refs), spiders) - -class JsonEncoderTestCase(BaseTestCase): - def test_encode_decode(self): - sr = self.spref - sp1 = self.spider1 - sp2 = self.spider2 - ref1 = sr.get_reference_from_spider(sp1) - ref2 = sr.get_reference_from_spider(sp2) dt = datetime.datetime(2010, 1, 2, 10, 11, 12) dts = "2010-01-02 10:11:12" d = datetime.date(2010, 1, 2) @@ -92,42 +24,21 @@ def test_encode_decode(self): dec = Decimal("1000.12") decs = "1000.12" - examples_encode_decode = [ - ('lala', 'lala'), - (sp1, ref1), - (['lala', sp1], ['lala', ref1]), - ({'lala': sp1}, {'lala': ref1}), - ({sp1: sp2}, {ref1: ref2}), - ({sp1: {sp2: ['lala', sp1]}}, {ref1: {ref2: ['lala', ref1]}}) - ] - for spiders, refs in examples_encode_decode: - self.assertEqual(self.encoder.encode(spiders), json.dumps(refs)) - self.assertEqual(self.decoder.decode(json.dumps(refs)), spiders) + for input, output in [('foo', 'foo'), (d, ds), (t, ts), (dt, dts), + (dec, decs), (['foo', d], ['foo', ds])]: + self.assertEqual(self.encoder.encode(input), json.dumps(output)) - examples_encode_only = [ - ({sp1: dt}, {ref1: dts}), - ({sp1: d}, {ref1: ds}), - ({sp1: t}, {ref1: ts}), - ({sp1: dec}, {ref1: decs}), - ] - for spiders, refs in examples_encode_only: - self.assertEqual(self.encoder.encode(spiders), json.dumps(refs)) - - assert 'Deferred' in self.encoder.encode(defer.Deferred()) + def test_encode_deferred(self): + self.assertIn('Deferred', self.encoder.encode(defer.Deferred())) def test_encode_request(self): r = Request("http://www.example.com/lala") rs = self.encoder.encode(r) - assert r.method in rs - assert r.url in rs + self.assertIn(r.method, rs) + self.assertIn(r.url, rs) def test_encode_response(self): r = Response("http://www.example.com/lala") rs = self.encoder.encode(r) - assert r.url in rs - assert str(r.status) in rs - - -if __name__ == "__main__": - unittest.main() - + self.assertIn(r.url, rs) + self.assertIn(str(r.status), rs)