diff --git a/tests/downloads_tests.py b/tests/downloads_tests.py index 0e7eb5e6..ac724ab2 100644 --- a/tests/downloads_tests.py +++ b/tests/downloads_tests.py @@ -56,6 +56,20 @@ def _reset_downloads_global_objects(): trafilatura.downloads.NO_CERT_POOL = None trafilatura.downloads.RETRY_STRATEGY = None + +def test_response_object(): + "Test if the Response class is functioning as expected." + my_html = b"

ABC

" + resp = Response(my_html, 200, "https://example.org") + assert bool(resp) is True + resp.store_headers({"X-Header": "xyz"}) + assert "X-Header" in resp.headers + resp.decode_data(True) + assert my_html.decode("utf-8") == resp.html == str(resp) + my_dict = resp.as_dict() + assert sorted(my_dict) == ["data", "headers", "html", "status", "url"] + + def test_fetch(): '''Test URL fetching.''' # logic: empty request? @@ -190,6 +204,7 @@ def test_queue(): if __name__ == '__main__': + test_response_object() test_fetch() test_config() test_decode() diff --git a/trafilatura/downloads.py b/trafilatura/downloads.py index 94c547de..2cea64c2 100644 --- a/trafilatura/downloads.py +++ b/trafilatura/downloads.py @@ -66,6 +66,15 @@ def __init__(self, data, status, url): self.status = status self.url = url + def __bool__(self): + return self.data is not None + + def __repr__(self): + return self.html if self.html else decode_file(self.data) + + def __str__(self): + return self.__repr__() + def store_headers(self, headerdict): "Store response headers if required." # control or normalization here? @@ -76,6 +85,14 @@ def decode_data(self, decode): if decode and self.data: self.html = decode_file(self.data) + def as_dict(self): + "Convert the response object to a dictionary." + return { + attr: getattr(self, attr) + for attr in self.__slots__ + if hasattr(self, attr) + } + # caching throws an error # @lru_cache(maxsize=2)