Skip to content

Commit

Permalink
Response class: add convenience functions (#497)
Browse files Browse the repository at this point in the history
* Response class: add convenience functions

* add as_dict test
  • Loading branch information
adbar authored Feb 5, 2024
1 parent f53d194 commit 67ff3a7
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 0 deletions.
15 changes: 15 additions & 0 deletions tests/downloads_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,20 @@ def _reset_downloads_global_objects():
trafilatura.downloads.NO_CERT_POOL = None
trafilatura.downloads.RETRY_STRATEGY = None


def test_response_object():
"Test if the Response class is functioning as expected."
my_html = b"<html><body><p>ABC</p></body></html>"
resp = Response(my_html, 200, "https://example.org")
assert bool(resp) is True
resp.store_headers({"X-Header": "xyz"})
assert "X-Header" in resp.headers
resp.decode_data(True)
assert my_html.decode("utf-8") == resp.html == str(resp)
my_dict = resp.as_dict()
assert sorted(my_dict) == ["data", "headers", "html", "status", "url"]


def test_fetch():
'''Test URL fetching.'''
# logic: empty request?
Expand Down Expand Up @@ -190,6 +204,7 @@ def test_queue():


if __name__ == '__main__':
test_response_object()
test_fetch()
test_config()
test_decode()
Expand Down
17 changes: 17 additions & 0 deletions trafilatura/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,15 @@ def __init__(self, data, status, url):
self.status = status
self.url = url

def __bool__(self):
return self.data is not None

def __repr__(self):
return self.html if self.html else decode_file(self.data)

def __str__(self):
return self.__repr__()

def store_headers(self, headerdict):
"Store response headers if required."
# control or normalization here?
Expand All @@ -76,6 +85,14 @@ def decode_data(self, decode):
if decode and self.data:
self.html = decode_file(self.data)

def as_dict(self):
"Convert the response object to a dictionary."
return {
attr: getattr(self, attr)
for attr in self.__slots__
if hasattr(self, attr)
}


# caching throws an error
# @lru_cache(maxsize=2)
Expand Down

0 comments on commit 67ff3a7

Please sign in to comment.