Skip to content

Commit b8d0af9

Browse files
committed
Do not make a real call for the BeforeRequestHook dummy request
1 parent a83432a commit b8d0af9

File tree

3 files changed

+41
-16
lines changed

3 files changed

+41
-16
lines changed

_test_unstructured_client/integration/test_decorators.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -291,27 +291,26 @@ async def test_split_pdf_requests_do_retry(monkeypatch):
291291
"""
292292
Test that when we split a pdf, the split requests will honor retryable errors.
293293
"""
294+
mock_endpoint_called = False
294295
number_of_split_502s = 2
295296
number_of_last_page_502s = 2
296297

297-
async def mock_send(_, request):
298+
async def mock_send(_, request: httpx.Request):
298299
"""
299300
Return a predefined number of 502s for requests with certain starting_page_number values.
300301
301-
This is because N-1 splits are sent off in the hook logic. These need explicit retry handling.
302-
The final split is returned to the SDK and gets the built in retry code.
302+
This is to make sure specific portions of the doc are retried properly.
303303
304304
We want to make sure both code paths are retried.
305305
"""
306-
request_body = request.read()
306+
# Assert that the SDK issues our no-op request
307+
# returned by the BeforeRequestHook
308+
nonlocal mock_endpoint_called
309+
if request.url.host == "no-op":
310+
mock_endpoint_called = True
311+
return Response(200, request=request)
307312

308-
# Always return a 200 if the request is not for us
309-
# (This mocks the httpbin.org call that gets us into the after_success hook)
310-
if "/general/v0/general" not in str(request.url):
311-
return Response(
312-
200,
313-
request=request,
314-
)
313+
request_body = request.read()
315314

316315
decoded_body = MultipartDecoder(request_body, request.headers.get("Content-Type"))
317316
form_data = form_utils.parse_form_data(decoded_body)
@@ -369,4 +368,6 @@ async def mock_send(_, request):
369368

370369
assert number_of_split_502s == 0
371370
assert number_of_last_page_502s == 0
371+
assert mock_endpoint_called
372+
372373
assert res.status_code == 200

_test_unstructured_client/unit/test_split_pdf_hook.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from collections import Counter
66
from typing import Coroutine
77

8+
import httpx
89
import pytest
910
import requests
1011
from requests_toolbelt import MultipartDecoder, MultipartEncoder
@@ -32,7 +33,7 @@ def test_unit_sdk_init():
3233
hook = SplitPdfHook()
3334
# This is a fake URL, test doesn't make an API call
3435
test_url = "http://localhost:5000"
35-
test_client = requests.Session()
36+
test_client = httpx.Client()
3637

3738
hook.sdk_init(test_url, test_client)
3839

src/unstructured_client/_hooks/custom/split_pdf_hook.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,15 +116,38 @@ def sdk_init(
116116
) -> Tuple[str, HttpClient]:
117117
"""Initializes Split PDF Hook.
118118
119+
Adds a mock transport layer to the httpx client. This will return an
120+
empty 200 response whenever the specified "dummy host" is used. The before_request
121+
hook returns this request so the SDK always succeeds and jumps straight to
122+
after_success, where we can await the split results.
123+
119124
Args:
120125
base_url (str): URL of the API.
121126
client (HttpClient): HTTP Client.
122127
123128
Returns:
124-
Tuple[str, httpx.Session]: The initialized SDK options.
129+
Tuple[str, HttpClient]: The initialized SDK options.
125130
"""
126-
self.client = client
127-
return base_url, client
131+
class DummyTransport(httpx.BaseTransport):
132+
def __init__(self, base_transport: httpx.BaseTransport):
133+
self.base_transport = base_transport
134+
135+
def handle_request(self, request: httpx.Request) -> httpx.Response:
136+
# Return an empty 200 response if we send a request to this dummy host
137+
if request.method == "GET" and request.url.host == "no-op":
138+
return httpx.Response(status_code=200, content=b'')
139+
140+
# Otherwise, pass the request to the default transport
141+
return self.base_transport.handle_request(request)
142+
143+
# Explicit cast to httpx.Client to avoid a typing error
144+
httpx_client = cast(httpx.Client, client)
145+
146+
# pylint: disable=protected-access
147+
httpx_client._transport = DummyTransport(httpx_client._transport)
148+
149+
self.client = httpx_client
150+
return base_url, self.client
128151

129152
# pylint: disable=too-many-return-statements
130153
def before_request(
@@ -289,7 +312,7 @@ async def call_api_partial(page):
289312

290313
# Return a dummy request for the SDK to use
291314
# This allows us to skip right to the AfterRequestHook and await all the calls
292-
dummy_request = httpx.Request("GET", "https://httpbin.org/status/200")
315+
dummy_request = httpx.Request("GET", "http://no-op")
293316

294317
return dummy_request
295318

0 commit comments

Comments
 (0)