Skip to content

Commit ceb624d

Browse files
committed
fix/ensure array params are sent to the server correctly
1 parent 6c0ae3d commit ceb624d

File tree

4 files changed

+128
-8
lines changed

4 files changed

+128
-8
lines changed
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
2+
import {
3+
type BeforeRequestContext,
4+
BeforeRequestHook,
5+
} from "../types.js";
6+
import { prepareRequestHeaders } from "./utils/request.js";
7+
8+
/**
9+
* If the given key in FormData is present and contains a comma-separated list of values,
10+
* split the values into separate entries with the key suffixed by "[]".
11+
*
12+
* @param formData - The FormData object to modify.
13+
* @param key - The key to extract and split.
14+
*/
15+
function flattenArrayParameter(formData: FormData, key: string): void {
16+
const value = formData.get(key);
17+
if (formData && typeof value === "string" && value.includes(",")) {
18+
formData.delete(key);
19+
const values = value.split(",").map(v => v.trim()).filter(Boolean);
20+
for (const v of values) {
21+
formData.append(`${key}[]`, v);
22+
}
23+
}
24+
}
25+
/**
26+
* Represents a hook for fixing array parameters before sending a request.
27+
*/
28+
export class FixArrayParamsHook implements BeforeRequestHook {
29+
/**
30+
* Fixes specific array parameters in the request.
31+
* The SDK creates FormData with {extract_image_block_types: "a,b,c"},
32+
* and the server expects it to be {extract_image_block_types[]: ["a", "b", "c"]}.
33+
* Speakeasy will fix this upstream soon.
34+
*
35+
* @param _hookCtx - The context object for the hook, containing metadata about the request.
36+
* @param request - The original Request object.
37+
* @returns A new Request object with modified form data and headers.
38+
*/
39+
async beforeRequest(
40+
_hookCtx: BeforeRequestContext,
41+
request: Request
42+
): Promise<Request> {
43+
const requestClone = request.clone();
44+
const formData = await requestClone.formData();
45+
46+
flattenArrayParameter(formData, "extract_image_block_types");
47+
48+
const headers = prepareRequestHeaders(requestClone);
49+
50+
return new Request(requestClone, {
51+
body: formData,
52+
headers: headers,
53+
});
54+
}
55+
}

src/hooks/custom/utils/request.ts

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,13 +90,5 @@ export async function prepareRequestBody(
9090
startingPageNumber.toString()
9191
);
9292

93-
if (formData.has(EXTRACT_IMAGE_BLOCK_TYPES)) {
94-
newFormData.delete(EXTRACT_IMAGE_BLOCK_TYPES);
95-
const extractImageBlockTypes = (formData.get(EXTRACT_IMAGE_BLOCK_TYPES)?.toString() || "").split(",");
96-
for(const blockType of extractImageBlockTypes) {
97-
newFormData.append(EXTRACT_IMAGE_BLOCK_TYPES, blockType);
98-
}
99-
}
100-
10193
return newFormData;
10294
}

src/hooks/registration.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { Hooks } from "./types.js";
33
import { LoggerHook } from "./custom/LoggerHook.js";
44
import { SplitPdfHook } from "./custom/SplitPdfHook.js";
55
import { HttpsCheckHook } from "./custom/HttpsCheckHook.js";
6+
import { FixArrayParamsHook } from "./custom/FixArrayParamsHook.js";
67

78
/*
89
* This file is only ever generated once on the first generation and then is free to be modified.
@@ -19,6 +20,7 @@ export function initHooks(hooks: Hooks) {
1920
const loggerHook = new LoggerHook();
2021
const splitPdfHook = new SplitPdfHook();
2122
const httpsCheckHook = new HttpsCheckHook();
23+
const fixArrayParamsHook = new FixArrayParamsHook();
2224

2325
// NOTE: logger_hook should stay registered last as logs the status of
2426
// request and whether it will be retried which can be changed by e.g. split_pdf_hook
@@ -28,6 +30,7 @@ export function initHooks(hooks: Hooks) {
2830
hooks.registerSDKInitHook(splitPdfHook);
2931

3032
// Register before request hooks
33+
hooks.registerBeforeRequestHook(fixArrayParamsHook)
3134
hooks.registerBeforeRequestHook(splitPdfHook);
3235

3336
// Register after success hooks

test/unit/FixArrayParamsHook.test.ts

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import { readFileSync } from "fs";
2+
3+
import { UnstructuredClient } from "../../src";
4+
import { PartitionResponse } from "../../src/sdk/models/operations";
5+
import { PartitionParameters, Strategy } from "../../src/sdk/models/shared";
6+
import { describe, it, expect, vi, beforeEach} from 'vitest';
7+
8+
describe("FixArrayParamsHook unit tests", () => {
9+
beforeEach(() => {
10+
// Reset the mock before each test
11+
vi.resetAllMocks();
12+
});
13+
14+
const FAKE_API_KEY = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
15+
16+
// Assert that array parameters are sent in the correct format
17+
// This should work with and without pdf splitting
18+
it.each([
19+
{splitPdfPage: false},
20+
{splitPdfPage: true},
21+
])(
22+
"should send extract_image_block_types in the correct format", async ({splitPdfPage}) => {
23+
const client = new UnstructuredClient({});
24+
25+
const file = {
26+
content: readFileSync("test/data/layout-parser-paper-fast.pdf"),
27+
fileName: "test/data/layout-parser-paper-fast.pdf",
28+
};
29+
30+
const requestParams: PartitionParameters = {
31+
files: file,
32+
strategy: Strategy.Fast,
33+
extractImageBlockTypes: ["a", "b", "c"],
34+
splitPdfPage: splitPdfPage,
35+
};
36+
37+
const fetchMock = vi.fn().mockResolvedValue(
38+
new Response(
39+
JSON.stringify([
40+
{
41+
type: "Image",
42+
element_id: "2fe9cbfbf0ff1bd64cc4705347dbd1d6",
43+
text: "This is a test",
44+
metadata: {},
45+
},
46+
]),
47+
{
48+
status: 200,
49+
headers: { "Content-Type": "application/json" },
50+
}
51+
)
52+
);
53+
54+
vi.stubGlobal("fetch", fetchMock);
55+
56+
const res: PartitionResponse = await client.general.partition({
57+
partitionParameters: requestParams,
58+
});
59+
60+
expect(fetchMock).toHaveBeenCalledTimes(1);
61+
62+
const request = fetchMock.mock.calls[0][0];
63+
const formData = await request.formData();
64+
const extract_image_block_types = formData.getAll(
65+
"extract_image_block_types[]"
66+
);
67+
68+
expect(extract_image_block_types).toEqual(["a", "b", "c"]);
69+
});
70+
});

0 commit comments

Comments
 (0)