diff --git a/python/langsmith/client.py b/python/langsmith/client.py index 545cfaa4..854239e4 100644 --- a/python/langsmith/client.py +++ b/python/langsmith/client.py @@ -96,7 +96,7 @@ class ZoneInfo: # type: ignore[no-redef] WARNED_ATTACHMENTS = False EMPTY_SEQ: tuple[Dict, ...] = () BOUNDARY = uuid.uuid4().hex -MultipartParts = List[Tuple[str, Tuple[None, bytes, str]]] +MultipartParts = List[Tuple[str, Tuple[None, bytes, str, Dict[str, str]]]] URLLIB3_SUPPORTS_BLOCKSIZE = "key_blocksize" in signature(PoolKey).parameters @@ -1638,17 +1638,11 @@ def multipart_ingest_runs( # insert runtime environment self._insert_runtime_env(create_dicts) self._insert_runtime_env(update_dicts) - # check size limit - size_limit_bytes = (self.info.batch_ingest_config or {}).get( - "size_limit_bytes" - ) or _SIZE_LIMIT_BYTES # send the runs in multipart requests - acc_size = 0 acc_context: List[str] = [] acc_parts: MultipartParts = [] for event, payloads in (("post", create_dicts), ("patch", update_dicts)): for payload in payloads: - parts: MultipartParts = [] # collect fields to be sent as separate parts fields = [ ("inputs", payload.pop("inputs", None)), @@ -1656,45 +1650,49 @@ def multipart_ingest_runs( ("events", payload.pop("events", None)), ] # encode the main run payload - parts.append( + payloadb = _dumps_json(payload) + acc_parts.append( ( f"{event}.{payload['id']}", - (None, _dumps_json(payload), "application/json"), + ( + None, + payloadb, + "application/json", + {"Content-Length": str(len(payloadb))}, + ), ) ) # encode the fields we collected for key, value in fields: if value is None: continue - parts.append( + valb = _dumps_json(value) + acc_parts.append( ( f"{event}.{payload['id']}.{key}", - (None, _dumps_json(value), "application/json"), + ( + None, + valb, + "application/json", + {"Content-Length": str(len(valb))}, + ), ), ) # encode the attachments if attachments := all_attachments.pop(payload["id"], None): for n, (ct, ba) in attachments.items(): - parts.append( - (f"attachment.{payload['id']}.{n}", (None, ba, ct)) + acc_parts.append( + ( + f"attachment.{payload['id']}.{n}", + (None, ba, ct, {"Content-Length": str(len(ba))}), + ) ) - # calculate the size of the parts - size = sum(len(p[1][1]) for p in parts) # compute context - context = f"trace={payload.get('trace_id')},id={payload.get('id')}" - # if next size would exceed limit, send the current parts - if acc_size + size > size_limit_bytes: - self._send_multipart_req(acc_parts, _context="; ".join(acc_context)) - acc_parts.clear() - acc_context.clear() - acc_size = 0 - # accumulate the parts - acc_size += size - acc_parts.extend(parts) - acc_context.append(context) - # send the remaining parts - if acc_parts: - self._send_multipart_req(acc_parts, _context="; ".join(acc_context)) + acc_context.append( + f"trace={payload.get('trace_id')},id={payload.get('id')}" + ) + # send the request + self._send_multipart_req(acc_parts, _context="; ".join(acc_context)) def _send_multipart_req(self, parts: MultipartParts, *, _context: str): for api_url, api_key in self._write_api_urls.items(): diff --git a/python/poetry.lock b/python/poetry.lock index 9af73254..7c5567ad 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand. [[package]] name = "annotated-types" @@ -1404,28 +1404,29 @@ requests = ">=2.0.1,<3.0.0" [[package]] name = "ruff" -version = "0.3.7" +version = "0.6.9" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.3.7-py3-none-macosx_10_12_x86_64.macosx_11_0_arm64.macosx_10_12_universal2.whl", hash = "sha256:0e8377cccb2f07abd25e84fc5b2cbe48eeb0fea9f1719cad7caedb061d70e5ce"}, - {file = "ruff-0.3.7-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:15a4d1cc1e64e556fa0d67bfd388fed416b7f3b26d5d1c3e7d192c897e39ba4b"}, - {file = "ruff-0.3.7-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d28bdf3d7dc71dd46929fafeec98ba89b7c3550c3f0978e36389b5631b793663"}, - {file = "ruff-0.3.7-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:379b67d4f49774ba679593b232dcd90d9e10f04d96e3c8ce4a28037ae473f7bb"}, - {file = "ruff-0.3.7-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c060aea8ad5ef21cdfbbe05475ab5104ce7827b639a78dd55383a6e9895b7c51"}, - {file = "ruff-0.3.7-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:ebf8f615dde968272d70502c083ebf963b6781aacd3079081e03b32adfe4d58a"}, - {file = "ruff-0.3.7-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d48098bd8f5c38897b03604f5428901b65e3c97d40b3952e38637b5404b739a2"}, - {file = "ruff-0.3.7-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:da8a4fda219bf9024692b1bc68c9cff4b80507879ada8769dc7e985755d662ea"}, - {file = "ruff-0.3.7-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c44e0149f1d8b48c4d5c33d88c677a4aa22fd09b1683d6a7ff55b816b5d074f"}, - {file = "ruff-0.3.7-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3050ec0af72b709a62ecc2aca941b9cd479a7bf2b36cc4562f0033d688e44fa1"}, - {file = "ruff-0.3.7-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:a29cc38e4c1ab00da18a3f6777f8b50099d73326981bb7d182e54a9a21bb4ff7"}, - {file = "ruff-0.3.7-py3-none-musllinux_1_2_i686.whl", hash = "sha256:5b15cc59c19edca917f51b1956637db47e200b0fc5e6e1878233d3a938384b0b"}, - {file = "ruff-0.3.7-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:e491045781b1e38b72c91247cf4634f040f8d0cb3e6d3d64d38dcf43616650b4"}, - {file = "ruff-0.3.7-py3-none-win32.whl", hash = "sha256:bc931de87593d64fad3a22e201e55ad76271f1d5bfc44e1a1887edd0903c7d9f"}, - {file = "ruff-0.3.7-py3-none-win_amd64.whl", hash = "sha256:5ef0e501e1e39f35e03c2acb1d1238c595b8bb36cf7a170e7c1df1b73da00e74"}, - {file = "ruff-0.3.7-py3-none-win_arm64.whl", hash = "sha256:789e144f6dc7019d1f92a812891c645274ed08af6037d11fc65fcbc183b7d59f"}, - {file = "ruff-0.3.7.tar.gz", hash = "sha256:d5c1aebee5162c2226784800ae031f660c350e7a3402c4d1f8ea4e97e232e3ba"}, + {file = "ruff-0.6.9-py3-none-linux_armv6l.whl", hash = "sha256:064df58d84ccc0ac0fcd63bc3090b251d90e2a372558c0f057c3f75ed73e1ccd"}, + {file = "ruff-0.6.9-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:140d4b5c9f5fc7a7b074908a78ab8d384dd7f6510402267bc76c37195c02a7ec"}, + {file = "ruff-0.6.9-py3-none-macosx_11_0_arm64.whl", hash = "sha256:53fd8ca5e82bdee8da7f506d7b03a261f24cd43d090ea9db9a1dc59d9313914c"}, + {file = "ruff-0.6.9-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:645d7d8761f915e48a00d4ecc3686969761df69fb561dd914a773c1a8266e14e"}, + {file = "ruff-0.6.9-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eae02b700763e3847595b9d2891488989cac00214da7f845f4bcf2989007d577"}, + {file = "ruff-0.6.9-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d5ccc9e58112441de8ad4b29dcb7a86dc25c5f770e3c06a9d57e0e5eba48829"}, + {file = "ruff-0.6.9-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:417b81aa1c9b60b2f8edc463c58363075412866ae4e2b9ab0f690dc1e87ac1b5"}, + {file = "ruff-0.6.9-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3c866b631f5fbce896a74a6e4383407ba7507b815ccc52bcedabb6810fdb3ef7"}, + {file = "ruff-0.6.9-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7b118afbb3202f5911486ad52da86d1d52305b59e7ef2031cea3425142b97d6f"}, + {file = "ruff-0.6.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a67267654edc23c97335586774790cde402fb6bbdb3c2314f1fc087dee320bfa"}, + {file = "ruff-0.6.9-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:3ef0cc774b00fec123f635ce5c547dac263f6ee9fb9cc83437c5904183b55ceb"}, + {file = "ruff-0.6.9-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:12edd2af0c60fa61ff31cefb90aef4288ac4d372b4962c2864aeea3a1a2460c0"}, + {file = "ruff-0.6.9-py3-none-musllinux_1_2_i686.whl", hash = "sha256:55bb01caeaf3a60b2b2bba07308a02fca6ab56233302406ed5245180a05c5625"}, + {file = "ruff-0.6.9-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:925d26471fa24b0ce5a6cdfab1bb526fb4159952385f386bdcc643813d472039"}, + {file = "ruff-0.6.9-py3-none-win32.whl", hash = "sha256:eb61ec9bdb2506cffd492e05ac40e5bc6284873aceb605503d8494180d6fc84d"}, + {file = "ruff-0.6.9-py3-none-win_amd64.whl", hash = "sha256:785d31851c1ae91f45b3d8fe23b8ae4b5170089021fbb42402d811135f0b7117"}, + {file = "ruff-0.6.9-py3-none-win_arm64.whl", hash = "sha256:a9641e31476d601f83cd602608739a0840e348bda93fec9f1ee816f8b6798b93"}, + {file = "ruff-0.6.9.tar.gz", hash = "sha256:b076ef717a8e5bc819514ee1d602bbdca5b4420ae13a9cf61a0c0a4f53a2baa2"}, ] [[package]] @@ -1917,4 +1918,4 @@ vcr = [] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "a20ea8a3bba074fc87b54139dfe7c4c4ffea37d5d5f4b874fb759baad4d443d0" +content-hash = "c6a6b584b516728bbb8aced435bbd898fb574e6dfec450148988ac96f4a717a4" diff --git a/python/pyproject.toml b/python/pyproject.toml index 641c9be6..064d77a4 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -39,7 +39,7 @@ requests-toolbelt = "^1.0.0" pytest = "^7.3.1" black = ">=23.3,<25.0" mypy = "^1.9.0" -ruff = "^0.3.4" +ruff = "^0.6.9" types-requests = "^2.31.0.1" pandas-stubs = "^2.0.1.230501" types-pyyaml = "^6.0.12.10" diff --git a/python/tests/integration_tests/wrappers/test_openai.py b/python/tests/integration_tests/wrappers/test_openai.py index 32dcd85c..3f9fda04 100644 --- a/python/tests/integration_tests/wrappers/test_openai.py +++ b/python/tests/integration_tests/wrappers/test_openai.py @@ -39,7 +39,7 @@ def test_chat_sync_api(mock_session: mock.MagicMock, stream: bool): assert len(original_chunks) == len(patched_chunks) assert [o.choices == p.choices for o, p in zip(original_chunks, patched_chunks)] else: - assert type(original) == type(patched) + assert type(original) is type(patched) assert original.choices == patched.choices # Give the thread a chance. time.sleep(0.01) @@ -74,7 +74,7 @@ async def test_chat_async_api(mock_session: mock.MagicMock, stream: bool): assert len(original_chunks) == len(patched_chunks) assert [o.choices == p.choices for o, p in zip(original_chunks, patched_chunks)] else: - assert type(original) == type(patched) + assert type(original) is type(patched) assert original.choices == patched.choices # Give the thread a chance. time.sleep(0.1) @@ -117,7 +117,7 @@ def test_completions_sync_api(mock_session: mock.MagicMock, stream: bool): assert original.response assert patched.response else: - assert type(original) == type(patched) + assert type(original) is type(patched) assert original.choices == patched.choices # Give the thread a chance. time.sleep(0.1) @@ -170,7 +170,7 @@ async def test_completions_async_api(mock_session: mock.MagicMock, stream: bool) assert original.response assert patched.response else: - assert type(original) == type(patched) + assert type(original) is type(patched) assert original.choices == patched.choices # Give the thread a chance. for _ in range(10): diff --git a/python/tests/unit_tests/test_client.py b/python/tests/unit_tests/test_client.py index b9c19d79..399d5712 100644 --- a/python/tests/unit_tests/test_client.py +++ b/python/tests/unit_tests/test_client.py @@ -1061,11 +1061,8 @@ def test_batch_ingest_run_splits_large_batches( ] if use_multipart_endpoint: client.multipart_ingest_runs(create=posts, update=patches) - # we can support up to 20MB per batch, so we need to find the number of batches - # we should be sending - max_in_batch = max(1, (20 * MB) // (payload_size + 20)) - - expected_num_requests = min(6, math.ceil((len(run_ids) * 2) / max_in_batch)) + # multipart endpoint should only send one request + expected_num_requests = 1 # count the number of POST requests assert sum( [1 for call in mock_session.request.call_args_list if call[0][0] == "POST"]