cleanlab · axl1313 · May 15, 2025 · May 15, 2025 · May 8, 2025 · May 9, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -29,3 +29,27 @@ jobs:
 
       - name: Run lints
         run: ./scripts/lint
+
+  upload:
+    if: github.repository == 'stainless-sdks/codex-python'
+    timeout-minutes: 10
+    name: upload
+    permissions:
+      contents: read
+      id-token: write
+    runs-on: depot-ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Get GitHub OIDC Token
+        id: github-oidc
+        uses: actions/github-script@v6
+        with:
+          script: core.setOutput('github_token', await core.getIDToken());
+
+      - name: Upload tarball
+        env:
+          URL: https://pkg.stainless.com/s
+          AUTH: ${{ steps.github-oidc.outputs.github_token }}
+          SHA: ${{ github.sha }}
+        run: ./scripts/utils/upload-artifact.sh
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "0.1.0-alpha.19"
+  ".": "0.1.0-alpha.20"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,3 +1,3 @@
 configured_endpoints: 44
-openapi_spec_hash: 97719fe7ae4c641a5a020dd21f2978dd
+openapi_spec_hash: 9d81a4b0eca6d3629ba9d5432a65655c
 config_hash: 659f65b6ccf5612986f920f7f9abbcb5
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,26 @@
 # Changelog
 
+## 0.1.0-alpha.20 (2025-05-15)
+
+Full Changelog: [v0.1.0-alpha.19...v0.1.0-alpha.20](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.19...v0.1.0-alpha.20)
+
+### Features
+
+* **api:** api update ([2e74162](https://github.com/cleanlab/codex-python/commit/2e741628a380d0fefe117f80bb3796b111575df3))
+* **api:** api update ([9e85827](https://github.com/cleanlab/codex-python/commit/9e85827e0b1a58011a8ead15c695cb175744325a))
+
+
+### Bug Fixes
+
+* **package:** support direct resource imports ([09066c8](https://github.com/cleanlab/codex-python/commit/09066c8bf38b23fd3d902b42c4f4f769161b0e2e))
+
+
+### Chores
+
+* **ci:** upload sdks to package manager ([6594b48](https://github.com/cleanlab/codex-python/commit/6594b48736ea79e7f9457cb3b47abfa17618565b))
+* **internal:** avoid errors for isinstance checks on proxies ([a1d7faf](https://github.com/cleanlab/codex-python/commit/a1d7fafa46e9100a4d29c46b48919025b26a0cfa))
+* **internal:** version bump ([971e28d](https://github.com/cleanlab/codex-python/commit/971e28dd483b3f2d38094f368baebd5eb0906e2c))
+
 ## 0.1.0-alpha.19 (2025-05-07)
 
 Full Changelog: [v0.1.0-alpha.18...v0.1.0-alpha.19](https://github.com/cleanlab/codex-python/compare/v0.1.0-alpha.18...v0.1.0-alpha.19)

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "codex-sdk"
-version = "0.1.0-alpha.19"
+version = "0.1.0-alpha.20"
 description = "Internal SDK used within cleanlab-codex package. Refer to https://pypi.org/project/cleanlab-codex/ instead."
 dynamic = ["readme"]
 license = "MIT"

diff --git a/scripts/utils/upload-artifact.sh b/scripts/utils/upload-artifact.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+set -exuo pipefail
+
+RESPONSE=$(curl -X POST "$URL" \
+  -H "Authorization: Bearer $AUTH" \
+  -H "Content-Type: application/json")
+
+SIGNED_URL=$(echo "$RESPONSE" | jq -r '.url')
+
+if [[ "$SIGNED_URL" == "null" ]]; then
+  echo -e "\033[31mFailed to get signed URL.\033[0m"
+  exit 1
+fi
+
+UPLOAD_RESPONSE=$(tar -cz . | curl -v -X PUT \
+  -H "Content-Type: application/gzip" \
+  --data-binary @- "$SIGNED_URL" 2>&1)
+
+if echo "$UPLOAD_RESPONSE" | grep -q "HTTP/[0-9.]* 200"; then
+  echo -e "\033[32mUploaded build to Stainless storage.\033[0m"
+  echo -e "\033[32mInstallation: npm install 'https://pkg.stainless.com/s/codex-python/$SHA'\033[0m"
+else
+  echo -e "\033[31mFailed to upload artifact.\033[0m"
+  exit 1
+fi
diff --git a/src/codex/__init__.py b/src/codex/__init__.py
@@ -1,5 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+import typing as _t
+
 from . import types
 from ._types import NOT_GIVEN, Omit, NoneType, NotGiven, Transport, ProxiesTypes
 from ._utils import file_from_path
@@ -80,6 +82,9 @@
     "DefaultAsyncHttpxClient",
 ]
 
+if not _t.TYPE_CHECKING:
+    from ._utils._resources_proxy import resources as resources
+
 _setup_logging()
 
 # Update the __module__ attribute for exported symbols so that

diff --git a/src/codex/_utils/_proxy.py b/src/codex/_utils/_proxy.py
@@ -46,7 +46,10 @@ def __dir__(self) -> Iterable[str]:
     @property  # type: ignore
     @override
     def __class__(self) -> type:  # pyright: ignore
-        proxied = self.__get_proxied__()
+        try:
+            proxied = self.__get_proxied__()
+        except Exception:
+            return type(self)
         if issubclass(type(proxied), LazyProxy):
             return type(proxied)
         return proxied.__class__

diff --git a/src/codex/_utils/_resources_proxy.py b/src/codex/_utils/_resources_proxy.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from typing import Any
+from typing_extensions import override
+
+from ._proxy import LazyProxy
+
+
+class ResourcesProxy(LazyProxy[Any]):
+    """A proxy for the `codex.resources` module.
+
+    This is used so that we can lazily import `codex.resources` only when
+    needed *and* so that users can just import `codex` and reference `codex.resources`
+    """
+
+    @override
+    def __load__(self) -> Any:
+        import importlib
+
+        mod = importlib.import_module("codex.resources")
+        return mod
+
+
+resources = ResourcesProxy().__as_proxied__()
diff --git a/src/codex/_version.py b/src/codex/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "codex"
-__version__ = "0.1.0-alpha.19"  # x-release-please-version
+__version__ = "0.1.0-alpha.20"  # x-release-please-version
diff --git a/src/codex/resources/projects/clusters.py b/src/codex/resources/projects/clusters.py
@@ -50,7 +50,9 @@ def list(
         self,
         project_id: str,
         *,
-        eval_issue_types: List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query"]]
+        eval_issue_types: List[
+            Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]
+        ]
         | NotGiven = NOT_GIVEN,
         instruction_adherence_failure: Optional[Literal["html_format", "content_structure"]] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
@@ -177,7 +179,9 @@ def list(
         self,
         project_id: str,
         *,
-        eval_issue_types: List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query"]]
+        eval_issue_types: List[
+            Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]
+        ]
         | NotGiven = NOT_GIVEN,
         instruction_adherence_failure: Optional[Literal["html_format", "content_structure"]] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,

diff --git a/src/codex/resources/projects/projects.py b/src/codex/resources/projects/projects.py
@@ -426,8 +426,8 @@ def validate(
         query: str,
         response: str,
         use_llm_matching: bool | NotGiven = NOT_GIVEN,
-        bad_response_thresholds: project_validate_params.BadResponseThresholds | NotGiven = NOT_GIVEN,
         constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        custom_eval_thresholds: Optional[Dict[str, float]] | NotGiven = NOT_GIVEN,
         custom_metadata: Optional[object] | NotGiven = NOT_GIVEN,
         eval_scores: Optional[Dict[str, float]] | NotGiven = NOT_GIVEN,
         options: Optional[project_validate_params.Options] | NotGiven = NOT_GIVEN,
@@ -451,10 +451,13 @@ def validate(
         query will be recorded in the project for SMEs to answer.
 
         Args:
+          custom_eval_thresholds: Optional custom thresholds for specific evals. Keys should match with the keys
+              in the `eval_scores` dictionary.
+
           custom_metadata: Arbitrary metadata supplied by the user/system
 
-          eval_scores: Evaluation scores to use for flagging a response as bad. If not provided, TLM
-              will be used to generate scores.
+          eval_scores: Scores assessing different aspects of the RAG system. If not provided, TLM will
+              be used to generate scores.
 
           options: Typed dict of advanced configuration options for the Trustworthy Language Model.
               Many of these configurations are determined by the quality preset selected
@@ -575,8 +578,8 @@ def validate(
                     "prompt": prompt,
                     "query": query,
                     "response": response,
-                    "bad_response_thresholds": bad_response_thresholds,
                     "constrain_outputs": constrain_outputs,
+                    "custom_eval_thresholds": custom_eval_thresholds,
                     "custom_metadata": custom_metadata,
                     "eval_scores": eval_scores,
                     "options": options,
@@ -967,8 +970,8 @@ async def validate(
         query: str,
         response: str,
         use_llm_matching: bool | NotGiven = NOT_GIVEN,
-        bad_response_thresholds: project_validate_params.BadResponseThresholds | NotGiven = NOT_GIVEN,
         constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN,
+        custom_eval_thresholds: Optional[Dict[str, float]] | NotGiven = NOT_GIVEN,
         custom_metadata: Optional[object] | NotGiven = NOT_GIVEN,
         eval_scores: Optional[Dict[str, float]] | NotGiven = NOT_GIVEN,
         options: Optional[project_validate_params.Options] | NotGiven = NOT_GIVEN,
@@ -992,10 +995,13 @@ async def validate(
         query will be recorded in the project for SMEs to answer.
 
         Args:
+          custom_eval_thresholds: Optional custom thresholds for specific evals. Keys should match with the keys
+              in the `eval_scores` dictionary.
+
           custom_metadata: Arbitrary metadata supplied by the user/system
 
-          eval_scores: Evaluation scores to use for flagging a response as bad. If not provided, TLM
-              will be used to generate scores.
+          eval_scores: Scores assessing different aspects of the RAG system. If not provided, TLM will
+              be used to generate scores.
 
           options: Typed dict of advanced configuration options for the Trustworthy Language Model.
               Many of these configurations are determined by the quality preset selected
@@ -1116,8 +1122,8 @@ async def validate(
                     "prompt": prompt,
                     "query": query,
                     "response": response,
-                    "bad_response_thresholds": bad_response_thresholds,
                     "constrain_outputs": constrain_outputs,
+                    "custom_eval_thresholds": custom_eval_thresholds,
                     "custom_metadata": custom_metadata,
                     "eval_scores": eval_scores,
                     "options": options,

diff --git a/src/codex/types/project_validate_params.py b/src/codex/types/project_validate_params.py
@@ -7,7 +7,7 @@
 
 from .._utils import PropertyInfo
 
-__all__ = ["ProjectValidateParams", "BadResponseThresholds", "Options"]
+__all__ = ["ProjectValidateParams", "Options"]
 
 
 class ProjectValidateParams(TypedDict, total=False):
@@ -21,15 +21,19 @@ class ProjectValidateParams(TypedDict, total=False):
 
     use_llm_matching: bool
 
-    bad_response_thresholds: BadResponseThresholds
-
     constrain_outputs: Optional[List[str]]
 
+    custom_eval_thresholds: Optional[Dict[str, float]]
+    """Optional custom thresholds for specific evals.
+
+    Keys should match with the keys in the `eval_scores` dictionary.
+    """
+
     custom_metadata: Optional[object]
     """Arbitrary metadata supplied by the user/system"""
 
     eval_scores: Optional[Dict[str, float]]
-    """Evaluation scores to use for flagging a response as bad.
+    """Scores assessing different aspects of the RAG system.
 
     If not provided, TLM will be used to generate scores.
     """
@@ -139,16 +143,6 @@ class ProjectValidateParams(TypedDict, total=False):
     x_stainless_package_version: Annotated[str, PropertyInfo(alias="x-stainless-package-version")]
 
 
-class BadResponseThresholds(TypedDict, total=False):
-    context_sufficiency: Optional[float]
-
-    query_ease: Optional[float]
-
-    response_helpfulness: Optional[float]
-
-    trustworthiness: Optional[float]
-
-
 class Options(TypedDict, total=False):
     custom_eval_criteria: Iterable[object]
 

diff --git a/src/codex/types/project_validate_response.py b/src/codex/types/project_validate_response.py
@@ -8,7 +8,7 @@
 
 
 class EvalScores(BaseModel):
-    is_bad: bool
+    failed: bool
 
     score: Optional[float] = None
 
@@ -18,7 +18,7 @@ class EvalScores(BaseModel):
 class ProjectValidateResponse(BaseModel):
     eval_scores: Dict[str, EvalScores]
     """
-    Evaluation scores for the original response along with a boolean flag, `is_bad`,
+    Evaluation scores for the original response along with a boolean flag, `failed`,
     indicating whether the score is below the threshold.
     """
 

diff --git a/src/codex/types/projects/cluster_list_params.py b/src/codex/types/projects/cluster_list_params.py
@@ -9,7 +9,7 @@
 
 
 class ClusterListParams(TypedDict, total=False):
-    eval_issue_types: List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query"]]
+    eval_issue_types: List[Literal["hallucination", "search_failure", "unhelpful", "difficult_query", "unsupported"]]
 
     instruction_adherence_failure: Optional[Literal["html_format", "content_structure"]]
 

diff --git a/src/codex/types/projects/entry_notify_sme_params.py b/src/codex/types/projects/entry_notify_sme_params.py
@@ -18,4 +18,13 @@ class EntryNotifySmeParams(TypedDict, total=False):
 class ViewContext(TypedDict, total=False):
     page: Required[int]
 
-    filter: Literal["unanswered", "answered", "all", "hallucination", "search_failure", "unhelpful", "difficult_query"]
+    filter: Literal[
+        "unanswered",
+        "answered",
+        "all",
+        "hallucination",
+        "search_failure",
+        "unhelpful",
+        "difficult_query",
+        "unsupported",
+    ]
diff --git a/tests/api_resources/test_projects.py b/tests/api_resources/test_projects.py
@@ -444,13 +444,8 @@ def test_method_validate_with_all_params(self, client: Codex) -> None:
             query="query",
             response="response",
             use_llm_matching=True,
-            bad_response_thresholds={
-                "context_sufficiency": 0,
-                "query_ease": 0,
-                "response_helpfulness": 0,
-                "trustworthiness": 0,
-            },
             constrain_outputs=["string"],
+            custom_eval_thresholds={"foo": 0},
             custom_metadata={},
             eval_scores={"foo": 0},
             options={
@@ -944,13 +939,8 @@ async def test_method_validate_with_all_params(self, async_client: AsyncCodex) -
             query="query",
             response="response",
             use_llm_matching=True,
-            bad_response_thresholds={
-                "context_sufficiency": 0,
-                "query_ease": 0,
-                "response_helpfulness": 0,
-                "trustworthiness": 0,
-            },
             constrain_outputs=["string"],
+            custom_eval_thresholds={"foo": 0},
             custom_metadata={},
             eval_scores={"foo": 0},
             options={

diff --git a/tests/test_utils/test_proxy.py b/tests/test_utils/test_proxy.py
@@ -21,3 +21,14 @@ def test_recursive_proxy() -> None:
     assert dir(proxy) == []
     assert type(proxy).__name__ == "RecursiveLazyProxy"
     assert type(operator.attrgetter("name.foo.bar.baz")(proxy)).__name__ == "RecursiveLazyProxy"
+
+
+def test_isinstance_does_not_error() -> None:
+    class AlwaysErrorProxy(LazyProxy[Any]):
+        @override
+        def __load__(self) -> Any:
+            raise RuntimeError("Mocking missing dependency")
+
+    proxy = AlwaysErrorProxy()
+    assert not isinstance(proxy, dict)
+    assert isinstance(proxy, LazyProxy)