Skip to content

Commit 7652365

Browse files
feat(api): api update
1 parent 157909e commit 7652365

File tree

5 files changed

+27
-37
lines changed

5 files changed

+27
-37
lines changed

.stats.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
configured_endpoints: 44
2-
openapi_spec_hash: 8ffde9b129ffc5edd4c4f8c9d866d869
2+
openapi_spec_hash: 056bc3805c2373563a6585103edd5cb8
33
config_hash: 659f65b6ccf5612986f920f7f9abbcb5

src/codex/resources/projects/projects.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -426,8 +426,8 @@ def validate(
426426
query: str,
427427
response: str,
428428
use_llm_matching: bool | NotGiven = NOT_GIVEN,
429-
bad_response_thresholds: project_validate_params.BadResponseThresholds | NotGiven = NOT_GIVEN,
430429
constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN,
430+
custom_eval_thresholds: Optional[Dict[str, float]] | NotGiven = NOT_GIVEN,
431431
custom_metadata: Optional[object] | NotGiven = NOT_GIVEN,
432432
eval_scores: Optional[Dict[str, float]] | NotGiven = NOT_GIVEN,
433433
options: Optional[project_validate_params.Options] | NotGiven = NOT_GIVEN,
@@ -451,10 +451,13 @@ def validate(
451451
query will be recorded in the project for SMEs to answer.
452452
453453
Args:
454+
custom_eval_thresholds: Optional custom thresholds for specific evals. Keys should match with the keys
455+
in the `eval_scores` dictionary.
456+
454457
custom_metadata: Arbitrary metadata supplied by the user/system
455458
456-
eval_scores: Evaluation scores to use for flagging a response as bad. If not provided, TLM
457-
will be used to generate scores.
459+
eval_scores: Scores assessing different aspects of the RAG system. If not provided, TLM will
460+
be used to generate scores.
458461
459462
options: Typed dict of advanced configuration options for the Trustworthy Language Model.
460463
Many of these configurations are determined by the quality preset selected
@@ -575,8 +578,8 @@ def validate(
575578
"prompt": prompt,
576579
"query": query,
577580
"response": response,
578-
"bad_response_thresholds": bad_response_thresholds,
579581
"constrain_outputs": constrain_outputs,
582+
"custom_eval_thresholds": custom_eval_thresholds,
580583
"custom_metadata": custom_metadata,
581584
"eval_scores": eval_scores,
582585
"options": options,
@@ -967,8 +970,8 @@ async def validate(
967970
query: str,
968971
response: str,
969972
use_llm_matching: bool | NotGiven = NOT_GIVEN,
970-
bad_response_thresholds: project_validate_params.BadResponseThresholds | NotGiven = NOT_GIVEN,
971973
constrain_outputs: Optional[List[str]] | NotGiven = NOT_GIVEN,
974+
custom_eval_thresholds: Optional[Dict[str, float]] | NotGiven = NOT_GIVEN,
972975
custom_metadata: Optional[object] | NotGiven = NOT_GIVEN,
973976
eval_scores: Optional[Dict[str, float]] | NotGiven = NOT_GIVEN,
974977
options: Optional[project_validate_params.Options] | NotGiven = NOT_GIVEN,
@@ -992,10 +995,13 @@ async def validate(
992995
query will be recorded in the project for SMEs to answer.
993996
994997
Args:
998+
custom_eval_thresholds: Optional custom thresholds for specific evals. Keys should match with the keys
999+
in the `eval_scores` dictionary.
1000+
9951001
custom_metadata: Arbitrary metadata supplied by the user/system
9961002
997-
eval_scores: Evaluation scores to use for flagging a response as bad. If not provided, TLM
998-
will be used to generate scores.
1003+
eval_scores: Scores assessing different aspects of the RAG system. If not provided, TLM will
1004+
be used to generate scores.
9991005
10001006
options: Typed dict of advanced configuration options for the Trustworthy Language Model.
10011007
Many of these configurations are determined by the quality preset selected
@@ -1116,8 +1122,8 @@ async def validate(
11161122
"prompt": prompt,
11171123
"query": query,
11181124
"response": response,
1119-
"bad_response_thresholds": bad_response_thresholds,
11201125
"constrain_outputs": constrain_outputs,
1126+
"custom_eval_thresholds": custom_eval_thresholds,
11211127
"custom_metadata": custom_metadata,
11221128
"eval_scores": eval_scores,
11231129
"options": options,

src/codex/types/project_validate_params.py

Lines changed: 8 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from .._utils import PropertyInfo
99

10-
__all__ = ["ProjectValidateParams", "BadResponseThresholds", "Options"]
10+
__all__ = ["ProjectValidateParams", "Options"]
1111

1212

1313
class ProjectValidateParams(TypedDict, total=False):
@@ -21,15 +21,19 @@ class ProjectValidateParams(TypedDict, total=False):
2121

2222
use_llm_matching: bool
2323

24-
bad_response_thresholds: BadResponseThresholds
25-
2624
constrain_outputs: Optional[List[str]]
2725

26+
custom_eval_thresholds: Optional[Dict[str, float]]
27+
"""Optional custom thresholds for specific evals.
28+
29+
Keys should match with the keys in the `eval_scores` dictionary.
30+
"""
31+
2832
custom_metadata: Optional[object]
2933
"""Arbitrary metadata supplied by the user/system"""
3034

3135
eval_scores: Optional[Dict[str, float]]
32-
"""Evaluation scores to use for flagging a response as bad.
36+
"""Scores assessing different aspects of the RAG system.
3337
3438
If not provided, TLM will be used to generate scores.
3539
"""
@@ -139,16 +143,6 @@ class ProjectValidateParams(TypedDict, total=False):
139143
x_stainless_package_version: Annotated[str, PropertyInfo(alias="x-stainless-package-version")]
140144

141145

142-
class BadResponseThresholds(TypedDict, total=False):
143-
context_sufficiency: Optional[float]
144-
145-
query_ease: Optional[float]
146-
147-
response_helpfulness: Optional[float]
148-
149-
trustworthiness: Optional[float]
150-
151-
152146
class Options(TypedDict, total=False):
153147
custom_eval_criteria: Iterable[object]
154148

src/codex/types/project_validate_response.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010
class EvalScores(BaseModel):
11-
is_bad: bool
11+
failed: bool
1212

1313
score: Optional[float] = None
1414

@@ -18,7 +18,7 @@ class EvalScores(BaseModel):
1818
class ProjectValidateResponse(BaseModel):
1919
eval_scores: Dict[str, EvalScores]
2020
"""
21-
Evaluation scores for the original response along with a boolean flag, `is_bad`,
21+
Evaluation scores for the original response along with a boolean flag, `failed`,
2222
indicating whether the score is below the threshold.
2323
"""
2424

tests/api_resources/test_projects.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -444,13 +444,8 @@ def test_method_validate_with_all_params(self, client: Codex) -> None:
444444
query="query",
445445
response="response",
446446
use_llm_matching=True,
447-
bad_response_thresholds={
448-
"context_sufficiency": 0,
449-
"query_ease": 0,
450-
"response_helpfulness": 0,
451-
"trustworthiness": 0,
452-
},
453447
constrain_outputs=["string"],
448+
custom_eval_thresholds={"foo": 0},
454449
custom_metadata={},
455450
eval_scores={"foo": 0},
456451
options={
@@ -944,13 +939,8 @@ async def test_method_validate_with_all_params(self, async_client: AsyncCodex) -
944939
query="query",
945940
response="response",
946941
use_llm_matching=True,
947-
bad_response_thresholds={
948-
"context_sufficiency": 0,
949-
"query_ease": 0,
950-
"response_helpfulness": 0,
951-
"trustworthiness": 0,
952-
},
953942
constrain_outputs=["string"],
943+
custom_eval_thresholds={"foo": 0},
954944
custom_metadata={},
955945
eval_scores={"foo": 0},
956946
options={

0 commit comments

Comments
 (0)