Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "hatchling.build"
[project]
name = "draive"
description = "Framework designed to simplify and accelerate the development of LLM-based applications."
version = "0.75.0"
version = "0.75.1"
readme = "README.md"
maintainers = [
{ name = "Kacper Kaliński", email = "kacper.kalinski@miquido.com" },
Expand Down
4 changes: 2 additions & 2 deletions src/draive/evaluation/generator.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from collections.abc import Iterable
from collections.abc import Iterable, Sequence
from typing import Any

from draive.generation import ModelGeneration
Expand Down Expand Up @@ -35,7 +35,7 @@ async def generate_case_parameters[Parameters: DataModel](
count: int,
examples: Iterable[Parameters],
guidelines: str | None = None,
) -> list[Parameters]:
) -> Sequence[Parameters]:
results: list[Parameters] = []
example_pairs: list[tuple[str, Any]] = [(INPUT, example) for example in examples]

Expand Down
27 changes: 25 additions & 2 deletions src/draive/evaluation/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from collections.abc import Callable, Sequence
from typing import Any, Protocol, Self, cast, overload, runtime_checkable

from haiway import AttributePath, ScopeContext, ctx
from haiway import AttributePath, ScopeContext, as_list, ctx

from draive.commons import META_EMPTY, Meta, MetaValues
from draive.evaluation.evaluator import EvaluatorResult, PreparedEvaluator
Expand Down Expand Up @@ -96,6 +96,24 @@ async def evaluating[Value](
meta=Meta.of(meta),
)

@classmethod
def merging(
cls,
result: Self,
*results: Self,
meta: Meta | MetaValues | None = None,
) -> Self:
merged_evaluations: list[EvaluatorResult] = as_list(result.evaluations)
merged_meta: Meta = result.meta
for other in results:
merged_evaluations.extend(other.evaluations)
merged_meta = merged_meta.merged_with(other.meta)

return cls(
evaluations=merged_evaluations,
meta=merged_meta.merged_with(Meta.of(meta)),
)

evaluations: Sequence[EvaluatorResult] = Field(
description="Scenario evaluation results",
)
Expand Down Expand Up @@ -129,7 +147,12 @@ async def __call__(


class ScenarioEvaluator[Value, **Args]:
__slots__ = ("_definition", "_execution_context", "meta", "name")
__slots__ = (
"_definition",
"_execution_context",
"meta",
"name",
)

def __init__(
self,
Expand Down
57 changes: 30 additions & 27 deletions src/draive/evaluation/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import Protocol, Self, runtime_checkable
from uuid import UUID, uuid4

from haiway import ScopeContext, asynchronous, ctx
from haiway import ScopeContext, as_list, asynchronous, ctx

from draive.commons import META_EMPTY, Meta, MetaValues
from draive.evaluation.evaluator import EvaluatorResult, PreparedEvaluator
Expand All @@ -29,13 +29,10 @@ class EvaluationSuiteCase[CaseParameters: DataModel](DataModel):
comment: str | None = None


class SuiteEvaluatorCaseResult[CaseParameters: DataModel, Value: DataModel | str](DataModel):
class SuiteEvaluatorCaseResult[CaseParameters: DataModel](DataModel):
case: EvaluationSuiteCase[CaseParameters] = Field(
description="Evaluated case",
)
value: Value = Field(
description="Evaluated value",
)
results: Sequence[ScenarioEvaluatorResult] = Field(
description="Evaluation results",
)
Expand Down Expand Up @@ -71,7 +68,6 @@ def report(
return (
f"<evaluation_case identifier='{self.case.identifier}'>"
f"\n<relative_score>{self.relative_score * 100:.2f}%</relative_score>"
f"\n<evaluated_value>{self.value}</evaluated_value>"
# TODO: convert DataModel to xml representation when avaialble
f"\n<parameters>{self.case.parameters}</parameters>"
f"\n{report}"
Expand Down Expand Up @@ -99,13 +95,9 @@ def relative_score(self) -> float:
return score / len(self.results)


class SuiteEvaluatorResult[
SuiteParameters: DataModel,
CaseParameters: DataModel,
Value: DataModel | str,
](DataModel):
class SuiteEvaluatorResult[SuiteParameters: DataModel, CaseParameters: DataModel](DataModel):
parameters: SuiteParameters
cases: Sequence[SuiteEvaluatorCaseResult[CaseParameters, Value]]
cases: Sequence[SuiteEvaluatorCaseResult[CaseParameters]]

@property
def passed(self) -> bool:
Expand Down Expand Up @@ -149,13 +141,12 @@ def relative_score(self) -> float:
return score / len(self.cases)


class EvaluationCaseResult[Value: DataModel | str](DataModel):
class EvaluationCaseResult[Value](DataModel):
@classmethod
def of(
cls,
results: ScenarioEvaluatorResult | EvaluatorResult,
*_results: ScenarioEvaluatorResult | EvaluatorResult,
value: Value,
meta: Meta | MetaValues | None = None,
) -> Self:
free_results: list[EvaluatorResult] = []
Expand All @@ -177,7 +168,6 @@ def of(
)

return cls(
value=value,
results=tuple(scenario_results),
meta=Meta.of(meta),
)
Expand All @@ -196,13 +186,27 @@ async def evaluating(
*[evaluator(value) for evaluator in [evaluators, *_evaluators]],
return_exceptions=False,
),
value=value,
meta=Meta.of(meta),
)

value: Value = Field(
description="Evaluated value",
)
@classmethod
def merging(
cls,
result: Self,
*results: Self,
meta: Meta | MetaValues | None = None,
) -> Self:
merged_evaluations: list[ScenarioEvaluatorResult] = as_list(result.results)
merged_meta: Meta = result.meta
for other in results:
merged_evaluations.extend(other.results)
merged_meta = merged_meta.merged_with(other.meta)

return cls(
results=merged_evaluations,
meta=merged_meta.merged_with(Meta.of(meta)),
)

results: Sequence[ScenarioEvaluatorResult] = Field(
description="Evaluation results",
)
Expand All @@ -216,7 +220,7 @@ async def evaluating(
class EvaluationSuiteDefinition[
SuiteParameters: DataModel,
CaseParameters: DataModel,
Value: DataModel | str,
Value,
](Protocol):
async def __call__(
self,
Expand Down Expand Up @@ -245,7 +249,7 @@ async def save(
class EvaluationSuite[
SuiteParameters: DataModel,
CaseParameters: DataModel,
Value: DataModel | str,
Value,
]:
__slots__ = (
"_case_parameters",
Expand Down Expand Up @@ -280,7 +284,7 @@ async def __call__(
*case_parameters: EvaluationSuiteCase[CaseParameters] | CaseParameters | UUID,
parameters: SuiteParameters | None = None,
reload: bool = False,
) -> SuiteEvaluatorResult[SuiteParameters, CaseParameters, Value]:
) -> SuiteEvaluatorResult[SuiteParameters, CaseParameters]:
if context := self._execution_context:
async with context:
return await self._evaluate(
Expand All @@ -304,7 +308,7 @@ async def _evaluate(
*,
suite_parameters: SuiteParameters | None = None,
reload: bool = False,
) -> SuiteEvaluatorResult[SuiteParameters, CaseParameters, Value]:
) -> SuiteEvaluatorResult[SuiteParameters, CaseParameters]:
suite_data: EvaluationSuiteData[SuiteParameters, CaseParameters]
async with self._lock:
suite_data = await self._data(reload=reload)
Expand Down Expand Up @@ -353,15 +357,14 @@ async def _evaluate_case(
case_parameters: EvaluationSuiteCase[CaseParameters],
*,
suite_parameters: SuiteParameters,
) -> SuiteEvaluatorCaseResult[CaseParameters, Value]:
) -> SuiteEvaluatorCaseResult[CaseParameters]:
result: EvaluationCaseResult[Value] = await self._definition(
parameters=suite_parameters,
case_parameters=case_parameters.parameters,
)

return SuiteEvaluatorCaseResult[CaseParameters, Value](
return SuiteEvaluatorCaseResult[CaseParameters](
case=case_parameters,
value=result.value,
results=result.results,
)

Expand Down Expand Up @@ -502,7 +505,7 @@ async def remove_case(
await self._storage.save(self._data_cache)


def evaluation_suite[SuiteParameters: DataModel, CaseParameters: DataModel, Value: DataModel | str](
def evaluation_suite[SuiteParameters: DataModel, CaseParameters: DataModel, Value](
case_parameters: type[CaseParameters],
/,
suite_parameters: type[SuiteParameters] | SuiteParameters,
Expand Down
Loading