Skip to content

release: 0.2.0-alpha.10 #266

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .release-please-manifest.json
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
".": "0.2.0-alpha.9"
".": "0.2.0-alpha.10"
}
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Removed
* Deprecated and removed `publish_ground_truths` method. Use `update_data` instead.

## 0.2.0-alpha.10 (2024-07-19)

Full Changelog: [v0.2.0-alpha.9...v0.2.0-alpha.10](https://github.com/openlayer-ai/openlayer-python/compare/v0.2.0-alpha.9...v0.2.0-alpha.10)

### Features

* **api:** OpenAPI spec update via Stainless API ([#265](https://github.com/openlayer-ai/openlayer-python/issues/265)) ([58a602f](https://github.com/openlayer-ai/openlayer-python/commit/58a602f3fa3ab61466b90bcfe1a1ce8db4a83fb9))
* feat: add new columns to dataset when running custom metrics ([9c0d94c](https://github.com/openlayer-ai/openlayer-python/commit/9c0d94c1ab79ab8d3f94aa21f8c460e4d7e029f7))

## 0.2.0-alpha.9 (2024-07-17)

Full Changelog: [v0.2.0-alpha.8...v0.2.0-alpha.9](https://github.com/openlayer-ai/openlayer-python/compare/v0.2.0-alpha.8...v0.2.0-alpha.9)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "openlayer"
version = "0.2.0-alpha.9"
version = "0.2.0-alpha.10"
description = "The official Python library for the openlayer API"
dynamic = ["readme"]
license = "Apache-2.0"
Expand Down
2 changes: 1 addition & 1 deletion src/openlayer/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.

__title__ = "openlayer"
__version__ = "0.2.0-alpha.9" # x-release-please-version
__version__ = "0.2.0-alpha.10" # x-release-please-version
62 changes: 57 additions & 5 deletions src/openlayer/lib/core/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import json
import os
from dataclasses import asdict, dataclass, field
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Optional, Union, Set

import pandas as pd

Expand All @@ -25,6 +25,9 @@ class MetricReturn:
meta: Dict[str, Any] = field(default_factory=dict)
"""Any useful metadata in a JSON serializable dict."""

added_cols: Set[str] = field(default_factory=set)
"""Columns added to the dataset."""


@dataclass
class Dataset:
Expand All @@ -42,6 +45,12 @@ class Dataset:
output_path: str
"""The path to the dataset outputs."""

data_format: str
"""The format of the written dataset. E.g. 'csv' or 'json'."""

added_cols: Set[str] = field(default_factory=set)
"""Columns added to the dataset."""


class MetricRunner:
"""A class to run a list of metrics."""
Expand All @@ -68,6 +77,9 @@ def run_metrics(self, metrics: List[BaseMetric]) -> None:

self._compute_metrics(metrics)

# Write the updated datasets to the output location
self._write_updated_datasets_to_output()

def _parse_args(self) -> None:
parser = argparse.ArgumentParser(description="Compute custom metrics.")
parser.add_argument(
Expand Down Expand Up @@ -124,13 +136,21 @@ def _load_datasets(self) -> None:
# Load the dataset into a pandas DataFrame
if os.path.exists(os.path.join(dataset_path, "dataset.csv")):
dataset_df = pd.read_csv(os.path.join(dataset_path, "dataset.csv"))
data_format = "csv"
elif os.path.exists(os.path.join(dataset_path, "dataset.json")):
dataset_df = pd.read_json(os.path.join(dataset_path, "dataset.json"), orient="records")
data_format = "json"
else:
raise ValueError(f"No dataset found in {dataset_folder}.")

datasets.append(
Dataset(name=dataset_folder, config=dataset_config, df=dataset_df, output_path=dataset_path)
Dataset(
name=dataset_folder,
config=dataset_config,
df=dataset_df,
output_path=dataset_path,
data_format=data_format,
)
)
else:
raise ValueError("No model found in the openlayer.json file. Cannot compute metric.")
Expand All @@ -148,6 +168,31 @@ def _compute_metrics(self, metrics: List[BaseMetric]) -> None:
continue
metric.compute(self.datasets)

def _write_updated_datasets_to_output(self) -> None:
"""Write the updated datasets to the output location."""
for dataset in self.datasets:
if dataset.added_cols:
self._write_updated_dataset_to_output(dataset)

def _write_updated_dataset_to_output(self, dataset: Dataset) -> None:
"""Write the updated dataset to the output location."""

# Determine the filename based on the dataset name and format
filename = f"dataset.{dataset.data_format}"
data_path = os.path.join(dataset.output_path, filename)

# TODO: Read the dataset again and only include the added columns

# Write the DataFrame to the file based on the specified format
if dataset.data_format == "csv":
dataset.df.to_csv(data_path, index=False)
elif dataset.data_format == "json":
dataset.df.to_json(data_path, orient="records", indent=4, index=False)
else:
raise ValueError("Unsupported format. Please choose 'csv' or 'json'.")

print(f"Updated dataset {dataset.name} written to {data_path}")


class BaseMetric(abc.ABC):
"""Interface for the Base metric.
Expand All @@ -163,7 +208,7 @@ def key(self) -> str:
def compute(self, datasets: List[Dataset]) -> None:
"""Compute the metric on the model outputs."""
for dataset in datasets:
metric_return = self.compute_on_dataset(dataset.config, dataset.df)
metric_return = self.compute_on_dataset(dataset)
metric_value = metric_return.value
if metric_return.unit:
metric_value = f"{metric_value} {metric_return.unit}"
Expand All @@ -172,8 +217,12 @@ def compute(self, datasets: List[Dataset]) -> None:
output_dir = os.path.join(dataset.output_path, "metrics")
self._write_metric_return_to_file(metric_return, output_dir)

# Add the added columns to the dataset
if metric_return.added_cols:
dataset.added_cols.update(metric_return.added_cols)

@abc.abstractmethod
def compute_on_dataset(self, config: dict, df: pd.DataFrame) -> MetricReturn:
def compute_on_dataset(self, dataset: Dataset) -> MetricReturn:
"""Compute the metric on a specific dataset."""
pass

Expand All @@ -183,6 +232,9 @@ def _write_metric_return_to_file(self, metric_return: MetricReturn, output_dir:
# Create the directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Turn the metric return to a dict
metric_return_dict = asdict(metric_return)

with open(os.path.join(output_dir, f"{self.key}.json"), "w", encoding="utf-8") as f:
json.dump(asdict(metric_return), f, indent=4)
json.dump(metric_return_dict, f, indent=4)
print(f"Metric ({self.key}) value written to {output_dir}/{self.key}.json")
4 changes: 2 additions & 2 deletions src/openlayer/resources/inference_pipelines/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def stream(
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> DataStreamResponse:
"""
Create an inference data point in an inference pipeline.
Publish an inference data point to an inference pipeline.

Args:
config: Configuration for the data stream. Depends on your **Openlayer project task
Expand Down Expand Up @@ -108,7 +108,7 @@ async def stream(
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> DataStreamResponse:
"""
Create an inference data point in an inference pipeline.
Publish an inference data point to an inference pipeline.

Args:
config: Configuration for the data stream. Depends on your **Openlayer project task
Expand Down