Skip to content

Commit 9cb9cc1

Browse files
gustavocidornelaswhoseoyster
authored andcommitted
feat: completes OPEN-6020 Refactor manual part of the Python SDK
1 parent 6361dbe commit 9cb9cc1

File tree

14 files changed

+1441
-4
lines changed

14 files changed

+1441
-4
lines changed

src/openlayer/lib/.keep

Lines changed: 0 additions & 4 deletions
This file was deleted.

src/openlayer/lib/__init__.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
"""Openlayer lib.
2+
"""
3+
4+
__all__ = [
5+
"trace",
6+
"trace_openai",
7+
"trace_openai_assistant_thread_run",
8+
"Openlayer",
9+
"ConfigLlmData",
10+
]
11+
12+
# ---------------------------------- Tracing --------------------------------- #
13+
from .tracing import tracer
14+
from .._client import Openlayer
15+
from ..types.inference_pipelines.data_stream_params import ConfigLlmData
16+
17+
trace = tracer.trace
18+
19+
20+
def trace_openai(client):
21+
"""Trace OpenAI chat completions."""
22+
# pylint: disable=import-outside-toplevel
23+
import openai
24+
25+
from .integrations import openai_tracer
26+
27+
if not isinstance(client, (openai.Client, openai.AzureOpenAI)):
28+
raise ValueError("Invalid client. Please provide an OpenAI client.")
29+
return openai_tracer.trace_openai(client)
30+
31+
32+
def trace_openai_assistant_thread_run(client, run):
33+
"""Trace OpenAI Assistant thread run."""
34+
# pylint: disable=import-outside-toplevel
35+
from .integrations import openai_tracer
36+
37+
return openai_tracer.trace_openai_assistant_thread_run(client, run)

src/openlayer/lib/constants.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
"""Module for storing constants used throughout the OpenLayer SDK.
2+
"""
3+
4+
# --------------------------- LLM usage costs table -------------------------- #
5+
# Last update: 2024-02-05
6+
OPENAI_COST_PER_TOKEN = {
7+
"babbage-002": {
8+
"input": 0.0004e-3,
9+
"output": 0.0004e-3,
10+
},
11+
"davinci-002": {
12+
"input": 0.002e-3,
13+
"output": 0.002e-3,
14+
},
15+
"gpt-3.5-turbo": {
16+
"input": 0.0005e-3,
17+
"output": 0.0015e-3,
18+
},
19+
"gpt-3.5-turbo-0125": {
20+
"input": 0.0005e-3,
21+
"output": 0.0015e-3,
22+
},
23+
"gpt-3.5-turbo-0301": {
24+
"input": 0.0015e-3,
25+
"output": 0.002e-3,
26+
},
27+
"gpt-3.5-turbo-0613": {
28+
"input": 0.0015e-3,
29+
"output": 0.002e-3,
30+
},
31+
"gpt-3.5-turbo-1106": {
32+
"input": 0.001e-3,
33+
"output": 0.002e-3,
34+
},
35+
"gpt-3.5-turbo-16k-0613": {
36+
"input": 0.003e-3,
37+
"output": 0.004e-3,
38+
},
39+
"gpt-3.5-turbo-instruct": {
40+
"input": 0.0015e-3,
41+
"output": 0.002e-3,
42+
},
43+
"gpt-4": {
44+
"input": 0.03e-3,
45+
"output": 0.06e-3,
46+
},
47+
"gpt-4-turbo-preview": {
48+
"input": 0.01e-3,
49+
"output": 0.03e-3,
50+
},
51+
"gpt-4-0125-preview": {
52+
"input": 0.01e-3,
53+
"output": 0.03e-3,
54+
},
55+
"gpt-4-1106-preview": {
56+
"input": 0.01e-3,
57+
"output": 0.03e-3,
58+
},
59+
"gpt-4-0314": {
60+
"input": 0.03e-3,
61+
"output": 0.06e-3,
62+
},
63+
"gpt-4-1106-vision-preview": {
64+
"input": 0.01e-3,
65+
"output": 0.03e-3,
66+
},
67+
"gpt-4-32k": {
68+
"input": 0.06e-3,
69+
"output": 0.12e-3,
70+
},
71+
"gpt-4-32k-0314": {
72+
"input": 0.06e-3,
73+
"output": 0.12e-3,
74+
},
75+
}
76+
# Last update: 2024-03-26
77+
AZURE_OPENAI_COST_PER_TOKEN = {
78+
"babbage-002": {
79+
"input": 0.0004e-3,
80+
"output": 0.0004e-3,
81+
},
82+
"davinci-002": {
83+
"input": 0.002e-3,
84+
"output": 0.002e-3,
85+
},
86+
"gpt-35-turbo": {"input": 0.0005e-3, "output": 0.0015e-3},
87+
"gpt-35-turbo-0125": {"input": 0.0005e-3, "output": 0.0015e-3},
88+
"gpt-35-turbo-instruct": {"input": 0.0015e-3, "output": 0.002e-3},
89+
"gpt-4-turbo": {"input": 0.01e-3, "output": 0.03e-3},
90+
"gpt-4-turbo-vision": {"input": 0.01e-3, "output": 0.03e-3},
91+
"gpt-4-8k": {"input": 0.03e-3, "output": 0.06e-3},
92+
"gpt-4-32k": {"input": 0.06e-3, "output": 0.12e-3},
93+
}

src/openlayer/lib/core/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

src/openlayer/lib/core/base_model.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
"""Base class for an Openlayer model."""
2+
3+
import os
4+
import abc
5+
import json
6+
import time
7+
import inspect
8+
import argparse
9+
from typing import Any, Dict, Tuple
10+
from dataclasses import field, dataclass
11+
12+
import pandas as pd
13+
14+
from ..tracing import tracer
15+
16+
17+
@dataclass
18+
class RunReturn:
19+
"""The return type of the `run` method in the Openlayer model."""
20+
21+
output: Any
22+
"""The output of the model."""
23+
24+
other_fields: Dict[str, Any] = field(default_factory=dict)
25+
"""Any other fields that you want to log."""
26+
27+
28+
class OpenlayerModel(abc.ABC):
29+
"""Interface for the Openlayer model.
30+
31+
Your model's class should inherit from this class and implement either:
32+
- the `run` method (which takes a single row of data as input and returns
33+
a `RunReturn` object)
34+
- `run_batch_from_df` method (which takes a pandas DataFrame as input and returns
35+
a tuple of a DataFrame and a config dict).
36+
37+
It is more conventional to implement the `run` method.
38+
39+
Refer to Openlayer's templates for examples of how to implement this class.
40+
"""
41+
42+
def run_from_cli(self) -> None:
43+
"""Run the model from the command line."""
44+
parser = argparse.ArgumentParser(description="Run data through a model.")
45+
parser.add_argument(
46+
"--dataset-path", type=str, required=True, help="Path to the dataset"
47+
)
48+
parser.add_argument(
49+
"--output-dir",
50+
type=str,
51+
required=False,
52+
help="Directory to dump the results in",
53+
)
54+
55+
# Parse the arguments
56+
args = parser.parse_args()
57+
58+
return self.batch(
59+
dataset_path=args.dataset_path,
60+
output_dir=args.output_dir,
61+
)
62+
63+
def batch(self, dataset_path: str, output_dir: str) -> None:
64+
"""Reads the dataset from a file and runs the model on it."""
65+
# Load the dataset into a pandas DataFrame
66+
if dataset_path.endswith(".csv"):
67+
df = pd.read_csv(dataset_path)
68+
elif dataset_path.endswith(".json"):
69+
df = pd.read_json(dataset_path, orient="records")
70+
71+
# Call the model's run_batch method, passing in the DataFrame
72+
output_df, config = self.run_batch_from_df(df)
73+
self.write_output_to_directory(output_df, config, output_dir)
74+
75+
def run_batch_from_df(self, df: pd.DataFrame) -> Tuple[pd.DataFrame, dict]:
76+
"""Function that runs the model and returns the result."""
77+
# Ensure the 'output' column exists
78+
if "output" not in df.columns:
79+
df["output"] = None
80+
81+
# Get the signature of the 'run' method
82+
run_signature = inspect.signature(self.run)
83+
84+
for index, row in df.iterrows():
85+
# Filter row_dict to only include keys that are valid parameters
86+
# for the 'run' method
87+
row_dict = row.to_dict()
88+
filtered_kwargs = {
89+
k: v for k, v in row_dict.items() if k in run_signature.parameters
90+
}
91+
92+
# Call the run method with filtered kwargs
93+
output = self.run(**filtered_kwargs)
94+
95+
df.at[index, "output"] = output.output
96+
97+
for k, v in output.other_fields.items():
98+
if k not in df.columns:
99+
df[k] = None
100+
df.at[index, k] = v
101+
102+
trace = tracer.get_current_trace()
103+
if trace:
104+
processed_trace, _ = tracer.post_process_trace(trace_obj=trace)
105+
df.at[index, "steps"] = trace.to_dict()
106+
if "latency" in processed_trace:
107+
df.at[index, "latency"] = processed_trace["latency"]
108+
if "cost" in processed_trace:
109+
df.at[index, "cost"] = processed_trace["cost"]
110+
if "tokens" in processed_trace:
111+
df.at[index, "tokens"] = processed_trace["tokens"]
112+
113+
config = {
114+
"outputColumnName": "output",
115+
"inputVariableNames": list(run_signature.parameters.keys()),
116+
"metadata": {
117+
"output_timestamp": time.time(),
118+
},
119+
}
120+
121+
if "latency" in df.columns:
122+
config["latencyColumnName"] = "latency"
123+
if "cost" in df.columns:
124+
config["costColumnName"] = "cost"
125+
if "tokens" in df.columns:
126+
config["numOfTokenColumnName"] = "tokens"
127+
128+
return df, config
129+
130+
def write_output_to_directory(
131+
self,
132+
output_df: pd.DataFrame,
133+
config: Dict[str, Any],
134+
output_dir: str,
135+
fmt: str = "json",
136+
):
137+
"""Writes the output DataFrame to a file in the specified directory based on the
138+
given format.
139+
"""
140+
os.makedirs(
141+
output_dir, exist_ok=True
142+
) # Create the directory if it doesn't exist
143+
144+
# Determine the filename based on the dataset name and format
145+
filename = f"dataset.{fmt}"
146+
output_path = os.path.join(output_dir, filename)
147+
148+
# Write the config to a json file
149+
config_path = os.path.join(output_dir, "config.json")
150+
with open(config_path, "w", encoding="utf-8") as f:
151+
json.dump(config, f, indent=4)
152+
153+
# Write the DataFrame to the file based on the specified format
154+
if fmt == "csv":
155+
output_df.to_csv(output_path, index=False)
156+
elif fmt == "json":
157+
output_df.to_json(output_path, orient="records", indent=4)
158+
else:
159+
raise ValueError("Unsupported format. Please choose 'csv' or 'json'.")
160+
161+
print(f"Output written to {output_path}")
162+
163+
@abc.abstractmethod
164+
def run(self, **kwargs) -> RunReturn:
165+
"""Function that runs the model and returns the result."""
166+
pass

src/openlayer/lib/integrations/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)