Skip to content

Commit b9e1134

Browse files
gustavocidornelaswhoseoyster
authored andcommitted
chore: move cost estimation logic to the backend
1 parent 4859322 commit b9e1134

File tree

5 files changed

+2
-186
lines changed

5 files changed

+2
-186
lines changed

src/openlayer/lib/constants.py

Lines changed: 0 additions & 93 deletions
This file was deleted.

src/openlayer/lib/integrations/anthropic_tracer.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ def trace_anthropic(
2323
- end_time: The time when the completion was received.
2424
- latency: The time it took to generate the completion.
2525
- tokens: The total number of tokens used to generate the completion.
26-
- cost: The estimated cost of the completion.
2726
- prompt_tokens: The number of tokens in the prompt.
2827
- completion_tokens: The number of tokens in the completion.
2928
- model: The model used to generate the completion.
@@ -152,15 +151,12 @@ def stream_chunks(
152151
collected_function_call["inputs"] = json.loads(collected_function_call["inputs"])
153152
output_data = collected_function_call
154153

155-
cost = 0
156-
157154
trace_args = create_trace_args(
158155
end_time=end_time,
159156
inputs={"prompt": kwargs["messages"]},
160157
output=output_data,
161158
latency=latency,
162159
tokens=num_of_completion_tokens,
163-
cost=cost,
164160
prompt_tokens=num_of_prompt_tokens,
165161
completion_tokens=num_of_completion_tokens,
166162
model=kwargs.get("model"),
@@ -206,14 +202,12 @@ def handle_non_streaming_create(
206202
# Try to add step to the trace
207203
try:
208204
output_data = parse_non_streaming_output_data(response)
209-
cost = 0
210205
trace_args = create_trace_args(
211206
end_time=end_time,
212207
inputs={"prompt": kwargs["messages"]},
213208
output=output_data,
214209
latency=(end_time - start_time) * 1000,
215210
tokens=response.usage.input_tokens + response.usage.output_tokens,
216-
cost=cost,
217211
prompt_tokens=response.usage.input_tokens,
218212
completion_tokens=response.usage.output_tokens,
219213
model=response.model,
@@ -275,7 +269,6 @@ def create_trace_args(
275269
output: str,
276270
latency: float,
277271
tokens: int,
278-
cost: float,
279272
prompt_tokens: int,
280273
completion_tokens: int,
281274
model: str,
@@ -291,7 +284,6 @@ def create_trace_args(
291284
"output": output,
292285
"latency": latency,
293286
"tokens": tokens,
294-
"cost": cost,
295287
"prompt_tokens": prompt_tokens,
296288
"completion_tokens": completion_tokens,
297289
"model": model,

src/openlayer/lib/integrations/langchain_callback.py

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from langchain import schema as langchain_schema
88
from langchain.callbacks.base import BaseCallbackHandler
99

10-
from .. import constants
1110
from ..tracing import tracer
1211

1312
LANGCHAIN_TO_OPENLAYER_PROVIDER_MAP = {"openai-chat": "OpenAI"}
@@ -27,7 +26,6 @@ def __init__(self, **kwargs: Any) -> None:
2726
self.provider: str = None
2827
self.model: Optional[str] = None
2928
self.model_parameters: Dict[str, Any] = None
30-
self.cost: Optional[float] = None
3129
self.prompt_tokens: int = None
3230
self.completion_tokens: int = None
3331
self.total_tokens: int = None
@@ -87,10 +85,6 @@ def on_llm_end(self, response: langchain_schema.LLMResult, **kwargs: Any) -> Any
8785
if response.llm_output and "token_usage" in response.llm_output:
8886
self.prompt_tokens = response.llm_output["token_usage"].get("prompt_tokens", 0)
8987
self.completion_tokens = response.llm_output["token_usage"].get("completion_tokens", 0)
90-
self.cost = self._get_cost_estimate(
91-
num_input_tokens=self.prompt_tokens,
92-
num_output_tokens=self.completion_tokens,
93-
)
9488
self.total_tokens = response.llm_output["token_usage"].get("total_tokens", 0)
9589

9690
for generations in response.generations:
@@ -99,13 +93,6 @@ def on_llm_end(self, response: langchain_schema.LLMResult, **kwargs: Any) -> Any
9993

10094
self._add_to_trace()
10195

102-
def _get_cost_estimate(self, num_input_tokens: int, num_output_tokens: int) -> float:
103-
"""Returns the cost estimate for a given model and number of tokens."""
104-
if self.model not in constants.OPENAI_COST_PER_TOKEN:
105-
return None
106-
cost_per_token = constants.OPENAI_COST_PER_TOKEN[self.model]
107-
return cost_per_token["input"] * num_input_tokens + cost_per_token["output"] * num_output_tokens
108-
10996
def _add_to_trace(self) -> None:
11097
"""Adds to the trace."""
11198
name = PROVIDER_TO_STEP_NAME.get(self.provider, "Chat Completion Model")
@@ -114,7 +101,6 @@ def _add_to_trace(self) -> None:
114101
provider=self.provider,
115102
inputs={"prompt": self.prompt},
116103
output=self.output,
117-
cost=self.cost,
118104
tokens=self.total_tokens,
119105
latency=self.latency,
120106
start_time=self.start_time,

src/openlayer/lib/integrations/openai_tracer.py

Lines changed: 1 addition & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
import openai
1010

11-
from .. import constants
1211
from ..tracing import tracer
1312

1413
logger = logging.getLogger(__name__)
@@ -24,7 +23,6 @@ def trace_openai(
2423
- end_time: The time when the completion was received.
2524
- latency: The time it took to generate the completion.
2625
- tokens: The total number of tokens used to generate the completion.
27-
- cost: The estimated cost of the completion.
2826
- prompt_tokens: The number of tokens in the prompt.
2927
- completion_tokens: The number of tokens in the completion.
3028
- model: The model used to generate the completion.
@@ -161,20 +159,13 @@ def stream_chunks(
161159
else:
162160
collected_function_call["arguments"] = json.loads(collected_function_call["arguments"])
163161
output_data = collected_function_call
164-
completion_cost = estimate_cost(
165-
model=kwargs.get("model"),
166-
prompt_tokens=0,
167-
completion_tokens=(num_of_completion_tokens if num_of_completion_tokens else 0),
168-
is_azure_openai=is_azure_openai,
169-
)
170162

171163
trace_args = create_trace_args(
172164
end_time=end_time,
173165
inputs={"prompt": kwargs["messages"]},
174166
output=output_data,
175167
latency=latency,
176168
tokens=num_of_completion_tokens,
177-
cost=completion_cost,
178169
prompt_tokens=0,
179170
completion_tokens=num_of_completion_tokens,
180171
model=kwargs.get("model"),
@@ -196,21 +187,6 @@ def stream_chunks(
196187
)
197188

198189

199-
def estimate_cost(
200-
prompt_tokens: int,
201-
completion_tokens: int,
202-
model: str,
203-
is_azure_openai: bool = False,
204-
) -> float:
205-
"""Returns the cost estimate for a given OpenAI model and number of tokens."""
206-
if is_azure_openai and model in constants.AZURE_OPENAI_COST_PER_TOKEN:
207-
cost_per_token = constants.AZURE_OPENAI_COST_PER_TOKEN[model]
208-
elif model in constants.OPENAI_COST_PER_TOKEN:
209-
cost_per_token = constants.OPENAI_COST_PER_TOKEN[model]
210-
return cost_per_token["input"] * prompt_tokens + cost_per_token["output"] * completion_tokens
211-
return None
212-
213-
214190
def get_model_parameters(kwargs: Dict[str, Any]) -> Dict[str, Any]:
215191
"""Gets the model parameters from the kwargs."""
216192
return {
@@ -234,7 +210,6 @@ def create_trace_args(
234210
output: str,
235211
latency: float,
236212
tokens: int,
237-
cost: float,
238213
prompt_tokens: int,
239214
completion_tokens: int,
240215
model: str,
@@ -250,7 +225,6 @@ def create_trace_args(
250225
"output": output,
251226
"latency": latency,
252227
"tokens": tokens,
253-
"cost": cost,
254228
"prompt_tokens": prompt_tokens,
255229
"completion_tokens": completion_tokens,
256230
"model": model,
@@ -300,19 +274,12 @@ def handle_non_streaming_create(
300274
# Try to add step to the trace
301275
try:
302276
output_data = parse_non_streaming_output_data(response)
303-
cost = estimate_cost(
304-
model=response.model,
305-
prompt_tokens=response.usage.prompt_tokens,
306-
completion_tokens=response.usage.completion_tokens,
307-
is_azure_openai=is_azure_openai,
308-
)
309277
trace_args = create_trace_args(
310278
end_time=end_time,
311279
inputs={"prompt": kwargs["messages"]},
312280
output=output_data,
313281
latency=(end_time - start_time) * 1000,
314282
tokens=response.usage.total_tokens,
315-
cost=cost,
316283
prompt_tokens=response.usage.prompt_tokens,
317284
completion_tokens=response.usage.completion_tokens,
318285
model=response.model,
@@ -373,7 +340,7 @@ def trace_openai_assistant_thread_run(client: openai.OpenAI, run: "openai.types.
373340
"""Trace a run from an OpenAI assistant.
374341
375342
Once the run is completed, the thread data is published to Openlayer,
376-
along with the latency, cost, and number of tokens used."""
343+
along with the latency, and number of tokens used."""
377344
_type_check_run(run)
378345

379346
# Do nothing if the run is not completed
@@ -420,11 +387,6 @@ def _extract_run_vars(run: "openai.types.beta.threads.run.Run") -> Dict[str, any
420387
"completion_tokens": run.usage.completion_tokens,
421388
"tokens": run.usage.total_tokens,
422389
"model": run.model,
423-
"cost": estimate_cost(
424-
model=run.model,
425-
prompt_tokens=run.usage.prompt_tokens,
426-
completion_tokens=run.usage.completion_tokens,
427-
),
428390
}
429391

430392

src/openlayer/lib/tracing/tracer.py

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ def post_process_trace(
306306
else:
307307
input_variable_names = []
308308

309-
processed_steps = bubble_up_costs_and_tokens(trace_obj.to_dict())
309+
processed_steps = trace_obj.to_dict()
310310

311311
trace_data = {
312312
"inferenceTimestamp": root_step.start_time,
@@ -322,34 +322,3 @@ def post_process_trace(
322322
trace_data.update(input_variables)
323323

324324
return trace_data, input_variable_names
325-
326-
327-
def bubble_up_costs_and_tokens(trace_dict: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
328-
"""Adds the cost and number of tokens of nested steps to their parent steps."""
329-
330-
def add_step_costs_and_tokens(step: Dict[str, Any]) -> Tuple[float, int]:
331-
step_cost = step_tokens = 0
332-
333-
if "cost" in step and step["cost"] is not None:
334-
step_cost += step["cost"]
335-
if "tokens" in step and step["tokens"] is not None:
336-
step_tokens += step["tokens"]
337-
338-
# Recursively add costs and tokens from nested steps
339-
for nested_step in step.get("steps", []):
340-
nested_cost, nested_tokens = add_step_costs_and_tokens(nested_step)
341-
step_cost += nested_cost
342-
step_tokens += nested_tokens
343-
344-
if "steps" in step:
345-
if step_cost > 0 and "cost" not in step:
346-
step["cost"] = step_cost
347-
if step_tokens > 0 and "tokens" not in step:
348-
step["tokens"] = step_tokens
349-
350-
return step_cost, step_tokens
351-
352-
for root_step_dict in trace_dict:
353-
add_step_costs_and_tokens(root_step_dict)
354-
355-
return trace_dict

0 commit comments

Comments
 (0)