Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 15 additions & 8 deletions examples/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from dotenv import load_dotenv
load_dotenv()

def run_provider(provider, model, api_key, **kwargs):
def run_provider(provider, model, api_key=None, **kwargs):
print(f"\n\n###RUNNING for <{provider}>, <{model}> ###")
llm = LLMCore(provider=provider, api_key=api_key, **kwargs)

Expand Down Expand Up @@ -107,6 +107,16 @@ def build_chat_request(model: str, chat_input: str, is_stream: bool, max_tokens:
"max_completion_tokens": max_tokens
}
}
elif 'amazon.nova' in model or 'anthropic.claude' in model:
chat_request = {
"chat_input": chat_input,
"model": model,
"is_stream": is_stream,
"retries": 0,
"parameters": {
"maxTokens": max_tokens
}
}
else:
chat_request = {
"chat_input": chat_input,
Expand Down Expand Up @@ -150,10 +160,7 @@ def multiple_provider_runs(provider:str, model:str, num_runs:int, api_key:str, *


multiple_provider_runs(provider="vertexai", model="gemini-1.5-flash", num_runs=1, api_key=os.environ["GOOGLE_API_KEY"])
# provider = "vertexai"
# model = "gemini-1.5-pro-latest"
# for _ in range(1):
# latencies = run_provider(provider=provider, model=model,
# api_key=os.environ["GOOGLE_API_KEY"],
# )
# pprint(latencies)

# Bedrock
multiple_provider_runs(provider="bedrock", model="us.amazon.nova-lite-v1:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"])
#multiple_provider_runs(provider="bedrock", model="anthropic.claude-3-5-sonnet-20241022-v2:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"])
18 changes: 17 additions & 1 deletion libs/core/llmstudio_core/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ providers:
step: 1
bedrock:
id: bedrock
name: Bedrock
name: Bedrock ConverseAPI
chat: true
embed: true
keys:
Expand Down Expand Up @@ -126,6 +126,22 @@ providers:
max_tokens: 100000
input_token_cost: 0.000008
output_token_cost: 0.000024
us.amazon.nova-pro-v1:0:
mode: chat
max_tokens: 300000
input_token_cost: 0.0000008
output_token_cost: 0.0000016
us.amazon.nova-lite-v1:0:
mode: chat
max_tokens: 300000
input_token_cost: 0.00000006
output_token_cost: 0.00000012
us.amazon.nova-micro-v1:0:
mode: chat
max_tokens: 128000
input_token_cost: 0.000000035
output_token_cost: 0.00000007

parameters:
temperature:
name: "Temperature"
Expand Down
2 changes: 1 addition & 1 deletion libs/core/llmstudio_core/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Optional

from llmstudio_core.providers.azure import AzureProvider
from llmstudio_core.providers.bedrock.provider import BedrockProvider
from llmstudio_core.providers.bedrock_converse import BedrockConverseProvider

# from llmstudio_core.providers.ollama import OllamaProvider #TODO: adapt it
from llmstudio_core.providers.openai import OpenAIProvider
Expand Down
Empty file.
43 changes: 0 additions & 43 deletions libs/core/llmstudio_core/providers/bedrock/provider.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,15 @@
ChoiceDelta,
ChoiceDeltaToolCall,
ChoiceDeltaToolCallFunction,
CompletionUsage,
)
from pydantic import ValidationError

SERVICE = "bedrock-runtime"


@provider
class BedrockAnthropicProvider(ProviderCore):
class BedrockConverseProvider(ProviderCore):
def __init__(self, config, **kwargs):
super().__init__(config, **kwargs)
self._client = boto3.client(
Expand All @@ -46,17 +47,17 @@ def __init__(self, config, **kwargs):

@staticmethod
def _provider_config_name():
return "bedrock-antropic"
return "bedrock"

def validate_request(self, request: ChatRequest):
return ChatRequest(**request)

async def agenerate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Any]:
"""Generate an AWS Bedrock client"""
"""Generate an AWS Bedrock Converse client"""
return self.generate_client(request=request)

def generate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Generator]:
"""Generate an AWS Bedrock client"""
"""Generate an AWS Bedrock Converse client"""
try:
messages, system_prompt = self._process_messages(request.chat_input)
tools = self._process_tools(request.parameters)
Expand All @@ -83,7 +84,9 @@ def generate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Generator
async def aparse_response(
self, response: Any, **kwargs
) -> AsyncGenerator[Any, None]:
return self.parse_response(response=response, **kwargs)
result = self.parse_response(response=response, **kwargs)
for chunk in result:
yield chunk

def parse_response(self, response: AsyncGenerator[Any, None], **kwargs) -> Any:
tool_name = None
Expand Down Expand Up @@ -222,6 +225,22 @@ def parse_response(self, response: AsyncGenerator[Any, None], **kwargs) -> Any:
)
yield final_chunk.model_dump()

elif chunk.get("metadata"):
usage = chunk["metadata"].get("usage")
final_stream_chunk = ChatCompletionChunk(
id=str(uuid.uuid4()),
choices=[],
created=int(time.time()),
model=kwargs.get("request").model,
object="chat.completion.chunk",
usage=CompletionUsage(
completion_tokens=usage["outputTokens"],
prompt_tokens=usage["inputTokens"],
total_tokens=usage["totalTokens"],
),
)
yield final_stream_chunk.model_dump()

@staticmethod
def _process_messages(
chat_input: Union[str, List[Dict[str, str]]]
Expand Down
14 changes: 11 additions & 3 deletions libs/core/llmstudio_core/providers/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,11 @@ def _calculate_metrics(
output_tokens = len(self.tokenizer.encode(self._output_to_string(output)))
total_tokens = input_tokens + output_tokens

if usage:
input_tokens = usage.get("prompt_tokens", input_tokens)
output_tokens = usage.get("completion_tokens", output_tokens)
total_tokens = usage.get("total_tokens", total_tokens)

# Cost calculations
input_cost = self._calculate_cost(input_tokens, model_config.input_token_cost)
output_cost = self._calculate_cost(
Expand All @@ -823,9 +828,12 @@ def _calculate_metrics(
)
total_cost_usd -= cached_savings

reasoning_tokens = usage.get("completion_tokens_details", {}).get(
"reasoning_tokens", None
)
completion_tokens_details = usage.get("completion_tokens_details")
if completion_tokens_details:
reasoning_tokens = completion_tokens_details.get(
"reasoning_tokens", None
)

if reasoning_tokens:
total_tokens += reasoning_tokens
reasoning_cost = self._calculate_cost(
Expand Down
Loading