Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,32 @@ visualization:
- "status_breakdown" # Pie chart for pass/fail/error breakdown
```

#### Non OpenAI configuration example
```yaml
# Judge-LLM Google Gemini
llm:
provider: "gemini"
model: "gemini-1.5-pro"
temperature: 0.0
max_tokens: 512
timeout: 120
num_retries: 3

# Judge-LLM HuggingFace embeddings
# provider: "huggingface" or "openai"
# model: model name
# provider_kwargs: additional arguments,
# for examples see https://docs.ragas.io/en/stable/references/embeddings/#ragas.embeddings.HuggingfaceEmbeddings
embedding:
provider: "huggingface"
model: "sentence-transformers/all-mpnet-base-v2"
provider_kwargs:
# cache_folder: <path_for_downloaded_model>
model_kwargs:
device: "cpu"
...
```

### Evaluation Data Structure (`config/evaluation_data.yaml`)

```yaml
Expand Down
7 changes: 7 additions & 0 deletions config/system.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ llm:
timeout: 300 # Request timeout in seconds
num_retries: 3 # Retry attempts

# Default embedding configuration:
# embedding:
# provider: "openai"
# model: "text-embedding-3-small"
# provider_kwargs: {}


# API Configuration
# To get real time data. Currently it supports lightspeed-stack API.
# But can be easily integrated with other APIs with minimal change.
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ dependencies = [
"click>=8.0.0",
"diskcache>=5.6.3",
"tenacity>=9.1.2",
"langchain[huggingface]>=0.3.27",
"sentence-transformers>=5.1.0",
]

[dependency-groups]
Expand Down
3 changes: 3 additions & 0 deletions src/lightspeed_evaluation/core/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
DEFAULT_LLM_MAX_TOKENS = 512
DEFAULT_LLM_RETRIES = 3

DEFAULT_EMBEDDING_PROVIDER = "openai"
DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small"
Comment on lines +14 to +15
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just for reference, still I would suggest to keep an opensource model as default.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that choosing some huggingface model requires downloading and running it locally, IMHO a big change from the current setup.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fo default, I would personally prefer a free model..
with huggingface most model gets downloaded easily, if it is not present in the system.

For openai, team needs to have the access/key..
I know we have openai as default LLM, but majorly people use gemini, so they won't have embedding model.


DEFAULT_OUTPUT_DIR = "./eval_output"
DEFAULT_BASE_FILENAME = "evaluation"

Expand Down
45 changes: 45 additions & 0 deletions src/lightspeed_evaluation/core/embedding/manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Embedding Manager - Generic embedding configuration, validation, and parameter provider."""

from ..llm import validate_openai_env
from ..models import EmbeddingConfig, SystemConfig


class EmbeddingError(Exception):
"""Embedding config errors."""


class EmbeddingManager: # pylint: disable=too-few-public-methods
"""Generic Embedding Manager."""

def __init__(self, config: EmbeddingConfig):
"""Initialize with validated environment and constructed model name."""
self.config = config
self._validate_config()
print(
f"""
✅ Embedding Manager: {self.config.provider} -- {self.config.model} {self.config.provider_kwargs}"""
)

def _validate_config(self) -> None:
"""Validate config and env variables."""

def empty_check() -> None:
pass

env_validator = {
"openai": validate_openai_env,
# "google": _validate_gemini_env, # Google embeddings are not supported at the moment
"huggingface": empty_check,
}.get(self.config.provider)

if env_validator is None:
raise EmbeddingError(
f"Unsupported embedding provider {self.config.provider}"
)

env_validator()

@classmethod
def from_system_config(cls, system_config: SystemConfig) -> "EmbeddingManager":
"""Create Embedding Manager from system configuration."""
return cls(system_config.embedding)
31 changes: 31 additions & 0 deletions src/lightspeed_evaluation/core/embedding/ragas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Ragas Embedding Manager - Ragas specific embedding wrapper."""

from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import OpenAIEmbeddings
from ragas.embeddings import LangchainEmbeddingsWrapper

from ..embedding.manager import EmbeddingManager


class RagasEmbeddingManager: # pylint: disable=too-few-public-methods
"""Ragas Embedding Manager, modifies global ragas settings."""

def __init__(self, embedding_manager: EmbeddingManager):
"""Init RagasEmbeddingManager."""
config = embedding_manager.config
self.config = config

embedding_class = {
"openai": OpenAIEmbeddings,
"huggingface": HuggingFaceEmbeddings,
}.get(config.provider)
if not embedding_class:
raise RuntimeError(f"Unknown embedding provider {config.provider}")

kwargs = config.provider_kwargs
if kwargs is None:
kwargs = {}

self.embeddings = LangchainEmbeddingsWrapper(
embedding_class(model=config.model, **kwargs)
)
4 changes: 3 additions & 1 deletion src/lightspeed_evaluation/core/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from ..models import LLMConfig
from .deepeval import DeepEvalLLMManager
from .manager import LLMError, LLMManager
from .manager import LLMError, LLMManager, validate_gemini_env, validate_openai_env
from .ragas import RagasLLMManager

__all__ = [
Expand All @@ -11,4 +11,6 @@
"LLMManager",
"DeepEvalLLMManager",
"RagasLLMManager",
"validate_openai_env",
"validate_gemini_env",
]
112 changes: 57 additions & 55 deletions src/lightspeed_evaluation/core/llm/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,61 @@
import os
from typing import Any

from ..models import LLMConfig
from ..models import LLMConfig, SystemConfig


class LLMError(Exception):
"""LLM configuration error."""


def validate_openai_env() -> None:
"""Validate OpenAI environment variables."""
if not os.environ.get("OPENAI_API_KEY"):
raise LLMError(
"OPENAI_API_KEY environment variable is required for OpenAI provider"
)


def validate_azure_env() -> None:
"""Validate Azure OpenAI environment variables."""
required = ["AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT"]
if not all(os.environ.get(var) for var in required):
raise LLMError(f"Azure provider requires environment variables: {required}")


def validate_watsonx_env() -> None:
"""Validate Watsonx environment variables."""
required = ["WATSONX_API_KEY", "WATSONX_API_BASE", "WATSONX_PROJECT_ID"]
if not all(os.environ.get(var) for var in required):
raise LLMError(f"Watsonx provider requires environment variables: {required}")


def validate_anthropic_env() -> None:
"""Validate Anthropic environment variables."""
if not os.environ.get("ANTHROPIC_API_KEY"):
raise LLMError(
"ANTHROPIC_API_KEY environment variable is required for Anthropic provider"
)


def validate_gemini_env() -> None:
"""Validate Google Gemini environment variables."""
# Gemini can use either GOOGLE_API_KEY or GEMINI_API_KEY
if not (os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")):
raise LLMError(
"GOOGLE_API_KEY or GEMINI_API_KEY environment variable "
"is required for Gemini provider"
)


def validate_ollama_env() -> None:
"""Validate Ollama environment variables."""
# Ollama typically runs locally, but may need OLLAMA_HOST for remote instances
# No required env vars for basic local setup, but warn if OLLAMA_HOST is not set
if not os.environ.get("OLLAMA_HOST"):
print("ℹ️ OLLAMA_HOST not set, using default localhost:11434")


class LLMManager:
"""Generic LLM Manager for all use cases (Ragas, DeepEval, Custom metrics).

Expand Down Expand Up @@ -48,79 +96,35 @@ def _construct_model_name_and_validate(self) -> str:
print(f"⚠️ Using generic provider format for {provider}")
return f"{provider}/{self.config.model}"

def _validate_openai_env(self) -> None:
"""Validate OpenAI environment variables."""
if not os.environ.get("OPENAI_API_KEY"):
raise LLMError(
"OPENAI_API_KEY environment variable is required for OpenAI provider"
)

def _validate_azure_env(self) -> None:
"""Validate Azure OpenAI environment variables."""
required = ["AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT"]
if not all(os.environ.get(var) for var in required):
raise LLMError(f"Azure provider requires environment variables: {required}")

def _validate_watsonx_env(self) -> None:
"""Validate Watsonx environment variables."""
required = ["WATSONX_API_KEY", "WATSONX_API_BASE", "WATSONX_PROJECT_ID"]
if not all(os.environ.get(var) for var in required):
raise LLMError(
f"Watsonx provider requires environment variables: {required}"
)

def _validate_anthropic_env(self) -> None:
"""Validate Anthropic environment variables."""
if not os.environ.get("ANTHROPIC_API_KEY"):
raise LLMError(
"ANTHROPIC_API_KEY environment variable is required for Anthropic provider"
)

def _validate_gemini_env(self) -> None:
"""Validate Google Gemini environment variables."""
# Gemini can use either GOOGLE_API_KEY or GEMINI_API_KEY
if not (os.environ.get("GOOGLE_API_KEY") or os.environ.get("GEMINI_API_KEY")):
raise LLMError(
"GOOGLE_API_KEY or GEMINI_API_KEY environment variable "
"is required for Gemini provider"
)

def _validate_ollama_env(self) -> None:
"""Validate Ollama environment variables."""
# Ollama typically runs locally, but may need OLLAMA_HOST for remote instances
# No required env vars for basic local setup, but warn if OLLAMA_HOST is not set
if not os.environ.get("OLLAMA_HOST"):
print("ℹ️ OLLAMA_HOST not set, using default localhost:11434")

def _handle_openai_provider(self) -> str:
"""Handle OpenAI provider setup."""
self._validate_openai_env()
validate_openai_env()
return self.config.model

def _handle_azure_provider(self) -> str:
"""Handle Azure provider setup."""
self._validate_azure_env()
validate_azure_env()
deployment = os.environ.get("AZURE_OPENAI_DEPLOYMENT_NAME") or self.config.model
return f"azure/{deployment}"

def _handle_watsonx_provider(self) -> str:
"""Handle WatsonX provider setup."""
self._validate_watsonx_env()
validate_watsonx_env()
return f"watsonx/{self.config.model}"

def _handle_anthropic_provider(self) -> str:
"""Handle Anthropic provider setup."""
self._validate_anthropic_env()
validate_anthropic_env()
return f"anthropic/{self.config.model}"

def _handle_gemini_provider(self) -> str:
"""Handle Gemini provider setup."""
self._validate_gemini_env()
validate_gemini_env()
return f"gemini/{self.config.model}"

def _handle_ollama_provider(self) -> str:
"""Handle Ollama provider setup."""
self._validate_ollama_env()
validate_ollama_env()
return f"ollama/{self.config.model}"

def get_model_name(self) -> str:
Expand All @@ -142,11 +146,9 @@ def get_config(self) -> LLMConfig:
return self.config

@classmethod
def from_system_config(cls, system_config: dict[str, Any]) -> "LLMManager":
def from_system_config(cls, system_config: SystemConfig) -> "LLMManager":
"""Create LLM Manager from system configuration."""
llm_config_dict = system_config.get("llm", {})
config = LLMConfig(**llm_config_dict)
return cls(config)
return cls(system_config.llm)

@classmethod
def from_llm_config(cls, llm_config: LLMConfig) -> "LLMManager":
Expand Down
8 changes: 1 addition & 7 deletions src/lightspeed_evaluation/core/metrics/custom.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class EvaluationPromptParams(BaseModel):
scale: str = Field("0.0 to 1.0", description="Scale for scoring")


class CustomMetrics:
class CustomMetrics: # pylint: disable=too-few-public-methods
"""Handles custom metrics using LLMManager for direct LiteLLM calls."""

def __init__(self, llm_manager: LLMManager):
Expand Down Expand Up @@ -272,9 +272,3 @@ def _evaluate_tool_calls(
score = 1.0 if success else 0.0

return score, details

@classmethod
def from_system_config(cls, system_config: dict[str, Any]) -> "CustomMetrics":
"""Create CustomMetrics from system configuration."""
llm_manager = LLMManager.from_system_config(system_config)
return cls(llm_manager)
8 changes: 1 addition & 7 deletions src/lightspeed_evaluation/core/metrics/deepeval.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from ..models import EvaluationScope, TurnData


class DeepEvalMetrics:
class DeepEvalMetrics: # pylint: disable=too-few-public-methods
"""Handles DeepEval metrics evaluation using LLM Manager."""

def __init__(self, llm_manager: LLMManager):
Expand Down Expand Up @@ -128,9 +128,3 @@ def _evaluate_knowledge_retention(
metric = KnowledgeRetentionMetric(model=self.llm_manager.get_llm())

return self._evaluate_metric(metric, test_case)

@classmethod
def from_system_config(cls, system_config: dict[str, Any]) -> "DeepEvalMetrics":
"""Create DeepEvalMetrics from system configuration."""
llm_manager = LLMManager.from_system_config(system_config)
return cls(llm_manager)
Loading