Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 17 additions & 5 deletions src/lightspeed_evaluation/core/metrics/ragas.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Ragas metrics evaluation using LLM Manager."""

import math
from typing import Any, Optional

from datasets import Dataset
Expand Down Expand Up @@ -90,7 +91,7 @@ def evaluate(
return None, f"Unsupported Ragas metric: {metric_name}"

try:
return self.supported_metrics[metric_name](
result = self.supported_metrics[metric_name](
conv_data, scope.turn_idx, scope.turn_data, scope.is_conversation
)
except BrokenPipeError as e:
Expand All @@ -100,16 +101,27 @@ def evaluate(
f"connection issue: {str(e)}",
)
except OSError as e:
err_msg = f"Ragas {metric_name} evaluation failed: {str(e)}"
if e.errno == 32: # Broken pipe
return (
None,
err_msg = (
f"Ragas {metric_name} evaluation failed due to broken pipe "
f"(network/LLM timeout): {str(e)}",
f"(network/LLM timeout): {str(e)}"
)
return None, f"Ragas {metric_name} evaluation failed: {str(e)}"
return None, err_msg
except (RuntimeError, ValueError, TypeError, ImportError) as e:
return None, f"Ragas {metric_name} evaluation failed: {str(e)}"

# Ragas returns float('NaN') when it cannot parse the output from the
# LLM (OutputParserException)
if result[0] is not None and math.isnan(result[0]):
return (
None,
f"Ragas {metric_name} evaluation failed due to malformed "
"output from the LLM",
)

return result

def _evaluate_response_relevancy(
self,
_conv_data: Any,
Expand Down