|
| 1 | +# # SPDX-License-Identifier: Apache-2.0 |
| 2 | +# Standard |
| 3 | +from pathlib import Path |
| 4 | +from unittest.mock import MagicMock, patch |
| 5 | +import unittest |
| 6 | + |
| 7 | +# Third Party |
| 8 | +from pandas import DataFrame |
| 9 | +from ragas.callbacks import ChainRun |
| 10 | +from ragas.dataset_schema import EvaluationDataset, EvaluationResult |
| 11 | +import pandas as pd |
| 12 | + |
| 13 | +# First Party |
| 14 | +from instructlab.eval.ragas import ModelConfig, RagasEvaluator, RunConfig, Sample |
| 15 | + |
| 16 | + |
| 17 | +class TestRagasEvaluator(unittest.TestCase): |
| 18 | + @patch("instructlab.eval.ragas.get_openai_client") |
| 19 | + def test_generate_answers_from_model(self, mock_get_openai_client): |
| 20 | + # mock the OpenAI client to always return "london" for chat completions |
| 21 | + mock_client = MagicMock() |
| 22 | + mock_response = MagicMock() |
| 23 | + mock_response.choices[0].message.content = "London" |
| 24 | + mock_client.chat.completions.create.return_value = mock_response |
| 25 | + mock_get_openai_client.return_value = mock_client |
| 26 | + |
| 27 | + # get answers |
| 28 | + questions = pd.DataFrame({"user_input": ["What is the capital of France?"]}) |
| 29 | + student_model = ModelConfig( |
| 30 | + base_url="https://api.openai.com", |
| 31 | + model_name="gpt-3.5-turbo", |
| 32 | + api_key="test-api-key", |
| 33 | + ) |
| 34 | + evaluator = RagasEvaluator() |
| 35 | + result_df = evaluator._generate_answers_from_model(questions, student_model) |
| 36 | + |
| 37 | + # what we expect to see |
| 38 | + expected_df = questions.copy() |
| 39 | + expected_df["response"] = ["London"] |
| 40 | + |
| 41 | + # perform the assertions |
| 42 | + pd.testing.assert_frame_equal(result_df, expected_df) |
| 43 | + mock_get_openai_client.assert_called_once_with( |
| 44 | + model_api_base=student_model.base_url, api_key=student_model.api_key |
| 45 | + ) |
| 46 | + mock_client.chat.completions.create.assert_called_once_with( |
| 47 | + messages=[student_model.system_prompt, "What is the capital of France?"], |
| 48 | + model=student_model.model_name, |
| 49 | + seed=42, |
| 50 | + max_tokens=student_model.max_tokens, |
| 51 | + temperature=student_model.temperature, |
| 52 | + ) |
| 53 | + |
| 54 | + @patch("instructlab.eval.ragas.read_json") |
| 55 | + @patch("instructlab.eval.ragas.evaluate") |
| 56 | + @patch("instructlab.eval.ragas.ChatOpenAI") |
| 57 | + @patch.object(RagasEvaluator, "_generate_answers_from_model") |
| 58 | + @patch.object(RagasEvaluator, "_get_metrics") |
| 59 | + def test_run( |
| 60 | + self, |
| 61 | + mock_get_metrics: MagicMock, |
| 62 | + mock_generate_answers_from_model: MagicMock, |
| 63 | + mock_ChatOpenAI: MagicMock, |
| 64 | + mock_evaluate: MagicMock, |
| 65 | + mock_read_json: MagicMock, |
| 66 | + ): |
| 67 | + ######################################################################## |
| 68 | + # SETUP EVERYTHING WE NEED FOR THE TESTS |
| 69 | + ######################################################################## |
| 70 | + |
| 71 | + # These are the variables which will control the flow of the test. |
| 72 | + # Since we have to re-construct some Ragas components under the hood, |
| 73 | + |
| 74 | + student_model_response = "Paris" |
| 75 | + user_question = "What is the capital of France?" |
| 76 | + golden_answer = "The capital of France is Paris." |
| 77 | + base_ds = [{"user_input": user_question, "reference": golden_answer}] |
| 78 | + mocked_metric = "mocked-metric" |
| 79 | + mocked_metric_score = 4.0 |
| 80 | + |
| 81 | + # The following section takes care of mocking function return calls. |
| 82 | + # Ragas is tricky because it has some complex data structures under the hood, |
| 83 | + # so what we have to do is configure the intermediate outputs that we expect |
| 84 | + # to receive from Ragas. |
| 85 | + |
| 86 | + mock_get_metrics.return_value = [mocked_metric] |
| 87 | + interim_df = DataFrame( |
| 88 | + { |
| 89 | + "user_input": [user_question], |
| 90 | + "response": [student_model_response], |
| 91 | + "reference": [golden_answer], |
| 92 | + } |
| 93 | + ) |
| 94 | + mock_generate_answers_from_model.return_value = interim_df.copy() |
| 95 | + mocked_evaluation_ds = EvaluationDataset.from_pandas(interim_df) |
| 96 | + mock_ChatOpenAI.return_value = MagicMock() |
| 97 | + |
| 98 | + # Ragas requires this value to instantiate an EvaluationResult object, so we must provide it. |
| 99 | + # It isn't functionally used for our purposes though. |
| 100 | + |
| 101 | + _unimportant_ragas_traces = { |
| 102 | + "default": ChainRun( |
| 103 | + run_id="42", |
| 104 | + parent_run_id=None, |
| 105 | + name="root", |
| 106 | + inputs={"system": "null", "user": "null"}, |
| 107 | + outputs={"assistant": "null"}, |
| 108 | + metadata={"user_id": 1337}, |
| 109 | + ) |
| 110 | + } |
| 111 | + mock_evaluate.return_value = EvaluationResult( |
| 112 | + scores=[{mocked_metric: mocked_metric_score}], |
| 113 | + dataset=mocked_evaluation_ds, |
| 114 | + ragas_traces=_unimportant_ragas_traces, |
| 115 | + ) |
| 116 | + |
| 117 | + ######################################################################## |
| 118 | + # Run the tests |
| 119 | + ######################################################################## |
| 120 | + |
| 121 | + # Configure all other inputs that Ragas does not depend on for proper mocking |
| 122 | + student_model = ModelConfig( |
| 123 | + base_url="https://api.openai.com", |
| 124 | + model_name="pt-3.5-turbo", |
| 125 | + api_key="test-api-key", |
| 126 | + ) |
| 127 | + run_config = RunConfig(max_retries=3, max_wait=60, seed=42, timeout=30) |
| 128 | + evaluator = RagasEvaluator() |
| 129 | + |
| 130 | + ######################################################################## |
| 131 | + # Test case: directly passing a dataset |
| 132 | + ######################################################################## |
| 133 | + result = evaluator.run( |
| 134 | + dataset=base_ds, student_model=student_model, run_config=run_config |
| 135 | + ) |
| 136 | + |
| 137 | + self.assertIsInstance(result, EvaluationResult) |
| 138 | + mock_generate_answers_from_model.assert_called_once() |
| 139 | + mock_evaluate.assert_called_once() |
| 140 | + mock_ChatOpenAI.assert_called_once_with(model="gpt-4o") |
| 141 | + |
| 142 | + ######################################################################## |
| 143 | + # Test case: passing a dataset in via Path to JSONL file |
| 144 | + ######################################################################## |
| 145 | + mock_read_json.return_value = DataFrame(base_ds) |
| 146 | + result = evaluator.run( |
| 147 | + dataset=Path("dummy_path.jsonl"), |
| 148 | + student_model=student_model, |
| 149 | + run_config=run_config, |
| 150 | + ) |
| 151 | + |
| 152 | + self.assertIsInstance(result, EvaluationResult) |
| 153 | + mock_read_json.assert_called_once_with( |
| 154 | + Path("dummy_path.jsonl"), orient="records", lines=True |
| 155 | + ) |
| 156 | + mock_generate_answers_from_model.assert_called() |
| 157 | + mock_evaluate.assert_called() |
| 158 | + |
| 159 | + |
| 160 | +if __name__ == "__main__": |
| 161 | + unittest.main() |
0 commit comments