@@ -154,64 +154,3 @@ evaluator = DeepevalEvaluator(
154
154
prometheus_config = prometheus_config,
155
155
)
156
156
```
157
-
158
- ## 完整示例
159
-
160
- 以下是使用 DeepEval 评测器的完整例子。其中定义了 [ GEval] ( https://deepeval.com/docs/metrics-llm-evals ) 指标和 [ ToolCorrectnessMetric] ( https://deepeval.com/docs/metrics-tool-correctness ) 指标,分别用于整体输出质量评估和工具调用正确率评估,并将评测结果上报至火山引擎的 VMP 平台:
161
-
162
- ``` python
163
- import asyncio
164
- import os
165
- from builtin_tools.agent import agent
166
-
167
- from deepeval.metrics import GEval, ToolCorrectnessMetric
168
- from deepeval.test_case import LLMTestCaseParams
169
- from veadk.config import getenv
170
- from veadk.evaluation.deepeval_evaluator import DeepevalEvaluator
171
- from veadk.evaluation.utils.prometheus import PrometheusPushgatewayConfig
172
- from veadk.prompts.prompt_evaluator import eval_principle_prompt
173
-
174
- prometheus_config = PrometheusPushgatewayConfig()
175
-
176
- # 1. Rollout, and generate eval set file
177
- # await agent.run(
178
- # prompt,
179
- # collect_runtime_data=True,
180
- # eval_set_id=f"eval_demo_set_{get_current_time()}",
181
- # )
182
- # # get expect output
183
- # dump_path = agent._dump_path
184
- # assert dump_path != "", "Dump eval set file failed! Please check runtime logs."
185
-
186
- # 2. Evaluate in terms of eval set file
187
- evaluator = DeepevalEvaluator(
188
- agent = agent,
189
- judge_model_name = getenv(" MODEL_JUDGE_NAME" ),
190
- judge_model_api_base = getenv(" MODEL_JUDGE_API_BASE" ),
191
- judge_model_api_key = getenv(" MODEL_JUDGE_API_KEY" ),
192
- prometheus_config = prometheus_config,
193
- )
194
-
195
- # 3. Define evaluation metrics
196
- metrics = [
197
- GEval(
198
- threshold = 0.8 ,
199
- name = " Base Evaluation" ,
200
- criteria = eval_principle_prompt,
201
- evaluation_params = [
202
- LLMTestCaseParams.INPUT ,
203
- LLMTestCaseParams.ACTUAL_OUTPUT ,
204
- LLMTestCaseParams.EXPECTED_OUTPUT ,
205
- ],
206
- ),
207
- ToolCorrectnessMetric(
208
- threshold = 0.5
209
- ),
210
- ]
211
-
212
- # 4. Run evaluation
213
- eval_set_file_path = os.path.join(
214
- os.path.dirname(__file__ ), " builtin_tools" , " evalsetf0aef1.evalset.json"
215
- )
216
- await evaluator.eval(eval_set_file_path = eval_set_file_path, metrics = metrics)
217
- ```
0 commit comments