@@ -1027,7 +1027,7 @@ def compute(self, responses: list[ModelResponse], docs: list[Doc], **kwargs) ->
1027
1027
questions = [formatted_doc .query for formatted_doc in docs ]
1028
1028
options = [formatted_doc .choices for formatted_doc in docs ]
1029
1029
golds = [formatted_doc .get_golds ()[0 ] for formatted_doc in docs ]
1030
- predictions = [response .text [0 ] for response in responses ]
1030
+ predictions = [response .final_text [0 ] for response in responses ]
1031
1031
1032
1032
scores , messages , judgements = self .judge .evaluate_answer_batch (questions , predictions , options , golds )
1033
1033
@@ -1059,7 +1059,7 @@ def compute(self, model_response: list[ModelResponse], doc: list[Doc], **kwargs)
1059
1059
# If we are evaluating a multiturn task, we need to have specific field in the formatted doc
1060
1060
questions = [doc .specific ["multi_turn_queries" ] for doc in docs ]
1061
1061
golds = [doc .specific .get ("reference" , None ) for doc in docs ]
1062
- predictions = [response .text [0 ] for response in model_responses ]
1062
+ predictions = [response .final_text [0 ] for response in model_responses ]
1063
1063
1064
1064
query_context_1 = {"query" : questions [0 ], "context" : "" }
1065
1065
query_context_2 = {"query" : questions [1 ], "context" : predictions [0 ]}
@@ -1089,7 +1089,7 @@ def compute(self, responses: list[ModelResponse], docs: list[Doc], **kwargs):
1089
1089
questions = [doc .specific ["question" ] for doc in docs ]
1090
1090
options = [doc .choices for doc in docs ]
1091
1091
golds = [doc .get_golds ()[0 ] for doc in docs ]
1092
- predictions = [response .text [0 ] for response in responses ]
1092
+ predictions = [response .final_text [0 ] for response in responses ]
1093
1093
1094
1094
scores , messages , judgements = self .judge .evaluate_answer_batch (questions , predictions , options , golds )
1095
1095
@@ -1098,8 +1098,8 @@ def compute(self, responses: list[ModelResponse], docs: list[Doc], **kwargs):
1098
1098
metrics .append (
1099
1099
{
1100
1100
f"judge_score_{ self .short_judge_name } " : scores [i ],
1101
- f"user_prompt_{ self .short_judge_name } " : messages [i ],
1102
- f"judgement_{ self .short_judge_name } " : judgements [i ],
1101
+ # f"user_prompt_{self.short_judge_name}": messages[i],
1102
+ # f"judgement_{self.short_judge_name}": judgements[i],
1103
1103
}
1104
1104
)
1105
1105
0 commit comments