Skip to content

Commit 5daaa3c

Browse files
committed
feat: implement polling logic for async evaluation judging
Update submitPluginEvaluation endpoint to /submit-with-questions with 202 handling. Add convertPersonaToRequest helper to transform PersonaInfo to API format. Replace synchronous submit with polling workflow: - Submit batch, get 202 Accepted - Poll /api/evaluation/results every 2 seconds - Wait until evaluated_count increases by batch size - Update UI with real-time progress from polled data Add llm_model extraction and persona conversion in runEvaluation. Update persistence to include llmModel. Apply polling logic to both runEvaluation and resumeEvaluation.
1 parent e85fa6a commit 5daaa3c

File tree

2 files changed

+152
-57
lines changed

2 files changed

+152
-57
lines changed

src/evaluation-view/EvaluationService.ts

Lines changed: 148 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import { AIService } from '../services/aiService';
22
import {
3-
startPluginEvaluation,
43
submitPluginEvaluation,
54
startPluginEvaluationWithQuestions,
5+
getEvaluationResults,
66
} from '../services';
77
import type { ModelInfo, PersonaInfo } from '../components/chat-header/types';
88
import type {
@@ -11,6 +11,8 @@ import type {
1111
TestCase,
1212
SubmissionItem,
1313
EvaluationRun,
14+
PersonaConfigRequest,
15+
PersonaModelSettings,
1416
} from './evaluationViewTypes';
1517
import { EvaluationPersistence, type PersistedEvaluationState } from './EvaluationPersistence';
1618

@@ -25,6 +27,7 @@ export class EvaluationService {
2527
private deps: ServiceDependencies;
2628
private abortController: AbortController | null = null;
2729
private currentModel: ModelInfo | null = null;
30+
private currentLlmModel: string | null = null;
2831
private currentPersona: PersonaInfo | null = null;
2932
private currentCollectionId: string | null = null;
3033
private processedQuestionIds: Set<string> = new Set();
@@ -39,6 +42,31 @@ export class EvaluationService {
3942
this.updateShellState = updateShellState;
4043
}
4144

45+
/**
46+
* Convert PersonaInfo to PersonaConfigRequest format for API
47+
*/
48+
private convertPersonaToRequest(persona: PersonaInfo | null): PersonaConfigRequest | null {
49+
if (!persona) return null;
50+
51+
return {
52+
id: persona.id || null,
53+
name: persona.name || null,
54+
description: (persona as any).description || null,
55+
system_prompt: persona.system_prompt || null,
56+
model_settings: {
57+
temperature: 0.7,
58+
top_p: 0.9,
59+
frequency_penalty: 0.0,
60+
presence_penalty: 0.0,
61+
context_window: 4000,
62+
stop_sequences: [],
63+
...(persona.model_settings || {}),
64+
} as PersonaModelSettings,
65+
created_at: (persona as any).created_at || null,
66+
updated_at: (persona as any).updated_at || null,
67+
};
68+
}
69+
4270
private updateState(newState: Partial<EvaluationFeatureState>): void {
4371
this.state = { ...this.state, ...newState };
4472
this.updateShellState(newState);
@@ -50,25 +78,29 @@ export class EvaluationService {
5078
public runEvaluation = async (
5179
selectedModel: ModelInfo,
5280
selectedPersona: PersonaInfo | null = null,
53-
collectionId?: string | null,
54-
questions?: string[] | null
81+
collectionId: string,
82+
questions: string[]
5583
): Promise<void> => {
5684
this.updateState({ isRunning: true, error: null, progress: 0 });
5785
this.abortController = new AbortController();
5886

5987
// Save current run config
6088
this.currentModel = selectedModel;
89+
this.currentLlmModel = selectedModel.name;
6190
this.currentPersona = selectedPersona;
62-
this.currentCollectionId = collectionId || null;
91+
this.currentCollectionId = collectionId;
6392
this.processedQuestionIds.clear();
6493

6594
try {
6695
// Step 1: Start evaluation - get test questions with context
6796
console.log('Starting evaluation...');
6897

69-
const { evaluation_run_id, test_data } = collectionId && questions
70-
? await startPluginEvaluationWithQuestions({ collection_id: collectionId, questions })
71-
: await startPluginEvaluation();
98+
const { evaluation_run_id, test_data } = await startPluginEvaluationWithQuestions({
99+
collection_id: collectionId,
100+
questions,
101+
llm_model: this.currentLlmModel,
102+
persona: this.convertPersonaToRequest(selectedPersona),
103+
});
72104
console.log(`Evaluation started: ${evaluation_run_id}`);
73105
console.log(`Total questions: ${test_data.length}`);
74106

@@ -80,10 +112,14 @@ export class EvaluationService {
80112
total_questions: test_data.length,
81113
correct_count: 0,
82114
incorrect_count: 0,
115+
evaluated_count: 0,
83116
accuracy: 0,
84117
started_at: new Date().toISOString(),
85118
is_completed: false,
86119
progress: 0,
120+
status: 'running',
121+
duration_seconds: null,
122+
run_date: new Date().toISOString(),
87123
},
88124
});
89125

@@ -132,41 +168,67 @@ export class EvaluationService {
132168

133169
this.updateState({ isGenerating: false });
134170

135-
// Step 4: Submit batch for judging
171+
// Step 4: Submit batch for judging (returns 202 immediately)
136172
console.log(`Submitting ${submissions.length} answers...`);
137-
const result = await submitPluginEvaluation({
173+
const submitResponse = await submitPluginEvaluation({
138174
evaluation_run_id,
139175
submissions,
140176
});
141177

142-
console.log(`Batch ${batchIndex + 1} results:`, result);
143-
144-
// Update progress and results
145-
this.updateState({
146-
progress: result.progress,
147-
currentResults: result,
148-
activeRun: {
149-
...this.state.activeRun!,
150-
progress: result.progress,
151-
correct_count: result.correct_count,
152-
incorrect_count: result.incorrect_count,
153-
accuracy: (result.correct_count / result.total_questions) * 100,
154-
},
155-
});
178+
console.log(`Batch ${batchIndex + 1} submitted:`, submitResponse.message);
179+
180+
// Step 5: Poll for results until batch is judged
181+
const previousEvaluatedCount = this.state.activeRun?.evaluated_count || 0;
182+
const targetCount = previousEvaluatedCount + submissions.length;
183+
184+
console.log(`Waiting for judging... (target: ${targetCount})`);
185+
186+
while (true) {
187+
// Check if aborted
188+
if (this.abortController?.signal.aborted) {
189+
console.log('Evaluation aborted during polling');
190+
this.updateState({ isRunning: false });
191+
return;
192+
}
193+
194+
// Wait 2 seconds before polling
195+
await new Promise(resolve => setTimeout(resolve, 2000));
196+
197+
// Poll for results
198+
const resultsData = await getEvaluationResults(evaluation_run_id);
199+
const evaluationRun = resultsData.evaluation_run;
200+
201+
console.log(`Poll result: evaluated ${evaluationRun.evaluated_count}/${evaluationRun.total_questions}`);
202+
203+
// Update UI with latest progress
204+
this.updateState({
205+
progress: evaluationRun.progress,
206+
activeRun: {
207+
...this.state.activeRun!,
208+
evaluated_count: evaluationRun.evaluated_count,
209+
progress: evaluationRun.progress,
210+
correct_count: evaluationRun.correct_count,
211+
incorrect_count: evaluationRun.incorrect_count,
212+
accuracy: evaluationRun.accuracy,
213+
},
214+
});
215+
216+
// Check if batch is processed
217+
if (evaluationRun.evaluated_count >= targetCount) {
218+
console.log(`Batch ${batchIndex + 1} judged successfully`);
219+
break;
220+
}
221+
}
156222

157223
// Save progress to persistence
158224
this.savePersistenceState(evaluation_run_id, test_data);
159225

160226
// Check if completed
161-
if (result.is_completed) {
227+
const currentRun = this.state.activeRun!;
228+
if (currentRun.evaluated_count === currentRun.total_questions) {
162229
console.log('Evaluation completed!');
163230
const completedRun: EvaluationRun = {
164-
id: evaluation_run_id,
165-
total_questions: result.total_questions,
166-
correct_count: result.correct_count,
167-
incorrect_count: result.incorrect_count,
168-
accuracy: (result.correct_count / result.total_questions) * 100,
169-
started_at: this.state.activeRun!.started_at,
231+
...currentRun,
170232
completed_at: new Date().toISOString(),
171233
is_completed: true,
172234
progress: 1.0,
@@ -274,11 +336,12 @@ export class EvaluationService {
274336
* Save current evaluation state to persistence
275337
*/
276338
private savePersistenceState(runId: string, testCases: TestCase[]): void {
277-
if (!this.currentModel) return;
339+
if (!this.currentModel || !this.currentLlmModel) return;
278340

279341
const persistedState: PersistedEvaluationState = {
280342
runId,
281343
model: this.currentModel,
344+
llmModel: this.currentLlmModel,
282345
persona: this.currentPersona,
283346
collectionId: this.currentCollectionId,
284347
testCases,
@@ -308,6 +371,7 @@ export class EvaluationService {
308371

309372
// Restore tracking state
310373
this.currentModel = persistedState.model;
374+
this.currentLlmModel = persistedState.llmModel;
311375
this.currentPersona = persistedState.persona;
312376
this.currentCollectionId = persistedState.collectionId || null;
313377
this.processedQuestionIds = new Set(persistedState.processedQuestionIds);
@@ -341,10 +405,14 @@ export class EvaluationService {
341405
total_questions: test_data.length,
342406
correct_count: 0,
343407
incorrect_count: 0,
408+
evaluated_count: this.processedQuestionIds.size,
344409
accuracy: 0,
345410
started_at: new Date(persistedState.timestamp).toISOString(),
346411
is_completed: false,
347412
progress: this.processedQuestionIds.size / test_data.length,
413+
status: 'running',
414+
duration_seconds: null,
415+
run_date: new Date(persistedState.timestamp).toISOString(),
348416
},
349417
});
350418

@@ -391,41 +459,67 @@ export class EvaluationService {
391459

392460
this.updateState({ isGenerating: false });
393461

394-
// Submit batch for judging
462+
// Submit batch for judging (returns 202 immediately)
395463
console.log(`Submitting ${submissions.length} answers...`);
396-
const result = await submitPluginEvaluation({
464+
const submitResponse = await submitPluginEvaluation({
397465
evaluation_run_id,
398466
submissions,
399467
});
400468

401-
console.log(`Batch ${batchIndex + 1} results:`, result);
402-
403-
// Update progress and results
404-
this.updateState({
405-
progress: result.progress,
406-
currentResults: result,
407-
activeRun: {
408-
...this.state.activeRun!,
409-
progress: result.progress,
410-
correct_count: result.correct_count,
411-
incorrect_count: result.incorrect_count,
412-
accuracy: (result.correct_count / result.total_questions) * 100,
413-
},
414-
});
469+
console.log(`Batch ${batchIndex + 1} submitted:`, submitResponse.message);
470+
471+
// Poll for results until batch is judged
472+
const previousEvaluatedCount = this.state.activeRun?.evaluated_count || 0;
473+
const targetCount = previousEvaluatedCount + submissions.length;
474+
475+
console.log(`Waiting for judging... (target: ${targetCount})`);
476+
477+
while (true) {
478+
// Check if aborted
479+
if (this.abortController?.signal.aborted) {
480+
console.log('Evaluation aborted during polling');
481+
this.updateState({ isRunning: false });
482+
return;
483+
}
484+
485+
// Wait 2 seconds before polling
486+
await new Promise(resolve => setTimeout(resolve, 2000));
487+
488+
// Poll for results
489+
const resultsData = await getEvaluationResults(evaluation_run_id);
490+
const evaluationRun = resultsData.evaluation_run;
491+
492+
console.log(`Poll result: evaluated ${evaluationRun.evaluated_count}/${evaluationRun.total_questions}`);
493+
494+
// Update UI with latest progress
495+
this.updateState({
496+
progress: evaluationRun.progress,
497+
activeRun: {
498+
...this.state.activeRun!,
499+
evaluated_count: evaluationRun.evaluated_count,
500+
progress: evaluationRun.progress,
501+
correct_count: evaluationRun.correct_count,
502+
incorrect_count: evaluationRun.incorrect_count,
503+
accuracy: evaluationRun.accuracy,
504+
},
505+
});
506+
507+
// Check if batch is processed
508+
if (evaluationRun.evaluated_count >= targetCount) {
509+
console.log(`Batch ${batchIndex + 1} judged successfully`);
510+
break;
511+
}
512+
}
415513

416514
// Save progress to persistence
417515
this.savePersistenceState(evaluation_run_id, test_data);
418516

419517
// Check if completed
420-
if (result.is_completed) {
518+
const currentRun = this.state.activeRun!;
519+
if (currentRun.evaluated_count === currentRun.total_questions) {
421520
console.log('Evaluation completed!');
422521
const completedRun: EvaluationRun = {
423-
id: evaluation_run_id,
424-
total_questions: result.total_questions,
425-
correct_count: result.correct_count,
426-
incorrect_count: result.incorrect_count,
427-
accuracy: (result.correct_count / result.total_questions) * 100,
428-
started_at: this.state.activeRun!.started_at,
522+
...currentRun,
429523
completed_at: new Date().toISOString(),
430524
is_completed: true,
431525
progress: 1.0,

src/services/evaluationApiService.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,21 @@ export async function startPluginEvaluation(): Promise<StartEvaluationResponse>
3232

3333
/**
3434
* Submit plugin evaluation answers
35-
* Backend judges each answer and returns progress
35+
* Backend queues judging as background task and returns 202 Accepted
3636
*/
3737
export async function submitPluginEvaluation(
3838
request: SubmitEvaluationRequest
3939
): Promise<SubmitEvaluationResponse> {
40-
const response = await fetch(`${EVALUATION_API_BASE}/plugin/submit`, {
40+
const response = await fetch(`${EVALUATION_API_BASE}/plugin/submit-with-questions`, {
4141
method: 'POST',
4242
headers: {
4343
'Content-Type': 'application/json',
4444
},
4545
body: JSON.stringify(request),
4646
});
4747

48-
if (!response.ok) {
48+
// Accept both 200 and 202 status codes
49+
if (!response.ok && response.status !== 202) {
4950
const error = await response.json().catch(() => ({ detail: 'Unknown error' }));
5051
throw new Error(error.detail || 'Failed to submit evaluation');
5152
}

0 commit comments

Comments
 (0)