@@ -253,11 +253,17 @@ def get_commit_statuses(sha: str) -> pd.DataFrame:
253
253
for item in all_data
254
254
]
255
255
256
- return (
257
- pd .DataFrame (parsed )
258
- .sort_values (by = ["job_status" , "job_name" ], ascending = [True , True ])
259
- .reset_index (drop = True )
260
- )
256
+ # Create DataFrame
257
+ df = pd .DataFrame (parsed )
258
+
259
+ # Drop duplicates keeping the first occurrence (newest status for each context)
260
+ # GitHub returns statuses in reverse chronological order
261
+ df = df .drop_duplicates (subset = ["job_name" ], keep = "first" )
262
+
263
+ # Sort by status and job name
264
+ return df .sort_values (
265
+ by = ["job_status" , "job_name" ], ascending = [True , True ]
266
+ ).reset_index (drop = True )
261
267
262
268
263
269
def get_pr_info_from_number (pr_number : str ) -> dict :
@@ -291,28 +297,50 @@ def get_checks_fails(client: Client, job_url: str):
291
297
Get tests that did not succeed for the given job URL.
292
298
Exclude checks that have status 'error' as they are counted in get_checks_errors.
293
299
"""
294
- columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
295
- query = f"""SELECT { columns } FROM `gh-data`.checks
296
- WHERE task_url LIKE '{ job_url } %'
297
- AND test_status IN ('FAIL', 'ERROR')
298
- AND check_status!='error'
299
- ORDER BY check_name, test_name
300
- """
300
+ query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
301
+ FROM (
302
+ SELECT
303
+ argMax(check_status, check_start_time) as job_status,
304
+ check_name as job_name,
305
+ argMax(test_status, check_start_time) as status,
306
+ test_name,
307
+ report_url as results_link,
308
+ task_url
309
+ FROM `gh-data`.checks
310
+ GROUP BY check_name, test_name, report_url, task_url
311
+ )
312
+ WHERE task_url LIKE '{ job_url } %'
313
+ AND test_status IN ('FAIL', 'ERROR')
314
+ AND job_status!='error'
315
+ ORDER BY job_name, test_name
316
+ """
301
317
return client .query_dataframe (query )
302
318
303
319
304
320
def get_checks_known_fails (client : Client , job_url : str , known_fails : dict ):
305
321
"""
306
322
Get tests that are known to fail for the given job URL.
307
323
"""
308
- assert len (known_fails ) > 0 , "cannot query the database with empty known fails"
309
- columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
310
- query = f"""SELECT { columns } FROM `gh-data`.checks
311
- WHERE task_url LIKE '{ job_url } %'
312
- AND test_status='BROKEN'
313
- AND test_name IN ({ ',' .join (f"'{ test } '" for test in known_fails .keys ())} )
314
- ORDER BY test_name, check_name
315
- """
324
+ if len (known_fails ) == 0 :
325
+ return pd .DataFrame ()
326
+
327
+ query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
328
+ FROM (
329
+ SELECT
330
+ argMax(check_status, check_start_time) as job_status,
331
+ check_name as job_name,
332
+ argMax(test_status, check_start_time) as status,
333
+ test_name,
334
+ report_url as results_link,
335
+ task_url
336
+ FROM `gh-data`.checks
337
+ GROUP BY check_name, test_name, report_url, task_url
338
+ )
339
+ WHERE task_url LIKE '{ job_url } %'
340
+ AND test_status='BROKEN'
341
+ AND test_name IN ({ ',' .join (f"'{ test } '" for test in known_fails .keys ())} )
342
+ ORDER BY job_name, test_name
343
+ """
316
344
317
345
df = client .query_dataframe (query )
318
346
@@ -333,12 +361,22 @@ def get_checks_errors(client: Client, job_url: str):
333
361
"""
334
362
Get checks that have status 'error' for the given job URL.
335
363
"""
336
- columns = "check_status as job_status, check_name as job_name, test_status, test_name, report_url as results_link"
337
- query = f"""SELECT { columns } FROM `gh-data`.checks
338
- WHERE task_url LIKE '{ job_url } %'
339
- AND check_status=='error'
340
- ORDER BY check_name, test_name
341
- """
364
+ query = f"""SELECT job_status, job_name, status as test_status, test_name, results_link
365
+ FROM (
366
+ SELECT
367
+ argMax(check_status, check_start_time) as job_status,
368
+ check_name as job_name,
369
+ argMax(test_status, check_start_time) as status,
370
+ test_name,
371
+ report_url as results_link,
372
+ task_url
373
+ FROM `gh-data`.checks
374
+ GROUP BY check_name, test_name, report_url, task_url
375
+ )
376
+ WHERE task_url LIKE '{ job_url } %'
377
+ AND job_status=='error'
378
+ ORDER BY job_name, test_name
379
+ """
342
380
return client .query_dataframe (query )
343
381
344
382
0 commit comments