Skip to content

Commit 0af88ce

Browse files
macdiceCommitfest Bot
authored andcommitted
Maintain statistics about build times.
An hourly cronjob refreshes a table of basic stats about each status. Also fix another place where pg8000-origin strings don't seem to compare as expected (gotta figure out what's behind that...) Builds are polled when they've overdue, next up we should do the same for individual tasks, per status and per task name (ie Windows vs macOS etc) so we can figure out when exactly a task is definitely overdue based on master. Not enough data from master and REL_XXX yet to actually be useful, waiting for more data...
1 parent 59c9638 commit 0af88ce

File tree

3 files changed

+85
-14
lines changed

3 files changed

+85
-14
lines changed

cfbot_cirrus.py

Lines changed: 74 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -238,9 +238,9 @@ def update_branch(cursor, build_id, build_status, commit_id, build_branch):
238238
LIMIT 1""",
239239
(build_id, build_branch, commit_id),
240240
)
241-
(is_most_recent_build_id,) = cursor.fetchone()
242-
if is_most_recent_build_id:
243-
is_current_build_for_branch = True
241+
if row := cursor.fetchone():
242+
if row[0]:
243+
is_current_build_for_branch = True
244244

245245
if is_current_build_for_branch:
246246
# Find the latest branch (push) record corresponding to the
@@ -512,7 +512,7 @@ def ingest_webhook(conn, event_type, event):
512512
"""select 1
513513
from build
514514
where build_id = %s
515-
for key share""",
515+
for update""",
516516
(build_id,),
517517
)
518518
if not cursor.fetchone():
@@ -674,15 +674,16 @@ def poll_stale_build(conn, build_id):
674674

675675
# check if we already have this task, and what its status is
676676
cursor.execute(
677-
"""SELECT status
677+
"""SELECT status, status != %s
678678
FROM task
679-
WHERE task_id = %s""",
680-
(task_id,),
679+
WHERE task_id = %s
680+
FOR UPDATE""",
681+
(task_status, task_id),
681682
)
682683
if row := cursor.fetchone():
683684
# process change, if it is different
684-
(old_task_status,) = row
685-
if old_task_status != task_status:
685+
(old_task_status, change) = row
686+
if change:
686687
process_new_task_status(
687688
cursor, task_id, old_task_status, task_status, "poll", task_sent
688689
)
@@ -813,12 +814,12 @@ def poll_stale_builds(conn):
813814
status,
814815
avg_elapsed + stddev_elapsed * 2 as elapsed_p95
815816
from build_status_statistics
816-
where branch_name = 'master' or branch_name like 'REL_%'),
817+
where branch_name = 'master' or branch_name like 'REL_%%'),
817818
run as (select build_id,
818819
status,
819820
branch_name,
820821
case
821-
when branch_name = 'master' or branch_name like 'REL_%'
822+
when branch_name = 'master' or branch_name like 'REL_%%'
822823
then branch_name
823824
else 'master'
824825
end as reference_branch,
@@ -831,7 +832,8 @@ def poll_stale_builds(conn):
831832
run.status,
832833
extract(epoch from ref.elapsed_p95),
833834
extract(epoch from run.elapsed)
834-
from run left join ref on (run.reference_branch = ref.branch_name)
835+
from run
836+
left join ref on ((run.reference_branch, run.status) = (ref.branch_name, ref.status))
835837
where run.elapsed > COALESCE(elapsed_p95, interval '30 minutes')""")
836838
for (
837839
build_id,
@@ -861,8 +863,66 @@ def poll_stale_builds(conn):
861863
cfbot_work_queue.insert_work_queue(cursor, "poll-stale-build", build_id)
862864

863865

866+
def refresh_task_status_statistics(conn):
867+
cursor = conn.cursor()
868+
cursor.execute(
869+
"""delete from task_status_history where sent < now() - interval '30 days'"""
870+
)
871+
cursor.execute("""delete from task_status_statistics""")
872+
# XXX waiting for more data before removing hard coded 'master' from here...
873+
cursor.execute("""insert into task_status_statistics
874+
(branch_name, task_name, status, avg_elapsed, stddev_elapsed, n)
875+
with elapsed as (select coalesce('master', build.branch_name) as branch_name,
876+
task.task_name,
877+
h.status,
878+
lead(h.sent) over(partition by h.task_id order by h.sent) - h.sent as elapsed
879+
from build
880+
join task using (build_id)
881+
join task_status_history h using (task_id)
882+
where task.status = 'COMPLETED'
883+
--- and (build.branch_name = 'master' or build.branch_name like 'REL_%%')
884+
)
885+
select branch_name,
886+
task_name,
887+
status,
888+
avg(elapsed),
889+
coalesce(interval '1 second' * stddev(extract(epoch from elapsed)), interval '0 seconds') as stddev,
890+
count(elapsed) as n
891+
from elapsed
892+
where elapsed is not null
893+
group by 1, 2, 3
894+
--- having count(*) > 1""")
895+
896+
897+
def refresh_build_status_statistics(conn):
898+
cursor = conn.cursor()
899+
cursor.execute(
900+
"""delete from build_status_history where received < now() - interval '30 days'"""
901+
)
902+
cursor.execute("""delete from build_status_statistics""")
903+
cursor.execute("""insert into build_status_statistics
904+
(branch_name, status, avg_elapsed, stddev_elapsed, n)
905+
with elapsed as (select coalesce('master', build.branch_name) as branch_name,
906+
h.status,
907+
lead(received) over (partition by h.build_id order by received) - received as elapsed
908+
from build_status_history h
909+
join build using (build_id)
910+
where build.status = 'COMPLETED'
911+
--- and (build.branch_name = 'master' or build.branch_name like 'REL_%%')
912+
)
913+
select branch_name,
914+
status,
915+
avg(elapsed),
916+
coalesce(interval '1 second' * stddev(extract(epoch from elapsed)), interval '0 seconds') as stddev,
917+
count(elapsed) as n
918+
from elapsed
919+
where elapsed is not null
920+
group by 1, 2
921+
--- having count(*) > 1""")
922+
923+
864924
if __name__ == "__main__":
865925
with cfbot_util.db() as conn:
866-
cursor = conn.cursor()
867-
poll_stale_build(conn, "4879753326362624")
926+
refresh_task_status_statistics(conn)
927+
refresh_build_status_statistics(conn)
868928
conn.commit()

cfbot_periodic_hourly.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
#!/usr/bin/env python3
2+
3+
import cfbot_cirrus
4+
import cfbot_util
5+
6+
if __name__ == "__main__":
7+
with cfbot_util.db() as conn:
8+
cfbot_cirrus.refresh_build_status_statistics(conn)
9+
cfbot_cirrus.refresh_task_status_statistics(conn)
10+
conn.commit()

create.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ ALTER TABLE public.task_status_history OWNER TO cfbot;
222222

223223
CREATE TABLE public.task_status_statistics (
224224
branch_name text NOT NULL,
225+
task_name text NOT NULL,
225226
status text NOT NULL,
226227
avg_elapsed interval NOT NULL,
227228
stddev_elapsed interval NOT NULL,

0 commit comments

Comments
 (0)