Mark benchmark query success on output json

ding-young · ding-young · commit e484c5265bf2 · 2025-05-29T14:39:22.000Z
diff --git a/benchmarks/compare.py b/benchmarks/compare.py
@@ -47,13 +47,15 @@ class QueryRun:
     query: int
     iterations: List[QueryResult]
     start_time: int
+    success: bool = True
 
     @classmethod
     def load_from(cls, data: Dict[str, Any]) -> QueryRun:
         return cls(
             query=data["query"],
             iterations=[QueryResult(**iteration) for iteration in data["iterations"]],
             start_time=data["start_time"],
+            success=data["success"],
         )
 
     @property
@@ -125,11 +127,26 @@ def compare(
     faster_count = 0
     slower_count = 0
     no_change_count = 0
+    failure_count = 0
     total_baseline_time = 0
     total_comparison_time = 0
 
     for baseline_result, comparison_result in zip(baseline.queries, comparison.queries):
         assert baseline_result.query == comparison_result.query
+        
+        base_failed = not baseline_result.success
+        comp_failed = not comparison_result.success 
+        # If a query fails, its execution time is excluded from the performance comparison
+        if base_failed or comp_failed:
+            change_text = "incomparable" 
+            failure_count += 1
+            table.add_row(
+                f"Q{baseline_result.query}",
+                "FAIL" if base_failed else f"{baseline_result.execution_time:.2f}ms",
+                "FAIL" if comp_failed else f"{comparison_result.execution_time:.2f}ms",
+                change_text,
+            )
+            continue
 
         total_baseline_time += baseline_result.execution_time
         total_comparison_time += comparison_result.execution_time
@@ -156,8 +173,8 @@ def compare(
     console.print(table)
 
     # Calculate averages
-    avg_baseline_time = total_baseline_time / len(baseline.queries)
-    avg_comparison_time = total_comparison_time / len(comparison.queries)
+    avg_baseline_time = total_baseline_time / (len(baseline.queries) - failure_count)
+    avg_comparison_time = total_comparison_time / (len(comparison.queries) - failure_count)
 
     # Summary table
     summary_table = Table(show_header=True, header_style="bold magenta")
@@ -171,6 +188,7 @@ def compare(
     summary_table.add_row("Queries Faster", str(faster_count))
     summary_table.add_row("Queries Slower", str(slower_count))
     summary_table.add_row("Queries with No Change", str(no_change_count))
+    summary_table.add_row("Queries with Failure", str(failure_count))
 
     console.print(summary_table)
 
diff --git a/benchmarks/src/clickbench.rs b/benchmarks/src/clickbench.rs
@@ -141,9 +141,9 @@ impl RunOpt {
                     }
                 }
                 Err(e) => {
-                    eprintln!("Query {query_id} failed: {e}");
-                    // TODO mark failure
+                    benchmark_run.mark_failed();
                     failed_queries.push(query_id);
+                    eprintln!("Query {query_id} failed: {e}");
                 }
             }
         }
diff --git a/benchmarks/src/sort_tpch.rs b/benchmarks/src/sort_tpch.rs
@@ -174,7 +174,7 @@ impl RunOpt {
     /// If query is specified from command line, run only that query.
     /// Otherwise, run all queries.
     pub async fn run(&self) -> Result<()> {
-        let mut benchmark_run = BenchmarkRun::new();
+        let mut benchmark_run: BenchmarkRun = BenchmarkRun::new();
 
         let query_range = match self.query {
             Some(query_id) => query_id..=query_id,
@@ -194,6 +194,7 @@ impl RunOpt {
                     }
                 }
                 Err(e) => {
+                    benchmark_run.mark_failed();
                     failed_queries.push(query_id);
                     eprintln!("Query {query_id} failed: {e}");
                 }
diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs
@@ -131,7 +131,7 @@ impl RunOpt {
                     }
                 }
                 Err(e) => {
-                    // TODO mark
+                    benchmark_run.mark_failed();
                     failed_queries.push(query_id);
                     eprintln!("Query {query_id} failed: {e}");
                 }
diff --git a/benchmarks/src/util/run.rs b/benchmarks/src/util/run.rs
@@ -90,6 +90,7 @@ pub struct BenchQuery {
     iterations: Vec<QueryIter>,
     #[serde(serialize_with = "serialize_start_time")]
     start_time: SystemTime,
+    success: bool,
 }
 /// Internal representation of a single benchmark query iteration result.
 pub struct QueryResult {
@@ -124,6 +125,7 @@ impl BenchmarkRun {
             query: id.to_owned(),
             iterations: vec![],
             start_time: SystemTime::now(),
+            success: true,
         });
         if let Some(c) = self.current_case.as_mut() {
             *c += 1;
@@ -142,6 +144,13 @@ impl BenchmarkRun {
         }
     }
 
+    /// Mark current query
+    pub fn mark_failed(&mut self) {
+        if let Some(idx) = self.current_case {
+            self.queries[idx].success = false;
+        }
+    }
+
     /// Stringify data into formatted json
     pub fn to_json(&self) -> String {
         let mut output = HashMap::<&str, Value>::new();

Original file line number	Diff line number	Diff line change
`@@ -141,9 +141,9 @@ impl RunOpt {`
`141`	`141`	`}`
`142`	`142`	`}`
`143`	`143`	`Err(e) => {`
`144`		`- eprintln!("Query {query_id} failed: {e}");`
`145`		`- // TODO mark failure`
	`144`	`+ benchmark_run.mark_failed();`
`146`	`145`	`failed_queries.push(query_id);`
	`146`	`+ eprintln!("Query {query_id} failed: {e}");`
`147`	`147`	`}`
`148`	`148`	`}`
`149`	`149`	`}`
Original file line number	Diff line number	Diff line change
`@@ -174,7 +174,7 @@ impl RunOpt {`
`174`	`174`	`/// If query is specified from command line, run only that query.`
`175`	`175`	`/// Otherwise, run all queries.`
`176`	`176`	`pub async fn run(&self) -> Result<()> {`
`177`		`- let mut benchmark_run = BenchmarkRun::new();`
	`177`	`+ let mut benchmark_run: BenchmarkRun = BenchmarkRun::new();`
`178`	`178`
`179`	`179`	`let query_range = match self.query {`
`180`	`180`	`Some(query_id) => query_id..=query_id,`
`@@ -194,6 +194,7 @@ impl RunOpt {`
`194`	`194`	`}`
`195`	`195`	`}`
`196`	`196`	`Err(e) => {`
	`197`	`+ benchmark_run.mark_failed();`
`197`	`198`	`failed_queries.push(query_id);`
`198`	`199`	`eprintln!("Query {query_id} failed: {e}");`
`199`	`200`	`}`
Original file line number	Diff line number	Diff line change
`@@ -131,7 +131,7 @@ impl RunOpt {`
`131`	`131`	`}`
`132`	`132`	`}`
`133`	`133`	`Err(e) => {`
`134`		`- // TODO mark`
	`134`	`+ benchmark_run.mark_failed();`
`135`	`135`	`failed_queries.push(query_id);`
`136`	`136`	`eprintln!("Query {query_id} failed: {e}");`
`137`	`137`	`}`