Skip to content

Commit e484c52

Browse files
committed
Mark benchmark query success on output json
1 parent 6c768ec commit e484c52

File tree

5 files changed

+34
-6
lines changed

5 files changed

+34
-6
lines changed

benchmarks/compare.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,15 @@ class QueryRun:
4747
query: int
4848
iterations: List[QueryResult]
4949
start_time: int
50+
success: bool = True
5051

5152
@classmethod
5253
def load_from(cls, data: Dict[str, Any]) -> QueryRun:
5354
return cls(
5455
query=data["query"],
5556
iterations=[QueryResult(**iteration) for iteration in data["iterations"]],
5657
start_time=data["start_time"],
58+
success=data["success"],
5759
)
5860

5961
@property
@@ -125,11 +127,26 @@ def compare(
125127
faster_count = 0
126128
slower_count = 0
127129
no_change_count = 0
130+
failure_count = 0
128131
total_baseline_time = 0
129132
total_comparison_time = 0
130133

131134
for baseline_result, comparison_result in zip(baseline.queries, comparison.queries):
132135
assert baseline_result.query == comparison_result.query
136+
137+
base_failed = not baseline_result.success
138+
comp_failed = not comparison_result.success
139+
# If a query fails, its execution time is excluded from the performance comparison
140+
if base_failed or comp_failed:
141+
change_text = "incomparable"
142+
failure_count += 1
143+
table.add_row(
144+
f"Q{baseline_result.query}",
145+
"FAIL" if base_failed else f"{baseline_result.execution_time:.2f}ms",
146+
"FAIL" if comp_failed else f"{comparison_result.execution_time:.2f}ms",
147+
change_text,
148+
)
149+
continue
133150

134151
total_baseline_time += baseline_result.execution_time
135152
total_comparison_time += comparison_result.execution_time
@@ -156,8 +173,8 @@ def compare(
156173
console.print(table)
157174

158175
# Calculate averages
159-
avg_baseline_time = total_baseline_time / len(baseline.queries)
160-
avg_comparison_time = total_comparison_time / len(comparison.queries)
176+
avg_baseline_time = total_baseline_time / (len(baseline.queries) - failure_count)
177+
avg_comparison_time = total_comparison_time / (len(comparison.queries) - failure_count)
161178

162179
# Summary table
163180
summary_table = Table(show_header=True, header_style="bold magenta")
@@ -171,6 +188,7 @@ def compare(
171188
summary_table.add_row("Queries Faster", str(faster_count))
172189
summary_table.add_row("Queries Slower", str(slower_count))
173190
summary_table.add_row("Queries with No Change", str(no_change_count))
191+
summary_table.add_row("Queries with Failure", str(failure_count))
174192

175193
console.print(summary_table)
176194

benchmarks/src/clickbench.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,9 @@ impl RunOpt {
141141
}
142142
}
143143
Err(e) => {
144-
eprintln!("Query {query_id} failed: {e}");
145-
// TODO mark failure
144+
benchmark_run.mark_failed();
146145
failed_queries.push(query_id);
146+
eprintln!("Query {query_id} failed: {e}");
147147
}
148148
}
149149
}

benchmarks/src/sort_tpch.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ impl RunOpt {
174174
/// If query is specified from command line, run only that query.
175175
/// Otherwise, run all queries.
176176
pub async fn run(&self) -> Result<()> {
177-
let mut benchmark_run = BenchmarkRun::new();
177+
let mut benchmark_run: BenchmarkRun = BenchmarkRun::new();
178178

179179
let query_range = match self.query {
180180
Some(query_id) => query_id..=query_id,
@@ -194,6 +194,7 @@ impl RunOpt {
194194
}
195195
}
196196
Err(e) => {
197+
benchmark_run.mark_failed();
197198
failed_queries.push(query_id);
198199
eprintln!("Query {query_id} failed: {e}");
199200
}

benchmarks/src/tpch/run.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ impl RunOpt {
131131
}
132132
}
133133
Err(e) => {
134-
// TODO mark
134+
benchmark_run.mark_failed();
135135
failed_queries.push(query_id);
136136
eprintln!("Query {query_id} failed: {e}");
137137
}

benchmarks/src/util/run.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ pub struct BenchQuery {
9090
iterations: Vec<QueryIter>,
9191
#[serde(serialize_with = "serialize_start_time")]
9292
start_time: SystemTime,
93+
success: bool,
9394
}
9495
/// Internal representation of a single benchmark query iteration result.
9596
pub struct QueryResult {
@@ -124,6 +125,7 @@ impl BenchmarkRun {
124125
query: id.to_owned(),
125126
iterations: vec![],
126127
start_time: SystemTime::now(),
128+
success: true,
127129
});
128130
if let Some(c) = self.current_case.as_mut() {
129131
*c += 1;
@@ -142,6 +144,13 @@ impl BenchmarkRun {
142144
}
143145
}
144146

147+
/// Mark current query
148+
pub fn mark_failed(&mut self) {
149+
if let Some(idx) = self.current_case {
150+
self.queries[idx].success = false;
151+
}
152+
}
153+
145154
/// Stringify data into formatted json
146155
pub fn to_json(&self) -> String {
147156
let mut output = HashMap::<&str, Value>::new();

0 commit comments

Comments
 (0)