Skip to content

Commit 057f625

Browse files
committed
Lint and format
1 parent e71d617 commit 057f625

File tree

3 files changed

+108
-67
lines changed

3 files changed

+108
-67
lines changed

benchmark.py

Lines changed: 54 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,20 @@
2626

2727
# Define benchmark methods
2828

29+
2930
def bench_sort(values, K):
3031
"""Sort a copy of the list and return the first K smallest items."""
3132
lst = values.copy()
3233
lst.sort()
3334
return lst[:K]
3435

36+
3537
def bench_heapq(values, K):
3638
"""Use heapq.nsmallest on a copy of the list to obtain the first K smallest items."""
3739
lst = values.copy()
3840
return heapq.nsmallest(K, lst)
3941

42+
4043
def bench_quickselect(values, K):
4144
"""
4245
Use selectlib.quickselect on a copy of the list to partition it so that the element at index K-1
@@ -48,6 +51,7 @@ def bench_quickselect(values, K):
4851
result.sort()
4952
return result
5053

54+
5155
def bench_heapselect(values, K):
5256
"""
5357
Use selectlib.heapselect on a copy of the list to partition it so that the element at index K-1
@@ -59,6 +63,7 @@ def bench_heapselect(values, K):
5963
result.sort()
6064
return result
6165

66+
6267
def bench_nth_element(values, K):
6368
"""
6469
Use selectlib.nth_element on a copy of the list to partition it so that the element at index K-1
@@ -70,15 +75,17 @@ def bench_nth_element(values, K):
7075
result.sort()
7176
return result
7277

78+
7379
# Dictionary of methods to benchmark.
7480
methods = {
75-
"sort": bench_sort,
76-
"heapq.nsmallest": bench_heapq,
77-
"quickselect": bench_quickselect,
78-
"heapselect": bench_heapselect,
79-
"nth_element": bench_nth_element,
81+
'sort': bench_sort,
82+
'heapq.nsmallest': bench_heapq,
83+
'quickselect': bench_quickselect,
84+
'heapselect': bench_heapselect,
85+
'nth_element': bench_nth_element,
8086
}
8187

88+
8289
def run_benchmarks():
8390
"""
8491
Runs the benchmarks for different list sizes.
@@ -96,7 +103,9 @@ def run_benchmarks():
96103
for N in N_values:
97104
# Compute K values (ensure at least 1)
98105
K_VALUES = [max(1, int(N * p)) for p in percentages]
99-
print(f"\nBenchmarking for N = {N:,} (K values: {[f'{k:,}' for k in K_VALUES]})")
106+
print(
107+
f'\nBenchmarking for N = {N:,} (K values: {[f"{k:,}" for k in K_VALUES]})'
108+
)
100109
# Generate a random list of integers
101110
original = [random.randint(0, 1_000_000) for _ in range(N)]
102111

@@ -105,17 +114,23 @@ def run_benchmarks():
105114

106115
# For each K value, run each method 5 times and take the median time
107116
for K in K_VALUES:
108-
print(f" K = {K:,}")
117+
print(f' K = {K:,}')
109118
for name, func in methods.items():
110-
test_callable = lambda: func(original, K)
119+
120+
def test_callable():
121+
return func(original, K)
122+
111123
times = timeit.repeat(stmt=test_callable, repeat=5, number=1)
112124
med = statistics.median(times)
113125
results[name][K] = med
114-
times_ms = [f"{t*1000:,.3f}" for t in times]
115-
print(f" {name:15}: median = {med*1000:,.3f} ms (runs: {times_ms} ms)")
116-
overall_results[N] = {"K_values": K_VALUES, "results": results}
126+
times_ms = [f'{t * 1000:,.3f}' for t in times]
127+
print(
128+
f' {name:15}: median = {med * 1000:,.3f} ms (runs: {times_ms} ms)'
129+
)
130+
overall_results[N] = {'K_values': K_VALUES, 'results': results}
117131
return overall_results
118132

133+
119134
def plot_results(overall_results):
120135
"""
121136
Creates a vertical stack of grouped bar charts.
@@ -124,54 +139,61 @@ def plot_results(overall_results):
124139
and the y-axis shows the median time in ms.
125140
"""
126141
num_charts = len(overall_results)
127-
fig, axes = plt.subplots(nrows=num_charts, ncols=1, figsize=(10, 4*num_charts))
142+
fig, axes = plt.subplots(nrows=num_charts, ncols=1, figsize=(10, 4 * num_charts))
128143

129144
if num_charts == 1:
130145
axes = [axes]
131146

132147
# Bar appearance settings
133148
bar_width = 0.15
134149
method_offsets = {
135-
"sort": -2*bar_width,
136-
"heapq.nsmallest": -bar_width,
137-
"quickselect": 0,
138-
"heapselect": bar_width,
139-
"nth_element": 2*bar_width,
150+
'sort': -2 * bar_width,
151+
'heapq.nsmallest': -bar_width,
152+
'quickselect': 0,
153+
'heapselect': bar_width,
154+
'nth_element': 2 * bar_width,
140155
}
141156
method_colors = {
142-
"sort": '#1f77b4',
143-
"heapq.nsmallest": '#ff7f0e',
144-
"quickselect": '#2ca02c',
145-
"heapselect": '#d62728',
146-
"nth_element": '#9467bd',
157+
'sort': '#1f77b4',
158+
'heapq.nsmallest': '#ff7f0e',
159+
'quickselect': '#2ca02c',
160+
'heapselect': '#d62728',
161+
'nth_element': '#9467bd',
147162
}
148163

149164
# Process each chart (one per N value)
150165
for ax, (N, data) in zip(axes, sorted(overall_results.items(), key=lambda x: x[0])):
151-
K_VALUES = data["K_values"]
152-
results = data["results"]
166+
K_VALUES = data['K_values']
167+
results = data['results']
153168
x_positions = list(range(len(K_VALUES)))
154-
x_labels = [f"{K:,} ({(K/N)*100:.1f}%)" for K in K_VALUES]
169+
x_labels = [f'{K:,} ({(K / N) * 100:.1f}%)' for K in K_VALUES]
155170

156171
for method, timing_dict in results.items():
157-
times_ms = [timing_dict[K]*1000 for K in K_VALUES]
172+
times_ms = [timing_dict[K] * 1000 for K in K_VALUES]
158173
positions = [x + method_offsets[method] for x in x_positions]
159-
bars = ax.bar(positions, times_ms, width=bar_width, label=method, color=method_colors.get(method))
174+
bars = ax.bar(
175+
positions,
176+
times_ms,
177+
width=bar_width,
178+
label=method,
179+
color=method_colors.get(method),
180+
)
160181
ax.bar_label(bars, fmt='%.2f', padding=1, fontsize=8)
161182

162-
ax.set_title(f"N = {N:,}")
163-
ax.set_xlabel("K (percentage of N)")
164-
ax.set_ylabel("Median time (ms)")
183+
ax.set_title(f'N = {N:,}')
184+
ax.set_xlabel('K (percentage of N)')
185+
ax.set_ylabel('Median time (ms)')
165186
ax.set_xticks(x_positions)
166187
ax.set_xticklabels(x_labels)
167-
ax.legend(title="Method")
188+
ax.legend(title='Method')
168189
ax.grid(True, linestyle='--', alpha=0.5)
169190

170-
plt.suptitle("Performance Benchmark for N-Smallest Methods", fontsize=18)
191+
plt.suptitle('Performance Benchmark for N-Smallest Methods', fontsize=18)
171192
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
172193
plt.savefig('plot.png')
173194
plt.show()
174195

196+
175197
if __name__ == '__main__':
176198
bench_results = run_benchmarks()
177199
plot_results(bench_results)

benchmark_median.py

Lines changed: 45 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
# The median index is computed as (n-1)//2.
3232
# ---------------------------------------------------------------------------
3333

34+
3435
def bench_median_low(values):
3536
"""
3637
Uses the built‐in statistics.median_low function.
@@ -39,6 +40,7 @@ def bench_median_low(values):
3940
# statistics.median_low returns the median (for even-length lists, the lower of the two)
4041
return stats.median_low(lst)
4142

43+
4244
def bench_nth_element(values):
4345
"""
4446
Uses selectlib.nth_element to repartition the list so that the median is at index (n-1)//2.
@@ -50,6 +52,7 @@ def bench_nth_element(values):
5052
selectlib.nth_element(lst, median_index)
5153
return lst[median_index]
5254

55+
5356
def bench_quickselect(values):
5457
"""
5558
Uses selectlib.quickselect to reposition the median element in the list.
@@ -60,6 +63,7 @@ def bench_quickselect(values):
6063
selectlib.quickselect(lst, median_index)
6164
return lst[median_index]
6265

66+
6367
def bench_heapselect(values):
6468
"""
6569
Uses selectlib.heapselect to reposition the median element in the list.
@@ -70,14 +74,16 @@ def bench_heapselect(values):
7074
selectlib.heapselect(lst, median_index)
7175
return lst[median_index]
7276

77+
7378
# Dictionary of methods to benchmark.
7479
methods = {
75-
"median_low": bench_median_low,
76-
"nth_element": bench_nth_element,
77-
"quickselect": bench_quickselect,
78-
"heapselect" : bench_heapselect,
80+
'median_low': bench_median_low,
81+
'nth_element': bench_nth_element,
82+
'quickselect': bench_quickselect,
83+
'heapselect': bench_heapselect,
7984
}
8085

86+
8187
# ---------------------------------------------------------------------------
8288
# Benchmark runner
8389
# ---------------------------------------------------------------------------
@@ -95,23 +101,28 @@ def run_benchmarks():
95101
overall_results = {} # {N: { method: time_in_seconds, ... } }
96102

97103
for N in N_values:
98-
print(f"\nBenchmarking for N = {N:,} (median index = {(N-1)//2:,})")
104+
print(f'\nBenchmarking for N = {N:,} (median index = {(N - 1) // 2:,})')
99105
# Generate a random list of integers
100106
original = [random.randint(0, 1_000_000) for _ in range(N)]
101107

102108
results = {}
103109
for name, func in methods.items():
104110
# Prepare a callable that calls the method for the given list
105-
test_callable = lambda: func(original)
111+
def test_callable():
112+
return func(original)
113+
106114
# Run 5 times
107115
times = timeit.repeat(stmt=test_callable, repeat=5, number=1)
108116
med_time = statistics.median(times)
109117
results[name] = med_time
110-
times_ms = [f"{t*1000:,.3f}" for t in times] # format as milliseconds
111-
print(f" {name:12}: median = {med_time*1000:,.3f} ms (runs: {times_ms})")
118+
times_ms = [f'{t * 1000:,.3f}' for t in times] # format as milliseconds
119+
print(
120+
f' {name:12}: median = {med_time * 1000:,.3f} ms (runs: {times_ms})'
121+
)
112122
overall_results[N] = results
113123
return overall_results
114124

125+
115126
# ---------------------------------------------------------------------------
116127
# Plotting results
117128
# ---------------------------------------------------------------------------
@@ -126,12 +137,12 @@ def plot_results(results):
126137
num_groups = len(N_values)
127138

128139
# Method ordering and colors (similar to benchmark.py)
129-
methods_order = ["median_low", "nth_element", "quickselect", "heapselect"]
140+
methods_order = ['median_low', 'nth_element', 'quickselect', 'heapselect']
130141
method_colors = {
131-
"median_low": '#1f77b4',
132-
"nth_element": '#ff7f0e',
133-
"quickselect": '#2ca02c',
134-
"heapselect": '#d62728',
142+
'median_low': '#1f77b4',
143+
'nth_element': '#ff7f0e',
144+
'quickselect': '#2ca02c',
145+
'heapselect': '#d62728',
135146
}
136147

137148
# X positions for the groups
@@ -140,32 +151,41 @@ def plot_results(results):
140151
# Bar appearance settings
141152
bar_width = 0.18
142153
offsets = {
143-
"median_low": -1.5*bar_width,
144-
"nth_element": -0.5*bar_width,
145-
"quickselect": 0.5*bar_width,
146-
"heapselect": 1.5*bar_width,
154+
'median_low': -1.5 * bar_width,
155+
'nth_element': -0.5 * bar_width,
156+
'quickselect': 0.5 * bar_width,
157+
'heapselect': 1.5 * bar_width,
147158
}
148159

149160
plt.figure(figsize=(10, 6))
150161

151162
# For each method, plot a bar for each list size
152163
for method in methods_order:
153-
times_ms = [results[N][method]*1000 for N in N_values]
164+
times_ms = [results[N][method] * 1000 for N in N_values]
154165
positions = [pos + offsets[method] for pos in group_positions]
155-
bars = plt.bar(positions, times_ms, width=bar_width, label=method, color=method_colors.get(method))
166+
bars = plt.bar(
167+
positions,
168+
times_ms,
169+
width=bar_width,
170+
label=method,
171+
color=method_colors.get(method),
172+
)
156173
plt.bar_label(bars, fmt='%.2f', padding=3, fontsize=8)
157174

158175
# Configure x-axis with list sizes (formatted with commas)
159-
plt.xticks(group_positions, [f"{N:,}" for N in N_values])
160-
plt.xlabel("List size (N)")
161-
plt.ylabel("Time (ms)")
162-
plt.title("Benchmark: statistics.median_low vs. selectlib selection methods (median)")
163-
plt.legend(title="Method")
176+
plt.xticks(group_positions, [f'{N:,}' for N in N_values])
177+
plt.xlabel('List size (N)')
178+
plt.ylabel('Time (ms)')
179+
plt.title(
180+
'Benchmark: statistics.median_low vs. selectlib selection methods (median)'
181+
)
182+
plt.legend(title='Method')
164183
plt.grid(True, linestyle='--', alpha=0.5)
165184
plt.tight_layout()
166-
plt.savefig("plot_median.png")
185+
plt.savefig('plot_median.png')
167186
plt.show()
168187

188+
169189
# ---------------------------------------------------------------------------
170190
# Main
171191
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)