2626
2727# Define benchmark methods
2828
29+
2930def bench_sort (values , K ):
3031 """Sort a copy of the list and return the first K smallest items."""
3132 lst = values .copy ()
3233 lst .sort ()
3334 return lst [:K ]
3435
36+
3537def bench_heapq (values , K ):
3638 """Use heapq.nsmallest on a copy of the list to obtain the first K smallest items."""
3739 lst = values .copy ()
3840 return heapq .nsmallest (K , lst )
3941
42+
4043def bench_quickselect (values , K ):
4144 """
4245 Use selectlib.quickselect on a copy of the list to partition it so that the element at index K-1
@@ -48,6 +51,7 @@ def bench_quickselect(values, K):
4851 result .sort ()
4952 return result
5053
54+
5155def bench_heapselect (values , K ):
5256 """
5357 Use selectlib.heapselect on a copy of the list to partition it so that the element at index K-1
@@ -59,6 +63,7 @@ def bench_heapselect(values, K):
5963 result .sort ()
6064 return result
6165
66+
6267def bench_nth_element (values , K ):
6368 """
6469 Use selectlib.nth_element on a copy of the list to partition it so that the element at index K-1
@@ -70,15 +75,17 @@ def bench_nth_element(values, K):
7075 result .sort ()
7176 return result
7277
78+
7379# Dictionary of methods to benchmark.
7480methods = {
75- " sort" : bench_sort ,
76- " heapq.nsmallest" : bench_heapq ,
77- " quickselect" : bench_quickselect ,
78- " heapselect" : bench_heapselect ,
79- " nth_element" : bench_nth_element ,
81+ ' sort' : bench_sort ,
82+ ' heapq.nsmallest' : bench_heapq ,
83+ ' quickselect' : bench_quickselect ,
84+ ' heapselect' : bench_heapselect ,
85+ ' nth_element' : bench_nth_element ,
8086}
8187
88+
8289def run_benchmarks ():
8390 """
8491 Runs the benchmarks for different list sizes.
@@ -96,7 +103,9 @@ def run_benchmarks():
96103 for N in N_values :
97104 # Compute K values (ensure at least 1)
98105 K_VALUES = [max (1 , int (N * p )) for p in percentages ]
99- print (f"\n Benchmarking for N = { N :,} (K values: { [f'{ k :,} ' for k in K_VALUES ]} )" )
106+ print (
107+ f'\n Benchmarking for N = { N :,} (K values: { [f"{ k :,} " for k in K_VALUES ]} )'
108+ )
100109 # Generate a random list of integers
101110 original = [random .randint (0 , 1_000_000 ) for _ in range (N )]
102111
@@ -105,17 +114,23 @@ def run_benchmarks():
105114
106115 # For each K value, run each method 5 times and take the median time
107116 for K in K_VALUES :
108- print (f" K = { K :,} " )
117+ print (f' K = { K :,} ' )
109118 for name , func in methods .items ():
110- test_callable = lambda : func (original , K )
119+
120+ def test_callable ():
121+ return func (original , K )
122+
111123 times = timeit .repeat (stmt = test_callable , repeat = 5 , number = 1 )
112124 med = statistics .median (times )
113125 results [name ][K ] = med
114- times_ms = [f"{ t * 1000 :,.3f} " for t in times ]
115- print (f" { name :15} : median = { med * 1000 :,.3f} ms (runs: { times_ms } ms)" )
116- overall_results [N ] = {"K_values" : K_VALUES , "results" : results }
126+ times_ms = [f'{ t * 1000 :,.3f} ' for t in times ]
127+ print (
128+ f' { name :15} : median = { med * 1000 :,.3f} ms (runs: { times_ms } ms)'
129+ )
130+ overall_results [N ] = {'K_values' : K_VALUES , 'results' : results }
117131 return overall_results
118132
133+
119134def plot_results (overall_results ):
120135 """
121136 Creates a vertical stack of grouped bar charts.
@@ -124,54 +139,61 @@ def plot_results(overall_results):
124139 and the y-axis shows the median time in ms.
125140 """
126141 num_charts = len (overall_results )
127- fig , axes = plt .subplots (nrows = num_charts , ncols = 1 , figsize = (10 , 4 * num_charts ))
142+ fig , axes = plt .subplots (nrows = num_charts , ncols = 1 , figsize = (10 , 4 * num_charts ))
128143
129144 if num_charts == 1 :
130145 axes = [axes ]
131146
132147 # Bar appearance settings
133148 bar_width = 0.15
134149 method_offsets = {
135- " sort" : - 2 * bar_width ,
136- " heapq.nsmallest" : - bar_width ,
137- " quickselect" : 0 ,
138- " heapselect" : bar_width ,
139- " nth_element" : 2 * bar_width ,
150+ ' sort' : - 2 * bar_width ,
151+ ' heapq.nsmallest' : - bar_width ,
152+ ' quickselect' : 0 ,
153+ ' heapselect' : bar_width ,
154+ ' nth_element' : 2 * bar_width ,
140155 }
141156 method_colors = {
142- " sort" : '#1f77b4' ,
143- " heapq.nsmallest" : '#ff7f0e' ,
144- " quickselect" : '#2ca02c' ,
145- " heapselect" : '#d62728' ,
146- " nth_element" : '#9467bd' ,
157+ ' sort' : '#1f77b4' ,
158+ ' heapq.nsmallest' : '#ff7f0e' ,
159+ ' quickselect' : '#2ca02c' ,
160+ ' heapselect' : '#d62728' ,
161+ ' nth_element' : '#9467bd' ,
147162 }
148163
149164 # Process each chart (one per N value)
150165 for ax , (N , data ) in zip (axes , sorted (overall_results .items (), key = lambda x : x [0 ])):
151- K_VALUES = data [" K_values" ]
152- results = data [" results" ]
166+ K_VALUES = data [' K_values' ]
167+ results = data [' results' ]
153168 x_positions = list (range (len (K_VALUES )))
154- x_labels = [f" { K :,} ({ (K / N ) * 100 :.1f} %)" for K in K_VALUES ]
169+ x_labels = [f' { K :,} ({ (K / N ) * 100 :.1f} %)' for K in K_VALUES ]
155170
156171 for method , timing_dict in results .items ():
157- times_ms = [timing_dict [K ]* 1000 for K in K_VALUES ]
172+ times_ms = [timing_dict [K ] * 1000 for K in K_VALUES ]
158173 positions = [x + method_offsets [method ] for x in x_positions ]
159- bars = ax .bar (positions , times_ms , width = bar_width , label = method , color = method_colors .get (method ))
174+ bars = ax .bar (
175+ positions ,
176+ times_ms ,
177+ width = bar_width ,
178+ label = method ,
179+ color = method_colors .get (method ),
180+ )
160181 ax .bar_label (bars , fmt = '%.2f' , padding = 1 , fontsize = 8 )
161182
162- ax .set_title (f" N = { N :,} " )
163- ax .set_xlabel (" K (percentage of N)" )
164- ax .set_ylabel (" Median time (ms)" )
183+ ax .set_title (f' N = { N :,} ' )
184+ ax .set_xlabel (' K (percentage of N)' )
185+ ax .set_ylabel (' Median time (ms)' )
165186 ax .set_xticks (x_positions )
166187 ax .set_xticklabels (x_labels )
167- ax .legend (title = " Method" )
188+ ax .legend (title = ' Method' )
168189 ax .grid (True , linestyle = '--' , alpha = 0.5 )
169190
170- plt .suptitle (" Performance Benchmark for N-Smallest Methods" , fontsize = 18 )
191+ plt .suptitle (' Performance Benchmark for N-Smallest Methods' , fontsize = 18 )
171192 plt .tight_layout (rect = [0 , 0.03 , 1 , 0.95 ])
172193 plt .savefig ('plot.png' )
173194 plt .show ()
174195
196+
175197if __name__ == '__main__' :
176198 bench_results = run_benchmarks ()
177199 plot_results (bench_results )
0 commit comments