11#!/usr/bin/env python3
22"""
3- Benchmark comparisons for four methods to obtain the K smallest items from a list,
3+ Benchmark comparisons for five methods to obtain the K smallest items from a list,
44for various values of K with different list sizes N (varying from 1,000 to 1,000,000).
55
66For each method and each chosen K (as a percentage of N), the test is run 5 times
99Methods benchmarked:
1010 1. Using built‐in sort: sort the list and slice the first K elements.
1111 2. Using heapq.nsmallest: use the heap‐based algorithm.
12- 3. Using quickselect: partition the list with selectlib.quickselect and slice the first K elements.
13- 4. Using heapselect: partition the list with selectlib.heapselect and slice the first K elements.
12+ 3. Using quickselect: partition the list with selectlib.quickselect and then sort the first K elements.
13+ 4. Using heapselect: partition the list with selectlib.heapselect and then sort the first K elements.
14+ 5. Using nth_element: partition the list with selectlib.nth_element and then sort the first K elements.
1415
1516The benchmark results are then plotted as grouped bar charts (one per N value) in a vertical stack.
1617Note: The percentages for K are now 0.2%, 1%, 10%, and 25% of N.
2425import selectlib
2526
2627# Define benchmark methods
28+
2729def bench_sort (values , K ):
2830 """Sort a copy of the list and return the first K smallest items."""
2931 lst = values .copy ()
@@ -41,7 +43,6 @@ def bench_quickselect(values, K):
4143 is in the correct sorted position; then sort and return the first K elements.
4244 """
4345 lst = values .copy ()
44- # Partition in-place so that the element at index (K-1) is in the correct position
4546 selectlib .quickselect (lst , K - 1 )
4647 result = lst [:K ]
4748 result .sort ()
@@ -53,24 +54,35 @@ def bench_heapselect(values, K):
5354 is in the correct sorted position; then sort and return the first K elements.
5455 """
5556 lst = values .copy ()
56- # Partition in-place so that the element at index (K-1) is in the correct position.
5757 selectlib .heapselect (lst , K - 1 )
5858 result = lst [:K ]
5959 result .sort ()
6060 return result
6161
62- # List of methods to benchmark
62+ def bench_nth_element (values , K ):
63+ """
64+ Use selectlib.nth_element on a copy of the list to partition it so that the element at index K-1
65+ is in the correct sorted position; then sort and return the first K elements.
66+ """
67+ lst = values .copy ()
68+ selectlib .nth_element (lst , K - 1 )
69+ result = lst [:K ]
70+ result .sort ()
71+ return result
72+
73+ # Dictionary of methods to benchmark.
6374methods = {
6475 "sort" : bench_sort ,
6576 "heapq.nsmallest" : bench_heapq ,
6677 "quickselect" : bench_quickselect ,
6778 "heapselect" : bench_heapselect ,
79+ "nth_element" : bench_nth_element ,
6880}
6981
7082def run_benchmarks ():
7183 """
7284 Runs the benchmarks for different list sizes.
73- For each N in N_VALUES , constructs a random list of integers and then, for each K (as a percentage of N),
85+ For each N in N_values , constructs a random list of integers and then, for each K (as a percentage of N),
7486 runs each method 5 times and records the median runtime.
7587 Returns a dictionary mapping each N to its benchmark results.
7688 """
@@ -111,47 +123,42 @@ def plot_results(overall_results):
111123 For each subplot, the x-axis shows K along with its percentage of N,
112124 and the y-axis shows the median time in ms.
113125 """
114- # Determine the number of charts (one for each N)
115126 num_charts = len (overall_results )
116127 fig , axes = plt .subplots (nrows = num_charts , ncols = 1 , figsize = (10 , 4 * num_charts ))
117128
118- # If only one subplot, put it into a list for uniform processing.
119129 if num_charts == 1 :
120130 axes = [axes ]
121131
122- # Define bar appearance
123- bar_width = 0.2
132+ # Bar appearance settings
133+ bar_width = 0.15
124134 method_offsets = {
125- "sort" : - bar_width ,
126- "heapq.nsmallest" : 0 ,
127- "quickselect" : bar_width ,
128- "heapselect" : bar_width * 2 ,
135+ "sort" : - 2 * bar_width ,
136+ "heapq.nsmallest" : - bar_width ,
137+ "quickselect" : 0 ,
138+ "heapselect" : bar_width ,
139+ "nth_element" : 2 * bar_width ,
129140 }
130141 method_colors = {
131142 "sort" : '#1f77b4' ,
132143 "heapq.nsmallest" : '#ff7f0e' ,
133144 "quickselect" : '#2ca02c' ,
134- "heapselect" : '#d62728'
145+ "heapselect" : '#d62728' ,
146+ "nth_element" : '#9467bd' ,
135147 }
136148
137- # Sort the overall_results by N for proper ordering (smallest to largest )
149+ # Process each chart (one per N value )
138150 for ax , (N , data ) in zip (axes , sorted (overall_results .items (), key = lambda x : x [0 ])):
139151 K_VALUES = data ["K_values" ]
140152 results = data ["results" ]
141- # Create x positions (one per K value)
142153 x_positions = list (range (len (K_VALUES )))
143- # Create x-axis labels as "K (percentage)" with comma formatting for K
144154 x_labels = [f"{ K :,} ({ (K / N )* 100 :.1f} %)" for K in K_VALUES ]
145155
146156 for method , timing_dict in results .items ():
147- # Extract times (convert seconds to milliseconds)
148157 times_ms = [timing_dict [K ]* 1000 for K in K_VALUES ]
149- # Compute adjusted positions for grouped bars
150158 positions = [x + method_offsets [method ] for x in x_positions ]
151159 bars = ax .bar (positions , times_ms , width = bar_width , label = method , color = method_colors .get (method ))
152160 ax .bar_label (bars , fmt = '%.2f' , padding = 1 , fontsize = 8 )
153161
154- # Use comma formatting for N in the title
155162 ax .set_title (f"N = { N :,} " )
156163 ax .set_xlabel ("K (percentage of N)" )
157164 ax .set_ylabel ("Median time (ms)" )
@@ -160,7 +167,6 @@ def plot_results(overall_results):
160167 ax .legend (title = "Method" )
161168 ax .grid (True , linestyle = '--' , alpha = 0.5 )
162169
163- # Improved overall title for the charts
164170 plt .suptitle ("Performance Benchmark for N-Smallest Methods" , fontsize = 18 )
165171 plt .tight_layout (rect = [0 , 0.03 , 1 , 0.95 ])
166172 plt .savefig ('plot.png' )
0 commit comments