Lint and format

grantjenks · grantjenks · commit 057f62507457 · 2025-02-02T22:44:40.000-08:00
diff --git a/benchmark.py b/benchmark.py
@@ -26,17 +26,20 @@
 
 # Define benchmark methods
 
+
 def bench_sort(values, K):
     """Sort a copy of the list and return the first K smallest items."""
     lst = values.copy()
     lst.sort()
     return lst[:K]
 
+
 def bench_heapq(values, K):
     """Use heapq.nsmallest on a copy of the list to obtain the first K smallest items."""
     lst = values.copy()
     return heapq.nsmallest(K, lst)
 
+
 def bench_quickselect(values, K):
     """
     Use selectlib.quickselect on a copy of the list to partition it so that the element at index K-1
@@ -48,6 +51,7 @@ def bench_quickselect(values, K):
     result.sort()
     return result
 
+
 def bench_heapselect(values, K):
     """
     Use selectlib.heapselect on a copy of the list to partition it so that the element at index K-1
@@ -59,6 +63,7 @@ def bench_heapselect(values, K):
     result.sort()
     return result
 
+
 def bench_nth_element(values, K):
     """
     Use selectlib.nth_element on a copy of the list to partition it so that the element at index K-1
@@ -70,15 +75,17 @@ def bench_nth_element(values, K):
     result.sort()
     return result
 
+
 # Dictionary of methods to benchmark.
 methods = {
-    "sort": bench_sort,
-    "heapq.nsmallest": bench_heapq,
-    "quickselect": bench_quickselect,
-    "heapselect": bench_heapselect,
-    "nth_element": bench_nth_element,
+    'sort': bench_sort,
+    'heapq.nsmallest': bench_heapq,
+    'quickselect': bench_quickselect,
+    'heapselect': bench_heapselect,
+    'nth_element': bench_nth_element,
 }
 
+
 def run_benchmarks():
     """
     Runs the benchmarks for different list sizes.
@@ -96,7 +103,9 @@ def run_benchmarks():
     for N in N_values:
         # Compute K values (ensure at least 1)
         K_VALUES = [max(1, int(N * p)) for p in percentages]
-        print(f"\nBenchmarking for N = {N:,} (K values: {[f'{k:,}' for k in K_VALUES]})")
+        print(
+            f'\nBenchmarking for N = {N:,} (K values: {[f"{k:,}" for k in K_VALUES]})'
+        )
         # Generate a random list of integers
         original = [random.randint(0, 1_000_000) for _ in range(N)]
 
@@ -105,17 +114,23 @@ def run_benchmarks():
 
         # For each K value, run each method 5 times and take the median time
         for K in K_VALUES:
-            print(f"  K = {K:,}")
+            print(f'  K = {K:,}')
             for name, func in methods.items():
-                test_callable = lambda: func(original, K)
+
+                def test_callable():
+                    return func(original, K)
+
                 times = timeit.repeat(stmt=test_callable, repeat=5, number=1)
                 med = statistics.median(times)
                 results[name][K] = med
-                times_ms = [f"{t*1000:,.3f}" for t in times]
-                print(f"    {name:15}: median = {med*1000:,.3f} ms  (runs: {times_ms} ms)")
-        overall_results[N] = {"K_values": K_VALUES, "results": results}
+                times_ms = [f'{t * 1000:,.3f}' for t in times]
+                print(
+                    f'    {name:15}: median = {med * 1000:,.3f} ms  (runs: {times_ms} ms)'
+                )
+        overall_results[N] = {'K_values': K_VALUES, 'results': results}
     return overall_results
 
+
 def plot_results(overall_results):
     """
     Creates a vertical stack of grouped bar charts.
@@ -124,54 +139,61 @@ def plot_results(overall_results):
     and the y-axis shows the median time in ms.
     """
     num_charts = len(overall_results)
-    fig, axes = plt.subplots(nrows=num_charts, ncols=1, figsize=(10, 4*num_charts))
+    fig, axes = plt.subplots(nrows=num_charts, ncols=1, figsize=(10, 4 * num_charts))
 
     if num_charts == 1:
         axes = [axes]
 
     # Bar appearance settings
     bar_width = 0.15
     method_offsets = {
-        "sort": -2*bar_width,
-        "heapq.nsmallest": -bar_width,
-        "quickselect": 0,
-        "heapselect": bar_width,
-        "nth_element": 2*bar_width,
+        'sort': -2 * bar_width,
+        'heapq.nsmallest': -bar_width,
+        'quickselect': 0,
+        'heapselect': bar_width,
+        'nth_element': 2 * bar_width,
     }
     method_colors = {
-        "sort": '#1f77b4',
-        "heapq.nsmallest": '#ff7f0e',
-        "quickselect": '#2ca02c',
-        "heapselect": '#d62728',
-        "nth_element": '#9467bd',
+        'sort': '#1f77b4',
+        'heapq.nsmallest': '#ff7f0e',
+        'quickselect': '#2ca02c',
+        'heapselect': '#d62728',
+        'nth_element': '#9467bd',
     }
 
     # Process each chart (one per N value)
     for ax, (N, data) in zip(axes, sorted(overall_results.items(), key=lambda x: x[0])):
-        K_VALUES = data["K_values"]
-        results = data["results"]
+        K_VALUES = data['K_values']
+        results = data['results']
         x_positions = list(range(len(K_VALUES)))
-        x_labels = [f"{K:,} ({(K/N)*100:.1f}%)" for K in K_VALUES]
+        x_labels = [f'{K:,} ({(K / N) * 100:.1f}%)' for K in K_VALUES]
 
         for method, timing_dict in results.items():
-            times_ms = [timing_dict[K]*1000 for K in K_VALUES]
+            times_ms = [timing_dict[K] * 1000 for K in K_VALUES]
             positions = [x + method_offsets[method] for x in x_positions]
-            bars = ax.bar(positions, times_ms, width=bar_width, label=method, color=method_colors.get(method))
+            bars = ax.bar(
+                positions,
+                times_ms,
+                width=bar_width,
+                label=method,
+                color=method_colors.get(method),
+            )
             ax.bar_label(bars, fmt='%.2f', padding=1, fontsize=8)
 
-        ax.set_title(f"N = {N:,}")
-        ax.set_xlabel("K (percentage of N)")
-        ax.set_ylabel("Median time (ms)")
+        ax.set_title(f'N = {N:,}')
+        ax.set_xlabel('K (percentage of N)')
+        ax.set_ylabel('Median time (ms)')
         ax.set_xticks(x_positions)
         ax.set_xticklabels(x_labels)
-        ax.legend(title="Method")
+        ax.legend(title='Method')
         ax.grid(True, linestyle='--', alpha=0.5)
 
-    plt.suptitle("Performance Benchmark for N-Smallest Methods", fontsize=18)
+    plt.suptitle('Performance Benchmark for N-Smallest Methods', fontsize=18)
     plt.tight_layout(rect=[0, 0.03, 1, 0.95])
     plt.savefig('plot.png')
     plt.show()
 
+
 if __name__ == '__main__':
     bench_results = run_benchmarks()
     plot_results(bench_results)
diff --git a/benchmark_median.py b/benchmark_median.py
@@ -31,6 +31,7 @@
 # The median index is computed as (n-1)//2.
 # ---------------------------------------------------------------------------
 
+
 def bench_median_low(values):
     """
     Uses the built‐in statistics.median_low function.
@@ -39,6 +40,7 @@ def bench_median_low(values):
     # statistics.median_low returns the median (for even-length lists, the lower of the two)
     return stats.median_low(lst)
 
+
 def bench_nth_element(values):
     """
     Uses selectlib.nth_element to repartition the list so that the median is at index (n-1)//2.
@@ -50,6 +52,7 @@ def bench_nth_element(values):
     selectlib.nth_element(lst, median_index)
     return lst[median_index]
 
+
 def bench_quickselect(values):
     """
     Uses selectlib.quickselect to reposition the median element in the list.
@@ -60,6 +63,7 @@ def bench_quickselect(values):
     selectlib.quickselect(lst, median_index)
     return lst[median_index]
 
+
 def bench_heapselect(values):
     """
     Uses selectlib.heapselect to reposition the median element in the list.
@@ -70,14 +74,16 @@ def bench_heapselect(values):
     selectlib.heapselect(lst, median_index)
     return lst[median_index]
 
+
 # Dictionary of methods to benchmark.
 methods = {
-    "median_low": bench_median_low,
-    "nth_element": bench_nth_element,
-    "quickselect": bench_quickselect,
-    "heapselect" : bench_heapselect,
+    'median_low': bench_median_low,
+    'nth_element': bench_nth_element,
+    'quickselect': bench_quickselect,
+    'heapselect': bench_heapselect,
 }
 
+
 # ---------------------------------------------------------------------------
 # Benchmark runner
 # ---------------------------------------------------------------------------
@@ -95,23 +101,28 @@ def run_benchmarks():
     overall_results = {}  # {N: { method: time_in_seconds, ... } }
 
     for N in N_values:
-        print(f"\nBenchmarking for N = {N:,} (median index = {(N-1)//2:,})")
+        print(f'\nBenchmarking for N = {N:,} (median index = {(N - 1) // 2:,})')
         # Generate a random list of integers
         original = [random.randint(0, 1_000_000) for _ in range(N)]
 
         results = {}
         for name, func in methods.items():
             # Prepare a callable that calls the method for the given list
-            test_callable = lambda: func(original)
+            def test_callable():
+                return func(original)
+
             # Run 5 times
             times = timeit.repeat(stmt=test_callable, repeat=5, number=1)
             med_time = statistics.median(times)
             results[name] = med_time
-            times_ms = [f"{t*1000:,.3f}" for t in times]  # format as milliseconds
-            print(f"  {name:12}: median = {med_time*1000:,.3f} ms  (runs: {times_ms})")
+            times_ms = [f'{t * 1000:,.3f}' for t in times]  # format as milliseconds
+            print(
+                f'  {name:12}: median = {med_time * 1000:,.3f} ms  (runs: {times_ms})'
+            )
         overall_results[N] = results
     return overall_results
 
+
 # ---------------------------------------------------------------------------
 # Plotting results
 # ---------------------------------------------------------------------------
@@ -126,12 +137,12 @@ def plot_results(results):
     num_groups = len(N_values)
 
     # Method ordering and colors (similar to benchmark.py)
-    methods_order = ["median_low", "nth_element", "quickselect", "heapselect"]
+    methods_order = ['median_low', 'nth_element', 'quickselect', 'heapselect']
     method_colors = {
-        "median_low": '#1f77b4',
-        "nth_element": '#ff7f0e',
-        "quickselect": '#2ca02c',
-        "heapselect":  '#d62728',
+        'median_low': '#1f77b4',
+        'nth_element': '#ff7f0e',
+        'quickselect': '#2ca02c',
+        'heapselect': '#d62728',
     }
 
     # X positions for the groups
@@ -140,32 +151,41 @@ def plot_results(results):
     # Bar appearance settings
     bar_width = 0.18
     offsets = {
-        "median_low": -1.5*bar_width,
-        "nth_element": -0.5*bar_width,
-        "quickselect": 0.5*bar_width,
-        "heapselect":  1.5*bar_width,
+        'median_low': -1.5 * bar_width,
+        'nth_element': -0.5 * bar_width,
+        'quickselect': 0.5 * bar_width,
+        'heapselect': 1.5 * bar_width,
     }
 
     plt.figure(figsize=(10, 6))
 
     # For each method, plot a bar for each list size
     for method in methods_order:
-        times_ms = [results[N][method]*1000 for N in N_values]
+        times_ms = [results[N][method] * 1000 for N in N_values]
         positions = [pos + offsets[method] for pos in group_positions]
-        bars = plt.bar(positions, times_ms, width=bar_width, label=method, color=method_colors.get(method))
+        bars = plt.bar(
+            positions,
+            times_ms,
+            width=bar_width,
+            label=method,
+            color=method_colors.get(method),
+        )
         plt.bar_label(bars, fmt='%.2f', padding=3, fontsize=8)
 
     # Configure x-axis with list sizes (formatted with commas)
-    plt.xticks(group_positions, [f"{N:,}" for N in N_values])
-    plt.xlabel("List size (N)")
-    plt.ylabel("Time (ms)")
-    plt.title("Benchmark: statistics.median_low vs. selectlib selection methods (median)")
-    plt.legend(title="Method")
+    plt.xticks(group_positions, [f'{N:,}' for N in N_values])
+    plt.xlabel('List size (N)')
+    plt.ylabel('Time (ms)')
+    plt.title(
+        'Benchmark: statistics.median_low vs. selectlib selection methods (median)'
+    )
+    plt.legend(title='Method')
     plt.grid(True, linestyle='--', alpha=0.5)
     plt.tight_layout()
-    plt.savefig("plot_median.png")
+    plt.savefig('plot_median.png')
     plt.show()
 
+
 # ---------------------------------------------------------------------------
 # Main
 # ---------------------------------------------------------------------------
diff --git a/test_selectlib.py b/test_selectlib.py