|
1 | 1 | import functools
|
2 | 2 | import time
|
3 |
| -from multiprocessing import get_context |
| 3 | +from multiprocessing import Process, Queue |
4 | 4 | from typing import Iterable, List, Optional, Tuple
|
5 | 5 | from itertools import islice
|
6 | 6 |
|
@@ -106,40 +106,54 @@ def search_all(
|
106 | 106 | used_queries = queries_list
|
107 | 107 |
|
108 | 108 | if parallel == 1:
|
| 109 | + # Single-threaded execution |
109 | 110 | start = time.perf_counter()
|
110 |
| - precisions, latencies = list( |
111 |
| - zip(*[search_one(query) for query in tqdm.tqdm(used_queries)]) |
112 |
| - ) |
| 111 | + results = [search_one(query) for query in tqdm.tqdm(used_queries)] |
| 112 | + total_time = time.perf_counter() - start |
113 | 113 | else:
|
114 |
| - ctx = get_context(self.get_mp_start_method()) |
| 114 | + # Dynamically calculate chunk size |
| 115 | + chunk_size = max(1, len(used_queries) // parallel) |
| 116 | + query_chunks = list(chunked_iterable(used_queries, chunk_size)) |
115 | 117 |
|
116 |
| - def process_initializer(): |
117 |
| - """Initialize each process before starting the search.""" |
| 118 | + # Function to be executed by each worker process |
| 119 | + def worker_function(chunk, result_queue): |
118 | 120 | self.__class__.init_client(
|
119 | 121 | self.host,
|
120 | 122 | distance,
|
121 | 123 | self.connection_params,
|
122 | 124 | self.search_params,
|
123 | 125 | )
|
124 | 126 | self.setup_search()
|
| 127 | + results = process_chunk(chunk, search_one) |
| 128 | + result_queue.put(results) |
125 | 129 |
|
126 |
| - # Dynamically chunk the generator |
127 |
| - query_chunks = list(chunked_iterable(used_queries, max(1, len(used_queries) // parallel))) |
128 |
| - |
129 |
| - with ctx.Pool( |
130 |
| - processes=parallel, |
131 |
| - initializer=process_initializer, |
132 |
| - ) as pool: |
133 |
| - if parallel > 10: |
134 |
| - time.sleep(15) # Wait for all processes to start |
135 |
| - start = time.perf_counter() |
136 |
| - results = pool.starmap( |
137 |
| - process_chunk, |
138 |
| - [(chunk, search_one) for chunk in query_chunks], |
139 |
| - ) |
140 |
| - precisions, latencies = zip(*[result for chunk in results for result in chunk]) |
| 130 | + # Create a queue to collect results |
| 131 | + result_queue = Queue() |
| 132 | + |
| 133 | + # Create and start worker processes |
| 134 | + processes = [] |
| 135 | + for chunk in query_chunks: |
| 136 | + process = Process(target=worker_function, args=(chunk, result_queue)) |
| 137 | + processes.append(process) |
| 138 | + process.start() |
| 139 | + |
| 140 | + # Start measuring time for the critical work |
| 141 | + start = time.perf_counter() |
141 | 142 |
|
142 |
| - total_time = time.perf_counter() - start |
| 143 | + # Collect results from all worker processes |
| 144 | + results = [] |
| 145 | + for _ in processes: |
| 146 | + results.extend(result_queue.get()) |
| 147 | + |
| 148 | + # Wait for all worker processes to finish |
| 149 | + for process in processes: |
| 150 | + process.join() |
| 151 | + |
| 152 | + # Stop measuring time for the critical work |
| 153 | + total_time = time.perf_counter() - start |
| 154 | + |
| 155 | + # Extract precisions and latencies (outside the timed section) |
| 156 | + precisions, latencies = zip(*results) |
143 | 157 |
|
144 | 158 | self.__class__.delete_client()
|
145 | 159 |
|
@@ -179,3 +193,8 @@ def chunked_iterable(iterable, size):
|
179 | 193 | def process_chunk(chunk, search_one):
|
180 | 194 | """Process a chunk of queries using the search_one function."""
|
181 | 195 | return [search_one(query) for query in chunk]
|
| 196 | + |
| 197 | + |
| 198 | +def process_chunk_wrapper(chunk, search_one): |
| 199 | + """Wrapper to process a chunk of queries.""" |
| 200 | + return process_chunk(chunk, search_one) |
0 commit comments