|
14 | 14 |
|
15 | 15 | #include "benchmark/benchmark.h" |
16 | 16 | #include "benchmark_api_internal.h" |
| 17 | +#include "benchmark_runner.h" |
17 | 18 | #include "internal_macros.h" |
18 | 19 |
|
19 | 20 | #ifndef BENCHMARK_OS_WINDOWS |
@@ -113,228 +114,6 @@ DEFINE_int32(v, 0, "The level of verbose logging to output"); |
113 | 114 |
|
114 | 115 | namespace benchmark { |
115 | 116 |
|
116 | | -namespace { |
117 | | -static const size_t kMaxIterations = 1000000000; |
118 | | - |
119 | | -static MemoryManager* memory_manager = nullptr; |
120 | | -} // end namespace |
121 | | - |
122 | | -namespace internal { |
123 | | - |
124 | | -void UseCharPointer(char const volatile*) {} |
125 | | - |
126 | | -namespace { |
127 | | - |
128 | | -BenchmarkReporter::Run CreateRunReport( |
129 | | - const benchmark::internal::BenchmarkInstance& b, |
130 | | - const internal::ThreadManager::Result& results, size_t memory_iterations, |
131 | | - const MemoryManager::Result& memory_result, double seconds) { |
132 | | - // Create report about this benchmark run. |
133 | | - BenchmarkReporter::Run report; |
134 | | - |
135 | | - report.run_name = b.name; |
136 | | - report.error_occurred = results.has_error_; |
137 | | - report.error_message = results.error_message_; |
138 | | - report.report_label = results.report_label_; |
139 | | - // This is the total iterations across all threads. |
140 | | - report.iterations = results.iterations; |
141 | | - report.time_unit = b.time_unit; |
142 | | - |
143 | | - if (!report.error_occurred) { |
144 | | - if (b.use_manual_time) { |
145 | | - report.real_accumulated_time = results.manual_time_used; |
146 | | - } else { |
147 | | - report.real_accumulated_time = results.real_time_used; |
148 | | - } |
149 | | - report.cpu_accumulated_time = results.cpu_time_used; |
150 | | - report.complexity_n = results.complexity_n; |
151 | | - report.complexity = b.complexity; |
152 | | - report.complexity_lambda = b.complexity_lambda; |
153 | | - report.statistics = b.statistics; |
154 | | - report.counters = results.counters; |
155 | | - |
156 | | - if (memory_iterations > 0) { |
157 | | - report.has_memory_result = true; |
158 | | - report.allocs_per_iter = |
159 | | - memory_iterations ? static_cast<double>(memory_result.num_allocs) / |
160 | | - memory_iterations |
161 | | - : 0; |
162 | | - report.max_bytes_used = memory_result.max_bytes_used; |
163 | | - } |
164 | | - |
165 | | - internal::Finish(&report.counters, results.iterations, seconds, b.threads); |
166 | | - } |
167 | | - return report; |
168 | | -} |
169 | | - |
170 | | -// Execute one thread of benchmark b for the specified number of iterations. |
171 | | -// Adds the stats collected for the thread into *total. |
172 | | -void RunInThread(const BenchmarkInstance* b, size_t iters, int thread_id, |
173 | | - ThreadManager* manager) { |
174 | | - internal::ThreadTimer timer; |
175 | | - State st = b->Run(iters, thread_id, &timer, manager); |
176 | | - CHECK(st.iterations() >= st.max_iterations) |
177 | | - << "Benchmark returned before State::KeepRunning() returned false!"; |
178 | | - { |
179 | | - MutexLock l(manager->GetBenchmarkMutex()); |
180 | | - internal::ThreadManager::Result& results = manager->results; |
181 | | - results.iterations += st.iterations(); |
182 | | - results.cpu_time_used += timer.cpu_time_used(); |
183 | | - results.real_time_used += timer.real_time_used(); |
184 | | - results.manual_time_used += timer.manual_time_used(); |
185 | | - results.complexity_n += st.complexity_length_n(); |
186 | | - internal::Increment(&results.counters, st.counters); |
187 | | - } |
188 | | - manager->NotifyThreadComplete(); |
189 | | -} |
190 | | - |
191 | | -struct RunResults { |
192 | | - std::vector<BenchmarkReporter::Run> non_aggregates; |
193 | | - std::vector<BenchmarkReporter::Run> aggregates_only; |
194 | | - |
195 | | - bool display_report_aggregates_only = false; |
196 | | - bool file_report_aggregates_only = false; |
197 | | -}; |
198 | | - |
199 | | -RunResults RunBenchmark( |
200 | | - const benchmark::internal::BenchmarkInstance& b, |
201 | | - std::vector<BenchmarkReporter::Run>* complexity_reports) { |
202 | | - RunResults run_results; |
203 | | - |
204 | | - const bool has_explicit_iteration_count = b.iterations != 0; |
205 | | - size_t iters = has_explicit_iteration_count ? b.iterations : 1; |
206 | | - std::unique_ptr<internal::ThreadManager> manager; |
207 | | - std::vector<std::thread> pool(b.threads - 1); |
208 | | - const int repeats = |
209 | | - b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions; |
210 | | - if (repeats != 1) { |
211 | | - run_results.display_report_aggregates_only = |
212 | | - (FLAGS_benchmark_report_aggregates_only || |
213 | | - FLAGS_benchmark_display_aggregates_only); |
214 | | - run_results.file_report_aggregates_only = |
215 | | - FLAGS_benchmark_report_aggregates_only; |
216 | | - if (b.aggregation_report_mode != internal::ARM_Unspecified) { |
217 | | - run_results.display_report_aggregates_only = |
218 | | - (b.aggregation_report_mode & |
219 | | - internal::ARM_DisplayReportAggregatesOnly); |
220 | | - run_results.file_report_aggregates_only = |
221 | | - (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly); |
222 | | - } |
223 | | - } |
224 | | - for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { |
225 | | - for (;;) { |
226 | | - // Try benchmark |
227 | | - VLOG(2) << "Running " << b.name << " for " << iters << "\n"; |
228 | | - |
229 | | - manager.reset(new internal::ThreadManager(b.threads)); |
230 | | - for (std::size_t ti = 0; ti < pool.size(); ++ti) { |
231 | | - pool[ti] = std::thread(&RunInThread, &b, iters, |
232 | | - static_cast<int>(ti + 1), manager.get()); |
233 | | - } |
234 | | - RunInThread(&b, iters, 0, manager.get()); |
235 | | - manager->WaitForAllThreads(); |
236 | | - for (std::thread& thread : pool) thread.join(); |
237 | | - internal::ThreadManager::Result results; |
238 | | - { |
239 | | - MutexLock l(manager->GetBenchmarkMutex()); |
240 | | - results = manager->results; |
241 | | - } |
242 | | - manager.reset(); |
243 | | - // Adjust real/manual time stats since they were reported per thread. |
244 | | - results.real_time_used /= b.threads; |
245 | | - results.manual_time_used /= b.threads; |
246 | | - |
247 | | - VLOG(2) << "Ran in " << results.cpu_time_used << "/" |
248 | | - << results.real_time_used << "\n"; |
249 | | - |
250 | | - // Base decisions off of real time if requested by this benchmark. |
251 | | - double seconds = results.cpu_time_used; |
252 | | - if (b.use_manual_time) { |
253 | | - seconds = results.manual_time_used; |
254 | | - } else if (b.use_real_time) { |
255 | | - seconds = results.real_time_used; |
256 | | - } |
257 | | - |
258 | | - const double min_time = |
259 | | - !IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time; |
260 | | - |
261 | | - // clang-format off |
262 | | - // turn off clang-format since it mangles prettiness here |
263 | | - // Determine if this run should be reported; Either it has |
264 | | - // run for a sufficient amount of time or because an error was reported. |
265 | | - const bool should_report = repetition_num > 0 |
266 | | - || has_explicit_iteration_count // An exact iteration count was requested |
267 | | - || results.has_error_ |
268 | | - || iters >= kMaxIterations // No chance to try again, we hit the limit. |
269 | | - || seconds >= min_time // the elapsed time is large enough |
270 | | - // CPU time is specified but the elapsed real time greatly exceeds the |
271 | | - // minimum time. Note that user provided timers are except from this |
272 | | - // sanity check. |
273 | | - || ((results.real_time_used >= 5 * min_time) && !b.use_manual_time); |
274 | | - // clang-format on |
275 | | - |
276 | | - if (should_report) { |
277 | | - MemoryManager::Result memory_result; |
278 | | - size_t memory_iterations = 0; |
279 | | - if (memory_manager != nullptr) { |
280 | | - // Only run a few iterations to reduce the impact of one-time |
281 | | - // allocations in benchmarks that are not properly managed. |
282 | | - memory_iterations = std::min<size_t>(16, iters); |
283 | | - memory_manager->Start(); |
284 | | - manager.reset(new internal::ThreadManager(1)); |
285 | | - RunInThread(&b, memory_iterations, 0, manager.get()); |
286 | | - manager->WaitForAllThreads(); |
287 | | - manager.reset(); |
288 | | - |
289 | | - memory_manager->Stop(&memory_result); |
290 | | - } |
291 | | - |
292 | | - BenchmarkReporter::Run report = CreateRunReport( |
293 | | - b, results, memory_iterations, memory_result, seconds); |
294 | | - if (!report.error_occurred && b.complexity != oNone) |
295 | | - complexity_reports->push_back(report); |
296 | | - run_results.non_aggregates.push_back(report); |
297 | | - break; |
298 | | - } |
299 | | - |
300 | | - // See how much iterations should be increased by |
301 | | - // Note: Avoid division by zero with max(seconds, 1ns). |
302 | | - double multiplier = min_time * 1.4 / std::max(seconds, 1e-9); |
303 | | - // If our last run was at least 10% of FLAGS_benchmark_min_time then we |
304 | | - // use the multiplier directly. Otherwise we use at most 10 times |
305 | | - // expansion. |
306 | | - // NOTE: When the last run was at least 10% of the min time the max |
307 | | - // expansion should be 14x. |
308 | | - bool is_significant = (seconds / min_time) > 0.1; |
309 | | - multiplier = is_significant ? multiplier : std::min(10.0, multiplier); |
310 | | - if (multiplier <= 1.0) multiplier = 2.0; |
311 | | - double next_iters = std::max(multiplier * iters, iters + 1.0); |
312 | | - if (next_iters > kMaxIterations) { |
313 | | - next_iters = kMaxIterations; |
314 | | - } |
315 | | - VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; |
316 | | - iters = static_cast<int>(next_iters + 0.5); |
317 | | - } |
318 | | - } |
319 | | - |
320 | | - // Calculate additional statistics |
321 | | - run_results.aggregates_only = ComputeStats(run_results.non_aggregates); |
322 | | - |
323 | | - // Maybe calculate complexity report |
324 | | - if ((b.complexity != oNone) && b.last_benchmark_instance) { |
325 | | - auto additional_run_stats = ComputeBigO(*complexity_reports); |
326 | | - run_results.aggregates_only.insert(run_results.aggregates_only.end(), |
327 | | - additional_run_stats.begin(), |
328 | | - additional_run_stats.end()); |
329 | | - complexity_reports->clear(); |
330 | | - } |
331 | | - |
332 | | - return run_results; |
333 | | -} |
334 | | - |
335 | | -} // namespace |
336 | | -} // namespace internal |
337 | | - |
338 | 117 | State::State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i, |
339 | 118 | int n_threads, internal::ThreadTimer* timer, |
340 | 119 | internal::ThreadManager* manager) |
@@ -610,7 +389,9 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter, |
610 | 389 | return benchmarks.size(); |
611 | 390 | } |
612 | 391 |
|
613 | | -void RegisterMemoryManager(MemoryManager* manager) { memory_manager = manager; } |
| 392 | +void RegisterMemoryManager(MemoryManager* manager) { |
| 393 | + internal::memory_manager = manager; |
| 394 | +} |
614 | 395 |
|
615 | 396 | namespace internal { |
616 | 397 |
|
|
0 commit comments