Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.

Commit f811fc3

Browse files
lhkbobSkia Commit-Bot
authored andcommitted
Avoid loop-dependent behavior in GrMemoryPoolBench
This helps stability of benchmark across repeated runs, and across code changes. Previously, a change to the tuned loop count could radically change the allocation behavior within the loop's iteration and lead to unfair comparisons. In addition, this separates the stack allocation pattern into N allocations followed by N LIFO releases, and a push-pop alternating pattern of N allocates and releases (so still LIFO, but reuses the memory at the start of a block). In later CLs experimenting on the memory pool, I found that there were surprising effects on performance linked to the specific interaction between the allocation size, per-allocation metadata, and per-block metadata. To help differentiate these coincidences, this adds two modes of allocation where one should already be aligned. It also moves away from a global pool, so that it's possible to benchmark on different block sizes and factor in the allocation/release cost of the actual blocks (vs. the cursor management of a larger sized pool). As part of this, the new/delete reference operator is added as an explicit benchmark. Change-Id: I12b8c11cb75db0df70460fe2e8cf6c029db7eb22 Reviewed-on: https://skia-review.googlesource.com/c/skia/+/262936 Commit-Queue: Michael Ludwig <michaelludwig@google.com> Reviewed-by: Brian Salomon <bsalomon@google.com>
1 parent fa424b2 commit f811fc3

File tree

1 file changed

+161
-142
lines changed

1 file changed

+161
-142
lines changed

bench/GrMemoryPoolBench.cpp

Lines changed: 161 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -5,185 +5,204 @@
55
* found in the LICENSE file.
66
*/
77

8-
#include "include/core/SkTypes.h"
9-
108
#include "bench/Benchmark.h"
11-
#include "include/private/SkTDArray.h"
12-
#include "include/private/SkTemplates.h"
9+
#include "include/private/GrTypesPriv.h"
1310
#include "include/utils/SkRandom.h"
1411
#include "src/gpu/GrMemoryPool.h"
1512

16-
#include <new>
17-
18-
// change this to 0 to compare GrMemoryPool to default new / delete
19-
#define OVERRIDE_NEW 1
13+
// sizeof is a multiple of GrMemoryPool::kAlignment
14+
struct Aligned {
15+
int fStuff[12]; // Will align on 4, 8, or 16 alignment
16+
};
17+
static_assert(sizeof(Aligned) % GrMemoryPool::kAlignment == 0);
2018

21-
struct A {
22-
int gStuff[10];
23-
#if OVERRIDE_NEW
24-
void* operator new(size_t size) { return gBenchPool->allocate(size); }
25-
void operator delete(void* mem) {
26-
if (mem) {
27-
return gBenchPool->release(mem);
28-
}
29-
}
30-
#endif
31-
static std::unique_ptr<GrMemoryPool> gBenchPool;
19+
// sizeof is not a multiple of GrMemoryPool::kAlignment
20+
struct Unaligned {
21+
int fStuff[9]; // Will not align on 8 or 16, but will on 4...
3222
};
33-
std::unique_ptr<GrMemoryPool> A::gBenchPool = GrMemoryPool::Make(10 * (1 << 10), 10 * (1 << 10));
23+
static_assert(sizeof(Unaligned) % GrMemoryPool::kAlignment != 0);
24+
25+
// All benchmarks create and delete the same number of objects. The key difference is the order
26+
// of operations, the size of the objects being allocated, and the size of the pool.
27+
typedef void (*RunBenchProc)(GrMemoryPool*, int);
28+
29+
// N objects are created, and then destroyed in reverse order (fully unwinding the cursor within
30+
// each block of the memory pool).
31+
template <typename T>
32+
static void run_stack(GrMemoryPool* pool, int loops) {
33+
static const int kMaxObjects = 4 * (1 << 10);
34+
T* objs[kMaxObjects];
35+
for (int i = 0; i < loops; ++i) {
36+
// Push N objects into the pool (or heap if pool is null)
37+
for (int j = 0; j < kMaxObjects; ++j) {
38+
objs[j] = pool ? (T*) pool->allocate(sizeof(T)) : new T;
39+
}
40+
// Pop N objects off in LIFO order
41+
for (int j = kMaxObjects - 1; j >= 0; --j) {
42+
if (pool) {
43+
pool->release(objs[j]);
44+
} else {
45+
delete objs[j];
46+
}
47+
}
3448

35-
/**
36-
* This benchmark creates and deletes objects in stack order
37-
*/
38-
class GrMemoryPoolBenchStack : public Benchmark {
39-
public:
40-
bool isSuitableFor(Backend backend) override {
41-
return backend == kNonRendering_Backend;
49+
// Everything has been cleaned up for the next loop
4250
}
51+
}
52+
53+
// N objects are created, and then destroyed in creation order (is not able to unwind the cursor
54+
// within each block, but can reclaim the block once everything is destroyed).
55+
template <typename T>
56+
static void run_queue(GrMemoryPool* pool, int loops) {
57+
static const int kMaxObjects = 4 * (1 << 10);
58+
T* objs[kMaxObjects];
59+
for (int i = 0; i < loops; ++i) {
60+
// Push N objects into the pool (or heap if pool is null)
61+
for (int j = 0; j < kMaxObjects; ++j) {
62+
objs[j] = pool ? (T*) pool->allocate(sizeof(T)) : new T;
63+
}
64+
// Pop N objects off in FIFO order
65+
for (int j = 0; j < kMaxObjects; ++j) {
66+
if (pool) {
67+
pool->release(objs[j]);
68+
} else {
69+
delete objs[j];
70+
}
71+
}
4372

44-
protected:
45-
const char* onGetName() override {
46-
return "grmemorypool_stack";
73+
// Everything has been cleaned up for the next loop
4774
}
48-
49-
void onDraw(int loops, SkCanvas*) override {
50-
SkRandom r;
51-
enum {
52-
kMaxObjects = 4 * (1 << 10),
53-
};
54-
A* objects[kMaxObjects];
55-
56-
// We delete if a random number [-1, 1] is < the thresh. Otherwise,
57-
// we allocate. We start allocate-biased and ping-pong to delete-biased
58-
SkScalar delThresh = -SK_ScalarHalf;
59-
const int kSwitchThreshPeriod = loops / (2 * kMaxObjects);
60-
int s = 0;
61-
62-
int count = 0;
63-
for (int i = 0; i < loops; i++, ++s) {
64-
if (kSwitchThreshPeriod == s) {
65-
delThresh = -delThresh;
66-
s = 0;
67-
}
68-
SkScalar del = r.nextSScalar1();
69-
if (count &&
70-
(kMaxObjects == count || del < delThresh)) {
71-
delete objects[count-1];
72-
--count;
75+
}
76+
77+
// N objects are created and immediately destroyed, so space at the start of the pool should be
78+
// immediately reclaimed.
79+
template <typename T>
80+
static void run_pushpop(GrMemoryPool* pool, int loops) {
81+
static const int kMaxObjects = 4 * (1 << 10);
82+
T* objs[kMaxObjects];
83+
for (int i = 0; i < loops; ++i) {
84+
// Push N objects into the pool (or heap if pool is null)
85+
for (int j = 0; j < kMaxObjects; ++j) {
86+
if (pool) {
87+
objs[j] = (T*) pool->allocate(sizeof(T));
88+
pool->release(objs[j]);
7389
} else {
74-
objects[count] = new A;
75-
++count;
90+
objs[j] = new T;
91+
delete objs[j];
7692
}
7793
}
78-
for (int i = 0; i < count; ++i) {
79-
delete objects[i];
80-
}
81-
}
8294

83-
private:
84-
typedef Benchmark INHERITED;
85-
};
86-
87-
struct B {
88-
int gStuff[10];
89-
#if OVERRIDE_NEW
90-
void* operator new(size_t size) { return gBenchPool->allocate(size); }
91-
void operator delete(void* mem) {
92-
if (mem) {
93-
return gBenchPool->release(mem);
94-
}
95+
// Everything has been cleaned up for the next loop
9596
}
96-
#endif
97-
static std::unique_ptr<GrMemoryPool> gBenchPool;
98-
};
99-
std::unique_ptr<GrMemoryPool> B::gBenchPool = GrMemoryPool::Make(10 * (1 << 10), 10 * (1 << 10));
100-
101-
/**
102-
* This benchmark creates objects and deletes them in random order
103-
*/
104-
class GrMemoryPoolBenchRandom : public Benchmark {
105-
public:
106-
bool isSuitableFor(Backend backend) override {
107-
return backend == kNonRendering_Backend;
97+
}
98+
99+
// N object creations and destructions are invoked in random order.
100+
template <typename T>
101+
static void run_random(GrMemoryPool* pool, int loops) {
102+
static const int kMaxObjects = 4 * (1 << 10);
103+
T* objs[kMaxObjects];
104+
for (int i = 0; i < kMaxObjects; ++i) {
105+
objs[i] = nullptr;
108106
}
109107

110-
protected:
111-
const char* onGetName() override {
112-
return "grmemorypool_random";
113-
}
108+
auto del = [&](int j) {
109+
// Delete
110+
if (pool) {
111+
pool->release(objs[j]);
112+
} else {
113+
delete objs[j];
114+
}
115+
objs[j] = nullptr;
116+
};
114117

115-
void onDraw(int loops, SkCanvas*) override {
116-
SkRandom r;
117-
enum {
118-
kMaxObjects = 4 * (1 << 10),
119-
};
120-
std::unique_ptr<B> objects[kMaxObjects];
121-
122-
for (int i = 0; i < loops; i++) {
123-
uint32_t idx = r.nextRangeU(0, kMaxObjects-1);
124-
if (nullptr == objects[idx].get()) {
125-
objects[idx].reset(new B);
118+
SkRandom r;
119+
for (int i = 0; i < loops; ++i) {
120+
// Execute 2*kMaxObjects operations, which should average to N create and N destroy,
121+
// followed by a small number of remaining deletions.
122+
for (int j = 0; j < 2 * kMaxObjects; ++j) {
123+
int k = r.nextRangeU(0, kMaxObjects-1);
124+
if (objs[k]) {
125+
del(k);
126126
} else {
127-
objects[idx].reset();
127+
// Create
128+
objs[k] = pool ? (T*) pool->allocate(sizeof(T)) : new T;
128129
}
129130
}
130-
}
131-
132-
private:
133-
typedef Benchmark INHERITED;
134-
};
135131

136-
struct C {
137-
int gStuff[10];
138-
#if OVERRIDE_NEW
139-
void* operator new(size_t size) { return gBenchPool->allocate(size); }
140-
void operator delete(void* mem) {
141-
if (mem) {
142-
return gBenchPool->release(mem);
132+
// Ensure everything is null for the next loop
133+
for (int j = 0; j < kMaxObjects; ++j) {
134+
if (objs[j]) {
135+
del(j);
136+
}
143137
}
144138
}
145-
#endif
146-
static std::unique_ptr<GrMemoryPool> gBenchPool;
147-
};
148-
std::unique_ptr<GrMemoryPool> C::gBenchPool = GrMemoryPool::Make(10 * (1 << 10), 10 * (1 << 10));
139+
}
149140

150-
/**
151-
* This benchmark creates objects and deletes them in queue order
152-
*/
153-
class GrMemoryPoolBenchQueue : public Benchmark {
154-
enum {
155-
M = 4 * (1 << 10),
156-
};
141+
///////////////////////////////////////////////////////////////////////////////////////////////////
142+
143+
class GrMemoryPoolBench : public Benchmark {
157144
public:
145+
GrMemoryPoolBench(const char* name, RunBenchProc proc, int poolSize)
146+
: fPoolSize(poolSize)
147+
, fProc(proc) {
148+
fName.printf("grmemorypool_%s", name);
149+
}
150+
158151
bool isSuitableFor(Backend backend) override {
159152
return backend == kNonRendering_Backend;
160153
}
161154

162155
protected:
163156
const char* onGetName() override {
164-
return "grmemorypool_queue";
157+
return fName.c_str();
165158
}
166159

167160
void onDraw(int loops, SkCanvas*) override {
168-
SkRandom r;
169-
C* objects[M];
170-
for (int i = 0; i < loops; i++) {
171-
uint32_t count = r.nextRangeU(0, M-1);
172-
for (uint32_t i = 0; i < count; i++) {
173-
objects[i] = new C;
174-
}
175-
for (uint32_t i = 0; i < count; i++) {
176-
delete objects[i];
177-
}
178-
}
161+
std::unique_ptr<GrMemoryPool> pool;
162+
if (fPoolSize > 0) {
163+
pool = GrMemoryPool::Make(fPoolSize, fPoolSize);
164+
} // else keep it null to test regular new/delete performance
165+
166+
fProc(pool.get(), loops);
179167
}
180168

181-
private:
169+
SkString fName;
170+
int fPoolSize;
171+
RunBenchProc fProc;
172+
182173
typedef Benchmark INHERITED;
183174
};
184175

185-
///////////////////////////////////////////////////////////////////////////////
186-
187-
DEF_BENCH( return new GrMemoryPoolBenchStack(); )
188-
DEF_BENCH( return new GrMemoryPoolBenchRandom(); )
189-
DEF_BENCH( return new GrMemoryPoolBenchQueue(); )
176+
///////////////////////////////////////////////////////////////////////////////////////////////////
177+
178+
static const int kLargePool = 10 * (1 << 10);
179+
static const int kSmallPool = GrMemoryPool::kMinAllocationSize;
180+
181+
DEF_BENCH( return new GrMemoryPoolBench("stack_aligned_lg", run_stack<Aligned>, kLargePool); )
182+
DEF_BENCH( return new GrMemoryPoolBench("stack_aligned_sm", run_stack<Aligned>, kSmallPool); )
183+
DEF_BENCH( return new GrMemoryPoolBench("stack_aligned_ref", run_stack<Aligned>, 0); )
184+
DEF_BENCH( return new GrMemoryPoolBench("stack_unaligned_lg", run_stack<Unaligned>, kLargePool); )
185+
DEF_BENCH( return new GrMemoryPoolBench("stack_unaligned_sm", run_stack<Unaligned>, kSmallPool); )
186+
DEF_BENCH( return new GrMemoryPoolBench("stack_unaligned_ref", run_stack<Unaligned>, 0); )
187+
188+
DEF_BENCH( return new GrMemoryPoolBench("queue_aligned_lg", run_queue<Aligned>, kLargePool); )
189+
DEF_BENCH( return new GrMemoryPoolBench("queue_aligned_sm", run_queue<Aligned>, kSmallPool); )
190+
DEF_BENCH( return new GrMemoryPoolBench("queue_aligned_ref", run_queue<Aligned>, 0); )
191+
DEF_BENCH( return new GrMemoryPoolBench("queue_unaligned_lg", run_queue<Unaligned>, kLargePool); )
192+
DEF_BENCH( return new GrMemoryPoolBench("queue_unaligned_sm", run_queue<Unaligned>, kSmallPool); )
193+
DEF_BENCH( return new GrMemoryPoolBench("queue_unaligned_ref", run_queue<Unaligned>, 0); )
194+
195+
DEF_BENCH( return new GrMemoryPoolBench("pushpop_aligned_lg", run_pushpop<Aligned>, kLargePool); )
196+
DEF_BENCH( return new GrMemoryPoolBench("pushpop_aligned_sm", run_pushpop<Aligned>, kSmallPool); )
197+
// DEF_BENCH( return new GrMemoryPoolBench("pushpop_aligned_ref", run_pushpop<Aligned>, 0); )
198+
DEF_BENCH( return new GrMemoryPoolBench("pushpop_unaligned_lg", run_pushpop<Unaligned>, kLargePool); )
199+
DEF_BENCH( return new GrMemoryPoolBench("pushpop_unaligned_sm", run_pushpop<Unaligned>, kSmallPool); )
200+
// DEF_BENCH( return new GrMemoryPoolBench("pushpop_unaligned_ref", run_pushpop<Unaligned>, 0); )
201+
// pushpop_x_ref are not meaningful because the compiler completely optimizes away new T; delete *.
202+
203+
DEF_BENCH( return new GrMemoryPoolBench("random_aligned_lg", run_random<Aligned>, kLargePool); )
204+
DEF_BENCH( return new GrMemoryPoolBench("random_aligned_sm", run_random<Aligned>, kSmallPool); )
205+
DEF_BENCH( return new GrMemoryPoolBench("random_aligned_ref", run_random<Aligned>, 0); )
206+
DEF_BENCH( return new GrMemoryPoolBench("random_unaligned_lg", run_random<Unaligned>, kLargePool); )
207+
DEF_BENCH( return new GrMemoryPoolBench("random_unaligned_sm", run_random<Unaligned>, kSmallPool); )
208+
DEF_BENCH( return new GrMemoryPoolBench("random_unaligned_ref", run_random<Unaligned>, 0); )

0 commit comments

Comments
 (0)