Skip to content

Commit 0f58d53

Browse files
committed
profiling
1 parent 1925e75 commit 0f58d53

File tree

14 files changed

+746
-17
lines changed

14 files changed

+746
-17
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package fib
2+
3+
import "testing"
4+
5+
// START OMIT
6+
func Fib(n int) int {
7+
a, b := 0, 1
8+
for i := 0; i < n; i++ {
9+
a, b = b, a+b
10+
}
11+
return a
12+
}
13+
14+
func BenchmarkFibWrong(b *testing.B) {
15+
Fib(b.N)
16+
}
17+
18+
func BenchmarkFibWrong2(b *testing.B) {
19+
for n := 0; n < b.N; n++ {
20+
Fib(n)
21+
}
22+
}
23+
24+
// END OMIT

2-benchmarking/examples/benchmark.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
package q
2+
3+
func BenchmarkRead(b *testing.B) {
4+
b.ReportAllocs() // HL
5+
for n := 0; n < b.N; n++ {
6+
// function under test
7+
}
8+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
package fib
2+
3+
import "testing"
4+
5+
// STARTFIB OMIT
6+
// Fib computes the n'th number in the Fibonacci series.
7+
func Fib(n int) int {
8+
switch n {
9+
case 0:
10+
return 0
11+
case 1:
12+
return 1
13+
case 2:
14+
return 1
15+
default:
16+
return Fib(n-1) + Fib(n-2)
17+
}
18+
}
19+
20+
// ENDFIB OMIT
21+
22+
// STARTBENCH OMIT
23+
func BenchmarkFib20(b *testing.B) {
24+
for n := 0; n < b.N; n++ {
25+
Fib(20) // run the Fib function b.N times
26+
}
27+
}
28+
29+
func BenchmarkFib1(b *testing.B) {
30+
for n := 0; n < b.N; n++ {
31+
Fib(1)
32+
}
33+
}
34+
35+
// ENDBENCH OMIT
36+
37+
func TestFib(t *testing.T) {
38+
fibs := []int{0, 1, 1, 2, 3, 5, 8, 13, 21}
39+
for n, want := range fibs {
40+
got := Fib(n)
41+
if want != got {
42+
t.Errorf("Fib(%d): want %d, got %d", n, want, got)
43+
}
44+
}
45+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// +build ignore
2+
3+
package main
4+
5+
import "testing"
6+
7+
const m1 = 0x5555555555555555
8+
const m2 = 0x3333333333333333
9+
const m4 = 0x0f0f0f0f0f0f0f0f
10+
const h01 = 0x0101010101010101
11+
12+
func popcnt(x uint64) int {
13+
x -= (x >> 1) & m1
14+
x = (x & m2) + ((x >> 2) & m2)
15+
x = (x + (x >> 4)) & m4
16+
return int((x * h01) >> 56)
17+
}
18+
19+
// START OMIT
20+
func BenchmarkPopcnt(b *testing.B) {
21+
for i := 0; i < b.N; i++ {
22+
// optimised away
23+
}
24+
}
25+
26+
// END OMIT
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package main
2+
3+
import "testing"
4+
5+
// START OMIT
6+
const m1 = 0x5555555555555555
7+
const m2 = 0x3333333333333333
8+
const m4 = 0x0f0f0f0f0f0f0f0f
9+
const h01 = 0x0101010101010101
10+
11+
func popcnt(x uint64) uint64 {
12+
x -= (x >> 1) & m1
13+
x = (x & m2) + ((x >> 2) & m2)
14+
x = (x + (x >> 4)) & m4
15+
return (x * h01) >> 56
16+
}
17+
18+
func BenchmarkPopcnt(b *testing.B) {
19+
for i := 0; i < b.N; i++ {
20+
popcnt(uint64(i))
21+
}
22+
}
23+
24+
// END OMIT

2-benchmarking/examples/reset.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package q
2+
3+
// START1 OMIT
4+
func BenchmarkExpensive(b *testing.B) {
5+
boringAndExpensiveSetup()
6+
b.ResetTimer() // HL
7+
for n := 0; n < b.N; n++ {
8+
// function under test
9+
}
10+
}
11+
12+
// END1 OMIT
13+
14+
// START2 OMIT
15+
func BenchmarkComplicated(b *testing.B) {
16+
for n := 0; n < b.N; n++ {
17+
b.StopTimer() // HL
18+
complicatedSetup()
19+
b.StartTimer() // HL
20+
// function under test
21+
}
22+
}
23+
24+
// END2 OMIT

3-profiling/1-profiling.md

Lines changed: 70 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -264,27 +264,86 @@ By inserting a `bufio.Reader` between the input file and `readbyte` will
264264
265265
_Exercise_: Compare the times of this revised program to `wc`. How close is it? Take a profile and see what remains.
266266
267-
268267
## Memory profiling
269268
270-
The output of a memory profile can be similarly visualised.
269+
The new `words` profile suggests that something is allocating inside the `readbyte` function. We can use pprof to investigate.
270+
```go
271+
defer profile.Start(profile.MemProfile).Stop()
272+
```
273+
Then run the program as usual
274+
```
275+
% go run main2.go moby.txt
276+
2018/08/25 14:41:15 profile: memory profiling enabled (rate 4096), /var/folders/by/3gf34_z95zg05cyj744_vhx40000gn/T/profile312088211/mem.pprof
277+
"moby.txt": 181275 words
278+
2018/08/25 14:41:15 profile: memory profiling disabled, /var/folders/by/3gf34_z95zg05cyj744_vhx40000gn/T/profile312088211/mem.pprof
279+
```
280+
281+
![memprof](/Users/dfc/devel/gophercon2018-performance-tuning-workshop/3-profiling/images/memprof.png)
282+
283+
As we suspected the allocation was coming from `readbyte` -- this wasn't that complicated, readbyte is three lines long:
284+
```go
285+
func readbyte(r io.Reader) (rune, error) {
286+
var buf [1]byte // allocation is here
287+
_, err := r.Read(buf[:])
288+
return rune(buf[0]), err
289+
}
290+
```
291+
We'll talk about why this is happening in more detail in the next section, but for the moment what we see is every call to readbyte is allocating a new one byte long _array_ and that array is being allocated on the heap.
292+
293+
_Exercise_: What are some ways we can avoid this? Try them and use CPU and memory profiling to prove it.
271294
272-
% go build -gcflags='-memprofile=/tmp/m.p'
273-
% go tool pprof --alloc_objects -svg $(go tool -n compile) /tmp/m.p > alloc_objects.svg
274-
% go tool pprof --inuse_objects -svg $(go tool -n compile) /tmp/m.p > inuse_objects.svg
295+
### Alloc objects vs. inuse objects
275296
276-
Memory profiles come in two varieties
297+
Memory profiles come in two varieties, named after their `go tool pprof` flags
277298
278-
- Alloc objects reports the call site where each allocation was made
299+
- `-alloc_objects` reports the call site where each allocation was made
300+
- `-inuse_objects` reports the call site where an allocation was made _iff_ it was reachable at the end of the profile
279301
280-
!(allocated objects)[images/alloc_objects.svg]
302+
To demonstrate this, here is a contrived program which will allocate a bunch of memory in a controlled manner.
303+
```go
304+
// ensure y is live beyond the end of main.
305+
var y []byte
281306

282-
- Inuse objects reports the call site where an allocation was made _iff_ it was reachable at the end of the profile
307+
func main() {
308+
defer profile.Start(profile.MemProfile, profile.MemProfileRate(1)).Stop()
309+
y = allocate(100000)
310+
runtime.GC()
311+
}
283312

284-
.link images/inuse_objects.svg
313+
// allocate allocates count byte slices and returns the first slice allocated.
314+
func allocate(count int) []byte {
315+
var x [][]byte
316+
for i := 0; i < count; i++ {
317+
x = append(x, makeByteSlice())
318+
}
319+
return x[0]
320+
}
285321

286-
DEMO: `examples/inuseallocs`
322+
// makeByteSlice returns a byte slice of a random length in the range [0, 16384).
323+
func makeByteSlice() []byte {
324+
return make([]byte, rand.Intn(1<<14))
325+
}
326+
```
327+
The program is annotation with the `profile` package, and we set the memory profile rate to `1`--that is, record a stack trace for every allocation. This is slows down the program a lot, but you'll see why in a minute.
328+
```
329+
% go run main.go
330+
2018/08/25 15:22:05 profile: memory profiling enabled (rate 1), /var/folders/by/3gf34_z95zg05cyj744_vhx40000gn/T/profile730812803/mem.pprof
331+
2018/08/25 15:22:05 profile: memory profiling disabled, /var/folders/by/3gf34_z95zg05cyj744_vhx40000gn/T/profile730812803/mem.pprof
332+
```
333+
Lets look at the graph of allocated objects, this is the default, and shows the call graphs that lead to the allocation of every object during the profile.
334+
```
335+
% go tool pprof -web -alloc_objects /var/folders/by/3gf34_z95zg05cyj744_vhx40000gn/T/profile891268605/mem.pprof
336+
```
337+
338+
![alloc_objects](images/alloc_objects.png)
339+
340+
Not surprisingly more than 99% of the allocations were inside `makeByteSlice`. Now lets look at the same profile using `-inuse_objects`
341+
```
342+
% go tool pprof -web -inuse_objects /var/folders/by/3gf34_z95zg05cyj744_vhx40000gn/T/profile891268605/mem.pprof
343+
```
287344
345+
![inuse_objects](images/inuse_objects.png)
346+
What we see is not the objects that were _allocated_ during the profile, but the objects that remain _in use_, at the time the profile was taken -- this ignores the stack trace for objects which have been reclaimed by the garbage collector.
288347
## Block profiling (example)
289348
290349
Here is a visualisation of a block profile:
Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// A simple example to demonstrate the difference between alloc_count and inuse_count
1+
// A simple example to demonstrate the difference between alloc_objects and inuse_objects
22
package main
33

44
import (
@@ -8,24 +8,25 @@ import (
88
"github.com/pkg/profile"
99
)
1010

11-
const count = 100000
12-
11+
// ensure y is live beyond the end of main.
1312
var y []byte
1413

1514
func main() {
1615
defer profile.Start(profile.MemProfile, profile.MemProfileRate(1)).Stop()
17-
y = allocate()
16+
y = allocate(100000)
1817
runtime.GC()
1918
}
2019

21-
func allocate() []byte {
20+
// allocate allocates count byte slices and returns the first slice allocated.
21+
func allocate(count int) []byte {
2222
var x [][]byte
2323
for i := 0; i < count; i++ {
2424
x = append(x, makeByteSlice())
2525
}
2626
return x[0]
2727
}
2828

29+
// makeByteSlice returns a byte slice of a random length in the range [0, 16384).
2930
func makeByteSlice() []byte {
30-
return make([]byte, rand.Intn(2^14))
31+
return make([]byte, rand.Intn(1<<14))
3132
}

3-profiling/examples/words/main2.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,12 @@ import (
77
"log"
88
"os"
99
"unicode"
10+
11+
"github.com/pkg/profile"
1012
)
1113

1214
func main() {
15+
defer profile.Start(profile.MemProfile).Stop()
1316
f, err := os.Open(os.Args[1])
1417
if err != nil {
1518
log.Fatalf("could not open file %q: %v", os.Args[1], err)

3-profiling/images/alloc_objects.png

324 KB
Loading

3-profiling/images/inuse_objects.png

542 KB
Loading

3-profiling/images/memprof.png

313 KB
Loading

3-profiling/images/pprof.png

265 KB
Loading

0 commit comments

Comments
 (0)