Skip to content

Commit ec2d7cb

Browse files
authored
GH-41159: [Go][Parquet] Improvement Parquet BitWriter WriteVlqInt Performance (#41160)
[GH-41159](#41159) ### Rationale for this change This change improves Parquet FileWriter performance while writing parquets from arrow Records. We saw a speed improvement from writing 320k rows/sec -> 650 rows/sec after making this change. ### What changes are included in this PR? This PR reuses the `buf` variable being used by the bitWriter when writing parquet files. ### Are these changes tested? Yes ### Are there any user-facing changes? No Authored-by: @ hhoughgg * GitHub Issue: #41159 Lead-authored-by: Andy Fan <duan-wei@cloudflare.com> Co-authored-by: andyfan <52736754+DuanWeiFan@users.noreply.github.com> Signed-off-by: Matt Topol <zotthewizard@gmail.com>
1 parent 48a9639 commit ec2d7cb

File tree

2 files changed

+20
-3
lines changed

2 files changed

+20
-3
lines changed

go/parquet/internal/utils/bit_reader_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,23 @@ func TestBitWriter(t *testing.T) {
5959

6060
assert.Equal(t, byte(0xAA), buf[0])
6161
assert.Equal(t, byte(0xCC), buf[1])
62+
63+
for i := 0; i < 3; i++ {
64+
assert.True(t, bw.WriteVlqInt(uint64(i)))
65+
}
66+
assert.Equal(t, byte(0xAA), buf[0])
67+
assert.Equal(t, byte(0xCC), buf[1])
68+
assert.Equal(t, byte(0), buf[2])
69+
assert.Equal(t, byte(1), buf[3])
70+
assert.Equal(t, byte(2), buf[4])
71+
}
72+
73+
func BenchmarkBitWriter(b *testing.B) {
74+
buf := make([]byte, b.N)
75+
bw := utils.NewBitWriter(utils.NewWriterAtBuffer(buf))
76+
for i := 0; i < b.N; i++ {
77+
assert.True(b, bw.WriteVlqInt(uint64(1)))
78+
}
6279
}
6380

6481
func TestBitReader(t *testing.T) {

go/parquet/internal/utils/bit_writer.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ type BitWriter struct {
7575
byteoffset int
7676
bitoffset uint
7777
raw [8]byte
78+
buf [binary.MaxVarintLen64]byte
7879
}
7980

8081
// NewBitWriter initializes a new bit writer to write to the passed in interface
@@ -163,9 +164,8 @@ func (b *BitWriter) WriteAligned(val uint64, nbytes int) bool {
163164
// without buffering.
164165
func (b *BitWriter) WriteVlqInt(v uint64) bool {
165166
b.Flush(true)
166-
var buf [binary.MaxVarintLen64]byte
167-
nbytes := binary.PutUvarint(buf[:], v)
168-
if _, err := b.wr.WriteAt(buf[:nbytes], int64(b.byteoffset)); err != nil {
167+
nbytes := binary.PutUvarint(b.buf[:], v)
168+
if _, err := b.wr.WriteAt(b.buf[:nbytes], int64(b.byteoffset)); err != nil {
169169
log.Println(err)
170170
return false
171171
}

0 commit comments

Comments
 (0)