Skip to content
This repository was archived by the owner on Oct 21, 2024. It is now read-only.

Commit 4f75659

Browse files
Richard Artoulrichardartoul
authored andcommitted
ARROW-7921: [Go] Add Reset method to various components and clean up comments.
The reset method allow the data structures to be re-used so they don't have to be allocated over and over again. Closes apache#6430 from richardartoul/ra/merge-upstream and squashes the following commits: 5a08281 <Richard Artoul> Add license to test file d76be05 <Richard Artoul> Add test for data reset d102b1f <Richard Artoul> Add tests d3e6e67 <Richard Artoul> cleanup comments c8525ae <Richard Artoul> Add Reset method to int array (#5) 489ca25 <Richard Artoul> Fix array.setData() to retain before release (#4) 88cd05f <Richard Artoul> Add reset method to Data (#3) 6d1b277 <Richard Artoul> Add Reset() method to String array (#2) dca2303 <Richard Artoul> Add Reset method to buffer and cleanup comments (#1) Lead-authored-by: Richard Artoul <richard.artoul@datadoghq.com> Co-authored-by: Richard Artoul <richardartoul@gmail.com> Signed-off-by: Sebastien Binet <binet@cern.ch>
1 parent 7e136a1 commit 4f75659

File tree

9 files changed

+358
-26
lines changed

9 files changed

+358
-26
lines changed

go/arrow/array/array.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,11 +119,13 @@ func (a *array) IsValid(i int) bool {
119119
}
120120

121121
func (a *array) setData(data *Data) {
122+
// Retain before releasing in case a.data is the same as data.
123+
data.Retain()
124+
122125
if a.data != nil {
123126
a.data.Release()
124127
}
125128

126-
data.Retain()
127129
if len(data.buffers) > 0 && data.buffers[0] != nil {
128130
a.nullBitmapBytes = data.buffers[0].Bytes()
129131
}

go/arrow/array/data.go

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import (
2424
"github.com/apache/arrow/go/arrow/memory"
2525
)
2626

27-
// A type which represents the memory and metadata for an Arrow array.
27+
// Data represents the memory and metadata of an Arrow array.
2828
type Data struct {
2929
refCount int64
3030
dtype arrow.DataType
@@ -35,6 +35,7 @@ type Data struct {
3535
childData []*Data // TODO(sgc): managed by ListArray, StructArray and UnionArray types
3636
}
3737

38+
// NewData creates a new Data.
3839
func NewData(dtype arrow.DataType, length int, buffers []*memory.Buffer, childData []*Data, nulls, offset int) *Data {
3940
for _, b := range buffers {
4041
if b != nil {
@@ -59,6 +60,42 @@ func NewData(dtype arrow.DataType, length int, buffers []*memory.Buffer, childDa
5960
}
6061
}
6162

63+
// Reset sets the Data for re-use.
64+
func (d *Data) Reset(dtype arrow.DataType, length int, buffers []*memory.Buffer, childData []*Data, nulls, offset int) {
65+
// Retain new buffers before releasing existing buffers in-case they're the same ones to prevent accidental premature
66+
// release.
67+
for _, b := range buffers {
68+
if b != nil {
69+
b.Retain()
70+
}
71+
}
72+
for _, b := range d.buffers {
73+
if b != nil {
74+
b.Release()
75+
}
76+
}
77+
d.buffers = buffers
78+
79+
// Retain new children data before releasing existing children data in-case they're the same ones to prevent accidental
80+
// premature release.
81+
for _, d := range childData {
82+
if d != nil {
83+
d.Retain()
84+
}
85+
}
86+
for _, d := range d.childData {
87+
if d != nil {
88+
d.Release()
89+
}
90+
}
91+
d.childData = childData
92+
93+
d.dtype = dtype
94+
d.length = length
95+
d.nulls = nulls
96+
d.offset = offset
97+
}
98+
6299
// Retain increases the reference count by 1.
63100
// Retain may be called simultaneously from multiple goroutines.
64101
func (d *Data) Retain() {
@@ -85,10 +122,19 @@ func (d *Data) Release() {
85122
}
86123
}
87124

88-
func (d *Data) DataType() arrow.DataType { return d.dtype }
89-
func (d *Data) NullN() int { return d.nulls }
90-
func (d *Data) Len() int { return d.length }
91-
func (d *Data) Offset() int { return d.offset }
125+
// DataType returns the DataType of the data.
126+
func (d *Data) DataType() arrow.DataType { return d.dtype }
127+
128+
// NullN returns the number of nulls.
129+
func (d *Data) NullN() int { return d.nulls }
130+
131+
// Len returns the length.
132+
func (d *Data) Len() int { return d.length }
133+
134+
// Offset returns the offset.
135+
func (d *Data) Offset() int { return d.offset }
136+
137+
// Buffers returns the buffers.
92138
func (d *Data) Buffers() []*memory.Buffer { return d.buffers }
93139

94140
// NewSliceData returns a new slice that shares backing data with the input.

go/arrow/array/data_test.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
package array
18+
19+
import (
20+
"testing"
21+
22+
"github.com/apache/arrow/go/arrow"
23+
"github.com/apache/arrow/go/arrow/memory"
24+
"github.com/stretchr/testify/assert"
25+
)
26+
27+
func TestDataReset(t *testing.T) {
28+
var (
29+
buffers1 = make([]*memory.Buffer, 0, 3)
30+
buffers2 = make([]*memory.Buffer, 0, 3)
31+
)
32+
for i := 0; i < cap(buffers1); i++ {
33+
buffers1 = append(buffers1, memory.NewBufferBytes([]byte("some-bytes1")))
34+
buffers2 = append(buffers2, memory.NewBufferBytes([]byte("some-bytes2")))
35+
}
36+
37+
data := NewData(&arrow.StringType{}, 10, buffers1, nil, 0, 0)
38+
data.Reset(&arrow.Int64Type{}, 5, buffers2, nil, 1, 2)
39+
40+
for i := 0; i < 2; i++ {
41+
assert.Equal(t, buffers2, data.Buffers())
42+
assert.Equal(t, &arrow.Int64Type{}, data.DataType())
43+
assert.Equal(t, 1, data.NullN())
44+
assert.Equal(t, 2, data.Offset())
45+
assert.Equal(t, 5, data.Len())
46+
47+
// Make sure it works when resetting the data with its own buffers (new buffers are retained
48+
// before old ones are released.)
49+
data.Reset(&arrow.Int64Type{}, 5, data.Buffers(), nil, 1, 2)
50+
}
51+
}

0 commit comments

Comments
 (0)