Skip to content

Commit eeaf121

Browse files
sbinetwesm
authored andcommitted
ARROW-3038: [Go] implement String array
needs #2870 Author: Sebastien Binet <binet@cern.ch> Closes #2871 from sbinet/issue-3038 and squashes the following commits: 12998d5 <Sebastien Binet> ARROW-3038: implement String array
1 parent 9fb776c commit eeaf121

File tree

6 files changed

+293
-8
lines changed

6 files changed

+293
-8
lines changed

go/README.md

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ them internally in the [ifql][] execution engine and storage layers of [InfluxDB
131131
- [x] 32 and 64 bit floats
132132
- [x] Packed LSB booleans
133133
- [x] Variable-length binary
134-
- [ ] String (valid UTF-8)
134+
- [x] String (valid UTF-8)
135135
- [ ] Half-float (16-bit)
136136
- [x] Null (no physical storage)
137137

@@ -156,17 +156,15 @@ them internally in the [ifql][] execution engine and storage layers of [InfluxDB
156156
### Type metadata
157157

158158
- [x] Data types (implemented arrays)
159-
- [ ] Field
160-
- [ ] Schema
159+
- [x] Field
160+
- [x] Schema
161161

162162

163163
### I/O
164164

165-
Serialization is planned for a future iteration.
166-
167165
- [ ] Flat buffers for serializing metadata
168-
- [ ] Record Batch
169-
- [ ] Table
166+
- [x] Record Batch
167+
- [x] Table
170168

171169

172170

go/arrow/array/array.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ func init() {
177177
arrow.HALF_FLOAT: unsupportedArrayType,
178178
arrow.FLOAT32: func(data *Data) Interface { return NewFloat32Data(data) },
179179
arrow.FLOAT64: func(data *Data) Interface { return NewFloat64Data(data) },
180-
arrow.STRING: unsupportedArrayType,
180+
arrow.STRING: func(data *Data) Interface { return NewStringData(data) },
181181
arrow.BINARY: func(data *Data) Interface { return NewBinaryData(data) },
182182
arrow.FIXED_SIZE_BINARY: unsupportedArrayType,
183183
arrow.DATE32: unsupportedArrayType,

go/arrow/array/binarybuilder.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,12 @@ func (b *BinaryBuilder) Resize(n int) {
151151
b.builder.resize(n, b.init)
152152
}
153153

154+
// NewArray creates a Binary array from the memory buffers used by the builder and resets the BinaryBuilder
155+
// so it can be used to build a new array.
156+
func (b *BinaryBuilder) NewArray() Interface {
157+
return b.NewBinaryArray()
158+
}
159+
154160
// NewBinaryArray creates a Binary array from the memory buffers used by the builder and resets the BinaryBuilder
155161
// so it can be used to build a new array.
156162
func (b *BinaryBuilder) NewBinaryArray() (a *Binary) {
@@ -182,3 +188,7 @@ func (b *BinaryBuilder) appendNextOffset() {
182188
// TODO(sgc): check binaryArrayMaximumCapacity?
183189
b.offsets.AppendValue(int32(numBytes))
184190
}
191+
192+
var (
193+
_ Builder = (*BinaryBuilder)(nil)
194+
)

go/arrow/array/builder.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,9 @@ func newBuilder(mem memory.Allocator, dtype arrow.DataType) Builder {
220220
case arrow.FLOAT64:
221221
return NewFloat64Builder(mem)
222222
case arrow.STRING:
223+
return NewStringBuilder(mem)
223224
case arrow.BINARY:
225+
return NewBinaryBuilder(mem, arrow.BinaryTypes.Binary)
224226
case arrow.FIXED_SIZE_BINARY:
225227
case arrow.DATE32:
226228
case arrow.DATE64:

go/arrow/array/string.go

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
package array
18+
19+
import (
20+
"fmt"
21+
"math"
22+
"strings"
23+
"unsafe"
24+
25+
"github.com/apache/arrow/go/arrow"
26+
"github.com/apache/arrow/go/arrow/memory"
27+
)
28+
29+
const (
30+
stringArrayMaximumCapacity = math.MaxInt32
31+
)
32+
33+
// A type which represents an immutable sequence of variable-length UTF-8 strings.
34+
type String struct {
35+
array
36+
offsets []int32
37+
values string
38+
}
39+
40+
// NewStringData constructs a new String array from data.
41+
func NewStringData(data *Data) *String {
42+
a := &String{}
43+
a.refCount = 1
44+
a.setData(data)
45+
return a
46+
}
47+
48+
// Value returns the slice at index i. This value should not be mutated.
49+
func (a *String) Value(i int) string { return a.values[a.offsets[i]:a.offsets[i+1]] }
50+
51+
func (a *String) String() string {
52+
o := new(strings.Builder)
53+
o.WriteString("[")
54+
for i := 0; i < a.Len(); i++ {
55+
if i > 0 {
56+
o.WriteString(" ")
57+
}
58+
switch {
59+
case a.IsNull(i):
60+
o.WriteString("(null)")
61+
default:
62+
fmt.Fprintf(o, "%q", a.Value(i))
63+
}
64+
}
65+
o.WriteString("]")
66+
return o.String()
67+
}
68+
69+
func (a *String) setData(data *Data) {
70+
if len(data.buffers) != 3 {
71+
panic("arrow/array: len(data.buffers) != 3")
72+
}
73+
74+
a.array.setData(data)
75+
76+
if vdata := data.buffers[2]; vdata != nil {
77+
b := vdata.Bytes()
78+
a.values = *(*string)(unsafe.Pointer(&b))
79+
}
80+
81+
if offsets := data.buffers[1]; offsets != nil {
82+
a.offsets = arrow.Int32Traits.CastFromBytes(offsets.Bytes())
83+
}
84+
}
85+
86+
// A StringBuilder is used to build a String array using the Append methods.
87+
type StringBuilder struct {
88+
builder *BinaryBuilder
89+
}
90+
91+
func NewStringBuilder(mem memory.Allocator) *StringBuilder {
92+
b := &StringBuilder{
93+
builder: NewBinaryBuilder(mem, arrow.BinaryTypes.String),
94+
}
95+
return b
96+
}
97+
98+
// Release decreases the reference count by 1.
99+
// When the reference count goes to zero, the memory is freed.
100+
// Release may be called simultaneously from multiple goroutines.
101+
func (b *StringBuilder) Release() {
102+
b.builder.Release()
103+
}
104+
105+
// Retain increases the reference count by 1.
106+
// Retain may be called simultaneously from multiple goroutines.
107+
func (b *StringBuilder) Retain() {
108+
b.builder.Retain()
109+
}
110+
111+
//
112+
// Len returns the number of elements in the array builder.
113+
func (b *StringBuilder) Len() int { return b.builder.Len() }
114+
115+
// Cap returns the total number of elements that can be stored without allocating additional memory.
116+
func (b *StringBuilder) Cap() int { return b.builder.Cap() }
117+
118+
// NullN returns the number of null values in the array builder.
119+
func (b *StringBuilder) NullN() int { return b.builder.NullN() }
120+
121+
func (b *StringBuilder) Append(v string) {
122+
b.builder.Append([]byte(v))
123+
}
124+
125+
func (b *StringBuilder) AppendNull() {
126+
b.builder.AppendNull()
127+
}
128+
129+
// AppendValues will append the values in the v slice. The valid slice determines which values
130+
// in v are valid (not null). The valid slice must either be empty or be equal in length to v. If empty,
131+
// all values in v are appended and considered valid.
132+
func (b *StringBuilder) AppendValues(v []string, valid []bool) {
133+
b.builder.AppendStringValues(v, valid)
134+
}
135+
136+
func (b *StringBuilder) Value(i int) string {
137+
return string(b.builder.Value(i))
138+
}
139+
140+
func (b *StringBuilder) init(capacity int) {
141+
b.builder.init(capacity)
142+
}
143+
144+
func (b *StringBuilder) resize(newBits int, init func(int)) {
145+
b.builder.resize(newBits, init)
146+
}
147+
148+
// Reserve ensures there is enough space for appending n elements
149+
// by checking the capacity and calling Resize if necessary.
150+
func (b *StringBuilder) Reserve(n int) {
151+
b.builder.Reserve(n)
152+
}
153+
154+
// Resize adjusts the space allocated by b to n elements. If n is greater than b.Cap(),
155+
// additional memory will be allocated. If n is smaller, the allocated memory may reduced.
156+
func (b *StringBuilder) Resize(n int) {
157+
b.builder.Resize(n)
158+
}
159+
160+
// NewArray creates a String array from the memory buffers used by the builder and resets the StringBuilder
161+
// so it can be used to build a new array.
162+
func (b *StringBuilder) NewArray() Interface {
163+
return b.NewStringArray()
164+
}
165+
166+
// NewStringArray creates a String array from the memory buffers used by the builder and resets the StringBuilder
167+
// so it can be used to build a new array.
168+
func (b *StringBuilder) NewStringArray() (a *String) {
169+
data := b.builder.newData()
170+
a = NewStringData(data)
171+
data.Release()
172+
return
173+
}
174+
175+
var (
176+
_ Interface = (*String)(nil)
177+
_ Builder = (*StringBuilder)(nil)
178+
)

go/arrow/array/string_test.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing, software
12+
// distributed under the License is distributed on an "AS IS" BASIS,
13+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
// See the License for the specific language governing permissions and
15+
// limitations under the License.
16+
17+
package array_test
18+
19+
import (
20+
"testing"
21+
22+
"github.com/apache/arrow/go/arrow"
23+
"github.com/apache/arrow/go/arrow/array"
24+
"github.com/apache/arrow/go/arrow/memory"
25+
)
26+
27+
func TestStringArray(t *testing.T) {
28+
mem := memory.NewCheckedAllocator(memory.NewGoAllocator())
29+
defer mem.AssertSize(t, 0)
30+
31+
var (
32+
want = []string{"hello", "世界", "", "bye"}
33+
valids = []bool{true, true, false, true}
34+
)
35+
36+
sb := array.NewStringBuilder(mem)
37+
defer sb.Release()
38+
39+
sb.Retain()
40+
sb.Release()
41+
42+
sb.AppendValues(want[:2], nil)
43+
44+
sb.AppendNull()
45+
sb.Append(want[3])
46+
47+
if got, want := sb.Len(), len(want); got != want {
48+
t.Fatalf("invalid len: got=%d, want=%d", got, want)
49+
}
50+
51+
if got, want := sb.NullN(), 1; got != want {
52+
t.Fatalf("invalid nulls: got=%d, want=%d", got, want)
53+
}
54+
55+
arr := sb.NewStringArray()
56+
defer arr.Release()
57+
58+
arr.Retain()
59+
arr.Release()
60+
61+
if got, want := arr.Len(), len(want); got != want {
62+
t.Fatalf("invalid len: got=%d, want=%d", got, want)
63+
}
64+
65+
if got, want := arr.NullN(), 1; got != want {
66+
t.Fatalf("invalid nulls: got=%d, want=%d", got, want)
67+
}
68+
69+
for i := range want {
70+
if arr.IsNull(i) != !valids[i] {
71+
t.Fatalf("arr[%d]-validity: got=%v want=%v", i, !arr.IsNull(i), valids[i])
72+
}
73+
switch {
74+
case arr.IsNull(i):
75+
default:
76+
got := arr.Value(i)
77+
if got != want[i] {
78+
t.Fatalf("arr[%d]: got=%q, want=%q", i, got, want[i])
79+
}
80+
}
81+
}
82+
83+
sub := array.MakeFromData(arr.Data())
84+
defer sub.Release()
85+
86+
if sub.DataType().ID() != arrow.STRING {
87+
t.Fatalf("invalid type: got=%q, want=string", sub.DataType().Name())
88+
}
89+
90+
if _, ok := sub.(*array.String); !ok {
91+
t.Fatalf("could not type-assert to array.String")
92+
}
93+
94+
if got, want := arr.String(), `["hello" "世界" (null) "bye"]`; got != want {
95+
t.Fatalf("got=%q, want=%q", got, want)
96+
}
97+
}

0 commit comments

Comments
 (0)