Skip to content

Commit cd4c2ca

Browse files
committed
ARROW-5110: [Go] implement reading struct arrays from Arrow file
Author: Sebastien Binet <binet@cern.ch> Closes #4158 from sbinet/issue-5110 and squashes the following commits: f6f9b9e <Sebastien Binet> arrow/array: provide List array stringer implementation b838ee0 <Sebastien Binet> arrow/array: fix Struct array Stringer implementation d496c2f <Sebastien Binet> ARROW-5110: implement reading struct arrays from Arrow file
1 parent 9526138 commit cd4c2ca

File tree

6 files changed

+108
-8
lines changed

6 files changed

+108
-8
lines changed

go/arrow/array/list.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
package array
1818

1919
import (
20+
"fmt"
21+
"strings"
2022
"sync/atomic"
2123

2224
"github.com/apache/arrow/go/arrow"
@@ -42,6 +44,27 @@ func NewListData(data *Data) *List {
4244

4345
func (a *List) ListValues() Interface { return a.values }
4446

47+
func (a *List) String() string {
48+
o := new(strings.Builder)
49+
o.WriteString("[")
50+
for i := 0; i < a.Len(); i++ {
51+
if i > 0 {
52+
o.WriteString(" ")
53+
}
54+
if !a.IsValid(i) {
55+
o.WriteString("(null)")
56+
continue
57+
}
58+
beg := int64(a.offsets[i])
59+
end := int64(a.offsets[i+1])
60+
sub := NewSlice(a.values, beg, end)
61+
fmt.Fprintf(o, "%v", sub)
62+
sub.Release()
63+
}
64+
o.WriteString("]")
65+
return o.String()
66+
}
67+
4568
func (a *List) setData(data *Data) {
4669
a.array.setData(data)
4770
vals := data.buffers[1]

go/arrow/array/struct.go

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,19 +46,14 @@ func (a *Struct) Field(i int) Interface { return a.fields[i] }
4646

4747
func (a *Struct) String() string {
4848
o := new(strings.Builder)
49-
o.WriteString("[")
49+
o.WriteString("{")
5050
for i, v := range a.fields {
5151
if i > 0 {
5252
o.WriteString(" ")
5353
}
54-
switch {
55-
case a.IsNull(i):
56-
o.WriteString("(null)")
57-
default:
58-
fmt.Fprintf(o, "%v", v)
59-
}
54+
fmt.Fprintf(o, "%v", v)
6055
}
61-
o.WriteString("]")
56+
o.WriteString("}")
6257
return o.String()
6358
}
6459

go/arrow/array/struct_test.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,3 +247,56 @@ func TestStructArrayBulkAppend(t *testing.T) {
247247
}
248248
}
249249
}
250+
251+
func TestStructArrayStringer(t *testing.T) {
252+
pool := memory.NewCheckedAllocator(memory.NewGoAllocator())
253+
defer pool.AssertSize(t, 0)
254+
255+
var (
256+
f1s = []float64{1.1, 1.2, 1.3, 1.4}
257+
f2s = []int32{1, 2, 3, 4}
258+
259+
fields = []arrow.Field{
260+
{Name: "f1", Type: arrow.PrimitiveTypes.Float64},
261+
{Name: "f2", Type: arrow.PrimitiveTypes.Int32},
262+
}
263+
dtype = arrow.StructOf(fields...)
264+
)
265+
266+
sb := array.NewStructBuilder(pool, dtype)
267+
defer sb.Release()
268+
269+
f1b := sb.FieldBuilder(0).(*array.Float64Builder)
270+
defer f1b.Release()
271+
272+
f2b := sb.FieldBuilder(1).(*array.Int32Builder)
273+
defer f2b.Release()
274+
275+
if got, want := sb.NumField(), 2; got != want {
276+
t.Fatalf("got=%d, want=%d", got, want)
277+
}
278+
279+
for i := range f1s {
280+
sb.Append(true)
281+
switch i {
282+
case 1:
283+
f1b.AppendNull()
284+
f2b.Append(f2s[i])
285+
case 2:
286+
f1b.Append(f1s[i])
287+
f2b.AppendNull()
288+
default:
289+
f1b.Append(f1s[i])
290+
f2b.Append(f2s[i])
291+
}
292+
}
293+
294+
arr := sb.NewArray().(*array.Struct)
295+
defer arr.Release()
296+
297+
want := "{[1.1 (null) 1.3 1.4] [1 2 (null) 4]}"
298+
got := arr.String()
299+
if got != want {
300+
t.Fatalf("invalid string representation:\ngot = %q\nwant= %q", got, want)
301+
}
302+
}

go/arrow/example_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ func Example_listArray() {
188188
pos = int(offsets[i])
189189
fmt.Printf("]\n")
190190
}
191+
fmt.Printf("List = %v\n", arr)
191192

192193
// Output:
193194
// NullN() = 2
@@ -200,6 +201,7 @@ func Example_listArray() {
200201
// List[4] = [6, 7, 8]
201202
// List[5] = (null)
202203
// List[6] = [9]
204+
// List = [[0 1 2] (null) [3] [4 5] [6 7 8] (null) [9]]
203205
}
204206

205207
// This example shows how to create a Struct array.

go/arrow/ipc/file_reader.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,9 @@ func (ctx *arrayLoaderContext) loadArray(dt arrow.DataType) array.Interface {
367367
case *arrow.ListType:
368368
return ctx.loadList(dt)
369369

370+
case *arrow.StructType:
371+
return ctx.loadStruct(dt)
372+
370373
default:
371374
panic(errors.Errorf("array type %T not handled yet", dt))
372375
}
@@ -448,6 +451,27 @@ func (ctx *arrayLoaderContext) loadList(dt *arrow.ListType) array.Interface {
448451
return array.NewListData(data)
449452
}
450453

454+
func (ctx *arrayLoaderContext) loadStruct(dt *arrow.StructType) array.Interface {
455+
field, buffers := ctx.loadCommon(1)
456+
457+
arrs := make([]array.Interface, len(dt.Fields()))
458+
subs := make([]*array.Data, len(dt.Fields()))
459+
for i, f := range dt.Fields() {
460+
arrs[i] = ctx.loadChild(f.Type)
461+
subs[i] = arrs[i].Data()
462+
}
463+
defer func() {
464+
for i := range arrs {
465+
arrs[i].Release()
466+
}
467+
}()
468+
469+
data := array.NewData(dt, int(field.Length()), buffers, subs, int(field.NullCount()), 0)
470+
defer data.Release()
471+
472+
return array.NewStructData(data)
473+
}
474+
451475
func readDictionary(meta *memory.Buffer, types dictTypeMap, r ReadAtSeeker) (int64, array.Interface, error) {
452476
// msg := flatbuf.GetRootAsMessage(meta.Bytes(), 0)
453477
// var dictBatch flatbuf.DictionaryBatch

go/arrow/ipc/metadata.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,9 @@ func concreteTypeFromFB(typ flatbuf.Type, data flatbuffers.Table, children []arr
254254
}
255255
return arrow.ListOf(children[0].Type), nil
256256

257+
case flatbuf.TypeStruct_:
258+
return arrow.StructOf(children...), nil
259+
257260
default:
258261
// FIXME(sbinet): implement all the other types.
259262
panic(fmt.Errorf("arrow/ipc: type %v not implemented", flatbuf.EnumNamesType[typ]))

0 commit comments

Comments
 (0)