@@ -6,75 +6,108 @@ import (
6
6
"github.com/apache/arrow/go/v13/arrow/memory"
7
7
)
8
8
9
- func stripNullsFromLists (record arrow.Record ) arrow.Record {
10
- cols := record .Columns ()
11
- for c , col := range cols {
12
- list , ok := col .(array.ListLike )
13
- if ! ok {
14
- continue
15
- }
16
- if _ , ok := list .(* array.Map ); ok {
17
- // maps also correspond to array.ListLike
9
+ func stripNullsFromLists (list array.ListLike ) array.ListLike {
10
+ // TODO: handle Arrow maps separately if required
11
+
12
+ if list .NullN () == 0 {
13
+ return list
14
+ }
15
+
16
+ bldr := array .NewBuilder (memory .DefaultAllocator , list .DataType ()).(array.ListLikeBuilder )
17
+ for j := 0 ; j < list .Len (); j ++ {
18
+ if list .IsNull (j ) {
19
+ bldr .AppendNull ()
18
20
continue
19
21
}
20
-
21
- bldr := array .NewListBuilder (memory .DefaultAllocator , list .DataType ().(arrow.ListLikeType ).Elem ())
22
- for j := 0 ; j < list .Len (); j ++ {
23
- if list .IsNull (j ) {
24
- bldr .AppendNull ()
22
+ bldr .Append (true )
23
+ vBldr := bldr .ValueBuilder ()
24
+ from , to := list .ValueOffsets (j )
25
+ slc := array .NewSlice (list .ListValues (), from , to )
26
+ for k := 0 ; k < int (to - from ); k ++ {
27
+ if slc .IsNull (k ) {
25
28
continue
26
29
}
27
- bldr .Append (true )
28
- vBldr := bldr .ValueBuilder ()
29
- from , to := list .ValueOffsets (j )
30
- slc := array .NewSlice (list .ListValues (), from , to )
31
- for k := 0 ; k < int (to - from ); k ++ {
32
- if slc .IsNull (k ) {
33
- continue
34
- }
35
- err := vBldr .AppendValueFromString (slc .ValueStr (k ))
36
- if err != nil {
37
- panic (err )
38
- }
30
+ err := vBldr .AppendValueFromString (slc .ValueStr (k ))
31
+ if err != nil {
32
+ panic (err )
39
33
}
40
34
}
41
- cols [c ] = bldr .NewArray ()
42
35
}
43
- return array .NewRecord (record .Schema (), cols , record .NumRows ())
36
+
37
+ return bldr .NewArray ().(array.ListLike )
44
38
}
45
39
46
40
type AllowNullFunc func (arrow.DataType ) bool
47
41
48
- func (s * WriterTestSuite ) replaceNullsByEmpty (record arrow.Record ) arrow.Record {
42
+ func (s * WriterTestSuite ) replaceNullsByEmpty (arr arrow.Array ) arrow.Array {
49
43
if s .allowNull == nil {
50
- return record
44
+ return arr
51
45
}
52
46
53
- cols := record .Columns ()
54
- for c , col := range cols {
55
- if col .NullN () == 0 || s .allowNull (col .DataType ()) {
56
- continue
57
- }
58
-
59
- builder := array .NewBuilder (memory .DefaultAllocator , col .DataType ())
60
- for j := 0 ; j < col .Len (); j ++ {
61
- if col .IsNull (j ) {
47
+ if ! s .allowNull (arr .DataType ()) && arr .NullN () > 0 {
48
+ builder := array .NewBuilder (memory .DefaultAllocator , arr .DataType ())
49
+ for j := 0 ; j < arr .Len (); j ++ {
50
+ if arr .IsNull (j ) {
62
51
builder .AppendEmptyValue ()
63
52
continue
64
53
}
65
54
66
- if err := builder .AppendValueFromString (col .ValueStr (j )); err != nil {
55
+ if err := builder .AppendValueFromString (arr .ValueStr (j )); err != nil {
67
56
panic (err )
68
57
}
69
58
}
70
- cols [c ] = builder .NewArray ()
59
+
60
+ arr = builder .NewArray ()
61
+ }
62
+
63
+ // we need to process the nested arrays, too
64
+ return s .replaceNullsByEmptyNestedArray (arr )
65
+ }
66
+
67
+ func (s * WriterTestSuite ) replaceNullsByEmptyNestedArray (arr arrow.Array ) arrow.Array {
68
+ if s .allowNull == nil {
69
+ return arr
70
+ }
71
+
72
+ switch arr := arr .(type ) {
73
+ case array.ListLike : // TODO: handle Arrow maps separately if required
74
+ values := s .handleNullsArray (arr .ListValues ())
75
+ return array .MakeFromData (
76
+ array .NewData (arr .DataType (), arr .Len (),
77
+ arr .Data ().Buffers (),
78
+ []arrow.ArrayData {values .Data ()},
79
+ arr .NullN (), arr .Data ().Offset (),
80
+ ),
81
+ )
82
+ case * array.Struct :
83
+ children := make ([]arrow.ArrayData , arr .NumField ())
84
+ for i := 0 ; i < arr .NumField (); i ++ {
85
+ children [i ] = s .handleNullsArray (arr .Field (i )).Data ()
86
+ }
87
+ return array .MakeFromData (
88
+ array .NewData (arr .DataType (), arr .Len (),
89
+ arr .Data ().Buffers (),
90
+ children ,
91
+ arr .NullN (), arr .Data ().Offset (),
92
+ ),
93
+ )
94
+ default :
95
+ return arr
71
96
}
72
- return array .NewRecord (record .Schema (), cols , record .NumRows ())
73
97
}
74
98
75
99
func (s * WriterTestSuite ) handleNulls (record arrow.Record ) arrow.Record {
76
- if s .ignoreNullsInLists {
77
- record = stripNullsFromLists (record )
100
+ cols := record .Columns ()
101
+ for c , col := range cols {
102
+ cols [c ] = s .handleNullsArray (col )
103
+ }
104
+ return array .NewRecord (record .Schema (), cols , record .NumRows ())
105
+ }
106
+
107
+ func (s * WriterTestSuite ) handleNullsArray (arr arrow.Array ) arrow.Array {
108
+ if list , ok := arr .(array.ListLike ); ok && s .ignoreNullsInLists {
109
+ arr = stripNullsFromLists (list ) // TODO: handle Arrow maps separately if required
78
110
}
79
- return s .replaceNullsByEmpty (record )
111
+
112
+ return s .replaceNullsByEmpty (arr )
80
113
}
0 commit comments