Skip to content

Commit 1d788e5

Browse files
adenechekou
authored andcommitted
ARROW-337: UnionListWriter.list() is doing more than it should, this …
…can cause data corruption The general idea is to use the "inner" writer's position to update the offset. This involves making sure various writers do indeed update their positions. UnionListWriter.startList() should explicitly set the inner writer position in case setPosition() was called to move the union list writer's position Author: adeneche <adeneche@dremio.com> Closes apache#183 from adeneche/ARROW-337 and squashes the following commits: 1ae7e00 [adeneche] updated TestComplexWriter to ensure position is set properly by the various writers 7d5aefc [adeneche] ARROW-337: UnionListWriter.list() is doing more than it should, this can cause data corruption
1 parent 585c74e commit 1d788e5

File tree

5 files changed

+154
-86
lines changed

5 files changed

+154
-86
lines changed

vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ public void start() {
5858
@Override
5959
public void end() {
6060
getWriter(MinorType.MAP).end();
61+
setPosition(idx() + 1);
6162
}
6263

6364
@Override
@@ -68,6 +69,7 @@ public void startList() {
6869
@Override
6970
public void endList() {
7071
getWriter(MinorType.LIST).endList();
72+
setPosition(idx() + 1);
7173
}
7274

7375
<#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first />

vector/src/main/codegen/templates/MapWriters.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ public void start() {
185185
186186
@Override
187187
public void end() {
188+
setPosition(idx()+1);
188189
}
189190
190191
<#list vv.types as type><#list type.minor as minor>

vector/src/main/codegen/templates/UnionListWriter.java

Lines changed: 7 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,7 @@ public void setPosition(int index) {
101101
public ${name}Writer <#if uncappedName == "int">integer<#else>${uncappedName}</#if>(String name) {
102102
// assert inMap;
103103
mapName = name;
104-
final int nextOffset = offsets.getAccessor().get(idx() + 1);
105-
vector.getMutator().setNotNull(idx());
106-
writer.setPosition(nextOffset);
107-
${name}Writer ${uncappedName}Writer = writer.<#if uncappedName == "int">integer<#else>${uncappedName}</#if>(name);
108-
return ${uncappedName}Writer;
104+
return writer.<#if uncappedName == "int">integer<#else>${uncappedName}</#if>(name);
109105
}
110106
111107
</#if>
@@ -120,18 +116,11 @@ public MapWriter map() {
120116
121117
@Override
122118
public ListWriter list() {
123-
final int nextOffset = offsets.getAccessor().get(idx() + 1);
124-
vector.getMutator().setNotNull(idx());
125-
offsets.getMutator().setSafe(idx() + 1, nextOffset + 1);
126-
writer.setPosition(nextOffset);
127119
return writer;
128120
}
129121
130122
@Override
131123
public ListWriter list(String name) {
132-
final int nextOffset = offsets.getAccessor().get(idx() + 1);
133-
vector.getMutator().setNotNull(idx());
134-
writer.setPosition(nextOffset);
135124
ListWriter listWriter = writer.list(name);
136125
return listWriter;
137126
}
@@ -145,30 +134,26 @@ public MapWriter map(String name) {
145134
@Override
146135
public void startList() {
147136
vector.getMutator().startNewValue(idx());
137+
writer.setPosition(offsets.getAccessor().get(idx() + 1));
148138
}
149139
150140
@Override
151141
public void endList() {
152-
142+
offsets.getMutator().set(idx() + 1, writer.idx());
143+
setPosition(idx() + 1);
153144
}
154145
155146
@Override
156147
public void start() {
157148
// assert inMap;
158-
final int nextOffset = offsets.getAccessor().get(idx() + 1);
159-
vector.getMutator().setNotNull(idx());
160-
offsets.getMutator().setSafe(idx() + 1, nextOffset);
161-
writer.setPosition(nextOffset);
162149
writer.start();
163150
}
164151
165152
@Override
166153
public void end() {
167154
// if (inMap) {
168-
writer.end();
169-
inMap = false;
170-
final int nextOffset = offsets.getAccessor().get(idx() + 1);
171-
offsets.getMutator().setSafe(idx() + 1, nextOffset + 1);
155+
writer.end();
156+
inMap = false;
172157
// }
173158
}
174159
@@ -181,11 +166,8 @@ public void end() {
181166
@Override
182167
public void write${name}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) {
183168
// assert !inMap;
184-
final int nextOffset = offsets.getAccessor().get(idx() + 1);
185-
vector.getMutator().setNotNull(idx());
186-
writer.setPosition(nextOffset);
187169
writer.write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>);
188-
offsets.getMutator().setSafe(idx() + 1, nextOffset + 1);
170+
writer.setPosition(writer.idx()+1);
189171
}
190172

191173
</#if>

vector/src/test/java/org/apache/arrow/vector/TestListVector.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,14 @@
1919

2020
import org.apache.arrow.memory.BufferAllocator;
2121
import org.apache.arrow.vector.complex.ListVector;
22-
import org.apache.arrow.vector.complex.impl.ComplexCopier;
23-
import org.apache.arrow.vector.complex.impl.UnionListReader;
2422
import org.apache.arrow.vector.complex.impl.UnionListWriter;
2523
import org.apache.arrow.vector.complex.reader.FieldReader;
26-
import org.apache.arrow.vector.types.pojo.Field;
2724
import org.junit.After;
2825
import org.junit.Assert;
2926
import org.junit.Before;
3027
import org.junit.Test;
3128

3229
public class TestListVector {
33-
private final static String EMPTY_SCHEMA_PATH = "";
3430

3531
private BufferAllocator allocator;
3632

0 commit comments

Comments
 (0)