Skip to content

Commit dacc0d1

Browse files
Pindikura Ravindrakou
authored andcommitted
ARROW-3459: [C++][Gandiva] support for string o/p
- If the output vectors aren't provided, allow resizable data buffers. - If the output vectors are provided, assert that the data buffer is resizeable. - use a cpp function to write to string-like o/p buffers, this checks for capacity and updates the offset vector. Author: Pindikura Ravindra <ravindra@dremio.com> Closes apache#4760 from pravindra/varlen and squashes the following commits: 0068b6a <Pindikura Ravindra> ARROW-3459: support for string o/p
1 parent bc82a4c commit dacc0d1

File tree

2 files changed

+85
-4
lines changed

2 files changed

+85
-4
lines changed

gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/Projector.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.apache.arrow.gandiva.ipc.GandivaTypes.SelectionVectorType;
3030
import org.apache.arrow.vector.FixedWidthVector;
3131
import org.apache.arrow.vector.ValueVector;
32+
import org.apache.arrow.vector.VariableWidthVector;
3233
import org.apache.arrow.vector.ipc.message.ArrowBuffer;
3334
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
3435
import org.apache.arrow.vector.types.pojo.Schema;
@@ -235,16 +236,22 @@ private void evaluate(int numRows, List<ArrowBuf> buffers, List<ArrowBuffer> buf
235236
bufSizes[idx++] = bufLayout.getSize();
236237
}
237238

238-
long[] outAddrs = new long[2 * outColumns.size()];
239-
long[] outSizes = new long[2 * outColumns.size()];
239+
long[] outAddrs = new long[3 * outColumns.size()];
240+
long[] outSizes = new long[3 * outColumns.size()];
240241
idx = 0;
241242
for (ValueVector valueVector : outColumns) {
242-
if (!(valueVector instanceof FixedWidthVector)) {
243-
throw new UnsupportedTypeException("Unsupported value vector type");
243+
boolean isFixedWith = valueVector instanceof FixedWidthVector;
244+
boolean isVarWidth = valueVector instanceof VariableWidthVector;
245+
if (!isFixedWith && !isVarWidth) {
246+
throw new UnsupportedTypeException("Unsupported value vector type " + valueVector.getField().getFieldType());
244247
}
245248

246249
outAddrs[idx] = valueVector.getValidityBuffer().memoryAddress();
247250
outSizes[idx++] = valueVector.getValidityBuffer().capacity();
251+
if (isVarWidth) {
252+
outAddrs[idx] = valueVector.getOffsetBuffer().memoryAddress();
253+
outSizes[idx++] = valueVector.getOffsetBuffer().capacity();
254+
}
248255
outAddrs[idx] = valueVector.getDataBuffer().memoryAddress();
249256
outSizes[idx++] = valueVector.getDataBuffer().capacity();
250257

gandiva/src/test/java/org/apache/arrow/gandiva/evaluator/ProjectorTest.java

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
import org.apache.arrow.vector.BitVector;
3939
import org.apache.arrow.vector.IntVector;
4040
import org.apache.arrow.vector.ValueVector;
41+
import org.apache.arrow.vector.VarCharVector;
4142
import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
4243
import org.apache.arrow.vector.ipc.message.ArrowRecordBatch;
4344
import org.apache.arrow.vector.types.DateUnit;
@@ -48,7 +49,9 @@
4849

4950
import org.junit.Assert;
5051
import org.junit.Ignore;
52+
import org.junit.Rule;
5153
import org.junit.Test;
54+
import org.junit.rules.ExpectedException;
5255

5356
import com.google.common.collect.Lists;
5457
import com.google.common.collect.Sets;
@@ -60,6 +63,9 @@ public class ProjectorTest extends BaseEvaluatorTest {
6063
private Charset utf8Charset = Charset.forName("UTF-8");
6164
private Charset utf16Charset = Charset.forName("UTF-16");
6265

66+
@Rule
67+
public ExpectedException thrown = ExpectedException.none();
68+
6369
List<ArrowBuf> varBufs(String[] strings, Charset charset) {
6470
ArrowBuf offsetsBuffer = allocator.buffer((strings.length + 1) * 4);
6571
ArrowBuf dataBuffer = allocator.buffer(strings.length * 8);
@@ -516,6 +522,74 @@ public void testStringFields() throws GandivaException {
516522
eval.close();
517523
}
518524

525+
@Test
526+
public void testStringOutput() throws GandivaException {
527+
/*
528+
* if (x >= 0) "hi" else "bye"
529+
*/
530+
531+
Field x = Field.nullable("x", new ArrowType.Int(32, true));
532+
533+
ArrowType retType = new ArrowType.Utf8();
534+
535+
TreeNode ifHiBye = TreeBuilder.makeIf(
536+
TreeBuilder.makeFunction(
537+
"greater_than_or_equal_to",
538+
Lists.newArrayList(
539+
TreeBuilder.makeField(x),
540+
TreeBuilder.makeLiteral(0)
541+
),
542+
boolType),
543+
TreeBuilder.makeStringLiteral("hi"),
544+
TreeBuilder.makeStringLiteral("bye"),
545+
retType);
546+
547+
ExpressionTree expr = TreeBuilder.makeExpression(ifHiBye, Field.nullable("res", retType));
548+
Schema schema = new Schema(Lists.newArrayList(x));
549+
Projector eval = Projector.make(schema, Lists.newArrayList(expr));
550+
551+
// fill up input record batch
552+
int numRows = 4;
553+
byte[] validity = new byte[]{(byte) 255, 0};
554+
int[] xValues = new int[]{10, -10, 20, -20};
555+
String[] expected = new String[]{"hi", "bye", "hi", "bye"};
556+
ArrowBuf validityX = buf(validity);
557+
ArrowBuf dataX = intBuf(xValues);
558+
ArrowRecordBatch batch =
559+
new ArrowRecordBatch(
560+
numRows,
561+
Lists.newArrayList(new ArrowFieldNode(numRows, 0)),
562+
Lists.newArrayList( validityX, dataX));
563+
564+
// allocate data for output vector.
565+
VarCharVector outVector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator);
566+
outVector.allocateNew(64, numRows);
567+
568+
569+
// evaluate expression
570+
List<ValueVector> output = new ArrayList<>();
571+
output.add(outVector);
572+
eval.evaluate(batch, output);
573+
574+
// match expected output.
575+
for (int i = 0; i < numRows; i++) {
576+
assertFalse(outVector.isNull(i));
577+
assertEquals(expected[i], new String(outVector.get(i)));
578+
}
579+
580+
// test with insufficient data buffer.
581+
try {
582+
outVector.allocateNew(4, numRows);
583+
thrown.expect(GandivaException.class);
584+
thrown.expectMessage("expand not implemented");
585+
eval.evaluate(batch, output);
586+
} finally {
587+
releaseRecordBatch(batch);
588+
releaseValueVectors(output);
589+
eval.close();
590+
}
591+
}
592+
519593
@Test
520594
public void testRegex() throws GandivaException {
521595
/*

0 commit comments

Comments
 (0)