17
17
18
18
package org .apache .spark .sql .execution .columnar
19
19
20
+ import scala .collection .mutable
21
+
20
22
import org .apache .spark .Logging
21
23
import org .apache .spark .sql .catalyst .InternalRow
22
24
import org .apache .spark .sql .catalyst .expressions ._
@@ -88,7 +90,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
88
90
case array : ArrayType => classOf [ArrayColumnAccessor ].getName
89
91
case t : MapType => classOf [MapColumnAccessor ].getName
90
92
}
91
- ctx.addMutableState(accessorCls, accessorName, s " $accessorName = null; " )
93
+ ctx.addMutableState(accessorCls, accessorName, " " )
92
94
93
95
val createCode = dt match {
94
96
case t if ctx.isPrimitiveType(dt) =>
@@ -97,7 +99,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
97
99
s " $accessorName = new $accessorCls(ByteBuffer.wrap(buffers[ $index]).order(nativeOrder)); "
98
100
case other =>
99
101
s """ $accessorName = new $accessorCls(ByteBuffer.wrap(buffers[ $index]).order(nativeOrder),
100
- ( ${dt.getClass.getName}) columnTypes[ $index]); """
102
+ ( ${dt.getClass.getName}) columnTypes[ $index]); """
101
103
}
102
104
103
105
val extract = s " $accessorName.extractTo(mutableRow, $index); "
@@ -114,6 +116,42 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
114
116
(createCode, extract + patch)
115
117
}.unzip
116
118
119
+ /*
120
+ * 200 = 6000 bytes / 30 (up to 30 bytes per one call))
121
+ * the maximum byte code size to be compiled for HotSpot is 8000.
122
+ * We should keep less than 8000
123
+ */
124
+ val numberOfStatementsThreshold = 200
125
+ val (initializerAccessorCalls, extractorCalls) =
126
+ if (initializeAccessors.length <= numberOfStatementsThreshold) {
127
+ (initializeAccessors.mkString(" \n " ), extractors.mkString(" \n " ))
128
+ } else {
129
+ val groupedAccessorsItr = initializeAccessors.grouped(numberOfStatementsThreshold)
130
+ val groupedExtractorsItr = extractors.grouped(numberOfStatementsThreshold)
131
+ var groupedAccessorsLength = 0
132
+ groupedAccessorsItr.zipWithIndex.map { case (body, i) =>
133
+ groupedAccessorsLength += 1
134
+ val funcName = s " accessors $i"
135
+ val funcCode = s """
136
+ |private void $funcName() {
137
+ | ${body.mkString(" \n " )}
138
+ |}
139
+ """ .stripMargin
140
+ ctx.addNewFunction(funcName, funcCode)
141
+ }
142
+ groupedExtractorsItr.zipWithIndex.map { case (body, i) =>
143
+ val funcName = s " extractors $i"
144
+ val funcCode = s """
145
+ |private void $funcName() {
146
+ | ${body.mkString(" \n " )}
147
+ |}
148
+ """ .stripMargin
149
+ ctx.addNewFunction(funcName, funcCode)
150
+ }
151
+ ((0 to groupedAccessorsLength - 1 ).map { i => s " accessors $i(); " }.mkString(" \n " ),
152
+ (0 to groupedAccessorsLength - 1 ).map { i => s " extractors $i(); " }.mkString(" \n " ))
153
+ }
154
+
117
155
val code = s """
118
156
import java.nio.ByteBuffer;
119
157
import java.nio.ByteOrder;
@@ -149,8 +187,6 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
149
187
this.nativeOrder = ByteOrder.nativeOrder();
150
188
this.buffers = new byte[ ${columnTypes.length}][];
151
189
this.mutableRow = new MutableUnsafeRow(rowWriter);
152
-
153
- ${initMutableStates(ctx)}
154
190
}
155
191
156
192
public void initialize(Iterator input, DataType[] columnTypes, int[] columnIndexes) {
@@ -159,6 +195,8 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
159
195
this.columnIndexes = columnIndexes;
160
196
}
161
197
198
+ ${declareAddedFunctions(ctx)}
199
+
162
200
public boolean hasNext() {
163
201
if (currentRow < numRowsInBatch) {
164
202
return true;
@@ -173,7 +211,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
173
211
for (int i = 0; i < columnIndexes.length; i ++) {
174
212
buffers[i] = batch.buffers()[columnIndexes[i]];
175
213
}
176
- ${initializeAccessors.mkString( " \n " ) }
214
+ ${initializerAccessorCalls }
177
215
178
216
return hasNext();
179
217
}
@@ -182,7 +220,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
182
220
currentRow += 1;
183
221
bufferHolder.reset();
184
222
rowWriter.initialize(bufferHolder, $numFields);
185
- ${extractors.mkString( " \n " ) }
223
+ ${extractorCalls }
186
224
unsafeRow.pointTo(bufferHolder.buffer, $numFields, bufferHolder.totalSize());
187
225
return unsafeRow;
188
226
}
0 commit comments