Skip to content

Commit

Permalink
Fix performance regression when Hive SerDe doesn't prefer Writables
Browse files Browse the repository at this point in the history
Introduced in #8206, GenericHiveRecordCursor was modified to
avoid extra overhead when the SerDe provided a more efficient String
handling implementation with Writables. However, when the SerDe does
not provide such an implementation and instead already returned String
instances directly, this change introduced an extra conversion from
bytes to String just to be converted back to bytes.

This change alters the behavior of GenericHiveRecordCursor parseString
to respect the PrimitiveObjectInspector's preference for using writables.
  • Loading branch information
pettyjamesm committed Sep 15, 2020
1 parent 710f04e commit 7042af0
Showing 1 changed file with 70 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
import io.airlift.slice.Slices;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
Expand Down Expand Up @@ -368,39 +370,80 @@ private void parseStringColumn(int column)
nulls[column] = true;
}
else {
Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveWritableObject(fieldData);
checkState(fieldValue != null, "fieldValue should not be null");
BinaryComparable hiveValue;
if (fieldValue instanceof Text) {
hiveValue = (Text) fieldValue;
}
else if (fieldValue instanceof BytesWritable) {
hiveValue = (BytesWritable) fieldValue;
}
else if (fieldValue instanceof HiveVarcharWritable) {
hiveValue = ((HiveVarcharWritable) fieldValue).getTextValue();
}
else if (fieldValue instanceof HiveCharWritable) {
hiveValue = ((HiveCharWritable) fieldValue).getTextValue();
PrimitiveObjectInspector inspector = (PrimitiveObjectInspector) fieldInspectors[column];
Slice value;
if (inspector.preferWritable()) {
value = parseStringFromPrimitiveWritableObjectValue(types[column], inspector.getPrimitiveWritableObject(fieldData));
}
else {
throw new IllegalStateException("unsupported string field type: " + fieldValue.getClass().getName());
value = parseStringFromPrimitiveJavaObjectValue(types[column], inspector.getPrimitiveJavaObject(fieldData));
}
slices[column] = value;
nulls[column] = false;
}
}

// create a slice view over the hive value and trim to character limits
Slice value = Slices.wrappedBuffer(hiveValue.getBytes(), 0, hiveValue.getLength());
Type type = types[column];
if (isVarcharType(type)) {
value = truncateToLength(value, type);
}
if (isCharType(type)) {
value = truncateToLengthAndTrimSpaces(value, type);
}
private static Slice parseStringFromPrimitiveWritableObjectValue(Type type, Object fieldValue)
{
checkState(fieldValue != null, "fieldValue should not be null");
BinaryComparable hiveValue;
if (fieldValue instanceof Text) {
hiveValue = (Text) fieldValue;
}
else if (fieldValue instanceof BytesWritable) {
hiveValue = (BytesWritable) fieldValue;
}
else if (fieldValue instanceof HiveVarcharWritable) {
hiveValue = ((HiveVarcharWritable) fieldValue).getTextValue();
}
else if (fieldValue instanceof HiveCharWritable) {
hiveValue = ((HiveCharWritable) fieldValue).getTextValue();
}
else {
throw new IllegalStateException("unsupported string field type: " + fieldValue.getClass().getName());
}
// create a slice view over the hive value and trim to character limits
Slice value = trimStringToCharacterLimits(type, Slices.wrappedBuffer(hiveValue.getBytes(), 0, hiveValue.getLength()));
// store a copy of the bytes, since the hive reader can reuse the underlying buffer
return Slices.copyOf(value);
}

// store a copy of the bytes, since the hive reader can reuse the underlying buffer
slices[column] = Slices.copyOf(value);
nulls[column] = false;
private static Slice parseStringFromPrimitiveJavaObjectValue(Type type, Object fieldValue)
{
checkState(fieldValue != null, "fieldValue should not be null");
Slice value;
if (fieldValue instanceof String) {
value = Slices.utf8Slice((String) fieldValue);
}
else if (fieldValue instanceof byte[]) {
value = Slices.wrappedBuffer((byte[]) fieldValue);
}
else if (fieldValue instanceof HiveVarchar) {
value = Slices.utf8Slice(((HiveVarchar) fieldValue).getValue());
}
else if (fieldValue instanceof HiveChar) {
value = Slices.utf8Slice(((HiveChar) fieldValue).getValue());
}
else {
throw new IllegalStateException("unsupported string field type: " + fieldValue.getClass().getName());
}
value = trimStringToCharacterLimits(type, value);
// Copy the slice if the value was trimmed and is now smaller than the backing buffer
if (!value.isCompact()) {
return Slices.copyOf(value);
}
return value;
}

private static Slice trimStringToCharacterLimits(Type type, Slice value)
{
if (isVarcharType(type)) {
return truncateToLength(value, type);
}
if (isCharType(type)) {
return truncateToLengthAndTrimSpaces(value, type);
}
return value;
}

private void parseDecimalColumn(int column)
Expand Down

0 comments on commit 7042af0

Please sign in to comment.