Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix performance regression when hive SerDe doesn't prefer Writables #15163

Merged
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Fix performance regression when Hive SerDe doesn't prefer Writables
Introduced in #8206, GenericHiveRecordCursor was modified to
avoid extra overhead when the SerDe provided a more efficient String
handling implementation with Writables. However, when the SerDe does
not provide such an implementation and instead already returned String
instances directly, this change introduced an extra conversion from
bytes to String just to be converted back to bytes.

This change alters the behavior of GenericHiveRecordCursor parseString
to respect the PrimitiveObjectInspector's preference for using writables.
  • Loading branch information
pettyjamesm committed Sep 15, 2020
commit 7042af0d8ba4651580b42eb52b3e0b250bbc0f0b
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
import io.airlift.slice.Slices;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
Expand Down Expand Up @@ -368,39 +370,80 @@ private void parseStringColumn(int column)
nulls[column] = true;
}
else {
Object fieldValue = ((PrimitiveObjectInspector) fieldInspectors[column]).getPrimitiveWritableObject(fieldData);
checkState(fieldValue != null, "fieldValue should not be null");
BinaryComparable hiveValue;
if (fieldValue instanceof Text) {
hiveValue = (Text) fieldValue;
}
else if (fieldValue instanceof BytesWritable) {
hiveValue = (BytesWritable) fieldValue;
}
else if (fieldValue instanceof HiveVarcharWritable) {
hiveValue = ((HiveVarcharWritable) fieldValue).getTextValue();
}
else if (fieldValue instanceof HiveCharWritable) {
hiveValue = ((HiveCharWritable) fieldValue).getTextValue();
PrimitiveObjectInspector inspector = (PrimitiveObjectInspector) fieldInspectors[column];
Slice value;
if (inspector.preferWritable()) {
value = parseStringFromPrimitiveWritableObjectValue(types[column], inspector.getPrimitiveWritableObject(fieldData));
}
else {
throw new IllegalStateException("unsupported string field type: " + fieldValue.getClass().getName());
value = parseStringFromPrimitiveJavaObjectValue(types[column], inspector.getPrimitiveJavaObject(fieldData));
}
slices[column] = value;
nulls[column] = false;
}
}

// create a slice view over the hive value and trim to character limits
Slice value = Slices.wrappedBuffer(hiveValue.getBytes(), 0, hiveValue.getLength());
Type type = types[column];
if (isVarcharType(type)) {
value = truncateToLength(value, type);
}
if (isCharType(type)) {
value = truncateToLengthAndTrimSpaces(value, type);
}
private static Slice parseStringFromPrimitiveWritableObjectValue(Type type, Object fieldValue)
{
checkState(fieldValue != null, "fieldValue should not be null");
BinaryComparable hiveValue;
if (fieldValue instanceof Text) {
hiveValue = (Text) fieldValue;
}
else if (fieldValue instanceof BytesWritable) {
hiveValue = (BytesWritable) fieldValue;
}
else if (fieldValue instanceof HiveVarcharWritable) {
hiveValue = ((HiveVarcharWritable) fieldValue).getTextValue();
}
else if (fieldValue instanceof HiveCharWritable) {
hiveValue = ((HiveCharWritable) fieldValue).getTextValue();
}
else {
throw new IllegalStateException("unsupported string field type: " + fieldValue.getClass().getName());
}
// create a slice view over the hive value and trim to character limits
Slice value = trimStringToCharacterLimits(type, Slices.wrappedBuffer(hiveValue.getBytes(), 0, hiveValue.getLength()));
// store a copy of the bytes, since the hive reader can reuse the underlying buffer
return Slices.copyOf(value);
}

// store a copy of the bytes, since the hive reader can reuse the underlying buffer
slices[column] = Slices.copyOf(value);
nulls[column] = false;
private static Slice parseStringFromPrimitiveJavaObjectValue(Type type, Object fieldValue)
{
checkState(fieldValue != null, "fieldValue should not be null");
Slice value;
if (fieldValue instanceof String) {
value = Slices.utf8Slice((String) fieldValue);
}
else if (fieldValue instanceof byte[]) {
value = Slices.wrappedBuffer((byte[]) fieldValue);
}
else if (fieldValue instanceof HiveVarchar) {
value = Slices.utf8Slice(((HiveVarchar) fieldValue).getValue());
}
else if (fieldValue instanceof HiveChar) {
value = Slices.utf8Slice(((HiveChar) fieldValue).getValue());
}
else {
throw new IllegalStateException("unsupported string field type: " + fieldValue.getClass().getName());
}
value = trimStringToCharacterLimits(type, value);
// Copy the slice if the value was trimmed and is now smaller than the backing buffer
if (!value.isCompact()) {
return Slices.copyOf(value);
}
return value;
}

private static Slice trimStringToCharacterLimits(Type type, Slice value)
{
if (isVarcharType(type)) {
return truncateToLength(value, type);
}
if (isCharType(type)) {
return truncateToLengthAndTrimSpaces(value, type);
}
return value;
}

private void parseDecimalColumn(int column)
Expand Down