Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package org.apache.hudi.io;

import org.apache.hudi.avro.HoodieAvroUtils;
import org.apache.hudi.common.model.HoodieAvroIndexedRecord;
import org.apache.hudi.common.model.HoodieAvroPayload;
import org.apache.hudi.common.model.HoodieRecord;
Expand All @@ -44,7 +45,6 @@
import org.apache.hudi.storage.HoodieStorage;
import org.apache.hudi.storage.StoragePath;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.generic.IndexedRecord;
Expand Down Expand Up @@ -275,20 +275,7 @@ private CDCTransformer getTransformer() {
}

private GenericRecord removeCommitMetadata(GenericRecord record) {
return record == null ? null : getRecordWithoutMetadata(record);
}

private GenericRecord getRecordWithoutMetadata(GenericRecord record) {
Schema avroSchema = dataSchema.getAvroSchema();
if (record.getSchema().getFields().size() == avroSchema.getFields().size()) {
return record;
} else {
GenericData.Record rec = new GenericData.Record(avroSchema);
for (Schema.Field field : avroSchema.getFields()) {
rec.put(field.pos(), record.get(field.name()));
}
return rec;
}
return record == null ? null : HoodieAvroUtils.projectRecordToNewSchemaShallow(record, dataSchema.getAvroSchema());
}

// -------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1369,6 +1369,46 @@ public static BigDecimal convertBytesToBigDecimal(byte[] value, int precision, i
scale, new MathContext(precision, RoundingMode.HALF_UP));
}

/**
* Projects a record to a new schema by performing a shallow copy of fields.
* Best used for adding or removing top-level metadata fields.
* <p>
* This is a high-performance alternative to deep rewriting. It only iterates through
* the top-level fields of the target schema and pulls values from the source record
* by field name.
* <p>
* <p>
* This is significantly faster than {@link #rewriteRecordWithNewSchema} for:
* 1. Wide records (many top-level fields): Reduces CPU overhead/recursion.
* 2. Deeply nested records: Uses reference-copying for nested structures instead of rebuilding them.
* <p>
* <b>Warning:</b> This method does not recursively rewrite/transform nested records, arrays,
* or maps. It assumes that the underlying values for each field are already
* compatible with the target schema.
*
* @param record The source GenericRecord to project.
* @param targetSchema The schema to project the record into.
* @return A new GenericRecord matching targetSchema, or the original record if
* the schemas are identical in field count.
*/

public static GenericRecord projectRecordToNewSchemaShallow(GenericRecord record, Schema targetSchema) {
if (record.getSchema().getFields().size() == targetSchema.getFields().size()) {
return record;
} else {
GenericData.Record rec = new GenericData.Record(targetSchema);
for (Schema.Field field : targetSchema.getFields()) {
Field sourceField = record.getSchema().getField(field.name());
if (sourceField == null) {
rec.put(field.pos(), null);
} else {
rec.put(field.pos(), record.get(sourceField.pos()));
}
}
return rec;
}
}

/**
* Avro does not support type promotion from numbers to string. This function returns true if
* it will be necessary to rewrite the record to support this promotion.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ public HoodieRecord joinWith(HoodieRecord other, Schema targetSchema) {
public HoodieRecord prependMetaFields(Schema recordSchema, Schema targetSchema, MetadataValues metadataValues, Properties props) {
try {
Option<IndexedRecord> avroRecordOpt = getData().getInsertValue(recordSchema, props);
GenericRecord newAvroRecord = HoodieAvroUtils.rewriteRecordWithNewSchema(avroRecordOpt.get(), targetSchema);
GenericRecord newAvroRecord = HoodieAvroUtils.projectRecordToNewSchemaShallow((GenericRecord) avroRecordOpt.get(), targetSchema);
updateMetadataValuesInternal(newAvroRecord, metadataValues);
return new HoodieAvroIndexedRecord(getKey(), newAvroRecord, getOperation(), this.currentLocation, this.newLocation);
} catch (IOException e) {
Expand Down

This file was deleted.

Loading