|
28 | 28 | import org.apache.kafka.connect.data.Schema; |
29 | 29 | import org.apache.kafka.connect.data.Struct; |
30 | 30 | import org.apache.kafka.connect.source.SourceRecord; |
| 31 | +import org.slf4j.Logger; |
| 32 | +import org.slf4j.LoggerFactory; |
31 | 33 |
|
32 | 34 | import java.io.IOException; |
33 | 35 | import java.math.BigDecimal; |
|
43 | 45 |
|
44 | 46 | /** Utility class to deal record. */ |
45 | 47 | public class SourceRecordUtils { |
| 48 | + private static final Logger LOG = LoggerFactory.getLogger(SourceRecordUtils.class); |
46 | 49 |
|
47 | 50 | private SourceRecordUtils() {} |
48 | 51 |
|
@@ -131,15 +134,86 @@ public static TableId getTableId(SourceRecord dataRecord) { |
131 | 134 |
|
132 | 135 | public static Object[] getSplitKey( |
133 | 136 | RowType splitBoundaryType, SourceRecord dataRecord, SchemaNameAdjuster nameAdjuster) { |
134 | | - // the split key field contains single field now |
135 | 137 | String splitFieldName = nameAdjuster.adjust(splitBoundaryType.getFieldNames().get(0)); |
136 | | - Struct key = (Struct) dataRecord.key(); |
137 | | - return new Object[] {key.get(splitFieldName)}; |
| 138 | + |
| 139 | + // Try primary key struct first (for backward compatibility) |
| 140 | + Struct keyStruct = (Struct) dataRecord.key(); |
| 141 | + if (keyStruct != null && keyStruct.schema().field(splitFieldName) != null) { |
| 142 | + return new Object[] {keyStruct.get(splitFieldName)}; |
| 143 | + } |
| 144 | + |
| 145 | + // For non-primary key chunk keys, use value-based approach |
| 146 | + return getSplitKeyFromValue(dataRecord, splitFieldName); |
| 147 | + } |
| 148 | + |
| 149 | + /** Extract chunk key from value struct (AFTER/BEFORE) for non-primary key chunk keys. */ |
| 150 | + private static Object[] getSplitKeyFromValue(SourceRecord dataRecord, String splitFieldName) { |
| 151 | + Struct value = (Struct) dataRecord.value(); |
| 152 | + if (value == null) { |
| 153 | + return null; // No value struct available |
| 154 | + } |
| 155 | + |
| 156 | + String op = value.getString(Envelope.FieldName.OPERATION); |
| 157 | + Struct targetStruct = null; |
| 158 | + |
| 159 | + if (op == null) { |
| 160 | + // READ operation (snapshot) |
| 161 | + targetStruct = value.getStruct(Envelope.FieldName.AFTER); |
| 162 | + } else { |
| 163 | + switch (op) { |
| 164 | + case "c": // CREATE |
| 165 | + case "r": // READ |
| 166 | + targetStruct = value.getStruct(Envelope.FieldName.AFTER); |
| 167 | + break; |
| 168 | + case "u": // UPDATE - prefer AFTER for current state |
| 169 | + targetStruct = value.getStruct(Envelope.FieldName.AFTER); |
| 170 | + if (targetStruct == null |
| 171 | + || targetStruct.schema().field(splitFieldName) == null) { |
| 172 | + // Fallback to BEFORE if AFTER doesn't have the field |
| 173 | + targetStruct = value.getStruct(Envelope.FieldName.BEFORE); |
| 174 | + } |
| 175 | + break; |
| 176 | + case "d": // DELETE - use BEFORE, but fallback if missing |
| 177 | + targetStruct = value.getStruct(Envelope.FieldName.BEFORE); |
| 178 | + if (targetStruct == null |
| 179 | + || targetStruct.schema().field(splitFieldName) == null) { |
| 180 | + // For DELETE with missing chunk key, return null to indicate "emit without |
| 181 | + // filtering" |
| 182 | + return null; |
| 183 | + } |
| 184 | + break; |
| 185 | + default: |
| 186 | + throw new IllegalArgumentException("Unknown operation: " + op); |
| 187 | + } |
| 188 | + } |
| 189 | + |
| 190 | + if (targetStruct == null || targetStruct.schema().field(splitFieldName) == null) { |
| 191 | + // Chunk key field not found in value struct |
| 192 | + // This could happen with schema changes or configuration issues |
| 193 | + LOG.debug( |
| 194 | + "Chunk key field '{}' not found in record, emitting without filtering. Table: {}, Operation: {}", |
| 195 | + splitFieldName, |
| 196 | + getTableId(dataRecord), |
| 197 | + dataRecord.value() != null |
| 198 | + ? ((Struct) dataRecord.value()).getString(Envelope.FieldName.OPERATION) |
| 199 | + : "unknown"); |
| 200 | + return null; |
| 201 | + } |
| 202 | + |
| 203 | + return new Object[] {targetStruct.get(splitFieldName)}; |
138 | 204 | } |
139 | 205 |
|
140 | 206 | /** Returns the specific key contains in the split key range or not. */ |
141 | 207 | public static boolean splitKeyRangeContains( |
142 | 208 | Object[] key, Object[] splitKeyStart, Object[] splitKeyEnd) { |
| 209 | + // If key is null, chunk key field was not found (e.g., DELETE with non-primary key chunk |
| 210 | + // key) |
| 211 | + // Emit the record without filtering to prevent data loss |
| 212 | + if (key == null) { |
| 213 | + LOG.debug("Chunk key is null, emitting record without filtering"); |
| 214 | + return true; |
| 215 | + } |
| 216 | + |
143 | 217 | // for all range |
144 | 218 | if (splitKeyStart == null && splitKeyEnd == null) { |
145 | 219 | return true; |
|
0 commit comments