Skip to content

Commit 01a91df

Browse files
committed
get all test cases passing for all formats with partioning
1 parent 039120e commit 01a91df

File tree

7 files changed

+85
-142
lines changed

7 files changed

+85
-142
lines changed

xtable-service/pom.xml

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,12 @@
6666
<groupId>org.apache.spark</groupId>
6767
<artifactId>spark-core_${scala.binary.version}</artifactId>
6868
<scope>provided</scope>
69+
<exclusions>
70+
<exclusion>
71+
<groupId>org.apache.avro</groupId>
72+
<artifactId>avro-mapred</artifactId>
73+
</exclusion>
74+
</exclusions>
6975
</dependency>
7076
<dependency>
7177
<groupId>org.apache.spark</groupId>
@@ -176,21 +182,17 @@
176182
</dependency>
177183

178184
<dependency>
179-
<groupId>org.apache.parquet</groupId>
180-
<artifactId>parquet-avro</artifactId>
181-
<version>${parquet.version}</version>
185+
<groupId>org.apache.hudi</groupId>
186+
<artifactId>hudi-spark${spark.version.prefix}-bundle_${scala.binary.version}</artifactId>
187+
<scope>test</scope>
182188
</dependency>
183-
184189
<dependency>
185-
<groupId>org.apache.avro</groupId>
186-
<artifactId>avro</artifactId>
187-
<version>${avro.version}</version>
190+
<groupId>org.apache.hudi</groupId>
191+
<artifactId>hudi-common</artifactId>
188192
</dependency>
189-
190193
<dependency>
191194
<groupId>org.apache.hudi</groupId>
192-
<artifactId>hudi-spark${spark.version.prefix}-bundle_${scala.binary.version}</artifactId>
193-
<scope>test</scope>
195+
<artifactId>hudi-java-client</artifactId>
194196
</dependency>
195197

196198
<dependency>

xtable-service/src/main/java/org/apache/xtable/service/ConversionService.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -193,9 +193,7 @@ public ConvertTableResponse convertTable(ConvertTableRequest convertTableRequest
193193
if (convertTableRequest.getConfigurations() != null) {
194194
String partitionSpec =
195195
convertTableRequest.getConfigurations().getOrDefault("partition-spec", null);
196-
if (partitionSpec != null
197-
&& (HUDI.equals(convertTableRequest.getSourceFormat())
198-
|| convertTableRequest.getTargetFormats().contains(HUDI))) {
196+
if (partitionSpec != null) {
199197
sourceProperties.put(PARTITION_FIELD_SPEC_CONFIG, partitionSpec);
200198
}
201199
}
@@ -204,6 +202,7 @@ public ConvertTableResponse convertTable(ConvertTableRequest convertTableRequest
204202
SourceTable.builder()
205203
.name(convertTableRequest.getSourceTableName())
206204
.basePath(convertTableRequest.getSourceTablePath())
205+
.dataPath(convertTableRequest.getSourceDataPath())
207206
.formatName(convertTableRequest.getSourceFormat())
208207
.additionalProperties(sourceProperties)
209208
.build();
@@ -213,7 +212,8 @@ public ConvertTableResponse convertTable(ConvertTableRequest convertTableRequest
213212
TargetTable targetTable =
214213
TargetTable.builder()
215214
.name(convertTableRequest.getSourceTableName())
216-
.basePath(convertTableRequest.getSourceTablePath())
215+
// set the metadata path to the data path as the default (required by Hudi)
216+
.basePath(convertTableRequest.getSourceDataPath())
217217
.formatName(targetFormat)
218218
.additionalProperties(sourceProperties)
219219
.build();

xtable-service/src/main/java/org/apache/xtable/service/models/ConvertTableRequest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ public class ConvertTableRequest {
3939
@JsonProperty("source-table-path")
4040
private String sourceTablePath;
4141

42+
@JsonProperty("source-data-path")
43+
private String sourceDataPath;
44+
4245
@JsonProperty("target-formats")
4346
private List<String> targetFormats;
4447

@@ -52,12 +55,14 @@ public ConvertTableRequest(
5255
@JsonProperty("source-format") String sourceFormat,
5356
@JsonProperty("source-table-name") String sourceTableName,
5457
@JsonProperty("source-table-path") String sourceTablePath,
58+
@JsonProperty("source-data-path") String sourceDataPath,
5559
@JsonProperty("target-format") List<String> targetFormat,
5660
@JsonProperty("configurations") Map<String, String> configurations) {
5761

5862
this.sourceFormat = sourceFormat;
5963
this.sourceTableName = sourceTableName;
6064
this.sourceTablePath = sourceTablePath;
65+
this.sourceDataPath = sourceDataPath;
6166
this.targetFormats = targetFormat;
6267
this.configurations = configurations;
6368
}

xtable-service/src/test/java/org/apache/xtable/service/it/ITConversionService.java renamed to xtable-service/src/test/java/org/apache/xtable/service/ITConversionService.java

Lines changed: 56 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
* limitations under the License.
1717
*/
1818

19-
package org.apache.xtable.service.it;
19+
package org.apache.xtable.service;
2020

2121
import static org.apache.xtable.GenericTable.getTableName;
2222
import static org.apache.xtable.model.storage.TableFormat.DELTA;
@@ -29,12 +29,10 @@
2929

3030
import java.io.File;
3131
import java.io.IOException;
32-
import java.nio.ByteBuffer;
3332
import java.nio.file.Files;
3433
import java.nio.file.Path;
3534
import java.util.ArrayList;
3635
import java.util.Arrays;
37-
import java.util.Base64;
3836
import java.util.Collections;
3937
import java.util.Comparator;
4038
import java.util.HashMap;
@@ -58,30 +56,21 @@
5856

5957
import org.apache.hudi.common.config.HoodieMetadataConfig;
6058

61-
import com.fasterxml.jackson.core.JsonProcessingException;
62-
import com.fasterxml.jackson.databind.JsonNode;
63-
import com.fasterxml.jackson.databind.ObjectMapper;
64-
import com.fasterxml.jackson.databind.node.ObjectNode;
65-
6659
import org.apache.xtable.GenericTable;
6760
import org.apache.xtable.hudi.HudiTestUtil;
6861
import org.apache.xtable.model.storage.TableFormat;
69-
import org.apache.xtable.service.ConversionService;
7062
import org.apache.xtable.service.models.ConvertTableRequest;
7163
import org.apache.xtable.service.models.ConvertTableResponse;
7264
import org.apache.xtable.service.models.ConvertedTable;
7365

7466
import io.quarkus.test.junit.QuarkusTest;
75-
import io.quarkus.test.junit.TestProfile;
7667
import jakarta.inject.Inject;
7768

78-
@TestProfile(ConversionTestProfile.class)
7969
@QuarkusTest
8070
public class ITConversionService {
8171

8272
@Inject ConversionService conversionService;
8373
private static Path tempDir;
84-
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
8574
protected static JavaSparkContext jsc;
8675
protected static SparkSession sparkSession;
8776

@@ -139,11 +128,11 @@ public void testVariousOperations(String sourceTableFormat, boolean isPartitione
139128
sourceTableFormat,
140129
tableName,
141130
table.getBasePath(),
131+
table.getDataPath(),
142132
targetTableFormats,
143133
partitionConfig);
144134
ConvertTableResponse response = conversionService.convertTable(request);
145135
assertConversionResponse(response, targetTableFormats);
146-
147136
checkDatasetEquivalence(sourceTableFormat, table, targetTableFormats, 100);
148137

149138
// Make multiple commits and then sync with service
@@ -160,11 +149,55 @@ public void testVariousOperations(String sourceTableFormat, boolean isPartitione
160149
checkDatasetEquivalenceWithFilter(
161150
sourceTableFormat, table, targetTableFormats, table.getFilterQuery());
162151
}
152+
153+
try (GenericTable tableWithUpdatedSchema =
154+
GenericTable.getInstanceWithAdditionalColumns(
155+
tableName, tempDir, sparkSession, jsc, sourceTableFormat, isPartitioned)) {
156+
ConvertTableRequest request =
157+
createConvertTableRequest(
158+
sourceTableFormat,
159+
tableName,
160+
tableWithUpdatedSchema.getBasePath(),
161+
tableWithUpdatedSchema.getDataPath(),
162+
targetTableFormats,
163+
partitionConfig);
164+
165+
List<Row> insertsAfterSchemaUpdate = tableWithUpdatedSchema.insertRows(100);
166+
tableWithUpdatedSchema.reload();
167+
ConvertTableResponse response = conversionService.convertTable(request);
168+
assertConversionResponse(response, targetTableFormats);
169+
checkDatasetEquivalence(sourceTableFormat, tableWithUpdatedSchema, targetTableFormats, 280);
170+
171+
tableWithUpdatedSchema.deleteRows(insertsAfterSchemaUpdate.subList(60, 90));
172+
response = conversionService.convertTable(request);
173+
assertConversionResponse(response, targetTableFormats);
174+
checkDatasetEquivalence(sourceTableFormat, tableWithUpdatedSchema, targetTableFormats, 250);
175+
176+
if (isPartitioned) {
177+
// Adds new partition.
178+
tableWithUpdatedSchema.insertRecordsForSpecialPartition(50);
179+
response = conversionService.convertTable(request);
180+
assertConversionResponse(response, targetTableFormats);
181+
checkDatasetEquivalence(sourceTableFormat, tableWithUpdatedSchema, targetTableFormats, 300);
182+
183+
// Drops partition.
184+
tableWithUpdatedSchema.deleteSpecialPartition();
185+
response = conversionService.convertTable(request);
186+
assertConversionResponse(response, targetTableFormats);
187+
checkDatasetEquivalence(sourceTableFormat, tableWithUpdatedSchema, targetTableFormats, 250);
188+
189+
// Insert records to the dropped partition again.
190+
tableWithUpdatedSchema.insertRecordsForSpecialPartition(50);
191+
response = conversionService.convertTable(request);
192+
assertConversionResponse(response, targetTableFormats);
193+
checkDatasetEquivalence(sourceTableFormat, tableWithUpdatedSchema, targetTableFormats, 300);
194+
}
195+
}
163196
}
164197

165198
private static Stream<Arguments> generateTestParametersFormatsAndPartitioning() {
166199
List<Arguments> arguments = new ArrayList<>();
167-
for (String sourceTableFormat : Arrays.asList(DELTA, ICEBERG)) {
200+
for (String sourceTableFormat : Arrays.asList(HUDI, DELTA, ICEBERG)) {
168201
for (boolean isPartitioned : new boolean[] {true, false}) {
169202
arguments.add(Arguments.of(sourceTableFormat, isPartitioned));
170203
}
@@ -178,60 +211,6 @@ protected static List<String> getOtherFormats(String sourceTableFormat) {
178211
.collect(Collectors.toList());
179212
}
180213

181-
private void compareDatasetWithUUID(List<String> dataset1Rows, List<String> dataset2Rows) {
182-
for (int i = 0; i < dataset1Rows.size(); i++) {
183-
String row1 = dataset1Rows.get(i);
184-
String row2 = dataset2Rows.get(i);
185-
if (row1.contains("uuid_field") && row2.contains("uuid_field")) {
186-
try {
187-
JsonNode node1 = OBJECT_MAPPER.readTree(row1);
188-
JsonNode node2 = OBJECT_MAPPER.readTree(row2);
189-
190-
// check uuid field
191-
String uuidStr1 = node1.get("uuid_field").asText();
192-
byte[] bytes = Base64.getDecoder().decode(node2.get("uuid_field").asText());
193-
ByteBuffer bb = ByteBuffer.wrap(bytes);
194-
UUID uuid2 = new UUID(bb.getLong(), bb.getLong());
195-
String uuidStr2 = uuid2.toString();
196-
assertEquals(
197-
uuidStr1,
198-
uuidStr2,
199-
String.format(
200-
"Datasets are not equivalent when reading from Spark. Source: %s, Target: %s",
201-
uuidStr1, uuidStr2));
202-
203-
// check other fields
204-
((ObjectNode) node1).remove("uuid_field");
205-
((ObjectNode) node2).remove("uuid_field");
206-
assertEquals(
207-
node1.toString(),
208-
node2.toString(),
209-
String.format(
210-
"Datasets are not equivalent when comparing other fields. Source: %s, Target: %s",
211-
node1, node2));
212-
} catch (JsonProcessingException e) {
213-
throw new RuntimeException(e);
214-
}
215-
} else {
216-
assertEquals(
217-
row1,
218-
row2,
219-
String.format(
220-
"Datasets are not equivalent when reading from Spark. Source: %s, Target: %s",
221-
row1, row2));
222-
}
223-
}
224-
}
225-
226-
private boolean containsUUIDFields(List<String> rows) {
227-
for (String row : rows) {
228-
if (row.contains("\"uuid_field\"")) {
229-
return true;
230-
}
231-
}
232-
return false;
233-
}
234-
235214
protected void checkDatasetEquivalenceWithFilter(
236215
String sourceFormat,
237216
GenericTable<?, ?> sourceTable,
@@ -296,7 +275,7 @@ private void checkDatasetEquivalence(
296275
.read()
297276
.options(finalTargetOptions)
298277
.format(targetFormat.toLowerCase())
299-
.load(sourceTable.getBasePath())
278+
.load(sourceTable.getDataPath())
300279
.orderBy(sourceTable.getOrderByColumn())
301280
.filter(filterCondition);
302281
}));
@@ -320,24 +299,20 @@ private void checkDatasetEquivalence(
320299
// if count is not known ahead of time, ensure datasets are non-empty
321300
assertFalse(dataset1Rows.isEmpty());
322301
}
323-
324-
if (containsUUIDFields(dataset1Rows) && containsUUIDFields(dataset2Rows)) {
325-
compareDatasetWithUUID(dataset1Rows, dataset2Rows);
326-
} else {
327-
assertEquals(
328-
dataset1Rows,
329-
dataset2Rows,
330-
String.format(
331-
"Datasets are not equivalent when reading from Spark. Source: %s, Target: %s",
332-
sourceFormat, format));
333-
}
302+
assertEquals(
303+
dataset1Rows,
304+
dataset2Rows,
305+
String.format(
306+
"Datasets are not equivalent when reading from Spark. Source: %s, Target: %s",
307+
sourceFormat, format));
334308
});
335309
}
336310

337311
private ConvertTableRequest createConvertTableRequest(
338312
String sourceFormat,
339313
String tableName,
340314
String tablePath,
315+
String dataPath,
341316
List<String> targetFormats,
342317
String partitionConfig) {
343318
Map<String, String> configs = new HashMap<>();
@@ -348,6 +323,7 @@ private ConvertTableRequest createConvertTableRequest(
348323
.sourceFormat(sourceFormat)
349324
.sourceTableName(tableName)
350325
.sourceTablePath(tablePath)
326+
.sourceDataPath(dataPath)
351327
.targetFormats(targetFormats)
352328
.configurations(configs)
353329
.build();

xtable-service/src/test/java/org/apache/xtable/service/TestConversionService.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@
5959
class TestConversionService {
6060
private static final String SOURCE_NAME = "users";
6161
private static final String SOURCE_PATH = "s3://bucket/tables/users";
62+
private static final String SOURCE_DATA_PATH = "s3://bucket/tables/users/data";
6263
private static final String HUDI_META_PATH = "s3://bucket/tables/users/.hoodie";
6364
private static final String ICEBERG_META_PATH =
6465
"s3://bucket/tables/users/metadata/v1.metadata.json";
@@ -111,6 +112,7 @@ void convertToTargetHudi() {
111112
.sourceFormat(TableFormat.DELTA)
112113
.sourceTableName(SOURCE_NAME)
113114
.sourceTablePath(SOURCE_PATH)
115+
.sourceDataPath(SOURCE_DATA_PATH)
114116
.targetFormats(Collections.singletonList(TableFormat.HUDI))
115117
.build();
116118

@@ -120,7 +122,7 @@ void convertToTargetHudi() {
120122
when(provider.getConversionSourceInstance(any())).thenReturn(conversionSrc);
121123
when(conversionSrc.getCurrentTable()).thenReturn(internalTbl);
122124

123-
when(internalTbl.getName()).thenReturn(TableFormat.HUDI);
125+
when(internalTbl.getTableFormat()).thenReturn(TableFormat.HUDI);
124126
when(internalTbl.getLatestMetdataPath()).thenReturn(HUDI_META_PATH);
125127
when(internalTbl.getReadSchema()).thenReturn(internalSchema);
126128

@@ -146,6 +148,7 @@ void convertToTargetIceberg() {
146148
.sourceFormat(TableFormat.DELTA)
147149
.sourceTableName(SOURCE_NAME)
148150
.sourceTablePath(SOURCE_PATH)
151+
.sourceDataPath(SOURCE_DATA_PATH)
149152
.targetFormats(Collections.singletonList(TableFormat.ICEBERG))
150153
.build();
151154

@@ -157,7 +160,7 @@ void convertToTargetIceberg() {
157160
when(provider.getConversionSourceInstance(any())).thenReturn(conversionSrc);
158161
when(conversionSrc.getCurrentTable()).thenReturn(internalTbl);
159162

160-
when(internalTbl.getName()).thenReturn(TableFormat.ICEBERG);
163+
when(internalTbl.getTableFormat()).thenReturn(TableFormat.ICEBERG);
161164
when(internalTbl.getLatestMetdataPath()).thenReturn(ICEBERG_META_PATH);
162165
when(internalTbl.getReadSchema()).thenReturn(internalSchema);
163166

@@ -185,6 +188,7 @@ void convertToTargetDelta() {
185188
.sourceFormat(TableFormat.ICEBERG)
186189
.sourceTableName(SOURCE_NAME)
187190
.sourceTablePath(SOURCE_PATH)
191+
.sourceDataPath(SOURCE_DATA_PATH)
188192
.targetFormats(Collections.singletonList(TableFormat.DELTA))
189193
.build();
190194

@@ -194,7 +198,7 @@ void convertToTargetDelta() {
194198
when(provider.getConversionSourceInstance(any())).thenReturn(conversionSrc);
195199
when(conversionSrc.getCurrentTable()).thenReturn(internalTbl);
196200

197-
when(internalTbl.getName()).thenReturn(TableFormat.DELTA);
201+
when(internalTbl.getTableFormat()).thenReturn(TableFormat.DELTA);
198202
when(internalTbl.getLatestMetdataPath()).thenReturn(DELTA_META_PATH);
199203
when(internalTbl.getReadSchema()).thenReturn(internalSchema);
200204

0 commit comments

Comments
 (0)