Skip to content

Commit

Permalink
[#4582] fix(hadoop-catalog): Fix some location without slash issue (#…
Browse files Browse the repository at this point in the history
…5322)

### What changes were proposed in this pull request?

Add the trailing slash to avoid issues when user set illegal location to
catalog and schema. For example, if user set "hdfs://ip:port" as a
catalog or schema location, we need to add a trailing "/" to avoid
issues in creating a `Path`.

### Why are the changes needed?

Users can set Catalog and schema "location" with improper path, like
"hdfs://ip:port", to support this case, we need to add an additional
slash to the end of the location.

Fix: #4582 

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Add a new IT to test.

Co-authored-by: Jerry Shao <jerryshao@datastrato.com>
  • Loading branch information
github-actions[bot] and jerryshao authored Oct 28, 2024
1 parent af5e6a9 commit c0aa14c
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,9 @@ public void initialize(
.getOrDefault(config, HadoopCatalogPropertiesMetadata.LOCATION);
this.catalogStorageLocation =
StringUtils.isNotBlank(catalogLocation)
? Optional.of(catalogLocation).map(Path::new)
? Optional.of(catalogLocation)
.map(s -> s.endsWith(SLASH) ? s : s + SLASH)
.map(Path::new)
: Optional.empty();
}

Expand Down Expand Up @@ -722,6 +724,7 @@ private Path getSchemaPath(String name, Map<String, String> properties) {
.getOrDefault(properties, HadoopSchemaPropertiesMetadata.LOCATION);

return Optional.ofNullable(schemaLocation)
.map(s -> s.endsWith(SLASH) ? s : s + SLASH)
.map(Path::new)
.orElse(catalogStorageLocation.map(p -> new Path(p, name)).orElse(null));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,66 @@ public void testGetFileLocationWithInvalidAuditHeaders() {
}
}

@Test
public void testCreateSchemaAndFilesetWithSpecialLocation() {
String localCatalogName = GravitinoITUtils.genRandomName("local_catalog");
String hdfsLocation =
String.format(
"hdfs://%s:%d",
containerSuite.getHiveContainer().getContainerIpAddress(),
HiveContainer.HDFS_DEFAULTFS_PORT);
Map<String, String> catalogProps = ImmutableMap.of("location", hdfsLocation);

Catalog localCatalog =
metalake.createCatalog(
localCatalogName, Catalog.Type.FILESET, provider, "comment", catalogProps);
Assertions.assertEquals(hdfsLocation, localCatalog.properties().get("location"));

// Create schema without specifying location.
Schema localSchema =
localCatalog
.asSchemas()
.createSchema("local_schema", "comment", ImmutableMap.of("key1", "val1"));

Fileset localFileset =
localCatalog
.asFilesetCatalog()
.createFileset(
NameIdentifier.of(localSchema.name(), "local_fileset"),
"fileset comment",
Fileset.Type.MANAGED,
null,
ImmutableMap.of("k1", "v1"));
Assertions.assertEquals(
hdfsLocation + "/local_schema/local_fileset", localFileset.storageLocation());

// Delete schema
localCatalog.asSchemas().dropSchema(localSchema.name(), true);

// Create schema with specifying location.
Map<String, String> schemaProps = ImmutableMap.of("location", hdfsLocation);
Schema localSchema2 =
localCatalog.asSchemas().createSchema("local_schema2", "comment", schemaProps);
Assertions.assertEquals(hdfsLocation, localSchema2.properties().get("location"));

Fileset localFileset2 =
localCatalog
.asFilesetCatalog()
.createFileset(
NameIdentifier.of(localSchema2.name(), "local_fileset2"),
"fileset comment",
Fileset.Type.MANAGED,
null,
ImmutableMap.of("k1", "v1"));
Assertions.assertEquals(hdfsLocation + "/local_fileset2", localFileset2.storageLocation());

// Delete schema
localCatalog.asSchemas().dropSchema(localSchema2.name(), true);

// Delete catalog
metalake.dropCatalog(localCatalogName, true);
}

protected String generateLocation(String filesetName) {
return String.format(
"hdfs://%s:%d/user/hadoop/%s/%s/%s",
Expand Down

0 comments on commit c0aa14c

Please sign in to comment.