Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ public static Schema getSchema(String schemaURIString) {
public static Schema getSchema(String schemaURIString, String authToken) {
URI schemaURI;
try {
schemaURIString = sanitizeURIString(schemaURIString);
schemaURI = new URI(schemaURIString);
} catch (URISyntaxException e) {
throw new RuntimeException("Schema URI is not valid - '" + schemaURIString + "'", e);
Expand Down Expand Up @@ -161,7 +162,7 @@ public static URI getRelativeOutputPath(URI baseInputDir, URI inputFile, URI out
Preconditions.checkState(relativePath.getPath().length() > 0 && !relativePath.equals(inputFile),
"Unable to extract out the relative path for input file '" + inputFile + "', based on base input path: "
+ baseInputDir);
String outputDirStr = outputDir.toString();
String outputDirStr = sanitizeURIString(outputDir.toString());
outputDir = !outputDirStr.endsWith("/") ? URI.create(outputDirStr.concat("/")) : outputDir;
URI relativeOutputURI = outputDir.resolve(relativePath).resolve(".");
return relativeOutputURI;
Expand Down Expand Up @@ -192,6 +193,7 @@ public static String getFileName(URI inputFileURI) {
*/
public static URI getFileURI(String uriStr, URI fullUriForPathOnlyUriStr)
throws URISyntaxException {
uriStr = sanitizeURIString(uriStr);
URI fileURI = URI.create(uriStr);
if (fileURI.getScheme() == null) {
return new URI(fullUriForPathOnlyUriStr.getScheme(), fullUriForPathOnlyUriStr.getUserInfo(),
Expand All @@ -211,6 +213,7 @@ public static URI getFileURI(String uriStr, URI fullUriForPathOnlyUriStr)
*/
public static URI getDirectoryURI(String uriStr)
throws URISyntaxException {
uriStr = sanitizeURIString(uriStr);
URI uri = new URI(uriStr);
if (uri.getScheme() == null) {
uri = new File(uriStr).toURI();
Expand Down Expand Up @@ -275,7 +278,7 @@ public static List<String> listMatchedFilesWithRecursiveOption(PinotFS pinotFs,
continue;
}
}
if (!pinotFs.isDirectory(new URI(file))) {
if (!pinotFs.isDirectory(new URI(sanitizeURIString(file)))) {
// In case PinotFS implementations list files without a scheme (e.g. hdfs://), then we may lose it in the
// input file path. Call SegmentGenerationUtils.getFileURI() to fix this up.
// getFileURI throws URISyntaxException
Expand All @@ -289,4 +292,8 @@ public static List<String> listMatchedFilesWithRecursiveOption(PinotFS pinotFs,
}
return filteredFiles;
}

public static String sanitizeURIString(String path) {
return path.replace(" ", "%20");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ public void testExtractFileNameFromURI() {
"input.data");
Assert.assertEquals(SegmentGenerationUtils.getFileName(URI.create("hdfs://var/data/myTable/2020/04/06/input.data")),
"input.data");

Assert.assertEquals(SegmentGenerationUtils.getFileName(
URI.create(SegmentGenerationUtils.sanitizeURIString("hdfs://var/data/my Table/2020/04/06/input.data"))),
"input.data");
Assert.assertEquals(SegmentGenerationUtils.getFileName(
URI.create(SegmentGenerationUtils.sanitizeURIString("hdfs://var/data/my Table/2020/04/06/input 2.data"))),
"input 2.data");
}

// Confirm output path generation works with URIs that have authority/userInfo.
Expand Down