Skip to content

HADOOP-19494 [branch-3.4]: [ABFS] Fix Case Sensitivity Issue for hdi_isfolder metadata #7584

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ public void endElement(final String uri,
if (parentNode.equals(AbfsHttpConstants.XML_TAG_METADATA)) {
currentBlobEntry.addMetadata(currentNode, value);
// For Marker blobs hdi_isFolder will be present as metadata
if (AbfsHttpConstants.XML_TAG_HDI_ISFOLDER.equals(currentNode)) {
if (AbfsHttpConstants.XML_TAG_HDI_ISFOLDER.equalsIgnoreCase(currentNode)) {
currentBlobEntry.setIsDirectory(Boolean.valueOf(value));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,22 @@ public Map<String, List<String>> getResponseHeaders() {
return headers;
}

/**{@inheritDoc}*/
@Override
public String getResponseHeaderIgnoreCase(final String headerName) {
Map<String, List<String>> responseHeaders = getResponseHeaders();
if (responseHeaders == null || responseHeaders.isEmpty()) {
return null;
}
// Search for the header value case-insensitively
return responseHeaders.entrySet().stream()
.filter(entry -> entry.getKey() != null
&& entry.getKey().equalsIgnoreCase(headerName))
.flatMap(entry -> entry.getValue().stream())
.findFirst()
.orElse(null); // Return null if no match is found
}

/**{@inheritDoc}*/
@Override
protected InputStream getContentInputStream()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1137,7 +1137,8 @@ public AbfsRestOperation setPathProperties(final String path,
this.createPathRestOp(path, false, false, false, null,
contextEncryptionAdapter, tracingContext);
// Make sure hdi_isFolder is added to the list of properties to be set.
boolean hdiIsFolderExists = properties.containsKey(XML_TAG_HDI_ISFOLDER);
boolean hdiIsFolderExists = properties.keySet()
.stream().anyMatch(XML_TAG_HDI_ISFOLDER::equalsIgnoreCase);
if (!hdiIsFolderExists) {
properties.put(XML_TAG_HDI_ISFOLDER, TRUE);
}
Expand Down Expand Up @@ -1548,7 +1549,7 @@ public AbfsRestOperation deleteBlobPath(final Path blobPath,
*/
@Override
public boolean checkIsDir(AbfsHttpOperation result) {
String resourceType = result.getResponseHeader(X_MS_META_HDI_ISFOLDER);
String resourceType = result.getResponseHeaderIgnoreCase(X_MS_META_HDI_ISFOLDER);
return resourceType != null && resourceType.equals(TRUE);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1630,7 +1630,8 @@ AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType operationType
* @param requestHeaders The list of HTTP headers for the request.
* @return An AbfsRestOperation instance.
*/
AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType operationType,
@VisibleForTesting
public AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType operationType,
final String httpMethod,
final URL url,
final List<AbfsHttpHeader> requestHeaders) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,14 @@ public final InputStream getListResultStream() {

public abstract Map<String, List<String>> getResponseHeaders();

/**
* Get response header value for the given headerKey ignoring case.
*
* @param httpHeader header key.
* @return header value.
*/
public abstract String getResponseHeaderIgnoreCase(String httpHeader);

// Returns a trace message for the request
@Override
public String toString() {
Expand Down Expand Up @@ -743,14 +751,20 @@ public String getTracingContextSuffix() {

@Override
public String getResponseHeader(final String httpHeader) {
return "";
return EMPTY_STRING;
}

@Override
public Map<String, List<String>> getResponseHeaders() {
return new HashMap<>();
}

/**{@inheritDoc}*/
@Override
public String getResponseHeaderIgnoreCase(final String headerName) {
return EMPTY_STRING;
}

@Override
public void sendPayload(final byte[] buffer,
final int offset,
Expand Down Expand Up @@ -787,6 +801,16 @@ public Map<String, List<String>> getResponseHeaders() {
return new HashMap<>();
}

/**{@inheritDoc}*/
@Override
public String getResponseHeaderIgnoreCase(final String httpHeader) {
// Directories on FNS-Blob are identified by a special metadata header.
if (httpHeader.equalsIgnoreCase(X_MS_META_HDI_ISFOLDER)) {
return TRUE;
}
return EMPTY_STRING;
}

@Override
public void processResponse(final byte[] buffer,
final int offset,
Expand Down Expand Up @@ -935,14 +959,20 @@ public String getTracingContextSuffix() {

@Override
public String getResponseHeader(final String httpHeader) {
return "";
return EMPTY_STRING;
}

@Override
public Map<String, List<String>> getResponseHeaders() {
return new HashMap<>();
}

/**{@inheritDoc}*/
@Override
public String getResponseHeaderIgnoreCase(final String headerName) {
return EMPTY_STRING;
}

@Override
public void sendPayload(final byte[] buffer,
final int offset,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,22 @@ public Map<String, List<String>> getResponseHeaders() {
return connection.getHeaderFields();
}

/**{@inheritDoc}*/
@Override
public String getResponseHeaderIgnoreCase(final String headerName) {
Map<String, List<String>> responseHeaders = getResponseHeaders();
if (responseHeaders == null || responseHeaders.isEmpty()) {
return null;
}
// Search for the header value case-insensitively
return responseHeaders.entrySet().stream()
.filter(entry -> entry.getKey() != null
&& entry.getKey().equalsIgnoreCase(headerName))
.flatMap(entry -> entry.getValue().stream())
.findFirst()
.orElse(null); // Return null if no match is found
}

/**{@inheritDoc}*/
public void sendPayload(byte[] buffer, int offset, int length)
throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,12 @@
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient;
import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation;
import org.apache.hadoop.fs.permission.FsPermission;

import static org.apache.hadoop.fs.CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY;
import static org.apache.hadoop.fs.azurebfs.ITestAzureBlobFileSystemListStatus.mockAbfsRestOperation;
import static org.apache.hadoop.fs.azurebfs.ITestAzureBlobFileSystemListStatus.mockIngressClientHandler;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathExists;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
import static org.mockito.ArgumentMatchers.any;
Expand Down Expand Up @@ -284,4 +287,61 @@ private void verifyFileNotFound(FileNotFoundException ex, String key) {
Assertions.assertThat(ex).isNotNull();
Assertions.assertThat(ex.getMessage()).contains(key);
}

/**
* Test directory status with different HDI folder configuration,
* verifying the correct header and directory state.
*/
private void testIsDirectory(boolean expected, String... configName) throws Exception {
try (AzureBlobFileSystem fs = Mockito.spy(getFileSystem())) {
assumeBlobServiceType();
AbfsBlobClient abfsBlobClient = mockIngressClientHandler(fs);
// Mock the operation to modify the headers
mockAbfsRestOperation(abfsBlobClient, configName);

// Create the path and invoke mkdirs method
Path path = new Path("/testPath");
fs.mkdirs(path);

// Assert that the response header has the updated value
FileStatus fileStatus = fs.getFileStatus(path);

AbfsHttpOperation op = abfsBlobClient.getPathStatus(
path.toUri().getPath(),
true, getTestTracingContext(fs, true),
null).getResult();

Assertions.assertThat(abfsBlobClient.checkIsDir(op))
.describedAs("Directory should be marked as " + expected)
.isEqualTo(expected);

// Verify the header and directory state
Assertions.assertThat(fileStatus.isDirectory())
.describedAs("Expected directory state: " + expected)
.isEqualTo(expected);

fs.delete(path, true);
}
}

/**
* Test to verify the directory status with different HDI folder configurations.
* Verifying the correct header and directory state.
*/
@Test
public void testIsDirectoryWithDifferentCases() throws Exception {
testIsDirectory(true, "HDI_ISFOLDER");

testIsDirectory(true, "Hdi_ISFOLDER");

testIsDirectory(true, "Hdi_isfolder");

testIsDirectory(true, "hdi_isfolder");

testIsDirectory(false, "Hdi_isfolder1");

testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER", "Hdi_isfolder");

testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER1", "Test");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
Expand All @@ -40,7 +41,13 @@
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.azurebfs.constants.FSOperationType;
import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations;
import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException;
import org.apache.hadoop.fs.azurebfs.services.AbfsBlobClient;
import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler;
import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader;
import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation;
import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperationType;
import org.apache.hadoop.fs.azurebfs.services.ListResponseData;
import org.apache.hadoop.fs.azurebfs.services.AbfsClient;
import org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil;
Expand All @@ -51,8 +58,11 @@
import org.apache.hadoop.fs.contract.ContractTestUtils;

import static java.net.HttpURLConnection.HTTP_OK;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH;
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE;
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_LIST_MAX_RESULTS;
import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_METADATA_PREFIX;
import static org.apache.hadoop.fs.azurebfs.services.RenameAtomicity.SUFFIX;
import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_JDK_MESSAGE;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs;
Expand All @@ -62,6 +72,8 @@

import static org.apache.hadoop.test.LambdaTestUtils.intercept;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyList;
import static org.mockito.ArgumentMatchers.eq;
import static org.mockito.ArgumentMatchers.nullable;
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.when;
Expand Down Expand Up @@ -524,4 +536,106 @@ private void assertDirectoryFileStatus(final FileStatus fileStatus,
Assertions.assertThat(fileStatus.getLen())
.describedAs("Content Length Not as Expected").isEqualTo(0);
}

/**
* Helper method to mock the AbfsRestOperation and modify the request headers.
*
* @param abfsBlobClient the mocked AbfsBlobClient
* @param newHeader the header to add in place of the old one
*/
public static void mockAbfsRestOperation(AbfsBlobClient abfsBlobClient, String... newHeader) {
Mockito.doAnswer(invocation -> {
List<AbfsHttpHeader> requestHeaders = invocation.getArgument(3);

// Remove the actual HDI config header and add the new one
requestHeaders.removeIf(header ->
HttpHeaderConfigurations.X_MS_META_HDI_ISFOLDER.equals(header.getName()));
for (String header : newHeader) {
requestHeaders.add(new AbfsHttpHeader(X_MS_METADATA_PREFIX + header, TRUE));
}

// Call the real method
return invocation.callRealMethod();
}).when(abfsBlobClient).getAbfsRestOperation(eq(AbfsRestOperationType.PutBlob),
eq(HTTP_METHOD_PUT), any(URL.class), anyList());
}

/**
* Helper method to mock the AbfsBlobClient and set up the client handler.
*
* @param fs the AzureBlobFileSystem instance
* @return the mocked AbfsBlobClient
*/
public static AbfsBlobClient mockIngressClientHandler(AzureBlobFileSystem fs) {
AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore());
AbfsClientHandler clientHandler = Mockito.spy(store.getClientHandler());
AbfsBlobClient abfsBlobClient = (AbfsBlobClient) Mockito.spy(
clientHandler.getClient());
fs.getAbfsStore().setClient(abfsBlobClient);
fs.getAbfsStore().setClientHandler(clientHandler);
Mockito.doReturn(abfsBlobClient).when(clientHandler).getIngressClient();
return abfsBlobClient;
}

/**
* Test directory status with different HDI folder configuration,
* verifying the correct header and directory state.
*/
private void testIsDirectory(boolean expected, String... configName) throws Exception {
try (AzureBlobFileSystem fs = Mockito.spy(getFileSystem())) {
assumeBlobServiceType();
AbfsBlobClient abfsBlobClient = mockIngressClientHandler(fs);
// Mock the operation to modify the headers
mockAbfsRestOperation(abfsBlobClient, configName);

// Create the path and invoke mkdirs method
Path path = new Path("/testPath");
fs.mkdirs(path);

// Assert that the response header has the updated value
FileStatus[] fileStatus = fs.listStatus(path.getParent());

AbfsHttpOperation op = abfsBlobClient.getPathStatus(
path.toUri().getPath(),
true, getTestTracingContext(fs, true),
null).getResult();

Assertions.assertThat(abfsBlobClient.checkIsDir(op))
.describedAs("Directory should be marked as " + expected)
.isEqualTo(expected);

// Verify the header and directory state
Assertions.assertThat(fileStatus.length)
.describedAs("Expected directory state: " + expected)
.isEqualTo(1);

// Verify the header and directory state
Assertions.assertThat(fileStatus[0].isDirectory())
.describedAs("Expected directory state: " + expected)
.isEqualTo(expected);

fs.delete(path, true);
}
}

/**
* Test to verify the directory status with different HDI folder configurations.
* Verifying the correct header and directory state.
*/
@Test
public void testIsDirectoryWithDifferentCases() throws Exception {
testIsDirectory(true, "HDI_ISFOLDER");

testIsDirectory(true, "Hdi_ISFOLDER");

testIsDirectory(true, "Hdi_isfolder");

testIsDirectory(true, "hdi_isfolder");

testIsDirectory(false, "Hdi_isfolder1");

testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER", "Hdi_isfolder");

testIsDirectory(true, "HDI_ISFOLDER", "Hdi_ISFOLDER1", "Test");
}
}