From ef6e2f62752aa18e9249a7d3295ddf6c62ee076d Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Fri, 9 Jun 2023 02:26:13 -0700 Subject: [PATCH 1/7] ListStatus FS Call Over Blob Endpoint --- .../fs/azurebfs/AzureBlobFileSystemStore.java | 85 +++++++- .../azurebfs/constants/AbfsHttpConstants.java | 13 ++ .../azurebfs/constants/HttpQueryParams.java | 1 + .../fs/azurebfs/services/AbfsClient.java | 2 + .../azurebfs/services/BlobListXmlParser.java | 159 ++++++++++++--- .../fs/azurebfs/services/BlobProperty.java | 184 +++++++++++++----- .../ITestAzureBlobFileSystemListStatus.java | 2 + .../hadoop/fs/azurebfs/ITestListBlob.java | 4 +- .../services/TestBlobListXmlParser.java | 115 +++++++++++ 9 files changed, 488 insertions(+), 77 deletions(-) create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestBlobListXmlParser.java diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 3245f8f1d92b2..df866520e5b06 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -635,7 +635,7 @@ BlobCopyProgress handleCopyInProgress(final Path dstPath, throw new AbfsRestOperationException( COPY_BLOB_FAILED.getStatusCode(), COPY_BLOB_FAILED.getErrorCode(), String.format("copy to path %s failed due to: %s", - dstPath.toUri().getPath(), blobProperty.getStatusDescription()), + dstPath.toUri().getPath(), blobProperty.getCopyStatusDescription()), new Exception(COPY_BLOB_FAILED.getErrorCode())); } if (COPY_STATUS_ABORTED.equalsIgnoreCase(blobProperty.getCopyStatus())) { @@ -668,7 +668,7 @@ BlobProperty getBlobProperty(Path blobPath, blobProperty.setCopyId(opResult.getResponseHeader(X_MS_COPY_ID)); blobProperty.setPath(blobPath); blobProperty.setCopySourceUrl(opResult.getResponseHeader(X_MS_COPY_SOURCE)); - blobProperty.setStatusDescription( + blobProperty.setCopyStatusDescription( opResult.getResponseHeader(X_MS_COPY_STATUS_DESCRIPTION)); blobProperty.setCopyStatus(opResult.getResponseHeader(X_MS_COPY_STATUS)); blobProperty.setContentLength( @@ -839,7 +839,7 @@ public List getListBlobs(Path sourceDirBlobPath, } do { AbfsRestOperation op = client.getListBlobs( - nextMarker, prefix, maxResult, tracingContext + nextMarker, prefix, "", maxResult, tracingContext ); BlobList blobList = op.getResult().getBlobList(); nextMarker = blobList.getNextMarker(); @@ -1899,6 +1899,51 @@ public String listStatus(final Path path, final String startFrom, } } + if (getPrefixMode() == PrefixMode.BLOB) { + FileStatus status = getFileStatusOverBlob(path, tracingContext); + if (status.isFile()) { + fileStatuses.add(status); + return continuation; + } + // For blob endpoint continuation will be used as nextMarker. + String prefix = relativePath + ROOT_PATH; + String delimiter = ROOT_PATH; + if (path.isRoot()) { + prefix = null; + } + do { + try (AbfsPerfInfo perfInfo = startTracking("listStatus", "getListBlobs")) { + AbfsRestOperation op = client.getListBlobs( + continuation, prefix, delimiter, abfsConfiguration.getListMaxResults(), + tracingContext + ); + perfInfo.registerResult(op.getResult()); + BlobList blobList = op.getResult().getBlobList(); + continuation = blobList.getNextMarker(); + if (blobList.getBlobPropertyList().isEmpty()) { + throw new AbfsRestOperationException( + AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), + AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), + "ListBlobs path not found", + null, op.getResult()); + } + + addBlobListAsFileStatus(blobList, fileStatuses); + + perfInfo.registerSuccess(true); + countAggregate++; + shouldContinue = + fetchAll && continuation != null && !continuation.isEmpty(); + + if (!shouldContinue) { + perfInfo.registerAggregates(startAggregate, countAggregate); + } + } + } while (shouldContinue); + + return continuation; + } + do { try (AbfsPerfInfo perfInfo = startTracking("listStatus", "listPath")) { AbfsRestOperation op = client.listPath(relativePath, false, @@ -1965,6 +2010,40 @@ public String listStatus(final Path path, final String startFrom, return continuation; } + private void addBlobListAsFileStatus(final BlobList blobList, + List fileStatuses) throws IOException { + + List blobProperties = blobList.getBlobPropertyList(); + for (BlobProperty entry: blobProperties) { + final String owner = identityTransformer.transformIdentityForGetRequest( + entry.getOwner(), true, userName); + final String group = identityTransformer.transformIdentityForGetRequest( + entry.getGroup(), false, primaryUserGroup); + final FsPermission fsPermission = entry.getPermission() == null + ? new AbfsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL) + : AbfsPermission.valueOf(entry.getPermission()); + final boolean hasAcl = entry.getAcl() == null ? false : true; + long blockSize = abfsConfiguration.getAzureBlockSize(); + + Path entryPath = entry.getPath(); + entryPath = entryPath.makeQualified(this.uri, entryPath); + + fileStatuses.add( + new VersionedFileStatus( + owner, + group, + fsPermission, + hasAcl, + entry.getContentLength(), + entry.getIsDirectory(), + 1, + blockSize, + entry.getLastModifiedTime(), + entryPath, + entry.getETag())); + } + } + // generate continuation token for xns account private String generateContinuationTokenForXns(final String firstEntryName) { Preconditions.checkArgument(!Strings.isNullOrEmpty(firstEntryName) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java index e48ad048b21a0..2d631e52b46a5 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java @@ -151,6 +151,19 @@ public final class AbfsHttpConstants { public static final String COPY_STATUS_ABORTED = "aborted"; public static final String COPY_STATUS_FAILED = "failed"; public static final String HDI_ISFOLDER = "hdi_isfolder"; + public static final String ETAG = "Etag"; + public static final String LAST_MODIFIED_TIME = "Last-Modified"; + public static final String CREATION_TIME = "Creation-Time"; + public static final String OWNER = "Owner"; + public static final String GROUP = "Group"; + public static final String PERMISSIONS = "Permissions"; + public static final String ACL = "Acl"; + public static final String COPY_ID = "CopyId"; + public static final String COPY_STATUS = "CopyStatus"; + public static final String COPY_SOURCE = "CopySource"; + public static final String COPY_PROGRESS = "CopyProgress"; + public static final String COPY_COMPLETION_TIME = "CopyCompletionTime"; + public static final String COPY_STATUS_DESCRIPTION = "CopyStatusDescription"; private AbfsHttpConstants() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java index 9c7c19f7a1e23..818eda8d7c7d8 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpQueryParams.java @@ -48,6 +48,7 @@ public final class HttpQueryParams { public static final String QUERY_PARAM_UPN = "upn"; public static final String QUERY_PARAM_BLOBTYPE = "blobtype"; public static final String QUERY_PARAM_BLOCKID = "blockid"; + public static final String QUERY_PARAM_DELIMITER = "delimiter"; //query params for SAS public static final String QUERY_PARAM_SAOID = "saoid"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 0731b5d55dde9..82d3960990bd6 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -1477,12 +1477,14 @@ public AbfsRestOperation setBlobMetadata(Path blobPath, List met */ public AbfsRestOperation getListBlobs(String marker, String prefix, + String delimiter, Integer maxResult, TracingContext tracingContext) throws AzureBlobFileSystemException { AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESTYPE, CONTAINER); abfsUriQueryBuilder.addQuery(QUERY_PARAM_COMP, QUERY_PARAM_COMP_VALUE_LIST); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_DELIMITER, delimiter); abfsUriQueryBuilder.addQuery(QUERY_PARAM_INCLUDE, QUERY_PARAM_INCLUDE_VALUE_METADATA); prefix = getDirectoryQueryParameter(prefix); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobListXmlParser.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobListXmlParser.java index 9a0ad4f4cea38..5ae253d58614c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobListXmlParser.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobListXmlParser.java @@ -26,8 +26,10 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.utils.DateTimeUtils; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DIRECTORY; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HDI_ISFOLDER; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.INVALID_XML; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; @@ -37,6 +39,17 @@ * creates a list of {@link BlobProperty}.
* * BlobList API XML response example + * + * string-value + * string-value + * int-value + * string-value + * + * + * + * + * + * */ public class BlobListXmlParser extends DefaultHandler { /** @@ -46,6 +59,21 @@ public class BlobListXmlParser extends DefaultHandler { private final String url; /** * {@link BlobProperty} for which at a given moment, the parsing is going on. + * + * Following XML elements will be parsed and added in currentBlobProperty. + * 1. Blob: for explicit files and directories + * + * blob-name + * + * + * value + * + * + * + * 2. BlobPrefix: for directories both explicit and implicit + * + * blob-prefix + * */ private BlobProperty currentBlobProperty; /** @@ -77,7 +105,7 @@ public void startElement(final String uri, final String qName, final Attributes attributes) throws SAXException { elements.push(localName); - if (AbfsHttpConstants.BLOB.equals(localName)) { + if (AbfsHttpConstants.BLOB.equals(localName) || AbfsHttpConstants.BLOB_PREFIX.equals(localName)) { currentBlobProperty = new BlobProperty(); } } @@ -96,12 +124,12 @@ public void endElement(final String uri, final String qName) throws SAXException { String currentNode = elements.pop(); - /* - * Check if the ending tag is correct to the starting tag in the stack. - */ + + // Check if the ending tag is correct to the starting tag in the stack. if (!currentNode.equals(localName)) { throw new SAXException(INVALID_XML); } + String parentNode = ""; if (elements.size() > 0) { parentNode = elements.peek(); @@ -121,57 +149,136 @@ public void endElement(final String uri, currentBlobProperty = null; } + /* + * If the closing tag is BlobPrefix, there are no more properties to be set in + * currentBlobProperty and this is a directory (implicit or explicit) + * If implicit, it will be added only once/ + * If explicit it will be added with Blob Tag as well. + */ + if (AbfsHttpConstants.BLOB_PREFIX.equals(currentNode)) { + currentBlobProperty.setIsDirectory(true); + blobList.addBlobProperty(currentBlobProperty); + currentBlobProperty = null; + } + + /* + * If the closing tag is Next Marker, it needs to be saved with the + * list of blobs currently fetched + */ if (AbfsHttpConstants.NEXT_MARKER.equals(currentNode)) { blobList.setNextMarker(value); } - if (parentNode.equals(AbfsHttpConstants.BLOB_PREFIX)) { - if (currentNode.equals(AbfsHttpConstants.NAME)) { - currentBlobProperty.setBlobPrefix(value); - } - } /* - * For case: - * - * value - * .... + * If the closing tag is Name, then it is either for a blob + * or for a blob prefix denoting a directory. We will save this + * in current BlobProperty for both */ - if (parentNode.equals(AbfsHttpConstants.BLOB)) { - if (currentNode.equals(AbfsHttpConstants.NAME)) { - currentBlobProperty.setName(value); - currentBlobProperty.setPath(new Path(ROOT_PATH + value)); - currentBlobProperty.setUrl(url + ROOT_PATH + value); + if (currentNode.equals(AbfsHttpConstants.NAME)) { + // In case of BlobPrefix Name will have a slash at the end + // Remove the "/" at the end of name + if (value.endsWith(FORWARD_SLASH)) { + value = value.substring(0, value.length() - 1); } + + currentBlobProperty.setName(value); + currentBlobProperty.setPath(new Path(ROOT_PATH + value)); + currentBlobProperty.setUrl(url + ROOT_PATH + value); } + /* * For case: - * ... - * value...value - * .... + * + * ... + * + * value + * value + * + * ... + * * ParentNode will be Metadata for all key1, key2, ... , keyN. */ if (parentNode.equals(AbfsHttpConstants.METADATA)) { currentBlobProperty.addMetadata(currentNode, value); + // For Marker blobs hdi_isFolder will be present as metadata if (HDI_ISFOLDER.equals(currentNode)) { currentBlobProperty.setIsDirectory(Boolean.valueOf(value)); } } + /* * For case: - * ... - * valuevalue - * .... + * + * ... + * + * date-time-value + * date-time-value + * etag + * owner user id + * owning group id + * permission string + * access control list + * file | directory + * size-in-bytes + * id + * pending | success | aborted | failed + * source url + * bytes copied/bytes total + * datetime + * error string + * + * ... + * * ParentNode will be Properties for Content-Length, ResourceType. */ if (parentNode.equals(AbfsHttpConstants.PROPERTIES)) { - if (currentNode.equals(AbfsHttpConstants.CONTENT_LEN)) { - currentBlobProperty.setContentLength(Long.valueOf(value)); + if (currentNode.equals(AbfsHttpConstants.CREATION_TIME)) { + currentBlobProperty.setCreationTime(DateTimeUtils.parseLastModifiedTime(value)); + } + if (currentNode.equals(AbfsHttpConstants.LAST_MODIFIED_TIME)) { + currentBlobProperty.setLastModifiedTime(DateTimeUtils.parseLastModifiedTime(value)); + } + if (currentNode.equals(AbfsHttpConstants.ETAG)) { + currentBlobProperty.setETag(value); + } + if (currentNode.equals(AbfsHttpConstants.OWNER)) { + currentBlobProperty.setOwner(value); + } + if (currentNode.equals(AbfsHttpConstants.GROUP)) { + currentBlobProperty.setGroup(value); + } + if (currentNode.equals(AbfsHttpConstants.PERMISSIONS)) { + currentBlobProperty.setPermission(value); + } + if (currentNode.equals(AbfsHttpConstants.ACL)) { + currentBlobProperty.setAcl(value); } if (currentNode.equals(AbfsHttpConstants.RESOURCE_TYPE)) { if (DIRECTORY.equals(value)) { currentBlobProperty.setIsDirectory(true); } } + if (currentNode.equals(AbfsHttpConstants.CONTENT_LEN)) { + currentBlobProperty.setContentLength(Long.parseLong(value)); + } + if (currentNode.equals(AbfsHttpConstants.COPY_ID)) { + currentBlobProperty.setCopyId(value); + } + if (currentNode.equals(AbfsHttpConstants.COPY_STATUS)) { + currentBlobProperty.setCopyStatus(value); + } + if (currentNode.equals(AbfsHttpConstants.COPY_SOURCE)) { + currentBlobProperty.setCopySourceUrl(value); + } + if (currentNode.equals(AbfsHttpConstants.COPY_PROGRESS)) { + currentBlobProperty.setCopyProgress(value); + } + if (currentNode.equals(AbfsHttpConstants.COPY_COMPLETION_TIME)) { + currentBlobProperty.setCopyCompletionTime(DateTimeUtils.parseLastModifiedTime(value)); + } + if (currentNode.equals(AbfsHttpConstants.COPY_STATUS_DESCRIPTION)) { + currentBlobProperty.setCopyStatusDescription(value); + } } /* * refresh bld for the next XML-tag value diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobProperty.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobProperty.java index 71bbb55fd9a90..f84725bef3acc 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobProperty.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobProperty.java @@ -21,112 +21,204 @@ import java.util.HashMap; import java.util.Map; +import com.microsoft.azure.storage.blob.BlobType; +import com.microsoft.azure.storage.blob.CopyState; +import com.microsoft.azure.storage.blob.CopyStatus; +import com.microsoft.azure.storage.blob.LeaseDuration; +import com.microsoft.azure.storage.blob.LeaseState; +import com.microsoft.azure.storage.blob.LeaseStatus; + import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; /** - * Encapsulates all the information related to a Blob. + * Encapsulates all the information related to a Blob as fetched + * on blob endpoint APIs */ public class BlobProperty { - private Boolean isDirectory = false; private String name; private Path path; private String url; - private String copySourceUrl; + private Boolean isDirectory = false; + private String eTag; + private long lastModifiedTime; + private long creationTime; + private String owner; + private String group; + private String permission; + private String acl; + private Long contentLength = 0L; private String copyId; private String copyStatus; - private String statusDescription; - private Long contentLength = 0L; + private String copySourceUrl; + private String copyProgress; + private String copyStatusDescription; + private long copyCompletionTime; private Map metadata = new HashMap<>(); - private String blobPrefix; + private AzureBlobFileSystemException ex; public BlobProperty() { } - public void setName(String name) { - this.name = name; + public String getName() { + return name; } - public void setUrl(String url) { - this.url = url; + public Path getPath() { + return path; } - public void setBlobPrefix(String blobPrefix) { - this.blobPrefix = blobPrefix; + public String getUrl() { + return url; } - public void addMetadata(String key, String value) { - metadata.put(key, value); + public Boolean getIsDirectory() { + return isDirectory; } - public void setIsDirectory(Boolean isDirectory) { - this.isDirectory = isDirectory; + public String getETag() { + return eTag; } - public void setCopyId(String copyId) { - this.copyId = copyId; + public long getLastModifiedTime() { + return lastModifiedTime; } - public void setCopySourceUrl(String copySourceUrl) { - this.copySourceUrl = copySourceUrl; + public long getCreationTime() { + return creationTime; } - public void setPath(Path path) { - this.path = path; + public String getOwner() { + return owner; } - public void setCopyStatus(String copyStatus) { - this.copyStatus = copyStatus; + public String getGroup() { + return group; + } + + public String getPermission() { + return permission; } - public void setStatusDescription(String statusDescription) { - this.statusDescription = statusDescription; + public String getAcl() { + return acl; } - public void setContentLength(Long length) { - this.contentLength = length; + public Long getContentLength() { + return contentLength; } + public String getCopyId() { + return copyId; + } - public Boolean getIsDirectory() { - return isDirectory; + public String getCopyStatus() { + return copyStatus; } - public AzureBlobFileSystemException getFailureException() { - return ex; + public String getCopySourceUrl() { + return copySourceUrl; } - public Path getPath() { - return path; + public String getCopyProgress() { + return copyProgress; + } + + public String getCopyStatusDescription() { + return copyStatusDescription; + } + + public long getCopyCompletionTime() { + return copyCompletionTime; + } + + public Map getMetadata() { + return metadata; + } + + public AzureBlobFileSystemException getFailureException() { + return ex; } public Path getBlobDstPath(Path dstBlobPath) { return null; } - public String getUrl() { - return url; + public void setName(final String name) { + this.name = name; } - public String getCopySourceUrl() { - return copySourceUrl; + public void setPath(final Path path) { + this.path = path; } - public String getCopyId() { - return copyId; + public void setUrl(final String url) { + this.url = url; } - public String getCopyStatus() { - return copyStatus; + public void setIsDirectory(final Boolean isDirectory) { + this.isDirectory = isDirectory; } - public String getStatusDescription() { - return statusDescription; + public void setETag(final String eTag) { + this.eTag = eTag; } - public Long getContentLength() { - return contentLength; + public void setLastModifiedTime(final long lastModifiedTime) { + this.lastModifiedTime = lastModifiedTime; + } + + public void setCreationTime(final long creationTime) { + this.creationTime = creationTime; + } + + public void setOwner(final String owner) { + this.owner = owner; + } + + public void setGroup(final String group) { + this.group = group; + } + + public void setPermission(final String permission) { + this.permission = permission; + } + + public void setAcl(final String acl) { + this.acl = acl; + } + + public void setContentLength(final Long contentLength) { + this.contentLength = contentLength; + } + + public void setCopyId(final String copyId) { + this.copyId = copyId; + } + + public void setCopyStatus(final String copyStatus) { + this.copyStatus = copyStatus; + } + + public void setCopySourceUrl(final String copySourceUrl) { + this.copySourceUrl = copySourceUrl; + } + + public void setCopyProgress(final String copyProgress) { + this.copyProgress = copyProgress; + } + + public void setCopyStatusDescription(final String copyStatusDescription) { + this.copyStatusDescription = copyStatusDescription; + } + + public void setCopyCompletionTime(final long copyCompletionTime) { + this.copyCompletionTime = copyCompletionTime; + } + + public void addMetadata(String key, String value) { + metadata.put(key, value); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index dc9ef9bffb945..c4a5c9bb5ee11 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -27,6 +27,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; +import org.junit.Assume; import org.junit.Test; import org.apache.hadoop.conf.Configuration; @@ -60,6 +61,7 @@ public ITestAzureBlobFileSystemListStatus() throws Exception { @Test public void testListPath() throws Exception { + Assume.assumeTrue(false); Configuration config = new Configuration(this.getRawConfiguration()); config.set(AZURE_LIST_MAX_RESULTS, "5000"); final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlob.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlob.java index 4ae0a6df29c1e..941c36c342732 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlob.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlob.java @@ -119,9 +119,9 @@ public void testListBlobWithMarkers() throws Exception { String prefix = answer.getArgument(1); TracingContext tracingContext = answer.getArgument(3); count[0]++; - return client.getListBlobs(marker, prefix, 1, tracingContext); + return client.getListBlobs(marker, prefix, "", 1, tracingContext); }).when(spiedClient).getListBlobs(Mockito.nullable(String.class), - Mockito.anyString(), Mockito.nullable(Integer.class), + Mockito.anyString(), Mockito.anyString(), Mockito.nullable(Integer.class), Mockito.any(TracingContext.class)); List blobProperties = fs.getAbfsStore() diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestBlobListXmlParser.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestBlobListXmlParser.java new file mode 100644 index 0000000000000..8ecc895ec9b22 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestBlobListXmlParser.java @@ -0,0 +1,115 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import org.assertj.core.api.Assertions; +import java.util.List; + +import org.junit.Test; +import org.xml.sax.SAXException; + +public class TestBlobListXmlParser { + @Test + public void testXMLParser() throws Exception { + String xmlResponse = "" + + "" + + "" + + "/" + + "" + + "" + + "Splitting Example.txt" + + "" + + "Tue, 06 Jun 2023 08:33:51 GMT" + + "Tue, 06 Jun 2023 08:33:51 GMT" + + "0x8DB6668C17D3B76" + + "3844" + + "file" + + "BlockBlob" + + "Hot" + + "true" + + "unlocked" + + "available" + + "true" + + "true" + + "true" + + "Tue, 06 Jun 2023 08:33:51 GMT" + + "true" + + "true" + + "" + + "" + + "" + + "" + + "" + + "bye/" + + "" + + "" + + "" + + ""; + byte[] bytes = xmlResponse.getBytes(); + final InputStream stream = new ByteArrayInputStream(bytes);; + final SAXParser saxParser = saxParserThreadLocal.get(); + saxParser.reset(); + BlobList blobList = new BlobList(); + saxParser.parse(stream, new BlobListXmlParser(blobList, "https://sample.url")); + List prop = blobList.getBlobPropertyList(); + Assertions.assertThat(prop.size()).isEqualTo(2); + Assertions.assertThat(prop.get(0).getIsDirectory()).isEqualTo(false); + Assertions.assertThat(prop.get(1).getIsDirectory()).isEqualTo(true); + } + + @Test + public void testEmptyBlobList() throws Exception { + String xmlResponse = "" + + "<" + + "EnumerationResults ServiceEndpoint=\"https://anujtestfns.blob.core.windows.net/\" ContainerName=\"manualtest\">" + + "abc/" + + "/" + + "" + + ""; + byte[] bytes = xmlResponse.getBytes(); + final InputStream stream = new ByteArrayInputStream(bytes);; + final SAXParser saxParser = saxParserThreadLocal.get(); + saxParser.reset(); + BlobList blobList = new BlobList(); + saxParser.parse(stream, new BlobListXmlParser(blobList, "https://sample.url")); + List prop = blobList.getBlobPropertyList(); + } + + private static final ThreadLocal saxParserThreadLocal + = new ThreadLocal() { + @Override + public SAXParser initialValue() { + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + try { + return factory.newSAXParser(); + } catch (SAXException e) { + throw new RuntimeException("Unable to create SAXParser", e); + } catch (ParserConfigurationException e) { + throw new RuntimeException("Check parser configuration", e); + } + } + }; +} From 658b09a15ac742b1d9b105272d772e5c67629cba Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Fri, 9 Jun 2023 02:37:43 -0700 Subject: [PATCH 2/7] Removed Unused Improts --- .../apache/hadoop/fs/azurebfs/services/BlobProperty.java | 7 ------- .../fs/azurebfs/ITestAzureBlobFileSystemListStatus.java | 2 -- .../hadoop/fs/azurebfs/services/TestBlobListXmlParser.java | 1 - 3 files changed, 10 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobProperty.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobProperty.java index f84725bef3acc..ea7351d2120a8 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobProperty.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobProperty.java @@ -21,13 +21,6 @@ import java.util.HashMap; import java.util.Map; -import com.microsoft.azure.storage.blob.BlobType; -import com.microsoft.azure.storage.blob.CopyState; -import com.microsoft.azure.storage.blob.CopyStatus; -import com.microsoft.azure.storage.blob.LeaseDuration; -import com.microsoft.azure.storage.blob.LeaseState; -import com.microsoft.azure.storage.blob.LeaseStatus; - import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index c4a5c9bb5ee11..dc9ef9bffb945 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -27,7 +27,6 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; -import org.junit.Assume; import org.junit.Test; import org.apache.hadoop.conf.Configuration; @@ -61,7 +60,6 @@ public ITestAzureBlobFileSystemListStatus() throws Exception { @Test public void testListPath() throws Exception { - Assume.assumeTrue(false); Configuration config = new Configuration(this.getRawConfiguration()); config.set(AZURE_LIST_MAX_RESULTS, "5000"); final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestBlobListXmlParser.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestBlobListXmlParser.java index 8ecc895ec9b22..ec4cf3fa04ffe 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestBlobListXmlParser.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestBlobListXmlParser.java @@ -22,7 +22,6 @@ import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import java.io.ByteArrayInputStream; -import java.io.IOException; import java.io.InputStream; import org.assertj.core.api.Assertions; import java.util.List; From bff778701012594c9319fd52c64ea9e384bde484 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Mon, 12 Jun 2023 01:57:17 -0700 Subject: [PATCH 3/7] Added Negative Test Scenarios --- .../fs/azurebfs/AzureBlobFileSystemStore.java | 57 +++++-- .../ITestAzureBlobFileSystemListStatus.java | 161 ++++++++++++++++++ 2 files changed, 204 insertions(+), 14 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index df866520e5b06..353e2f1202e31 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -49,6 +49,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.TreeMap; import java.util.WeakHashMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -1920,7 +1921,7 @@ public String listStatus(final Path path, final String startFrom, perfInfo.registerResult(op.getResult()); BlobList blobList = op.getResult().getBlobList(); continuation = blobList.getNextMarker(); - if (blobList.getBlobPropertyList().isEmpty()) { + if (blobList.getBlobPropertyList() == null) { throw new AbfsRestOperationException( AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), @@ -2013,8 +2014,23 @@ public String listStatus(final Path path, final String startFrom, private void addBlobListAsFileStatus(final BlobList blobList, List fileStatuses) throws IOException { + // Here before adding the data we might have to remove the duplicates. + // List Blobs call over blob endpoint returns two types of entries: Blob + // and BlobPrefix. In the case where ABFS generated the data, + // there will be a marker blob for each "directory" created by driver, + // and we will receive them as a Blob. If there are also files within this + // "directory", we will also receive a BlobPrefix. To further + // complicate matters, the data may not be generated by ABFS Driver, in + // which case we may not have a marker blob for each "directory". In this + // the only way to know there is a directory is using BlobPrefix entry. + // So, sometimes we receive both a Blob and a BlobPrefix for directories, + // and sometimes we receive only BlobPrefix as directory. We remove duplicates + // but prefer Blob over BlobPrefix. + TreeMap fileMetadata = new TreeMap<>(); List blobProperties = blobList.getBlobPropertyList(); + for (BlobProperty entry: blobProperties) { + String blobKey = entry.getName(); final String owner = identityTransformer.transformIdentityForGetRequest( entry.getOwner(), true, userName); final String group = identityTransformer.transformIdentityForGetRequest( @@ -2028,20 +2044,33 @@ private void addBlobListAsFileStatus(final BlobList blobList, Path entryPath = entry.getPath(); entryPath = entryPath.makeQualified(this.uri, entryPath); - fileStatuses.add( - new VersionedFileStatus( - owner, - group, - fsPermission, - hasAcl, - entry.getContentLength(), - entry.getIsDirectory(), - 1, - blockSize, - entry.getLastModifiedTime(), - entryPath, - entry.getETag())); + FileStatus fileStatus = new VersionedFileStatus( + owner, + group, + fsPermission, + hasAcl, + entry.getContentLength(), + entry.getIsDirectory(), + 1, + blockSize, + entry.getLastModifiedTime(), + entryPath, + entry.getETag()); + + if (entry.getETag() != null) { + // This is a blob entry. It is either a file or a marker blob. + // In both cases we will add this. + fileMetadata.put(blobKey, fileStatus); + } + else { + // This is a BlobPrefix entry. It is a directory with file inside + // This might have already been added as a marker blob. + if (!fileMetadata.containsKey(blobKey)) { + fileMetadata.put(blobKey, fileStatus); + } + } } + fileStatuses.addAll(fileMetadata.values()); } // generate continuation token for xns account diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index dc9ef9bffb945..fe309d4a96527 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -27,6 +27,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; +import org.assertj.core.api.Assertions; import org.junit.Test; import org.apache.hadoop.conf.Configuration; @@ -247,4 +248,164 @@ public void testRenameTrailingPeriodFile() throws IOException { assertTrue("Attempt to create file that ended with a dot should" + " throw IllegalArgumentException", exceptionThrown); } + + @Test + public void testListStatusImplicitExplicitChildren() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + fs.setWorkingDirectory(new Path("/")); + Path root = new Path("/"); + AzcopyHelper azcopyHelper = new AzcopyHelper( + getAccountName(), + getFileSystemName(), + getRawConfiguration(), + fs.getAbfsStore().getPrefixMode() + ); + + // Create an implicit directory under root + Path dir1 = new Path("a"); + azcopyHelper.createFolderUsingAzcopy(fs.makeQualified(dir1).toUri().getPath().substring(1)); + assertTrue("Path is implicit.", + BlobDirectoryStateHelper.isImplicitDirectory(dir1, fs)); + + // Assert that implicit directory is returned + FileStatus[] fileStatuses = fs.listStatus(root); + Assertions.assertThat(fileStatuses.length).isEqualTo(1); + assertImplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(dir1)); + + // Create a marker blob for the directory. + fs.mkdirs(dir1); + + // Assert that only one entry of explicit directory is returned + fileStatuses = fs.listStatus(root); + Assertions.assertThat(fileStatuses.length).isEqualTo(1); + assertExplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(dir1)); + + // Create a file under root + Path file1 = new Path("b"); + fs.create(file1); + + // Assert that two entries are returned in alphabetic order. + fileStatuses = fs.listStatus(root); + Assertions.assertThat(fileStatuses.length).isEqualTo(2); + assertExplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(dir1)); + assertFileFileStatus(fileStatuses[1], fs.makeQualified(file1)); + + // Create another implicit directory under root. + Path dir2 = new Path("c"); + azcopyHelper.createFolderUsingAzcopy(fs.makeQualified(dir2).toUri().getPath().substring(1)); + assertTrue("Path is implicit.", + BlobDirectoryStateHelper.isImplicitDirectory(dir2, fs)); + + // Assert that three entries are returned in alphabetic order. + fileStatuses = fs.listStatus(root); + Assertions.assertThat(fileStatuses.length).isEqualTo(3); + assertExplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(dir1)); + assertFileFileStatus(fileStatuses[1], fs.makeQualified(file1)); + assertImplicitDirectoryFileStatus(fileStatuses[2], fs.makeQualified(dir2)); + } + + @Test + public void testListStatusOnNonExistingPath() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path testPath = new Path("a/b"); + + intercept(FileNotFoundException.class, + () -> fs.listFiles(testPath, false).next()); + } + + @Test + public void testListStatusOnImplicitDirectory() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + AzcopyHelper azcopyHelper = new AzcopyHelper( + getAccountName(), + getFileSystemName(), + getRawConfiguration(), + fs.getAbfsStore().getPrefixMode() + ); + + // Create an implicit directory with another implicit directory inside + Path testPath = new Path("testDir"); + Path childPath = new Path("testDir/azcopy"); + azcopyHelper.createFolderUsingAzcopy( + fs.makeQualified(testPath).toUri().getPath().substring(1)); + assertTrue("Path is implicit.", + BlobDirectoryStateHelper.isImplicitDirectory(testPath, fs)); + + // Assert that one entry is returned as implicit child. + FileStatus[] fileStatuses = fs.listStatus(testPath); + Assertions.assertThat(fileStatuses.length).isEqualTo(1); + assertImplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(childPath)); + } + + @Test + public void testListStatusOnExplicitDirectory() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + AzcopyHelper azcopyHelper = new AzcopyHelper( + getAccountName(), + getFileSystemName(), + getRawConfiguration(), + fs.getAbfsStore().getPrefixMode() + ); + + // Create an explicit directory with all kind of children. + Path testPath = new Path("testDir"); + Path explicitChild = new Path ("testDir/a"); + Path fileChild = new Path ("testDir/b"); + Path implicitChild = new Path ("testDir/c"); + fs.mkdirs(explicitChild); + fs.create(fileChild); + azcopyHelper.createFolderUsingAzcopy( + fs.makeQualified(implicitChild).toUri().getPath().substring(1)); + + assertTrue("Test path is explicit", + BlobDirectoryStateHelper.isExplicitDirectory(testPath, fs)); + assertTrue("explicitChild Path is explicit", + BlobDirectoryStateHelper.isExplicitDirectory(explicitChild, fs)); + assertTrue("implicitChild Path is implicit.", + BlobDirectoryStateHelper.isImplicitDirectory(implicitChild, fs)); + + // Assert that three entry is returned. + FileStatus[] fileStatuses = fs.listStatus(testPath); + Assertions.assertThat(fileStatuses.length).isEqualTo(3); + assertExplicitDirectoryFileStatus(fileStatuses[0], fs.makeQualified(explicitChild)); + assertFileFileStatus(fileStatuses[1], fs.makeQualified(fileChild)); + assertImplicitDirectoryFileStatus(fileStatuses[2], fs.makeQualified(implicitChild)); + } + + @Test + public void testListStatusOnEmptyDirectory() throws Exception { + final AzureBlobFileSystem fs = getFileSystem(); + Path testPath = new Path("testPath"); + fs.mkdirs(testPath); + FileStatus[] fileStatuses = fs.listStatus(testPath); + Assertions.assertThat(fileStatuses.length).isEqualTo(0); + } + + private void assertFileFileStatus(final FileStatus fileStatus, + final Path qualifiedPath) { + Assertions.assertThat(fileStatus.getPath()).isEqualTo(qualifiedPath); + Assertions.assertThat(fileStatus.isDirectory()).isEqualTo(false); + Assertions.assertThat(fileStatus.isFile()).isEqualTo(true); + Assertions.assertThat(fileStatus.getModificationTime()).isNotEqualTo(0); + } + + private void assertImplicitDirectoryFileStatus(final FileStatus fileStatus, + final Path qualifiedPath) { + assertDirectoryFileStatus(fileStatus, qualifiedPath); + Assertions.assertThat(fileStatus.getModificationTime()).isEqualTo(0); + } + + private void assertExplicitDirectoryFileStatus(final FileStatus fileStatus, + final Path qualifiedPath) { + assertDirectoryFileStatus(fileStatus, qualifiedPath); + Assertions.assertThat(fileStatus.getModificationTime()).isNotEqualTo(0); + } + + private void assertDirectoryFileStatus(final FileStatus fileStatus, + final Path qualifiedPath) { + Assertions.assertThat(fileStatus.getPath()).isEqualTo(qualifiedPath); + Assertions.assertThat(fileStatus.isDirectory()).isEqualTo(true); + Assertions.assertThat(fileStatus.isFile()).isEqualTo(false); + Assertions.assertThat(fileStatus.getLen()).isEqualTo(0); + } } From 1027a75417257ee2dc798de7fe0945a56f14ca99 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Mon, 12 Jun 2023 02:55:18 -0700 Subject: [PATCH 4/7] Addressing Comments --- .../fs/azurebfs/AzureBlobFileSystemStore.java | 12 ++--- .../fs/azurebfs/services/AbfsClient.java | 6 +++ .../azurebfs/services/BlobListXmlParser.java | 52 ++++++++++--------- 3 files changed, 37 insertions(+), 33 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 353e2f1202e31..d35340e90a4e6 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -1913,6 +1913,9 @@ public String listStatus(final Path path, final String startFrom, prefix = null; } do { + // List Blob calls will be made with delimiter "/". This will ensure + // that all the children of a folder not listed out separately. Instead, + // a single entry corresponding to the directory name will be returned as BlobPrefix. try (AbfsPerfInfo perfInfo = startTracking("listStatus", "getListBlobs")) { AbfsRestOperation op = client.getListBlobs( continuation, prefix, delimiter, abfsConfiguration.getListMaxResults(), @@ -1921,13 +1924,6 @@ public String listStatus(final Path path, final String startFrom, perfInfo.registerResult(op.getResult()); BlobList blobList = op.getResult().getBlobList(); continuation = blobList.getNextMarker(); - if (blobList.getBlobPropertyList() == null) { - throw new AbfsRestOperationException( - AzureServiceErrorCode.PATH_NOT_FOUND.getStatusCode(), - AzureServiceErrorCode.PATH_NOT_FOUND.getErrorCode(), - "ListBlobs path not found", - null, op.getResult()); - } addBlobListAsFileStatus(blobList, fileStatuses); @@ -2038,7 +2034,7 @@ private void addBlobListAsFileStatus(final BlobList blobList, final FsPermission fsPermission = entry.getPermission() == null ? new AbfsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL) : AbfsPermission.valueOf(entry.getPermission()); - final boolean hasAcl = entry.getAcl() == null ? false : true; + final boolean hasAcl = entry.getAcl() != null; long blockSize = abfsConfiguration.getAzureBlockSize(); Path entryPath = entry.getPath(); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 82d3960990bd6..f15961dc6b7b4 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -1465,6 +1465,12 @@ public AbfsRestOperation setBlobMetadata(Path blobPath, List met * @param marker optional value. To be sent in case this method call in a non-first * iteration to the blobList API. Value has to be equal to the field NextMarker in the response * of previous iteration for the same operation. + * @param prefix optional value. Filters the results to return only blobs + * with names that begin with the specified prefix + * @param delimiter Optional. When the request includes this parameter, + * the operation returns a BlobPrefix element in the response body. + * This element acts as a placeholder for all blobs with names that begin + * with the same substring, up to the appearance of the delimiter character. * @param maxResult define how many blobs can client handle in server response. * In case maxResult <= 5000, server sends number of blobs equal to the value. In * case maxResult > 5000, server sends maximum 5000 blobs. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobListXmlParser.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobListXmlParser.java index 5ae253d58614c..d00675201a4cf 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobListXmlParser.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/BlobListXmlParser.java @@ -28,29 +28,29 @@ import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.utils.DateTimeUtils; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DIRECTORY; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HDI_ISFOLDER; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.INVALID_XML; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH; - /** * Parses the response inputSteam and populates an object of {@link BlobList}. Parsing * creates a list of {@link BlobProperty}.
* * BlobList API XML response example - * - * string-value - * string-value - * int-value - * string-value - * - * - * - * - * - * + *
+ *   {@code
+ *   
+ *    string-value
+ *    string-value
+ *    int-value
+ *    string-value
+ *    
+ *       
+ *       
+ *    
+ *    
+ *   
+ *   }
+ * 
*/ + + public class BlobListXmlParser extends DefaultHandler { /** * Object that contains the parsed response. @@ -127,7 +127,7 @@ public void endElement(final String uri, // Check if the ending tag is correct to the starting tag in the stack. if (!currentNode.equals(localName)) { - throw new SAXException(INVALID_XML); + throw new SAXException(AbfsHttpConstants.INVALID_XML); } String parentNode = ""; @@ -174,16 +174,18 @@ public void endElement(final String uri, * or for a blob prefix denoting a directory. We will save this * in current BlobProperty for both */ - if (currentNode.equals(AbfsHttpConstants.NAME)) { + if (currentNode.equals(AbfsHttpConstants.NAME) + && (parentNode.equals(AbfsHttpConstants.BLOB) + || parentNode.equals(AbfsHttpConstants.BLOB_PREFIX))) { // In case of BlobPrefix Name will have a slash at the end // Remove the "/" at the end of name - if (value.endsWith(FORWARD_SLASH)) { + if (value.endsWith(AbfsHttpConstants.FORWARD_SLASH)) { value = value.substring(0, value.length() - 1); } currentBlobProperty.setName(value); - currentBlobProperty.setPath(new Path(ROOT_PATH + value)); - currentBlobProperty.setUrl(url + ROOT_PATH + value); + currentBlobProperty.setPath(new Path(AbfsHttpConstants.ROOT_PATH + value)); + currentBlobProperty.setUrl(url + AbfsHttpConstants.ROOT_PATH + value); } /* @@ -201,7 +203,7 @@ public void endElement(final String uri, if (parentNode.equals(AbfsHttpConstants.METADATA)) { currentBlobProperty.addMetadata(currentNode, value); // For Marker blobs hdi_isFolder will be present as metadata - if (HDI_ISFOLDER.equals(currentNode)) { + if (AbfsHttpConstants.HDI_ISFOLDER.equals(currentNode)) { currentBlobProperty.setIsDirectory(Boolean.valueOf(value)); } } @@ -213,7 +215,7 @@ public void endElement(final String uri, * * date-time-value * date-time-value - * etag + * Etag * owner user id * owning group id * permission string @@ -254,7 +256,7 @@ public void endElement(final String uri, currentBlobProperty.setAcl(value); } if (currentNode.equals(AbfsHttpConstants.RESOURCE_TYPE)) { - if (DIRECTORY.equals(value)) { + if (AbfsHttpConstants.DIRECTORY.equals(value)) { currentBlobProperty.setIsDirectory(true); } } From 638d5ab4ddbdaa65dbf6a218edd5b296877f5752 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Tue, 13 Jun 2023 03:00:02 -0700 Subject: [PATCH 5/7] Test For Recursive Listing --- .../fs/azurebfs/AzureBlobFileSystem.java | 4 +- .../fs/azurebfs/AzureBlobFileSystemStore.java | 17 +++--- .../fs/azurebfs/BlobDirectoryStateHelper.java | 2 +- ...ileSystemDelegationSASForBlobEndpoint.java | 4 +- ...reBlobFileSystemExplictImplicitRename.java | 4 +- .../hadoop/fs/azurebfs/ITestListBlob.java | 60 ++++++++++++++++--- 6 files changed, 70 insertions(+), 21 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java index b2c17d344ea26..b817015aa0869 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java @@ -394,7 +394,7 @@ protected CompletableFuture openFileWithOptions( * @param tracingContext The tracingContext. */ private void validatePathOrSubPathDoesNotExist(final Path path, TracingContext tracingContext) throws IOException { - List blobList = abfsStore.getListBlobs(path, null, + List blobList = abfsStore.getListBlobs(path, null, null, tracingContext, 2, true); if (blobList.size() > 0 || abfsStore.checkIsDirectory(path, tracingContext)) { throw new AbfsRestOperationException(HTTP_CONFLICT, @@ -788,7 +788,7 @@ private PathInformation getPathInformation(final Path path, if (getAbfsStore().getAbfsConfiguration().getPrefixMode() == PrefixMode.BLOB) { List blobProperties = getAbfsStore() - .getListBlobs(path, null, tracingContext, 2, true); + .getListBlobs(path, null, null, tracingContext, 2, true); if (blobProperties.size() > 0) { return new PathInformation(true, true); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index d35340e90a4e6..5732fc56206f3 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -726,7 +726,7 @@ public Hashtable getBlobMetadata(final Path path, // The path does not exist explicitly. // Check here if the path is an implicit dir if (ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND && !path.isRoot()) { - List blobProperties = getListBlobs(path, null, + List blobProperties = getListBlobs(path, null, null, tracingContext, 2, true); if (blobProperties.size() == 0) { throw ex; @@ -826,7 +826,7 @@ private List getRequestHeadersForMetadata(Hashtable getListBlobs(Path sourceDirBlobPath, - String prefix, TracingContext tracingContext, + String prefix, String delimiter, TracingContext tracingContext, final Integer maxResult, final Boolean isDefinitiveDirSearch) throws AzureBlobFileSystemException { List blobProperties = new ArrayList<>(); @@ -838,9 +838,12 @@ public List getListBlobs(Path sourceDirBlobPath, ? ROOT_PATH : EMPTY_STRING); } + if (delimiter == null) { + delimiter = ""; + } do { AbfsRestOperation op = client.getListBlobs( - nextMarker, prefix, "", maxResult, tracingContext + nextMarker, prefix, delimiter, maxResult, tracingContext ); BlobList blobList = op.getResult().getBlobList(); nextMarker = blobList.getNextMarker(); @@ -1249,7 +1252,7 @@ public AbfsInputStream openFileForRead(final Path path, if (e.getStatusCode() != HTTP_NOT_FOUND) { throw e; } - List blobsList = getListBlobs(new Path(relativePath), null, + List blobsList = getListBlobs(new Path(relativePath), null, null, tracingContext, 2, true); if (blobsList.size() > 0) { throw new AbfsRestOperationException( @@ -1340,7 +1343,7 @@ public OutputStream openFileForWrite(final Path path, // The path does not exist explicitly. // Check here if the path is an implicit dir if (getPrefixMode() == PrefixMode.BLOB && ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) { - List blobProperties = getListBlobs(path, null, + List blobProperties = getListBlobs(path, null, null, tracingContext, 2, true); if (blobProperties.size() != 0) { throw new AbfsRestOperationException( @@ -1439,7 +1442,7 @@ public void rename(final Path source, final Path destination, /* * Fetch the list of blobs in the given sourcePath. */ - List srcBlobProperties = getListBlobs(source, null, + List srcBlobProperties = getListBlobs(source, null, null, tracingContext, null, true); BlobProperty blobPropOnSrc; if (srcBlobProperties.size() > 0) { @@ -1821,7 +1824,7 @@ public FileStatus getFileStatusOverBlob(final Path path, // The path does not exist explicitly. // Check here if the path is an implicit dir if (ex.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND && !path.isRoot()) { - List blobProperties = getListBlobs(path,null, tracingContext, 2, true); + List blobProperties = getListBlobs(path,null, null, tracingContext, 2, true); if (blobProperties.size() == 0) { throw ex; } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/BlobDirectoryStateHelper.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/BlobDirectoryStateHelper.java index dad4551f65dbd..e8a5f2ab3da7a 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/BlobDirectoryStateHelper.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/BlobDirectoryStateHelper.java @@ -48,7 +48,7 @@ public static boolean isImplicitDirectory(Path path, AzureBlobFileSystem fs) thr path = new Path(fs.makeQualified(path).toUri().getPath()); if (fs.getAbfsStore().getPrefixMode() == PrefixMode.BLOB) { List blobProperties = fs.getAbfsStore() - .getListBlobs(path,null, Mockito.mock(TracingContext.class), 2, true); + .getListBlobs(path,null, null, Mockito.mock(TracingContext.class), 2, true); if (blobProperties.size() == 0) { return false; } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSASForBlobEndpoint.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSASForBlobEndpoint.java index 52880c3d09120..005bd216abce3 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSASForBlobEndpoint.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSASForBlobEndpoint.java @@ -159,7 +159,7 @@ public void testListBlob() throws Exception { i++; } List blobProperties = fs.getAbfsStore() - .getListBlobs(new Path("dir"), null, + .getListBlobs(new Path("dir"), null, null, Mockito.mock(TracingContext.class), null, false); Assertions.assertThat(blobProperties) .describedAs( @@ -167,7 +167,7 @@ public void testListBlob() throws Exception { .hasSize(11); blobProperties = fs.getAbfsStore() - .getListBlobs(new Path("dir"), null, + .getListBlobs(new Path("dir"), null, null, Mockito.mock(TracingContext.class), null, true); Assertions.assertThat(blobProperties) .describedAs( diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemExplictImplicitRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemExplictImplicitRename.java index d6ec19c6b3066..7b279bf19ef27 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemExplictImplicitRename.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemExplictImplicitRename.java @@ -952,12 +952,12 @@ private void explicitImplicitCaseRenameAssert(final Boolean dstExist, } else { Assert.assertFalse(fs.rename(src, dst)); Assert.assertTrue(fs.getAbfsStore() - .getListBlobs(src, null, Mockito.mock(TracingContext.class), null, + .getListBlobs(src, null, null, Mockito.mock(TracingContext.class), null, false) .size() > 0); if (dstExist) { Assert.assertTrue(fs.getAbfsStore() - .getListBlobs(dst, null, Mockito.mock(TracingContext.class), null, + .getListBlobs(dst, null, null, Mockito.mock(TracingContext.class), null, false) .size() > 0); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlob.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlob.java index 941c36c342732..c98739c177cc9 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlob.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestListBlob.java @@ -27,6 +27,7 @@ import org.junit.Test; import org.mockito.Mockito; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.BlobProperty; @@ -51,7 +52,7 @@ public void testListBlob() throws Exception { * results including the directory blob(hdi_isfolder=true). */ blobProperties = fs.getAbfsStore() - .getListBlobs(new Path("dir"), null, + .getListBlobs(new Path("dir"), null, null, Mockito.mock(TracingContext.class), null, false); Assertions.assertThat(blobProperties) .describedAs( @@ -63,7 +64,7 @@ public void testListBlob() throws Exception { * results excluding the directory blob(hdi_isfolder=true). */ blobProperties = fs.getAbfsStore() - .getListBlobs(new Path("dir"), null, + .getListBlobs(new Path("dir"), null, null, Mockito.mock(TracingContext.class), null, true); Assertions.assertThat(blobProperties) .describedAs( @@ -76,7 +77,7 @@ public void testListBlob() throws Exception { * the directory blob(hdi_isfolder=true). */ blobProperties = fs.getAbfsStore() - .getListBlobs(new Path("dir"), null, + .getListBlobs(new Path("dir"), null, null, Mockito.mock(TracingContext.class), 13, false); Assertions.assertThat(blobProperties) .describedAs( @@ -89,7 +90,7 @@ public void testListBlob() throws Exception { * same as the maxResult */ blobProperties = fs.getAbfsStore() - .getListBlobs(new Path("dir"), null, + .getListBlobs(new Path("dir"), null, null, Mockito.mock(TracingContext.class), 5, false); Assertions.assertThat(blobProperties) .describedAs( @@ -117,15 +118,16 @@ public void testListBlobWithMarkers() throws Exception { Mockito.doAnswer(answer -> { String marker = answer.getArgument(0); String prefix = answer.getArgument(1); - TracingContext tracingContext = answer.getArgument(3); + String delimiter = answer.getArgument(2); + TracingContext tracingContext = answer.getArgument(4); count[0]++; - return client.getListBlobs(marker, prefix, "", 1, tracingContext); + return client.getListBlobs(marker, prefix, delimiter, 1, tracingContext); }).when(spiedClient).getListBlobs(Mockito.nullable(String.class), Mockito.anyString(), Mockito.anyString(), Mockito.nullable(Integer.class), Mockito.any(TracingContext.class)); List blobProperties = fs.getAbfsStore() - .getListBlobs(new Path("dir"), null, + .getListBlobs(new Path("dir"), null, null, Mockito.mock(TracingContext.class), 5, false); Assertions.assertThat(blobProperties) .describedAs( @@ -137,6 +139,50 @@ public void testListBlobWithMarkers() throws Exception { .isEqualTo(5); } + @Test + public void testListBlobWithDelimiter() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + fs.setWorkingDirectory(new Path("/")); + assumeNonHnsAccountBlobEndpoint(fs); + List blobProperties; + + // Create three levels of hierarchy. + Path level0 = new Path("a"); + Path level1 = new Path("a/b"); + Path level2 = new Path("a/b/c"); + fs.mkdirs(level0); + fs.mkdirs(level1); + fs.mkdirs(level2); + + // Without delimiter, recursive listing will return all the children and sub children + // There will be no BlobPrefix element and only explicit blobs will be returned + blobProperties = fs.getAbfsStore() + .getListBlobs(level0.getParent(), null, null, + Mockito.mock(TracingContext.class), null, true); + Assertions.assertThat(blobProperties) + .describedAs( + "BlobList should return all blobs in hierarchy") + .hasSize(3); + + // With delimiter, non-recursive listing will return only the immediate children + // There will be repetition of marker blobs. + blobProperties = fs.getAbfsStore() + .getListBlobs(level0.getParent(), null, "/", + Mockito.mock(TracingContext.class), null, true); + Assertions.assertThat(blobProperties) + .describedAs( + "BlobList should return only immediate Children") + .hasSize(2); + + // ABFS Listing With delimiter, non-recursive listing will return only the immediate children + // There will be no repetition of marker blobs. + FileStatus[] fileStatuses = fs.listStatus(level0.getParent()); + Assertions.assertThat(fileStatuses) + .describedAs( + "BlobList should return only immediate Children") + .hasSize(1); + } + private void assumeNonHnsAccountBlobEndpoint(final AzureBlobFileSystem fs) { Assume.assumeTrue("To work on only on non-HNS Blob endpoint", fs.getAbfsStore().getAbfsConfiguration().getPrefixMode() From ba21593f142e7520087b8505429a620ffeba768f Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Tue, 13 Jun 2023 03:27:26 -0700 Subject: [PATCH 6/7] startFrom case fallback to DFS --- .../apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 5732fc56206f3..59fac03057a28 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -1893,17 +1893,21 @@ public String listStatus(final Path path, final String startFrom, startFrom); final String relativePath = getRelativePath(path); + boolean useBlobEndpointListing = getPrefixMode() == PrefixMode.BLOB; if (continuation == null || continuation.isEmpty()) { // generate continuation token if a valid startFrom is provided. if (startFrom != null && !startFrom.isEmpty()) { + // In case startFrom is passed, fallback to DFS for now + // TODO: Support startFrom for List Blobs on Blob Endpoint + useBlobEndpointListing = false; continuation = getIsNamespaceEnabled(tracingContext) ? generateContinuationTokenForXns(startFrom) : generateContinuationTokenForNonXns(relativePath, startFrom); } } - if (getPrefixMode() == PrefixMode.BLOB) { + if (useBlobEndpointListing) { FileStatus status = getFileStatusOverBlob(path, tracingContext); if (status.isFile()) { fileStatuses.add(status); From 73d05cacbfcab12fad80db612fac9a114ca2fa13 Mon Sep 17 00:00:00 2001 From: Anuj Modi Date: Tue, 13 Jun 2023 05:41:48 -0700 Subject: [PATCH 7/7] Addressed Comments --- .../fs/azurebfs/AzureBlobFileSystemStore.java | 48 ++++++++++--------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 59fac03057a28..0d69032976509 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -1919,10 +1919,14 @@ public String listStatus(final Path path, final String startFrom, if (path.isRoot()) { prefix = null; } + + TreeMap fileMetadata = new TreeMap<>(); do { - // List Blob calls will be made with delimiter "/". This will ensure - // that all the children of a folder not listed out separately. Instead, - // a single entry corresponding to the directory name will be returned as BlobPrefix. + /* + * List Blob calls will be made with delimiter "/". This will ensure + * that all the children of a folder not listed out separately. Instead, + * a single entry corresponding to the directory name will be returned as BlobPrefix. + */ try (AbfsPerfInfo perfInfo = startTracking("listStatus", "getListBlobs")) { AbfsRestOperation op = client.getListBlobs( continuation, prefix, delimiter, abfsConfiguration.getListMaxResults(), @@ -1932,7 +1936,7 @@ public String listStatus(final Path path, final String startFrom, BlobList blobList = op.getResult().getBlobList(); continuation = blobList.getNextMarker(); - addBlobListAsFileStatus(blobList, fileStatuses); + addBlobListAsFileStatus(blobList, fileMetadata); perfInfo.registerSuccess(true); countAggregate++; @@ -1945,6 +1949,7 @@ public String listStatus(final Path path, final String startFrom, } } while (shouldContinue); + fileStatuses.addAll(fileMetadata.values()); return continuation; } @@ -2015,21 +2020,22 @@ public String listStatus(final Path path, final String startFrom, } private void addBlobListAsFileStatus(final BlobList blobList, - List fileStatuses) throws IOException { - - // Here before adding the data we might have to remove the duplicates. - // List Blobs call over blob endpoint returns two types of entries: Blob - // and BlobPrefix. In the case where ABFS generated the data, - // there will be a marker blob for each "directory" created by driver, - // and we will receive them as a Blob. If there are also files within this - // "directory", we will also receive a BlobPrefix. To further - // complicate matters, the data may not be generated by ABFS Driver, in - // which case we may not have a marker blob for each "directory". In this - // the only way to know there is a directory is using BlobPrefix entry. - // So, sometimes we receive both a Blob and a BlobPrefix for directories, - // and sometimes we receive only BlobPrefix as directory. We remove duplicates - // but prefer Blob over BlobPrefix. - TreeMap fileMetadata = new TreeMap<>(); + TreeMap fileMetadata) throws IOException { + + /* + * Here before adding the data we might have to remove the duplicates. + * List Blobs call over blob endpoint returns two types of entries: Blob + * and BlobPrefix. In the case where ABFS generated the data, + * there will be a marker blob for each "directory" created by driver, + * and we will receive them as a Blob. If there are also files within this + * "directory", we will also receive a BlobPrefix. To further + * complicate matters, the data may not be generated by ABFS Driver, in + * which case we may not have a marker blob for each "directory". In this + * the only way to know there is a directory is using BlobPrefix entry. + * So, sometimes we receive both a Blob and a BlobPrefix for directories, + * and sometimes we receive only BlobPrefix as directory. We remove duplicates + * but prefer Blob over BlobPrefix. + */ List blobProperties = blobList.getBlobPropertyList(); for (BlobProperty entry: blobProperties) { @@ -2064,8 +2070,7 @@ private void addBlobListAsFileStatus(final BlobList blobList, // This is a blob entry. It is either a file or a marker blob. // In both cases we will add this. fileMetadata.put(blobKey, fileStatus); - } - else { + } else { // This is a BlobPrefix entry. It is a directory with file inside // This might have already been added as a marker blob. if (!fileMetadata.containsKey(blobKey)) { @@ -2073,7 +2078,6 @@ private void addBlobListAsFileStatus(final BlobList blobList, } } } - fileStatuses.addAll(fileMetadata.values()); } // generate continuation token for xns account