Skip to content

Commit 3418bbb

Browse files
Da Zhousteveloughran
authored andcommitted
HADOOP-16269. ABFS: add listFileStatus with StartFrom.
Author: Da Zhou
1 parent 9b0aace commit 3418bbb

File tree

6 files changed

+363
-9
lines changed

6 files changed

+363
-9
lines changed

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java

Lines changed: 102 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import java.nio.charset.Charset;
3333
import java.nio.charset.CharsetDecoder;
3434
import java.nio.charset.CharsetEncoder;
35+
import java.nio.charset.StandardCharsets;
3536
import java.text.ParseException;
3637
import java.text.SimpleDateFormat;
3738
import java.util.ArrayList;
@@ -47,6 +48,7 @@
4748

4849
import com.google.common.annotations.VisibleForTesting;
4950
import com.google.common.base.Preconditions;
51+
import com.google.common.base.Strings;
5052

5153
import org.apache.hadoop.classification.InterfaceAudience;
5254
import org.apache.hadoop.classification.InterfaceStability;
@@ -81,6 +83,7 @@
8183
import org.apache.hadoop.fs.azurebfs.services.ExponentialRetryPolicy;
8284
import org.apache.hadoop.fs.azurebfs.services.SharedKeyCredentials;
8385
import org.apache.hadoop.fs.azurebfs.utils.Base64;
86+
import org.apache.hadoop.fs.azurebfs.utils.CRC64;
8487
import org.apache.hadoop.fs.azurebfs.utils.UriUtils;
8588
import org.apache.hadoop.fs.permission.AclEntry;
8689
import org.apache.hadoop.fs.permission.AclStatus;
@@ -92,7 +95,17 @@
9295
import org.slf4j.Logger;
9396
import org.slf4j.LoggerFactory;
9497

98+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_EQUALS;
99+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_FORWARD_SLASH;
100+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_HYPHEN;
101+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_PLUS;
102+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_STAR;
103+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHAR_UNDERSCORE;
104+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ROOT_PATH;
105+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SINGLE_WHITE_SPACE;
106+
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TOKEN_VERSION;
95107
import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_ABFS_ENDPOINT;
108+
96109
/**
97110
* Provides the bridging logic between Hadoop's abstract filesystem and Azure Storage.
98111
*/
@@ -106,6 +119,7 @@ public class AzureBlobFileSystemStore implements Closeable {
106119
private String userName;
107120
private String primaryUserGroup;
108121
private static final String DATE_TIME_PATTERN = "E, dd MMM yyyy HH:mm:ss 'GMT'";
122+
private static final String TOKEN_DATE_PATTERN = "yyyy-MM-dd'T'HH:mm:ss.SSSSSSS'Z'";
109123
private static final String XMS_PROPERTIES_ENCODING = "ISO-8859-1";
110124
private static final int LIST_MAX_RESULTS = 500;
111125

@@ -522,15 +536,43 @@ public FileStatus getFileStatus(final Path path) throws IOException {
522536
eTag);
523537
}
524538

539+
/**
540+
* @param path The list path.
541+
* @return the entries in the path.
542+
* */
525543
public FileStatus[] listStatus(final Path path) throws IOException {
526-
LOG.debug("listStatus filesystem: {} path: {}",
544+
return listStatus(path, null);
545+
}
546+
547+
/**
548+
* @param path Path the list path.
549+
* @param startFrom the entry name that list results should start with.
550+
* For example, if folder "/folder" contains four files: "afile", "bfile", "hfile", "ifile".
551+
* Then listStatus(Path("/folder"), "hfile") will return "/folder/hfile" and "folder/ifile"
552+
* Notice that if startFrom is a non-existent entry name, then the list response contains
553+
* all entries after this non-existent entry in lexical order:
554+
* listStatus(Path("/folder"), "cfile") will return "/folder/hfile" and "/folder/ifile".
555+
*
556+
* @return the entries in the path start from "startFrom" in lexical order.
557+
* */
558+
@InterfaceStability.Unstable
559+
public FileStatus[] listStatus(final Path path, final String startFrom) throws IOException {
560+
LOG.debug("listStatus filesystem: {} path: {}, startFrom: {}",
527561
client.getFileSystem(),
528-
path);
562+
path,
563+
startFrom);
529564

530-
String relativePath = path.isRoot() ? AbfsHttpConstants.EMPTY_STRING : getRelativePath(path);
565+
final String relativePath = path.isRoot() ? AbfsHttpConstants.EMPTY_STRING : getRelativePath(path);
531566
String continuation = null;
532-
ArrayList<FileStatus> fileStatuses = new ArrayList<>();
533567

568+
// generate continuation token if a valid startFrom is provided.
569+
if (startFrom != null && !startFrom.isEmpty()) {
570+
continuation = getIsNamespaceEnabled()
571+
? generateContinuationTokenForXns(startFrom)
572+
: generateContinuationTokenForNonXns(path.isRoot() ? ROOT_PATH : relativePath, startFrom);
573+
}
574+
575+
ArrayList<FileStatus> fileStatuses = new ArrayList<>();
534576
do {
535577
AbfsRestOperation op = client.listPath(relativePath, false, LIST_MAX_RESULTS, continuation);
536578
continuation = op.getResult().getResponseHeader(HttpHeaderConfigurations.X_MS_CONTINUATION);
@@ -583,6 +625,61 @@ public FileStatus[] listStatus(final Path path) throws IOException {
583625
return fileStatuses.toArray(new FileStatus[fileStatuses.size()]);
584626
}
585627

628+
// generate continuation token for xns account
629+
private String generateContinuationTokenForXns(final String firstEntryName) {
630+
Preconditions.checkArgument(!Strings.isNullOrEmpty(firstEntryName)
631+
&& !firstEntryName.startsWith(AbfsHttpConstants.ROOT_PATH),
632+
"startFrom must be a dir/file name and it can not be a full path");
633+
634+
StringBuilder sb = new StringBuilder();
635+
sb.append(firstEntryName).append("#$").append("0");
636+
637+
CRC64 crc64 = new CRC64();
638+
StringBuilder token = new StringBuilder();
639+
token.append(crc64.compute(sb.toString().getBytes(StandardCharsets.UTF_8)))
640+
.append(SINGLE_WHITE_SPACE)
641+
.append("0")
642+
.append(SINGLE_WHITE_SPACE)
643+
.append(firstEntryName);
644+
645+
return Base64.encode(token.toString().getBytes(StandardCharsets.UTF_8));
646+
}
647+
648+
// generate continuation token for non-xns account
649+
private String generateContinuationTokenForNonXns(final String path, final String firstEntryName) {
650+
Preconditions.checkArgument(!Strings.isNullOrEmpty(firstEntryName)
651+
&& !firstEntryName.startsWith(AbfsHttpConstants.ROOT_PATH),
652+
"startFrom must be a dir/file name and it can not be a full path");
653+
654+
// Notice: non-xns continuation token requires full path (first "/" is not included) for startFrom
655+
final String startFrom = (path.isEmpty() || path.equals(ROOT_PATH))
656+
? firstEntryName
657+
: path + ROOT_PATH + firstEntryName;
658+
659+
SimpleDateFormat simpleDateFormat = new SimpleDateFormat(TOKEN_DATE_PATTERN, Locale.US);
660+
String date = simpleDateFormat.format(new Date());
661+
String token = String.format("%06d!%s!%06d!%s!%06d!%s!",
662+
path.length(), path, startFrom.length(), startFrom, date.length(), date);
663+
String base64EncodedToken = Base64.encode(token.getBytes(StandardCharsets.UTF_8));
664+
665+
StringBuilder encodedTokenBuilder = new StringBuilder(base64EncodedToken.length() + 5);
666+
encodedTokenBuilder.append(String.format("%s!%d!", TOKEN_VERSION, base64EncodedToken.length()));
667+
668+
for (int i = 0; i < base64EncodedToken.length(); i++) {
669+
char current = base64EncodedToken.charAt(i);
670+
if (CHAR_FORWARD_SLASH == current) {
671+
current = CHAR_UNDERSCORE;
672+
} else if (CHAR_PLUS == current) {
673+
current = CHAR_STAR;
674+
} else if (CHAR_EQUALS == current) {
675+
current = CHAR_HYPHEN;
676+
}
677+
encodedTokenBuilder.append(current);
678+
}
679+
680+
return encodedTokenBuilder.toString();
681+
}
682+
586683
public void setOwner(final Path path, final String owner, final String group) throws
587684
AzureBlobFileSystemException {
588685
if (!getIsNamespaceEnabled()) {
@@ -1002,7 +1099,7 @@ public boolean equals(Object obj) {
10021099

10031100
FileStatus other = (FileStatus) obj;
10041101

1005-
if (!other.equals(this)) {// compare the path
1102+
if (!this.getPath().equals(other.getPath())) {// compare the path
10061103
return false;
10071104
}
10081105

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ public final class AbfsHttpConstants {
3939
public static final String GET_ACCESS_CONTROL = "getAccessControl";
4040
public static final String GET_STATUS = "getStatus";
4141
public static final String DEFAULT_TIMEOUT = "90";
42+
public static final String TOKEN_VERSION = "2";
4243

4344
public static final String JAVA_VERSION = "java.version";
4445
public static final String OS_NAME = "os.name";
@@ -91,5 +92,13 @@ public final class AbfsHttpConstants {
9192
public static final String PERMISSION_FORMAT = "%04d";
9293
public static final String SUPER_USER = "$superuser";
9394

95+
public static final char CHAR_FORWARD_SLASH = '/';
96+
public static final char CHAR_EXCLAMATION_POINT = '!';
97+
public static final char CHAR_UNDERSCORE = '_';
98+
public static final char CHAR_HYPHEN = '-';
99+
public static final char CHAR_EQUALS = '=';
100+
public static final char CHAR_STAR = '*';
101+
public static final char CHAR_PLUS = '+';
102+
94103
private AbfsHttpConstants() {}
95104
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.fs.azurebfs.utils;
19+
20+
/**
21+
* CRC64 implementation for AzureBlobFileSystem.
22+
*/
23+
public class CRC64 {
24+
25+
private static final long POLY = 0x9a6c9329ac4bc9b5L;
26+
private static final int TABLE_LENGTH = 256;
27+
private static final long[] TABLE = new long[TABLE_LENGTH];
28+
29+
private long value = -1;
30+
31+
/**
32+
* @param input byte arrays.
33+
* @return long value of the CRC-64 checksum of the data.
34+
* */
35+
public long compute(byte[] input) {
36+
init();
37+
for (int i = 0; i < input.length; i++) {
38+
value = TABLE[(input[i] ^ (int) value) & 0xFF] ^ (value >>> 8);
39+
}
40+
return ~value;
41+
}
42+
43+
/*
44+
* Initialize a table constructed from POLY (0x9a6c9329ac4bc9b5L).
45+
* */
46+
private void init() {
47+
value = -1;
48+
for (int n = 0; n < TABLE_LENGTH; ++n) {
49+
long crc = n;
50+
for (int i = 0; i < 8; ++i) {
51+
if ((crc & 1) == 1) {
52+
crc = (crc >>> 1) ^ POLY;
53+
} else {
54+
crc >>>= 1;
55+
}
56+
}
57+
TABLE[n] = crc;
58+
}
59+
}
60+
}

hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import java.util.UUID;
2525
import java.util.concurrent.Callable;
2626

27-
import com.google.common.base.Preconditions;
2827
import org.junit.After;
2928
import org.junit.Before;
3029
import org.slf4j.Logger;
@@ -211,9 +210,9 @@ public AzureBlobFileSystem getFileSystem(String abfsUri) throws Exception {
211210
* @throws IOException failure during create/init.
212211
*/
213212
public AzureBlobFileSystem createFileSystem() throws IOException {
214-
Preconditions.checkState(abfs == null,
215-
"existing ABFS instance exists: %s", abfs);
216-
abfs = (AzureBlobFileSystem) FileSystem.newInstance(rawConfig);
213+
if (abfs == null) {
214+
abfs = (AzureBlobFileSystem) FileSystem.newInstance(rawConfig);
215+
}
217216
return abfs;
218217
}
219218

0 commit comments

Comments
 (0)