3232import java .nio .charset .Charset ;
3333import java .nio .charset .CharsetDecoder ;
3434import java .nio .charset .CharsetEncoder ;
35+ import java .nio .charset .StandardCharsets ;
3536import java .text .ParseException ;
3637import java .text .SimpleDateFormat ;
3738import java .util .ArrayList ;
4748
4849import com .google .common .annotations .VisibleForTesting ;
4950import com .google .common .base .Preconditions ;
51+ import com .google .common .base .Strings ;
5052
5153import org .apache .hadoop .classification .InterfaceAudience ;
5254import org .apache .hadoop .classification .InterfaceStability ;
8183import org .apache .hadoop .fs .azurebfs .services .ExponentialRetryPolicy ;
8284import org .apache .hadoop .fs .azurebfs .services .SharedKeyCredentials ;
8385import org .apache .hadoop .fs .azurebfs .utils .Base64 ;
86+ import org .apache .hadoop .fs .azurebfs .utils .CRC64 ;
8487import org .apache .hadoop .fs .azurebfs .utils .UriUtils ;
8588import org .apache .hadoop .fs .permission .AclEntry ;
8689import org .apache .hadoop .fs .permission .AclStatus ;
9295import org .slf4j .Logger ;
9396import org .slf4j .LoggerFactory ;
9497
98+ import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .CHAR_EQUALS ;
99+ import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .CHAR_FORWARD_SLASH ;
100+ import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .CHAR_HYPHEN ;
101+ import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .CHAR_PLUS ;
102+ import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .CHAR_STAR ;
103+ import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .CHAR_UNDERSCORE ;
104+ import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .ROOT_PATH ;
105+ import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .SINGLE_WHITE_SPACE ;
106+ import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .TOKEN_VERSION ;
95107import static org .apache .hadoop .fs .azurebfs .constants .ConfigurationKeys .AZURE_ABFS_ENDPOINT ;
108+
96109/**
97110 * Provides the bridging logic between Hadoop's abstract filesystem and Azure Storage.
98111 */
@@ -106,6 +119,7 @@ public class AzureBlobFileSystemStore implements Closeable {
106119 private String userName ;
107120 private String primaryUserGroup ;
108121 private static final String DATE_TIME_PATTERN = "E, dd MMM yyyy HH:mm:ss 'GMT'" ;
122+ private static final String TOKEN_DATE_PATTERN = "yyyy-MM-dd'T'HH:mm:ss.SSSSSSS'Z'" ;
109123 private static final String XMS_PROPERTIES_ENCODING = "ISO-8859-1" ;
110124 private static final int LIST_MAX_RESULTS = 500 ;
111125
@@ -522,15 +536,43 @@ public FileStatus getFileStatus(final Path path) throws IOException {
522536 eTag );
523537 }
524538
539+ /**
540+ * @param path The list path.
541+ * @return the entries in the path.
542+ * */
525543 public FileStatus [] listStatus (final Path path ) throws IOException {
526- LOG .debug ("listStatus filesystem: {} path: {}" ,
544+ return listStatus (path , null );
545+ }
546+
547+ /**
548+ * @param path Path the list path.
549+ * @param startFrom the entry name that list results should start with.
550+ * For example, if folder "/folder" contains four files: "afile", "bfile", "hfile", "ifile".
551+ * Then listStatus(Path("/folder"), "hfile") will return "/folder/hfile" and "folder/ifile"
552+ * Notice that if startFrom is a non-existent entry name, then the list response contains
553+ * all entries after this non-existent entry in lexical order:
554+ * listStatus(Path("/folder"), "cfile") will return "/folder/hfile" and "/folder/ifile".
555+ *
556+ * @return the entries in the path start from "startFrom" in lexical order.
557+ * */
558+ @ InterfaceStability .Unstable
559+ public FileStatus [] listStatus (final Path path , final String startFrom ) throws IOException {
560+ LOG .debug ("listStatus filesystem: {} path: {}, startFrom: {}" ,
527561 client .getFileSystem (),
528- path );
562+ path ,
563+ startFrom );
529564
530- String relativePath = path .isRoot () ? AbfsHttpConstants .EMPTY_STRING : getRelativePath (path );
565+ final String relativePath = path .isRoot () ? AbfsHttpConstants .EMPTY_STRING : getRelativePath (path );
531566 String continuation = null ;
532- ArrayList <FileStatus > fileStatuses = new ArrayList <>();
533567
568+ // generate continuation token if a valid startFrom is provided.
569+ if (startFrom != null && !startFrom .isEmpty ()) {
570+ continuation = getIsNamespaceEnabled ()
571+ ? generateContinuationTokenForXns (startFrom )
572+ : generateContinuationTokenForNonXns (path .isRoot () ? ROOT_PATH : relativePath , startFrom );
573+ }
574+
575+ ArrayList <FileStatus > fileStatuses = new ArrayList <>();
534576 do {
535577 AbfsRestOperation op = client .listPath (relativePath , false , LIST_MAX_RESULTS , continuation );
536578 continuation = op .getResult ().getResponseHeader (HttpHeaderConfigurations .X_MS_CONTINUATION );
@@ -583,6 +625,61 @@ public FileStatus[] listStatus(final Path path) throws IOException {
583625 return fileStatuses .toArray (new FileStatus [fileStatuses .size ()]);
584626 }
585627
628+ // generate continuation token for xns account
629+ private String generateContinuationTokenForXns (final String firstEntryName ) {
630+ Preconditions .checkArgument (!Strings .isNullOrEmpty (firstEntryName )
631+ && !firstEntryName .startsWith (AbfsHttpConstants .ROOT_PATH ),
632+ "startFrom must be a dir/file name and it can not be a full path" );
633+
634+ StringBuilder sb = new StringBuilder ();
635+ sb .append (firstEntryName ).append ("#$" ).append ("0" );
636+
637+ CRC64 crc64 = new CRC64 ();
638+ StringBuilder token = new StringBuilder ();
639+ token .append (crc64 .compute (sb .toString ().getBytes (StandardCharsets .UTF_8 )))
640+ .append (SINGLE_WHITE_SPACE )
641+ .append ("0" )
642+ .append (SINGLE_WHITE_SPACE )
643+ .append (firstEntryName );
644+
645+ return Base64 .encode (token .toString ().getBytes (StandardCharsets .UTF_8 ));
646+ }
647+
648+ // generate continuation token for non-xns account
649+ private String generateContinuationTokenForNonXns (final String path , final String firstEntryName ) {
650+ Preconditions .checkArgument (!Strings .isNullOrEmpty (firstEntryName )
651+ && !firstEntryName .startsWith (AbfsHttpConstants .ROOT_PATH ),
652+ "startFrom must be a dir/file name and it can not be a full path" );
653+
654+ // Notice: non-xns continuation token requires full path (first "/" is not included) for startFrom
655+ final String startFrom = (path .isEmpty () || path .equals (ROOT_PATH ))
656+ ? firstEntryName
657+ : path + ROOT_PATH + firstEntryName ;
658+
659+ SimpleDateFormat simpleDateFormat = new SimpleDateFormat (TOKEN_DATE_PATTERN , Locale .US );
660+ String date = simpleDateFormat .format (new Date ());
661+ String token = String .format ("%06d!%s!%06d!%s!%06d!%s!" ,
662+ path .length (), path , startFrom .length (), startFrom , date .length (), date );
663+ String base64EncodedToken = Base64 .encode (token .getBytes (StandardCharsets .UTF_8 ));
664+
665+ StringBuilder encodedTokenBuilder = new StringBuilder (base64EncodedToken .length () + 5 );
666+ encodedTokenBuilder .append (String .format ("%s!%d!" , TOKEN_VERSION , base64EncodedToken .length ()));
667+
668+ for (int i = 0 ; i < base64EncodedToken .length (); i ++) {
669+ char current = base64EncodedToken .charAt (i );
670+ if (CHAR_FORWARD_SLASH == current ) {
671+ current = CHAR_UNDERSCORE ;
672+ } else if (CHAR_PLUS == current ) {
673+ current = CHAR_STAR ;
674+ } else if (CHAR_EQUALS == current ) {
675+ current = CHAR_HYPHEN ;
676+ }
677+ encodedTokenBuilder .append (current );
678+ }
679+
680+ return encodedTokenBuilder .toString ();
681+ }
682+
586683 public void setOwner (final Path path , final String owner , final String group ) throws
587684 AzureBlobFileSystemException {
588685 if (!getIsNamespaceEnabled ()) {
@@ -1002,7 +1099,7 @@ public boolean equals(Object obj) {
10021099
10031100 FileStatus other = (FileStatus ) obj ;
10041101
1005- if (!other . equals (this )) {// compare the path
1102+ if (!this . getPath (). equals (other . getPath () )) {// compare the path
10061103 return false ;
10071104 }
10081105
0 commit comments