Skip to content

HADOOP-16465 listLocatedStatus() optimisation #1943

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4283,41 +4283,68 @@ public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f,
RemoteIterator<? extends LocatedFileStatus> iterator =
once("listLocatedStatus", path.toString(),
() -> {
// lookup dir triggers existence check
final S3AFileStatus fileStatus =
(S3AFileStatus) getFileStatus(path);
if (fileStatus.isFile()) {
// simple case: File
LOG.debug("Path is a file");
return new Listing.SingleStatusRemoteIterator(
filter.accept(path) ? toLocatedFileStatus(fileStatus) : null);
} else {
// directory: trigger a lookup
final String key = maybeAddTrailingSlash(pathToKey(path));
final Listing.FileStatusAcceptor acceptor =
new Listing.AcceptAllButSelfAndS3nDirs(path);
boolean allowAuthoritative = allowAuthoritative(f);
DirListingMetadata meta =
S3Guard.listChildrenWithTtl(metadataStore, path,
ttlTimeProvider, allowAuthoritative);
final RemoteIterator<S3AFileStatus> cachedFileStatusIterator =
listing.createProvidedFileStatusIterator(
S3Guard.dirMetaToStatuses(meta), filter, acceptor);
return (allowAuthoritative && meta != null
&& meta.isAuthoritative())
? listing.createLocatedFileStatusIterator(
cachedFileStatusIterator)
: listing.createLocatedFileStatusIterator(
listing.createFileStatusListingIterator(path,
createListObjectsRequest(key, "/"),
filter,
acceptor,
cachedFileStatusIterator));
// Assuming the path to be a directory,
// trigger a list call directly.
final RemoteIterator<S3ALocatedFileStatus>
locatedFileStatusIteratorForDir =
getLocatedFileStatusIteratorForDir(path, filter);

// If no listing is present then path might be a file.
if (!locatedFileStatusIteratorForDir.hasNext()) {
final S3AFileStatus fileStatus =
(S3AFileStatus) getFileStatus(path);
if (fileStatus.isFile()) {
// simple case: File
LOG.debug("Path is a file");
return new Listing.SingleStatusRemoteIterator(
filter.accept(path)
? toLocatedFileStatus(fileStatus)
: null);
}
}
// Either empty or non-empty directory.
return locatedFileStatusIteratorForDir;
});
return toLocatedFileStatusIterator(iterator);
}

/**
* Generate list located status for a directory.
* Also performing tombstone reconciliation for guarded directories.
* @param dir directory to check.
* @param filter a path filter.
* @return an iterator that traverses statuses of the given dir.
* @throws IOException in case of failure.
*/
private RemoteIterator<S3ALocatedFileStatus> getLocatedFileStatusIteratorForDir(
Path dir, PathFilter filter) throws IOException {
final String key = maybeAddTrailingSlash(pathToKey(dir));
final Listing.FileStatusAcceptor acceptor =
new Listing.AcceptAllButSelfAndS3nDirs(dir);
boolean allowAuthoritative = allowAuthoritative(dir);
DirListingMetadata meta =
S3Guard.listChildrenWithTtl(metadataStore, dir,
ttlTimeProvider, allowAuthoritative);
Set<Path> tombstones = meta != null
? meta.listTombstones()
: null;
final RemoteIterator<S3AFileStatus> cachedFileStatusIterator =
listing.createProvidedFileStatusIterator(
S3Guard.dirMetaToStatuses(meta), filter, acceptor);
return (allowAuthoritative && meta != null
&& meta.isAuthoritative())
? listing.createLocatedFileStatusIterator(
cachedFileStatusIterator)
: listing.createTombstoneReconcilingIterator(
listing.createLocatedFileStatusIterator(
listing.createFileStatusListingIterator(dir,
createListObjectsRequest(key, "/"),
filter,
acceptor,
cachedFileStatusIterator)),
tombstones);
}

/**
* Build a {@link S3ALocatedFileStatus} from a {@link FileStatus} instance.
* @param status file status
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,63 @@ public void setup() throws Exception {
skipDuringFaultInjection(fs);
}

@Test
public void testCostOfLocatedFileStatusOnFile() throws Throwable {
describe("performing listLocatedStatus on a file");
Path file = path(getMethodName() + ".txt");
S3AFileSystem fs = getFileSystem();
touch(fs, file);
resetMetricDiffs();
fs.listLocatedStatus(file);
if (!fs.hasMetadataStore()) {
// Unguarded FS.
metadataRequests.assertDiffEquals(1);
}
listRequests.assertDiffEquals(1);
}

@Test
public void testCostOfListLocatedStatusOnEmptyDir() throws Throwable {
describe("performing listLocatedStatus on an empty dir");
Path dir = path(getMethodName());
S3AFileSystem fs = getFileSystem();
fs.mkdirs(dir);
resetMetricDiffs();
fs.listLocatedStatus(dir);
if (!fs.hasMetadataStore()) {
// Unguarded FS.
verifyOperationCount(2, 1);
} else {
if (fs.allowAuthoritative(dir)) {
verifyOperationCount(0, 0);
} else {
verifyOperationCount(0, 1);
}
}
}

@Test
public void testCostOfListLocatedStatusOnNonEmptyDir() throws Throwable {
describe("performing listLocatedStatus on a non empty dir");
Path dir = path(getMethodName() + "dir");
S3AFileSystem fs = getFileSystem();
fs.mkdirs(dir);
Path file = new Path(dir, "file.txt");
touch(fs, file);
resetMetricDiffs();
fs.listLocatedStatus(dir);
if (!fs.hasMetadataStore()) {
// Unguarded FS.
verifyOperationCount(0, 1);
} else {
if(fs.allowAuthoritative(dir)) {
verifyOperationCount(0, 0);
} else {
verifyOperationCount(0, 1);
}
}
}

@Test
public void testCostOfGetFileStatusOnFile() throws Throwable {
describe("performing getFileStatus on a file");
Expand Down