Skip to content

Commit

Permalink
HADOOP-13345 HS3Guard: Improved Consistency for S3A.
Browse files Browse the repository at this point in the history
Contributed by: Chris Nauroth, Aaron Fabbri, Mingliang Liu, Lei (Eddy) Xu,
Sean Mackrory, Steve Loughran and others.
  • Loading branch information
steveloughran committed Sep 1, 2017
1 parent 7a96033 commit 621b43e
Show file tree
Hide file tree
Showing 101 changed files with 13,065 additions and 538 deletions.
13 changes: 13 additions & 0 deletions hadoop-assemblies/src/main/resources/assemblies/hadoop-tools.xml
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,19 @@
<directory>../hadoop-sls/target/hadoop-sls-${project.version}/sls</directory>
<outputDirectory>/share/hadoop/${hadoop.component}/sls</outputDirectory>
</fileSet>
<fileSet>
<directory>../hadoop-aws/src/main/bin</directory>
<outputDirectory>/bin</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
<fileSet>
<directory>../hadoop-aws/src/main/shellprofile.d</directory>
<includes>
<include>*</include>
</includes>
<outputDirectory>/libexec/shellprofile.d</outputDirectory>
<fileMode>0755</fileMode>
</fileSet>
</fileSets>
<dependencySets>
<dependencySet>
Expand Down
5 changes: 5 additions & 0 deletions hadoop-common-project/hadoop-common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@
<artifactId>commons-configuration2</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
Expand Down Expand Up @@ -132,6 +133,13 @@ static <T> T newInstance(Class<T> theClass,
CONSTRUCTOR_CACHE.put(theClass, meth);
}
result = meth.newInstance(uri, conf);
} catch (InvocationTargetException e) {
Throwable cause = e.getCause();
if (cause instanceof RuntimeException) {
throw (RuntimeException) cause;
} else {
throw new RuntimeException(cause);
}
} catch (Exception e) {
throw new RuntimeException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,15 @@ public AbstractFileSystem run() throws UnsupportedFileSystemException {
return AbstractFileSystem.get(uri, conf);
}
});
} catch (RuntimeException ex) {
// RTEs can wrap other exceptions; if there is an IOException inner,
// throw it direct.
Throwable cause = ex.getCause();
if (cause instanceof IOException) {
throw (IOException) cause;
} else {
throw ex;
}
} catch (InterruptedException ex) {
LOG.error(ex.toString());
throw new IOException("Failed to get the AbstractFileSystem for path: "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1308,12 +1308,120 @@
</description>
</property>

<property>
<name>fs.s3a.metadatastore.authoritative</name>
<value>false</value>
<description>
When true, allow MetadataStore implementations to act as source of
truth for getting file status and directory listings. Even if this
is set to true, MetadataStore implementations may choose not to
return authoritative results. If the configured MetadataStore does
not support being authoritative, this setting will have no effect.
</description>
</property>

<property>
<name>fs.s3a.metadatastore.impl</name>
<value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value>
<description>
Fully-qualified name of the class that implements the MetadataStore
to be used by s3a. The default class, NullMetadataStore, has no
effect: s3a will continue to treat the backing S3 service as the one
and only source of truth for file and directory metadata.
</description>
</property>

<property>
<name>fs.s3a.s3guard.cli.prune.age</name>
<value>86400000</value>
<description>
Default age (in milliseconds) after which to prune metadata from the
metadatastore when the prune command is run. Can be overridden on the
command-line.
</description>
</property>


<property>
<name>fs.s3a.impl</name>
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
<description>The implementation class of the S3A Filesystem</description>
</property>

<property>
<name>fs.s3a.s3guard.ddb.region</name>
<value></value>
<description>
AWS DynamoDB region to connect to. An up-to-date list is
provided in the AWS Documentation: regions and endpoints. Without this
property, the S3Guard will operate table in the associated S3 bucket region.
</description>
</property>

<property>
<name>fs.s3a.s3guard.ddb.table</name>
<value></value>
<description>
The DynamoDB table name to operate. Without this property, the respective
S3 bucket name will be used.
</description>
</property>

<property>
<name>fs.s3a.s3guard.ddb.table.create</name>
<value>false</value>
<description>
If true, the S3A client will create the table if it does not already exist.
</description>
</property>

<property>
<name>fs.s3a.s3guard.ddb.table.capacity.read</name>
<value>500</value>
<description>
Provisioned throughput requirements for read operations in terms of capacity
units for the DynamoDB table. This config value will only be used when
creating a new DynamoDB table, though later you can manually provision by
increasing or decreasing read capacity as needed for existing tables.
See DynamoDB documents for more information.
</description>
</property>

<property>
<name>fs.s3a.s3guard.ddb.table.capacity.write</name>
<value>100</value>
<description>
Provisioned throughput requirements for write operations in terms of
capacity units for the DynamoDB table. Refer to related config
fs.s3a.s3guard.ddb.table.capacity.read before usage.
</description>
</property>

<property>
<name>fs.s3a.s3guard.ddb.max.retries</name>
<value>9</value>
<description>
Max retries on batched DynamoDB operations before giving up and
throwing an IOException. Each retry is delayed with an exponential
backoff timer which starts at 100 milliseconds and approximately
doubles each time. The minimum wait before throwing an exception is
sum(100, 200, 400, 800, .. 100*2^N-1 ) == 100 * ((2^N)-1)
So N = 9 yields at least 51.1 seconds (51,100) milliseconds of blocking
before throwing an IOException.
</description>
</property>

<property>
<name>fs.s3a.s3guard.ddb.background.sleep</name>
<value>25</value>
<description>
Length (in milliseconds) of pause between each batch of deletes when
pruning metadata. Prevents prune operations (which can typically be low
priority background operations) from overly interfering with other I/O
operations.
</description>
</property>

<property>
<name>fs.AbstractFileSystem.s3a.impl</name>
<value>org.apache.hadoop.fs.s3a.S3A</value>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -748,13 +748,27 @@ public void testRenameChildDirForbidden() throws Exception {

/**
* This a sanity check to make sure that any filesystem's handling of
* renames doesn't cause any regressions
* renames empty dirs doesn't cause any regressions.
*/
public void testRenameEmptyToDirWithSamePrefixAllowed() throws Throwable {
assumeTrue(renameSupported());
Path parentdir = path("testRenameEmptyToDirWithSamePrefixAllowed");
fs.mkdirs(parentdir);
Path dest = path("testRenameEmptyToDirWithSamePrefixAllowedDest");
rename(parentdir, dest, true, false, true);
}

/**
* This a sanity check to make sure that any filesystem's handling of
* renames non-empty dirs doesn't cause any regressions.
*/
@Test
public void testRenameToDirWithSamePrefixAllowed() throws Throwable {
assumeTrue(renameSupported());
final Path parentdir = path("testRenameToDirWithSamePrefixAllowed");
fs.mkdirs(parentdir);
// Before renaming, we create one file under the source parent directory
createFile(new Path(parentdir, "mychild"));
final Path dest = path("testRenameToDirWithSamePrefixAllowedDest");
rename(parentdir, dest, true, false, true);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,4 +222,67 @@ public void testRenameWithNonEmptySubDir() throws Throwable {
assertPathDoesNotExist("not deleted",
new Path(srcDir, "source.txt"));
}

/**
* Test that after renaming, the nested subdirectory is moved along with all
* its ancestors.
*/
@Test
public void testRenamePopulatesDirectoryAncestors() throws IOException {
final FileSystem fs = getFileSystem();
final Path src = path("testRenamePopulatesDirectoryAncestors/source");
fs.mkdirs(src);
final String nestedDir = "/dir1/dir2/dir3/dir4";
fs.mkdirs(path(src + nestedDir));

Path dst = path("testRenamePopulatesDirectoryAncestorsNew");

fs.rename(src, dst);
validateAncestorsMoved(src, dst, nestedDir);
}

/**
* Test that after renaming, the nested file is moved along with all its
* ancestors. It is similar to {@link #testRenamePopulatesDirectoryAncestors}.
*/
@Test
public void testRenamePopulatesFileAncestors() throws IOException {
final FileSystem fs = getFileSystem();
final Path src = path("testRenamePopulatesFileAncestors/source");
fs.mkdirs(src);
final String nestedFile = "/dir1/dir2/dir3/file4";
byte[] srcDataset = dataset(256, 'a', 'z');
writeDataset(fs, path(src + nestedFile), srcDataset, srcDataset.length,
1024, false);

Path dst = path("testRenamePopulatesFileAncestorsNew");

fs.rename(src, dst);
validateAncestorsMoved(src, dst, nestedFile);
}

/**
* Validate that the nested path and its ancestors should have been moved.
*
* @param src the source root to move
* @param dst the destination root to move
* @param nestedPath the nested path to move
*/
private void validateAncestorsMoved(Path src, Path dst, String nestedPath)
throws IOException {
assertIsDirectory(dst);
assertPathDoesNotExist("src path should not exist", path(src + nestedPath));
assertPathExists("dst path should exist", path(dst + nestedPath));

Path path = new Path(nestedPath).getParent();
while (path != null && !path.isRoot()) {
final Path parentSrc = path(src + path.toString());
assertPathDoesNotExist(parentSrc + " is not deleted", parentSrc);
final Path parentDst = path(dst + path.toString());
assertPathExists(parentDst + " should exist after rename", parentDst);
assertIsDirectory(parentDst);
path = path.getParent();
}
}

}
Loading

0 comments on commit 621b43e

Please sign in to comment.