Skip to content

HADOOP-16384: Avoid inconsistencies between DDB and S3 #1003

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
142fde4
HADOOP-16384: prune resilience.
steveloughran Jun 21, 2019
23dad58
HADOOP-16384: prune debugging.
steveloughran Jun 25, 2019
9b179b9
HADOOP-16384: inconsisent DB after test runs.
steveloughran Jun 26, 2019
e83eb0e
HADOOP-16384: fix compilation error; checkstyle was mistaken
steveloughran Jun 26, 2019
6751629
HADOOP-16384 prune resilience: ongoing testing
steveloughran Jul 1, 2019
fbfdd4c
HADOOP-16394 Prune Resilience and root dir test failures
steveloughran Jul 2, 2019
a3cfb61
HADOOP-16384 moving some of the sequential tests around so that the r…
steveloughran Jul 2, 2019
ccce1ba
HADOOP-16406. ITestDynamoDBMetadataStore.testProvisionTable times out…
steveloughran Jul 2, 2019
4d297e3
HADOOP-16384. More work on debugging and diagnostics
steveloughran Jul 3, 2019
c87415a
HADOOP-16384 ITestS3GuardRootOperations does a dump of the full state…
steveloughran Jul 3, 2019
cabe6b4
HADOOP-16384 new Purge entry point for erasing all entries of a files…
steveloughran Jul 4, 2019
817bfa2
HADOOP-16384 tune dump and prune commands for better UX
steveloughran Jul 5, 2019
eee02ef
HADOOP-16384 CTU provides more details on deletion; move onto a non-d…
steveloughran Jul 5, 2019
e68b463
HADOOP-16384: diagnostics
steveloughran Jul 5, 2019
49d4586
HADOOP-16384: remove the pruning from test teardowns
steveloughran Jul 8, 2019
b915e68
HADOOP-16384: track down final cause of the tombstone problem; reenab…
steveloughran Jul 8, 2019
3c0dd91
HADOOP-16384: code review, one more test
steveloughran Jul 9, 2019
66da306
HADOOP-16384 address Gabor and Yetus feedback
steveloughran Jul 10, 2019
7db3b7c
HADOOP-16384 improve dump operation
steveloughran Jul 10, 2019
0f85ac7
HADOOP-16384
steveloughran Jul 10, 2019
a6235e8
HADOOP-16384: remove unused import from AbstractS3GuardDynamoDBDiagno…
steveloughran Jul 11, 2019
6008e09
HADOOP-16384 cut superflous test; correct prune tool usage string
steveloughran Jul 11, 2019
d6f1241
HADOOP-16384 remove the instanceof check (for gabor); address indenta…
steveloughran Jul 11, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,18 @@ public ServiceLaunchException(int exitCode, String format, Object... args) {
}
}

/**
* Create a formatted exception.
* <p>
* This uses {@link String#format(String, Object...)}
* to build the formatted exception in the ENGLISH locale.
* @param exitCode exit code
* @param cause inner cause
* @param format format for message to use in exception
* @param args list of arguments
*/
public ServiceLaunchException(int exitCode, Throwable cause,
String format, Object... args) {
super(exitCode, String.format(Locale.ENGLISH, format, args), cause);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ public ServiceLauncher(String serviceName, String serviceClassName) {
* Get the service.
*
* Null until
* {@link #coreServiceLaunch(Configuration, List, boolean, boolean)}
* {@link #coreServiceLaunch(Configuration, Service, List, boolean, boolean)}
* has completed.
* @return the service
*/
Expand Down Expand Up @@ -303,7 +303,7 @@ public void launchServiceAndExit(List<String> args) {
exitException = e;
noteException(exitException);
}
if (exitException.getExitCode() != 0) {
if (exitException.getExitCode() == LauncherExitCodes.EXIT_USAGE) {
// something went wrong. Print the usage and commands
System.err.println(getUsageMessage());
System.err.println("Command: " + argumentString);
Expand All @@ -328,8 +328,18 @@ protected void bindCommandOptions() {
* @param exitException exception
*/
void noteException(ExitUtil.ExitException exitException) {
LOG.debug("Exception raised", exitException);
serviceExitCode = exitException.getExitCode();
int exitCode = exitException.getExitCode();
if (exitCode != 0) {
LOG.debug("Exception raised with exit code {}",
exitCode,
exitException);
Throwable cause = exitException.getCause();
if (cause != null) {
// log the nested exception in more detail
LOG.warn("{}", cause.toString(), cause);
}
}
serviceExitCode = exitCode;
serviceException = exitException;
}

Expand Down Expand Up @@ -451,17 +461,38 @@ public int loadConfigurationClasses() {
* @param execute execute/wait for the service to stop.
* @return an exit exception, which will have a status code of 0 if it worked
*/
@VisibleForTesting
public ExitUtil.ExitException launchService(Configuration conf,
List<String> processedArgs,
boolean addShutdownHook,
boolean execute) {

return launchService(conf, null, processedArgs, addShutdownHook, execute);
}

/**
* Launch a service catching all exceptions and downgrading them to exit codes
* after logging.
*
* Sets {@link #serviceException} to this value.
* @param conf configuration to use
* @param instance optional instance of the service.
* @param processedArgs command line after the launcher-specific arguments
* have been stripped out.
* @param addShutdownHook should a shutdown hook be added to terminate
* this service on shutdown. Tests should set this to false.
* @param execute execute/wait for the service to stop.
* @return an exit exception, which will have a status code of 0 if it worked
*/
public ExitUtil.ExitException launchService(Configuration conf,
S instance,
List<String> processedArgs,
boolean addShutdownHook,
boolean execute) {

ExitUtil.ExitException exitException;

try {
int exitCode = coreServiceLaunch(conf, processedArgs, addShutdownHook,
execute);
int exitCode = coreServiceLaunch(conf, instance, processedArgs,
addShutdownHook, execute);
if (service != null) {
// check to see if the service failed
Throwable failure = service.getFailureCause();
Expand Down Expand Up @@ -495,6 +526,12 @@ public ExitUtil.ExitException launchService(Configuration conf,
// exit exceptions are passed through unchanged
exitException = ee;
} catch (Throwable thrown) {
// other errors need a full log.
LOG.error("Exception raised {}",
service != null
? (service.toString() + " in state " + service.getServiceState())
: "during service instantiation",
thrown);
exitException = convertToExitException(thrown);
}
noteException(exitException);
Expand All @@ -514,6 +551,7 @@ public ExitUtil.ExitException launchService(Configuration conf,
* {@link #getService()}.
*
* @param conf configuration
* @param instance optional instance of the service.
* @param processedArgs arguments after the configuration parameters
* have been stripped out.
* @param addShutdownHook should a shutdown hook be added to terminate
Expand All @@ -530,12 +568,19 @@ public ExitUtil.ExitException launchService(Configuration conf,
*/

protected int coreServiceLaunch(Configuration conf,
S instance,
List<String> processedArgs,
boolean addShutdownHook,
boolean execute) throws Exception {

// create the service instance
instantiateService(conf);
if (instance == null) {
instantiateService(conf);
} else {
// service already exists, so instantiate
configuration = conf;
service = instance;
}
ServiceShutdownHook shutdownHook = null;

// and the shutdown hook if requested
Expand Down Expand Up @@ -685,8 +730,7 @@ protected static ExitUtil.ExitException convertToExitException(
}
// construct the new exception with the original message and
// an exit code
exitException = new ServiceLaunchException(exitCode, message);
exitException.initCause(thrown);
exitException = new ServiceLaunchException(exitCode, thrown, message);
return exitException;
}

Expand Down Expand Up @@ -917,7 +961,7 @@ protected List<String> parseCommandArgs(Configuration conf,
throw new ServiceLaunchException(EXIT_COMMAND_ARGUMENT_ERROR, e);
} catch (RuntimeException e) {
// lower level issue such as XML parse failure
throw new ServiceLaunchException(EXIT_COMMAND_ARGUMENT_ERROR,
throw new ServiceLaunchException(EXIT_COMMAND_ARGUMENT_ERROR, e,
E_PARSE_FAILED + " %s : %s", argString, e);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,10 @@
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.test.LambdaTestUtils;

import static org.apache.commons.lang3.StringUtils.join;
import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile;
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
import static org.apache.hadoop.fs.contract.ContractTestUtils.deleteChildren;
Expand Down Expand Up @@ -149,14 +151,18 @@ public void testRmRootRecursive() throws Throwable {
Path root = new Path("/");
assertIsDirectory(root);
Path file = new Path("/testRmRootRecursive");
ContractTestUtils.touch(getFileSystem(), file);
boolean deleted = getFileSystem().delete(root, true);
assertIsDirectory(root);
LOG.info("rm -rf / result is {}", deleted);
if (deleted) {
assertPathDoesNotExist("expected file to be deleted", file);
} else {
assertPathExists("expected file to be preserved", file);;
try {
ContractTestUtils.touch(getFileSystem(), file);
boolean deleted = getFileSystem().delete(root, true);
assertIsDirectory(root);
LOG.info("rm -rf / result is {}", deleted);
if (deleted) {
assertPathDoesNotExist("expected file to be deleted", file);
} else {
assertPathExists("expected file to be preserved", file);
}
} finally{
getFileSystem().delete(file, false);
}
}

Expand Down Expand Up @@ -185,28 +191,57 @@ public void testListEmptyRootDirectory() throws IOException {
for (FileStatus status : statuses) {
ContractTestUtils.assertDeleted(fs, status.getPath(), true);
}
assertEquals("listStatus on empty root-directory returned a non-empty list",
0, fs.listStatus(root).length);
assertFalse("listFiles(/, false).hasNext",
fs.listFiles(root, false).hasNext());
assertFalse("listFiles(/, true).hasNext",
fs.listFiles(root, true).hasNext());
assertFalse("listLocatedStatus(/).hasNext",
fs.listLocatedStatus(root).hasNext());
FileStatus[] rootListStatus = fs.listStatus(root);
assertEquals("listStatus on empty root-directory returned found: "
+ join("\n", rootListStatus),
0, rootListStatus.length);
assertNoElements("listFiles(/, false)",
fs.listFiles(root, false));
assertNoElements("listFiles(/, true)",
fs.listFiles(root, true));
assertNoElements("listLocatedStatus(/)",
fs.listLocatedStatus(root));
assertIsDirectory(root);
}

/**
* Assert that an iterator has no elements; the raised exception
* will include the element list.
* @param operation operation for assertion text.
* @param iter iterator
* @throws IOException failure retrieving the values.
*/
protected void assertNoElements(String operation,
RemoteIterator<LocatedFileStatus> iter) throws IOException {
List<LocatedFileStatus> resultList = toList(iter);
if (!resultList.isEmpty()) {
fail("Expected no results from " + operation + ", but got "
+ resultList.size() + " elements:\n"
+ join(resultList, "\n"));
}
}

@Test
public void testSimpleRootListing() throws IOException {
describe("test the nonrecursive root listing calls");
FileSystem fs = getFileSystem();
Path root = new Path("/");
FileStatus[] statuses = fs.listStatus(root);
String listStatusResult = join(statuses, "\n");
List<LocatedFileStatus> locatedStatusList = toList(
fs.listLocatedStatus(root));
assertEquals(statuses.length, locatedStatusList.size());
String locatedStatusResult = join(locatedStatusList, "\n");

assertEquals("listStatus(/) vs listLocatedStatus(/) with \n"
+ "listStatus =" + listStatusResult
+" listLocatedStatus = " + locatedStatusResult,
statuses.length, locatedStatusList.size());
List<LocatedFileStatus> fileList = toList(fs.listFiles(root, false));
assertTrue(fileList.size() <= statuses.length);
String listFilesResult = join(fileList, "\n");
assertTrue("listStatus(/) vs listFiles(/, false) with \n"
+ "listStatus = " + listStatusResult
+ "listFiles = " + listFilesResult,
fileList.size() <= statuses.length);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import org.apache.hadoop.fs.StreamCapabilities;
import org.apache.hadoop.io.IOUtils;
import org.junit.Assert;
import org.junit.internal.AssumptionViolatedException;
import org.junit.AssumptionViolatedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -457,8 +457,10 @@ public static void rejectRootOperation(Path path) throws IOException {
public static FileStatus[] deleteChildren(FileSystem fileSystem,
Path path,
boolean recursive) throws IOException {
LOG.debug("Deleting children of {} (recursive={})", path, recursive);
FileStatus[] children = listChildren(fileSystem, path);
for (FileStatus entry : children) {
LOG.debug("Deleting {}", entry);
fileSystem.delete(entry.getPath(), recursive);
}
return children;
Expand Down
14 changes: 13 additions & 1 deletion hadoop-tools/hadoop-aws/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,12 @@
<exclude>**/ITestS3AHuge*.java</exclude>
<!-- this sets out to overlaod DynamoDB, so must be run standalone -->
<exclude>**/ITestDynamoDBMetadataStoreScale.java</exclude>
<!-- Terasort MR jobs spawn enough processes that they use up all RAM -->
<exclude>**/ITestTerasort*.java</exclude>
<!-- MR jobs spawn enough processes that they use up all RAM -->
<exclude>**/ITest*CommitMRJob.java</exclude>
<!-- operations across the metastore -->
<exclude>**/ITestS3GuardDDBRootOperations.java</exclude>
</excludes>
</configuration>
</execution>
Expand Down Expand Up @@ -215,15 +220,22 @@
<!-- Do a sequential run for tests that cannot handle -->
<!-- parallel execution. -->
<includes>
<include>**/ITestS3AContractRootDir.java</include>
<include>**/ITestS3AFileContextStatistics.java</include>
<!-- large uploads consuming all bandwidth -->
<include>**/ITestS3AHuge*.java</include>
<!-- SSE encrypted files confuse everything else -->
<include>**/ITestS3AEncryptionSSEC*.java</include>
<!-- this sets out to overlaod DynamoDB, so must be run standalone -->
<include>**/ITestDynamoDBMetadataStoreScale.java</include>
<!-- the terasort tests both work with a file in the same path in -->
<!-- the local FS. Running them sequentially guarantees isolation -->
<!-- and that they don't conflict with the other MR jobs for RAM -->
<include>**/ITestTerasort*.java</include>
<!-- MR jobs spawn enough processes that they use up all RAM -->
<include>**/ITest*CommitMRJob.java</include>
<!-- operations across the metastore -->
<include>**/ITestS3AContractRootDir.java</include>
<include>**/ITestS3GuardDDBRootOperations.java</include>
</includes>
</configuration>
</execution>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import com.amazonaws.AmazonClientException;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.google.common.annotations.VisibleForTesting;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
Expand Down Expand Up @@ -50,6 +52,7 @@
/**
* Place for the S3A listing classes; keeps all the small classes under control.
*/
@InterfaceAudience.Private
public class Listing {

private final S3AFileSystem owner;
Expand Down Expand Up @@ -87,7 +90,7 @@ ProvidedFileStatusIterator createProvidedFileStatusIterator(
* @return the iterator
* @throws IOException IO Problems
*/
FileStatusListingIterator createFileStatusListingIterator(
public FileStatusListingIterator createFileStatusListingIterator(
Path listPath,
S3ListRequest request,
PathFilter filter,
Expand All @@ -110,7 +113,7 @@ FileStatusListingIterator createFileStatusListingIterator(
* @throws IOException IO Problems
*/
@Retries.RetryRaw
FileStatusListingIterator createFileStatusListingIterator(
public FileStatusListingIterator createFileStatusListingIterator(
Path listPath,
S3ListRequest request,
PathFilter filter,
Expand All @@ -129,7 +132,7 @@ FileStatusListingIterator createFileStatusListingIterator(
* @return a new remote iterator
*/
@VisibleForTesting
LocatedFileStatusIterator createLocatedFileStatusIterator(
public LocatedFileStatusIterator createLocatedFileStatusIterator(
RemoteIterator<S3AFileStatus> statusIterator) {
return new LocatedFileStatusIterator(statusIterator);
}
Expand Down Expand Up @@ -789,7 +792,7 @@ public boolean accept(FileStatus status) {
* Accept all entries except the base path and those which map to S3N
* pseudo directory markers.
*/
static class AcceptAllButSelfAndS3nDirs implements FileStatusAcceptor {
public static class AcceptAllButSelfAndS3nDirs implements FileStatusAcceptor {

/** Base path. */
private final Path qualifiedPath;
Expand Down
Loading