From 38d276fe9bf9665e013f18de831895bd0909716e Mon Sep 17 00:00:00 2001
From: Steve Loughran <stevel@cloudera.com>
Date: Thu, 24 Jun 2021 16:37:27 +0100
Subject: [PATCH] HADOOP-17771. S3AFS creation fails "Unable to find a region
 via the region provider chain." (#3133)

This addresses the regression in Hadoop 3.3.1 where if no S3 endpoint
is set in fs.s3a.endpoint, S3A filesystem creation may fail on
non-EC2 deployments, depending on the local host environment setup.

* If fs.s3a.endpoint is empty/null, and fs.s3a.endpoint.region
  is null, the region is set to "us-east-1".
* If fs.s3a.endpoint.region is explicitly set to "" then the client
  falls back to the SDK region resolution chain; this works on EC2
* Details in troubleshooting.md, including a workaround for Hadoop-3.3.1+
* Also contains some minor restructuring of troubleshooting.md

Contributed by Steve Loughran.
---
 .../org/apache/hadoop/fs/s3a/Constants.java   |   6 +
 .../hadoop/fs/s3a/DefaultS3ClientFactory.java |  46 +-
 .../hadoop/fs/s3a/impl/InternalConstants.java |   5 +
 .../site/markdown/tools/hadoop-aws/index.md   |  12 +-
 .../tools/hadoop-aws/troubleshooting_s3a.md   | 622 ++++++++++--------
 .../hadoop/fs/s3a/ITestS3AEndpointRegion.java |  99 +++
 6 files changed, 526 insertions(+), 264 deletions(-)

diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
index 8dc6bba1ccef5..b741bc2301744 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java
@@ -1087,4 +1087,10 @@ private Constants() {
    */
   public static final String AWS_REGION = "fs.s3a.endpoint.region";
 
+  /**
+   * The special S3 region which can be used to talk to any bucket.
+   * Value {@value}.
+   */
+  public static final String AWS_S3_CENTRAL_REGION = "us-east-1";
+
 }
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
index 6e84497cb99d0..7dc920ce50c1d 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java
@@ -22,6 +22,7 @@
 import java.net.URI;
 
 import com.amazonaws.ClientConfiguration;
+import com.amazonaws.SdkClientException;
 import com.amazonaws.client.builder.AwsClientBuilder;
 import com.amazonaws.handlers.RequestHandler2;
 import com.amazonaws.services.s3.AmazonS3;
@@ -41,10 +42,13 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.fs.s3a.statistics.impl.AwsStatisticsCollector;
+import org.apache.hadoop.fs.store.LogExactlyOnce;
 
 import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
+import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION;
 import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING;
 import static org.apache.hadoop.fs.s3a.Constants.EXPERIMENTAL_AWS_INTERNAL_THROTTLING_DEFAULT;
+import static org.apache.hadoop.fs.s3a.S3AUtils.translateException;
 
 /**
  * The default {@link S3ClientFactory} implementation.
@@ -64,6 +68,19 @@ public class DefaultS3ClientFactory extends Configured
   protected static final Logger LOG =
       LoggerFactory.getLogger(DefaultS3ClientFactory.class);
 
+  /**
+   * A one-off warning of default region chains in use.
+   */
+  private static final LogExactlyOnce WARN_OF_DEFAULT_REGION_CHAIN =
+      new LogExactlyOnce(LOG);
+
+  /**
+   * Warning message printed when the SDK Region chain is in use.
+   */
+  private static final String SDK_REGION_CHAIN_IN_USE =
+      "S3A filesystem client is using"
+          + " the SDK region resolution chain.";
+
   /**
    * Create the client by preparing the AwsConf configuration
    * and then invoking {@code buildAmazonS3Client()}.
@@ -94,9 +111,14 @@ public AmazonS3 createS3Client(
       awsConf.setUserAgentSuffix(parameters.getUserAgentSuffix());
     }
 
-    return buildAmazonS3Client(
-        awsConf,
-        parameters);
+    try {
+      return buildAmazonS3Client(
+          awsConf,
+          parameters);
+    } catch (SdkClientException e) {
+      // SDK refused to build.
+      throw translateException("creating AWS S3 client", uri.toString(), e);
+    }
   }
 
   /**
@@ -109,6 +131,7 @@ public AmazonS3 createS3Client(
    * @param awsConf  AWS configuration
    * @param parameters parameters
    * @return new AmazonS3 client
+   * @throws SdkClientException if the configuration is invalid.
    */
   protected AmazonS3 buildAmazonS3Client(
       final ClientConfiguration awsConf,
@@ -141,6 +164,21 @@ protected AmazonS3 buildAmazonS3Client(
       // no idea what the endpoint is, so tell the SDK
       // to work it out at the cost of an extra HEAD request
       b.withForceGlobalBucketAccessEnabled(true);
+      // HADOOP-17771 force set the region so the build process doesn't halt.
+      String region = getConf().getTrimmed(AWS_REGION, AWS_S3_CENTRAL_REGION);
+      LOG.debug("fs.s3a.endpoint.region=\"{}\"", region);
+      if (!region.isEmpty()) {
+        // there's either an explicit region or we have fallen back
+        // to the central one.
+        LOG.debug("Using default endpoint; setting region to {}", region);
+        b.setRegion(region);
+      } else {
+        // no region.
+        // allow this if people really want it; it is OK to rely on this
+        // when deployed in EC2.
+        WARN_OF_DEFAULT_REGION_CHAIN.warn(SDK_REGION_CHAIN_IN_USE);
+        LOG.debug(SDK_REGION_CHAIN_IN_USE);
+      }
     }
     final AmazonS3 client = b.build();
     return client;
@@ -206,7 +244,7 @@ protected static AmazonS3 configureAmazonS3Client(AmazonS3 s3,
       createEndpointConfiguration(
       final String endpoint, final ClientConfiguration awsConf,
       String awsRegion) {
-    LOG.debug("Creating endpoint configuration for {}", endpoint);
+    LOG.debug("Creating endpoint configuration for \"{}\"", endpoint);
     if (endpoint == null || endpoint.isEmpty()) {
       // the default endpoint...we should be using null at this point.
       LOG.debug("Using default endpoint -no need to generate a configuration");
diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java
index d6142f49c94a2..cf962b87a4176 100644
--- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java
+++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java
@@ -121,4 +121,9 @@ private InternalConstants() {
    */
   public static final int DEFAULT_UPLOAD_PART_COUNT_LIMIT = 10000;
 
+  /**
+   * The system property used by the AWS SDK to identify the region.
+   */
+  public static final String AWS_REGION_SYSPROP = "aws.region";
+
 }
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
index aff7694c52aa7..48b99caf651bf 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md
@@ -438,6 +438,12 @@ you'll need to remove the `profile` prefix from the AWS configuration section he
     aws_session_token = ...
     aws_security_token = ...
     ```
+Note:
+
+1. The `region` setting is only used if `fs.s3a.endpoint.region` is set to the empty string.
+1. For the credentials to be available to applications running in a Hadoop cluster, the
+   configuration files MUST be in the `~/.aws/` directory on the local filesystem in
+   all hosts in the cluster.
 
 ### <a name="auth_session"></a> Using Session Credentials with `TemporaryAWSCredentialsProvider`
 
@@ -802,8 +808,10 @@ options are covered in [Testing](./testing.md).
 <property>
   <name>fs.s3a.endpoint.region</name>
   <description>AWS S3 region for a bucket, which bypasses the parsing of
- fs.s3a.endpoint to know the region. Would be helpful in avoiding errors
- while using privateLink URL and explicitly set the bucket region.
+    fs.s3a.endpoint to know the region. Would be helpful in avoiding errors
+    while using privateLink URL and explicitly set the bucket region.
+    If set to a blank string (or 1+ space), falls back to the
+    (potentially brittle) SDK region resolution process.
   </description>
 </property>
 
diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md
index d91607d3fd19d..30047edfb5c38 100644
--- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md
+++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md
@@ -18,11 +18,17 @@
 
 ## <a name="introduction"></a> Introduction
 
-Common problems working with S3 are
+Common problems working with S3 are:
 
-1. Classpath setup
-1. Authentication
-1. Incorrect configuration
+1. [Classpath setup](#classpath)
+1. [Authentication](#authentication)
+1. [Access Denial](#access_denied)
+1. [Connectivity Problems](#connectivity)
+1. [File System Semantics](#semantics)
+1. [Encryption](#encryption)
+1. [Other Errors](#other)
+
+This document also includes some [best pactises](#best) to aid troubleshooting.
 
 
 Troubleshooting IAM Assumed Roles is covered in its
@@ -572,7 +578,7 @@ S3 sts endpoint and region like the following:
 
 ## <a name="connectivity"></a> Connectivity Problems
 
-### <a name="bad_endpoint"></a> Error message "The bucket you are attempting to access must be addressed using the specified endpoint"
+### <a name="bad_endpoint"></a> Error "The bucket you are attempting to access must be addressed using the specified endpoint"
 
 This surfaces when `fs.s3a.endpoint` is configured to use an S3 service endpoint
 which is neither the original AWS one, `s3.amazonaws.com` , nor the one where
@@ -611,6 +617,101 @@ can be used:
 Using the explicit endpoint for the region is recommended for speed and
 to use the V4 signing API.
 
+### <a name="NoRegion"></a>  `Unable to find a region via the region provider chain`
+
+S3A client creation fails, possibly after a pause of some seconds.
+
+This failure surfaces when _all_ the following conditions are met:
+
+1. Deployment outside EC2.
+1. `fs.s3a.endpoint` is unset.
+1. `fs.s3a.endpoint.region` is set to `""`.   (Hadoop 3.3.2+ only)
+1. Without the file `~/.aws/config` existing or without a region set in it.
+1. Without the JVM system property `aws.region` declaring a region.
+1. Without the environment variable `AWS_REGION` declaring a region.
+
+Stack trace (Hadoop 3.3.1):
+```
+Caused by: com.amazonaws.SdkClientException: Unable to find a region via the region provider chain.
+        Must provide an explicit region in the builder or setup environment to supply a region.
+    at com.amazonaws.client.builder.AwsClientBuilder.setRegion(AwsClientBuilder.java:462)
+    at com.amazonaws.client.builder.AwsClientBuilder.configureMutableProperties(AwsClientBuilder.java:424)
+    at com.amazonaws.client.builder.AwsSyncClientBuilder.build(AwsSyncClientBuilder.java:46)
+    at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.buildAmazonS3Client(DefaultS3ClientFactory.java:145)
+    at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.createS3Client(DefaultS3ClientFactory.java:97)
+    at org.apache.hadoop.fs.s3a.S3AFileSystem.bindAWSClient(S3AFileSystem.java:788)
+    at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:478)
+```
+
+Log and stack trace on later releases, with
+"S3A filesystem client is using the SDK region resolution chain."
+warning that the SDK resolution chain is in use:
+
+```
+2021-06-23 19:56:55,971 [main] WARN  s3a.DefaultS3ClientFactory (LogExactlyOnce.java:warn(39)) -
+    S3A filesystem client is using the SDK region resolution chain.
+
+2021-06-23 19:56:56,073 [main] WARN  fs.FileSystem (FileSystem.java:createFileSystem(3464)) -
+    Failed to initialize fileystem s3a://osm-pds/planet:
+ org.apache.hadoop.fs.s3a.AWSClientIOException: creating AWS S3 client on s3a://osm-pds:
+  com.amazonaws.SdkClientException: Unable to find a region via the region provider chain.
+  Must provide an explicit region in the builder or setup environment to supply a region.:
+   Unable to find a region via the region provider chain.
+    Must provide an explicit region in the builder or setup environment to supply a region.
+    at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:208)
+    at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.createS3Client(DefaultS3ClientFactory.java:122)
+    at org.apache.hadoop.fs.s3a.S3AFileSystem.bindAWSClient(S3AFileSystem.java:788)
+    at org.apache.hadoop.fs.s3a.S3AFileSystem.initialize(S3AFileSystem.java:478)
+    at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:3460)
+    at org.apache.hadoop.fs.FileSystem.access$300(FileSystem.java:172)
+    at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:3565)
+    at org.apache.hadoop.fs.FileSystem$Cache.getUnique(FileSystem.java:3518)
+    at org.apache.hadoop.fs.FileSystem.newInstance(FileSystem.java:592)
+Caused by: com.amazonaws.SdkClientException: Unable to find a region via the region provider chain.
+ Must provide an explicit region in the builder or setup environment to supply a region.
+    at com.amazonaws.client.builder.AwsClientBuilder.setRegion(AwsClientBuilder.java:462)
+    at com.amazonaws.client.builder.AwsClientBuilder.configureMutableProperties(AwsClientBuilder.java:424)
+    at com.amazonaws.client.builder.AwsSyncClientBuilder.build(AwsSyncClientBuilder.java:46)
+    at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.buildAmazonS3Client(DefaultS3ClientFactory.java:185)
+    at org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.createS3Client(DefaultS3ClientFactory.java:117)
+    ... 21 more
+```
+
+Due to changes in S3 client construction in Hadoop 3.3.1 this option surfaces in
+non-EC2 deployments where no AWS endpoint was declared:
+[HADOOP-17771](https://issues.apache.org/jira/browse/HADOOP-17771). On Hadoop
+3.3.2 and later it takes active effort to create this stack trace.
+
+**Fix: set `fs.s3a.endpoint` to `s3.amazonaws.com`**
+
+Set `fs.s3a.endpoint` to the endpoint where the data is stored
+(best), or to `s3.amazonaws.com` (second-best).
+
+```xml
+<property>
+  <name>fs.s3a.endpoint</name>
+  <value>s3.amazonaws.com</value>
+</property>
+```
+
+For Apache Spark, this can be done in `spark-defaults.conf`
+
+```
+spark.hadoop.fs.s3a.endpoint s3.amazonaws.com
+```
+
+Or in Scala by editing the spark configuration during setup.
+
+```scala
+sc.hadoopConfiguration.set("fs.s3a.endpoint", "s3.amazonaws.com")
+```
+
+Tip: set the logging of `org.apache.hadoop.fs.s3a.DefaultS3ClientFactory`
+to `DEBUG` to see how the endpoint and region configuration is determined.
+
+```
+log4j.logger.org.apache.hadoop.fs.s3a.DefaultS3ClientFactory=DEBUG
+```
 
 ### <a name="timeout_from_pool"></a> "Timeout waiting for connection from pool" when writing data
 
@@ -792,257 +893,10 @@ Again, we believe this is caused by the connection to S3 being broken.
 It may go away if the operation is retried.
 
 
-## <a name="other"></a> Other Errors
-
-### <a name="integrity"></a> `SdkClientException` Unable to verify integrity of data upload
-
-Something has happened to the data as it was uploaded.
-
-```
-Caused by: org.apache.hadoop.fs.s3a.AWSClientIOException: saving output on dest/_task_tmp.-ext-10000/_tmp.000000_0:
-    com.amazonaws.AmazonClientException: Unable to verify integrity of data upload.
-    Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64)
-    didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3.
-    You may need to delete the data stored in Amazon S3.
-    (metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null,
-    bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0):
-  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:144)
-  at org.apache.hadoop.fs.s3a.S3AOutputStream.close(S3AOutputStream.java:121)
-  at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
-  at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
-  at org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat$1.close(HiveIgnoreKeyTextOutputFormat.java:99)
-  at org.apache.hadoop.hive.ql.exec.FileSinkOperator$FSPaths.closeWriters(FileSinkOperator.java:190)
-  ... 22 more
-Caused by: com.amazonaws.AmazonClientException: Unable to verify integrity of data upload.
-  Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64)
-  didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3.
-  You may need to delete the data stored in Amazon S3.
-  (metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null,
-  bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0)
-  at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1492)
-  at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131)
-  at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123)
-  at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139)
-  at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47)
-  ... 4 more
-```
-
-As it uploads data to S3, the AWS SDK builds up an MD5 checksum of what was
-PUT/POSTed. When S3 returns the checksum of the uploaded data, that is compared
-with the local checksum. If there is a mismatch, this error is reported.
-
-The uploaded data is already on S3 and will stay there, though if this happens
-during a multipart upload, it may not be visible (but still billed: clean up your
-multipart uploads via the `hadoop s3guard uploads` command).
-
-Possible causes for this
-
-1. A (possibly transient) network problem, including hardware faults.
-1. A proxy server is doing bad things to the data.
-1. Some signing problem, especially with third-party S3-compatible object stores.
-
-This is a very, very rare occurrence.
-
-If the problem is a signing one, try changing the signature algorithm.
-
-```xml
-<property>
-  <name>fs.s3a.signing-algorithm</name>
-  <value>S3SignerType</value>
-</property>
-```
-
-We cannot make any promises that it will work,
-only that it has been known to make the problem go away "once"
-
-### `AWSS3IOException` The Content-MD5 you specified did not match what we received
-
-Reads work, but writes, even `mkdir`, fail:
-
-```
-org.apache.hadoop.fs.s3a.AWSS3IOException: copyFromLocalFile(file:/tmp/hello.txt, s3a://bucket/hello.txt)
-    on file:/tmp/hello.txt:
-    The Content-MD5 you specified did not match what we received.
-    (Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225),
-    S3 Extended Request ID: null
-  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:127)
-  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:69)
-  at org.apache.hadoop.fs.s3a.S3AFileSystem.copyFromLocalFile(S3AFileSystem.java:1494)
-  at org.apache.hadoop.tools.cloudup.Cloudup.uploadOneFile(Cloudup.java:466)
-  at org.apache.hadoop.tools.cloudup.Cloudup.access$000(Cloudup.java:63)
-  at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:353)
-  at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:350)
-  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
-  at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
-  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
-  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
-  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
-  at java.lang.Thread.run(Thread.java:748)
-Caused by: com.amazonaws.services.s3.model.AmazonS3Exception:
-    The Content-MD5 you specified did not match what we received.
-    (Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225),
-    S3 Extended Request ID: null
-  at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1307)
-  at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:894)
-  at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:597)
-  at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:363)
-  at com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:329)
-  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:308)
-  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
-  at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1422)
-  at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131)
-  at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123)
-  at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139)
-  at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47)
-  at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239)
-  ... 4 more
-```
-
-This stack trace was seen when interacting with a third-party S3 store whose
-expectations of headers related to the AWS V4 signing mechanism was not
-compatible with that of the specific AWS SDK Hadoop was using.
-
-Workaround: revert to V2 signing.
-
-```xml
-<property>
-  <name>fs.s3a.signing-algorithm</name>
-  <value>S3SignerType</value>
-</property>
-```
-
-### When writing data: "java.io.FileNotFoundException: Completing multi-part upload"
-
-
-A multipart upload was trying to complete, but failed as there was no upload
-with that ID.
-
-```
-java.io.FileNotFoundException: Completing multi-part upload on fork-5/test/multipart/1c397ca6-9dfb-4ac1-9cf7-db666673246b:
- com.amazonaws.services.s3.model.AmazonS3Exception: The specified upload does not exist.
-  The upload ID may be invalid, or the upload may have been aborted or completed.
-   (Service: Amazon S3; Status Code: 404; Error Code: NoSuchUpload;
-  at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182)
-  at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770)
-  at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
-  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310)
-  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785)
-  at com.amazonaws.services.s3.AmazonS3Client.completeMultipartUpload(AmazonS3Client.java:2705)
-  at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.complete(S3ABlockOutputStream.java:473)
-  at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.access$200(S3ABlockOutputStream.java:382)
-  at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:272)
-  at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
-  at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
-```
-
-This can happen when all outstanding uploads have been aborted, including
-the active ones.
-
-If the bucket has a lifecycle policy of deleting multipart uploads, make
-sure that the expiry time of the deletion is greater than that required
-for all open writes to complete the write,
-*and for all jobs using the S3A committers to commit their work.*
-
-
-### Application hangs after reading a number of files
-
-
-The pool of https client connections and/or IO threads have been used up,
-and none are being freed.
-
-
-1. The pools aren't big enough. See ["Timeout waiting for connection from pool"](#timeout_from_pool)
-2. Likely root cause: whatever code is reading files isn't calling `close()`
-on the input streams. Make sure your code does this!
-And if it's someone else's: make sure you have a recent version; search their
-issue trackers to see if its a known/fixed problem.
-If not, it's time to work with the developers, or come up with a workaround
-(i.e closing the input stream yourself).
-
-
-
-### Issue: when writing data, HTTP Exceptions logged at info from `AmazonHttpClient`
-
-```
-[s3a-transfer-shared-pool4-t6] INFO  http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496))
- - Unable to execute HTTP request: hwdev-steve-ireland-new.s3.amazonaws.com:443 failed to respond
-org.apache.http.NoHttpResponseException: bucket.s3.amazonaws.com:443 failed to respond
-  at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:143)
-  at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
-  at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261)
-  at org.apache.http.impl.AbstractHttpClientConnection.receiveResponseHeader(AbstractHttpClientConnection.java:283)
-  at org.apache.http.impl.conn.DefaultClientConnection.receiveResponseHeader(DefaultClientConnection.java:259)
-  at org.apache.http.impl.conn.ManagedClientConnectionImpl.receiveResponseHeader(ManagedClientConnectionImpl.java:209)
-  at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272)
-  at com.amazonaws.http.protocol.SdkHttpRequestExecutor.doReceiveResponse(SdkHttpRequestExecutor.java:66)
-  at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124)
-  at org.apache.http.impl.client.DefaultRequestDirector.tryExecute(DefaultRequestDirector.java:686)
-  at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:488)
-  at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:884)
-  at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
-  at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
-  at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:728)
-  at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
-  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310)
-  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785)
-  at com.amazonaws.services.s3.AmazonS3Client.copyPart(AmazonS3Client.java:1731)
-  at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:41)
-  at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:28)
-  at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239)
-  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
-  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
-  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
-  at java.lang.Thread.run(Thread.java:745)
-```
-
-These are HTTP I/O exceptions caught and logged inside the AWS SDK. The client
-will attempt to retry the operation; it may just be a transient event. If there
-are many such exceptions in logs, it may be a symptom of connectivity or network
-problems.
-
-### `AWSBadRequestException` IllegalLocationConstraintException/The unspecified location constraint is incompatible
-
-```
- Cause: org.apache.hadoop.fs.s3a.AWSBadRequestException: put on :
-  com.amazonaws.services.s3.model.AmazonS3Exception:
-   The unspecified location constraint is incompatible for the region specific
-    endpoint this request was sent to.
-    (Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException;
-
-  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:178)
-  at org.apache.hadoop.fs.s3a.S3ALambda.execute(S3ALambda.java:64)
-  at org.apache.hadoop.fs.s3a.WriteOperationHelper.uploadObject(WriteOperationHelper.java:451)
-  at org.apache.hadoop.fs.s3a.commit.magic.MagicCommitTracker.aboutToComplete(MagicCommitTracker.java:128)
-  at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:373)
-  at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
-  at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:101)
-  at org.apache.hadoop.hive.ql.io.orc.WriterImpl.close(WriterImpl.java:2429)
-  at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:106)
-  at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:91)
-  ...
-  Cause: com.amazonaws.services.s3.model.AmazonS3Exception:
-   The unspecified location constraint is incompatible for the region specific endpoint
-   this request was sent to. (Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException;
-   Request ID: EEBC5A08BCB3A645)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1588)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1258)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1030)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:742)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:716)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
-  at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
-  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
-  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4221)
-  ...
-```
-
-Something has been trying to write data to "/".
-
-## File System Semantics
+## <a name="semantics"></a>File System Semantics
 
 These are the issues where S3 does not appear to behave the way a filesystem
-"should".
+"should". That's because it "isn't".
 
 
 ### File not visible/saved
@@ -1185,7 +1039,7 @@ We also recommend using applications/application
 options which do  not rename files when committing work or when copying data
 to S3, but instead write directly to the final destination.
 
-## Rename not behaving as "expected"
+### Rename not behaving as "expected"
 
 S3 is not a filesystem. The S3A connector mimics file and directory rename by
 
@@ -1303,7 +1157,7 @@ is used, no encryption is specified, or the SSE-C specified is incorrect.
 2. A directory is encrypted with a SSE-C keyA and the user is trying to move a
 file using configured SSE-C keyB into that structure.
 
-## <a name="not_all_bytes_were_read"></a> Message appears in logs "Not all bytes were read from the S3ObjectInputStream"
+### <a name="not_all_bytes_were_read"></a> Message appears in logs "Not all bytes were read from the S3ObjectInputStream"
 
 
 This is a message which can be generated by the Amazon SDK when the client application
@@ -1378,8 +1232,250 @@ The specified bucket does not exist
     at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1367)
 ```
 
+## <a name="other"></a> Other Errors
+
+### <a name="integrity"></a> `SdkClientException` Unable to verify integrity of data upload
+
+Something has happened to the data as it was uploaded.
+
+```
+Caused by: org.apache.hadoop.fs.s3a.AWSClientIOException: saving output on dest/_task_tmp.-ext-10000/_tmp.000000_0:
+    com.amazonaws.AmazonClientException: Unable to verify integrity of data upload.
+    Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64)
+    didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3.
+    You may need to delete the data stored in Amazon S3.
+    (metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null,
+    bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0):
+  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:144)
+  at org.apache.hadoop.fs.s3a.S3AOutputStream.close(S3AOutputStream.java:121)
+  at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
+  at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
+  at org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat$1.close(HiveIgnoreKeyTextOutputFormat.java:99)
+  at org.apache.hadoop.hive.ql.exec.FileSinkOperator$FSPaths.closeWriters(FileSinkOperator.java:190)
+  ... 22 more
+Caused by: com.amazonaws.AmazonClientException: Unable to verify integrity of data upload.
+  Client calculated content hash (contentMD5: L75PalQk0CIhTp04MStVOA== in base 64)
+  didn't match hash (etag: 37ace01f2c383d6b9b3490933c83bb0f in hex) calculated by Amazon S3.
+  You may need to delete the data stored in Amazon S3.
+  (metadata.contentMD5: L75PalQk0CIhTp04MStVOA==, md5DigestStream: null,
+  bucketName: ext2, key: dest/_task_tmp.-ext-10000/_tmp.000000_0)
+  at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1492)
+  at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131)
+  at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123)
+  at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139)
+  at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47)
+  ... 4 more
+```
+
+As it uploads data to S3, the AWS SDK builds up an MD5 checksum of what was
+PUT/POSTed. When S3 returns the checksum of the uploaded data, that is compared
+with the local checksum. If there is a mismatch, this error is reported.
+
+The uploaded data is already on S3 and will stay there, though if this happens
+during a multipart upload, it may not be visible (but still billed: clean up
+your multipart uploads via the `hadoop s3guard uploads` command).
+
+Possible causes for this
+
+1. A (possibly transient) network problem, including hardware faults.
+1. A proxy server is doing bad things to the data.
+1. Some signing problem, especially with third-party S3-compatible object
+   stores.
+
+This is a very, very rare occurrence.
+
+If the problem is a signing one, try changing the signature algorithm.
+
+```xml
+<property>
+  <name>fs.s3a.signing-algorithm</name>
+  <value>S3SignerType</value>
+</property>
+```
+
+We cannot make any promises that it will work, only that it has been known to
+make the problem go away "once"
+
+### `AWSS3IOException` The Content-MD5 you specified did not match what we received
+
+Reads work, but writes, even `mkdir`, fail:
+
+```
+org.apache.hadoop.fs.s3a.AWSS3IOException: copyFromLocalFile(file:/tmp/hello.txt, s3a://bucket/hello.txt)
+    on file:/tmp/hello.txt:
+    The Content-MD5 you specified did not match what we received.
+    (Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225),
+    S3 Extended Request ID: null
+  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:127)
+  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:69)
+  at org.apache.hadoop.fs.s3a.S3AFileSystem.copyFromLocalFile(S3AFileSystem.java:1494)
+  at org.apache.hadoop.tools.cloudup.Cloudup.uploadOneFile(Cloudup.java:466)
+  at org.apache.hadoop.tools.cloudup.Cloudup.access$000(Cloudup.java:63)
+  at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:353)
+  at org.apache.hadoop.tools.cloudup.Cloudup$1.call(Cloudup.java:350)
+  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
+  at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
+  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
+  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
+  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
+  at java.lang.Thread.run(Thread.java:748)
+Caused by: com.amazonaws.services.s3.model.AmazonS3Exception:
+    The Content-MD5 you specified did not match what we received.
+    (Service: Amazon S3; Status Code: 400; Error Code: BadDigest; Request ID: 4018131225),
+    S3 Extended Request ID: null
+  at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1307)
+  at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:894)
+  at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:597)
+  at com.amazonaws.http.AmazonHttpClient.doExecute(AmazonHttpClient.java:363)
+  at com.amazonaws.http.AmazonHttpClient.executeWithTimer(AmazonHttpClient.java:329)
+  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:308)
+  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3659)
+  at com.amazonaws.services.s3.AmazonS3Client.putObject(AmazonS3Client.java:1422)
+  at com.amazonaws.services.s3.transfer.internal.UploadCallable.uploadInOneChunk(UploadCallable.java:131)
+  at com.amazonaws.services.s3.transfer.internal.UploadCallable.call(UploadCallable.java:123)
+  at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:139)
+  at com.amazonaws.services.s3.transfer.internal.UploadMonitor.call(UploadMonitor.java:47)
+  at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239)
+  ... 4 more
+```
+
+This stack trace was seen when interacting with a third-party S3 store whose
+expectations of headers related to the AWS V4 signing mechanism was not
+compatible with that of the specific AWS SDK Hadoop was using.
+
+Workaround: revert to V2 signing.
+
+```xml
+<property>
+  <name>fs.s3a.signing-algorithm</name>
+  <value>S3SignerType</value>
+</property>
+```
+
+### When writing data: "java.io.FileNotFoundException: Completing multi-part upload"
+
+A multipart upload was trying to complete, but failed as there was no upload
+with that ID.
+
+```
+java.io.FileNotFoundException: Completing multi-part upload on fork-5/test/multipart/1c397ca6-9dfb-4ac1-9cf7-db666673246b:
+ com.amazonaws.services.s3.model.AmazonS3Exception: The specified upload does not exist.
+  The upload ID may be invalid, or the upload may have been aborted or completed.
+   (Service: Amazon S3; Status Code: 404; Error Code: NoSuchUpload;
+  at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:1182)
+  at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:770)
+  at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
+  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310)
+  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785)
+  at com.amazonaws.services.s3.AmazonS3Client.completeMultipartUpload(AmazonS3Client.java:2705)
+  at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.complete(S3ABlockOutputStream.java:473)
+  at org.apache.hadoop.fs.s3a.S3ABlockOutputStream$MultiPartUpload.access$200(S3ABlockOutputStream.java:382)
+  at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:272)
+  at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
+  at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:106)
+```
+
+This can happen when all outstanding uploads have been aborted, including the
+active ones.
+
+If the bucket has a lifecycle policy of deleting multipart uploads, make sure
+that the expiry time of the deletion is greater than that required for all open
+writes to complete the write,
+*and for all jobs using the S3A committers to commit their work.*
+
+### Application hangs after reading a number of files
+
+The pool of https client connections and/or IO threads have been used up, and
+none are being freed.
+
+1. The pools aren't big enough.
+   See ["Timeout waiting for connection from pool"](#timeout_from_pool)
+2. Likely root cause: whatever code is reading files isn't calling `close()`
+   on the input streams. Make sure your code does this!
+   And if it's someone else's: make sure you have a recent version; search their
+   issue trackers to see if its a known/fixed problem. If not, it's time to work
+   with the developers, or come up with a workaround
+   (i.e closing the input stream yourself).
+
+### Issue: when writing data, HTTP Exceptions logged at info from `AmazonHttpClient`
+
+```
+[s3a-transfer-shared-pool4-t6] INFO  http.AmazonHttpClient (AmazonHttpClient.java:executeHelper(496))
+ - Unable to execute HTTP request: hwdev-steve-ireland-new.s3.amazonaws.com:443 failed to respond
+org.apache.http.NoHttpResponseException: bucket.s3.amazonaws.com:443 failed to respond
+  at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:143)
+  at org.apache.http.impl.conn.DefaultHttpResponseParser.parseHead(DefaultHttpResponseParser.java:57)
+  at org.apache.http.impl.io.AbstractMessageParser.parse(AbstractMessageParser.java:261)
+  at org.apache.http.impl.AbstractHttpClientConnection.receiveResponseHeader(AbstractHttpClientConnection.java:283)
+  at org.apache.http.impl.conn.DefaultClientConnection.receiveResponseHeader(DefaultClientConnection.java:259)
+  at org.apache.http.impl.conn.ManagedClientConnectionImpl.receiveResponseHeader(ManagedClientConnectionImpl.java:209)
+  at org.apache.http.protocol.HttpRequestExecutor.doReceiveResponse(HttpRequestExecutor.java:272)
+  at com.amazonaws.http.protocol.SdkHttpRequestExecutor.doReceiveResponse(SdkHttpRequestExecutor.java:66)
+  at org.apache.http.protocol.HttpRequestExecutor.execute(HttpRequestExecutor.java:124)
+  at org.apache.http.impl.client.DefaultRequestDirector.tryExecute(DefaultRequestDirector.java:686)
+  at org.apache.http.impl.client.DefaultRequestDirector.execute(DefaultRequestDirector.java:488)
+  at org.apache.http.impl.client.AbstractHttpClient.doExecute(AbstractHttpClient.java:884)
+  at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:82)
+  at org.apache.http.impl.client.CloseableHttpClient.execute(CloseableHttpClient.java:55)
+  at com.amazonaws.http.AmazonHttpClient.executeOneRequest(AmazonHttpClient.java:728)
+  at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:489)
+  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:310)
+  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:3785)
+  at com.amazonaws.services.s3.AmazonS3Client.copyPart(AmazonS3Client.java:1731)
+  at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:41)
+  at com.amazonaws.services.s3.transfer.internal.CopyPartCallable.call(CopyPartCallable.java:28)
+  at org.apache.hadoop.fs.s3a.BlockingThreadPoolExecutorService$CallableWithPermitRelease.call(BlockingThreadPoolExecutorService.java:239)
+  at java.util.concurrent.FutureTask.run(FutureTask.java:266)
+  at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
+  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
+  at java.lang.Thread.run(Thread.java:745)
+```
+
+These are HTTP I/O exceptions caught and logged inside the AWS SDK. The client
+will attempt to retry the operation; it may just be a transient event. If there
+are many such exceptions in logs, it may be a symptom of connectivity or network
+problems.
+
+### `AWSBadRequestException` IllegalLocationConstraintException/The unspecified location constraint is incompatible
 
-## Other Issues
+```
+ Cause: org.apache.hadoop.fs.s3a.AWSBadRequestException: put on :
+  com.amazonaws.services.s3.model.AmazonS3Exception:
+   The unspecified location constraint is incompatible for the region specific
+    endpoint this request was sent to.
+    (Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException;
+
+  at org.apache.hadoop.fs.s3a.S3AUtils.translateException(S3AUtils.java:178)
+  at org.apache.hadoop.fs.s3a.S3ALambda.execute(S3ALambda.java:64)
+  at org.apache.hadoop.fs.s3a.WriteOperationHelper.uploadObject(WriteOperationHelper.java:451)
+  at org.apache.hadoop.fs.s3a.commit.magic.MagicCommitTracker.aboutToComplete(MagicCommitTracker.java:128)
+  at org.apache.hadoop.fs.s3a.S3ABlockOutputStream.close(S3ABlockOutputStream.java:373)
+  at org.apache.hadoop.fs.FSDataOutputStream$PositionCache.close(FSDataOutputStream.java:72)
+  at org.apache.hadoop.fs.FSDataOutputStream.close(FSDataOutputStream.java:101)
+  at org.apache.hadoop.hive.ql.io.orc.WriterImpl.close(WriterImpl.java:2429)
+  at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:106)
+  at org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat$OrcRecordWriter.close(OrcOutputFormat.java:91)
+  ...
+  Cause: com.amazonaws.services.s3.model.AmazonS3Exception:
+   The unspecified location constraint is incompatible for the region specific endpoint
+   this request was sent to. (Service: Amazon S3; Status Code: 400; Error Code: IllegalLocationConstraintException;
+   Request ID: EEBC5A08BCB3A645)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1588)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1258)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1030)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:742)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:716)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:699)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:667)
+  at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:649)
+  at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:513)
+  at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:4221)
+  ...
+```
+
+Something has been trying to write data to "/".
+
+## <a name="best"></a> Best Practises
 
 ### <a name="logging"></a> Enabling low-level logging
 
@@ -1444,10 +1540,20 @@ http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) -
 http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << Content-Length: 0
 http.headers (LoggingManagedHttpClientConnection.java:onResponseReceived(127)) - http-outgoing-0 << Server: AmazonS3
 execchain.MainClientExec (MainClientExec.java:execute(284)) - Connection can be kept alive for 60000 MILLISECONDS
+
 ```
 
+### <a name="audit-logging"></a> Enable S3 Server-side Logging
+
+The [Auditing](auditing) feature of the S3A connector can be used to generate
+S3 Server Logs with information which can be used to debug problems
+working with S3, such as throttling events.
+
+Consult the [auditing documentation](auditing) documentation.
+As auditing is enabled by default, enabling S3 Logging for a bucket
+should be sufficient to collect these logs.
 
-## <a name="retries"></a>  Reducing failures by configuring retry policy
+### <a name="retries"></a>  Reducing failures by configuring retry policy
 
 The S3A client can ba configured to retry those operations which are considered
 retryable. That can be because they are idempotent, or
diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java
index abd637a5240cb..761dd558063ba 100644
--- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java
+++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java
@@ -18,13 +18,25 @@
 
 package org.apache.hadoop.fs.s3a;
 
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
 import com.amazonaws.ClientConfiguration;
 import com.amazonaws.client.builder.AwsClientBuilder;
+import com.amazonaws.services.s3.AmazonS3;
 import com.amazonaws.util.AwsHostNameUtils;
 import org.assertj.core.api.Assertions;
 import org.junit.Test;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext;
+
 import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION;
+import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_CENTRAL_REGION;
+import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ENDPOINT;
+import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AWS_REGION_SYSPROP;
+import static org.apache.hadoop.test.LambdaTestUtils.intercept;
 
 /**
  * Test to check correctness of S3A endpoint regions in
@@ -36,6 +48,7 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase {
   private static final String AWS_ENDPOINT_TEST = "test-endpoint";
   private static final String AWS_ENDPOINT_TEST_WITH_REGION =
       "test-endpoint.some-region.amazonaws.com";
+  public static final String MARS_NORTH_2 = "mars-north-2";
 
   /**
    * Test to verify that setting a region with the config would bypass the
@@ -88,4 +101,90 @@ private AwsClientBuilder.EndpointConfiguration createEpr(String endpoint,
     return DefaultS3ClientFactory.createEndpointConfiguration(endpoint,
         new ClientConfiguration(), awsRegion);
   }
+
+
+  @Test
+  public void testInvalidRegionDefaultEndpoint() throws Throwable {
+    describe("Create a client with an invalid region and the default endpoint");
+    Configuration conf = getConfiguration();
+    // we are making a big assumption about the timetable for AWS
+    // region rollout.
+    // if this test ever fails because this region now exists
+    // -congratulations!
+    conf.set(AWS_REGION, MARS_NORTH_2);
+    createMarsNorth2Client(conf);
+  }
+
+  @Test
+  public void testUnsetRegionDefaultEndpoint() throws Throwable {
+    describe("Create a client with no region and the default endpoint");
+    Configuration conf = getConfiguration();
+    conf.unset(AWS_REGION);
+    createS3Client(conf, DEFAULT_ENDPOINT, AWS_S3_CENTRAL_REGION);
+  }
+
+  /**
+   * By setting the system property {@code "aws.region"} we can
+   * guarantee that the SDK region resolution chain will always succeed
+   * (and fast).
+   * Clearly there is no validation of the region during the build process.
+   */
+  @Test
+  public void testBlankRegionTriggersSDKResolution() throws Throwable {
+    describe("Create a client with a blank region and the default endpoint."
+        + " This will trigger the SDK Resolution chain");
+    Configuration conf = getConfiguration();
+    conf.set(AWS_REGION, "");
+    System.setProperty(AWS_REGION_SYSPROP, MARS_NORTH_2);
+    try {
+      createMarsNorth2Client(conf);
+    } finally {
+      System.clearProperty(AWS_REGION_SYSPROP);
+    }
+  }
+
+  /**
+   * Create an S3 client bonded to an invalid region;
+   * verify that calling {@code getRegion()} triggers
+   * a failure.
+   * @param conf configuration to use in the building.
+   */
+  private void createMarsNorth2Client(Configuration conf) throws Exception {
+    AmazonS3 client = createS3Client(conf, DEFAULT_ENDPOINT, MARS_NORTH_2);
+    intercept(IllegalArgumentException.class, MARS_NORTH_2, client::getRegion);
+  }
+
+  /**
+   * Create an S3 client with the given conf and endpoint.
+   * The region name must then match that of the expected
+   * value.
+   * @param conf configuration to use.
+   * @param endpoint endpoint.
+   * @param expectedRegion expected region
+   * @return the client.
+   * @throws URISyntaxException parse problems.
+   * @throws IOException IO problems
+   */
+  private AmazonS3 createS3Client(Configuration conf,
+      String endpoint,
+      String expectedRegion)
+      throws URISyntaxException, IOException {
+
+    DefaultS3ClientFactory factory
+        = new DefaultS3ClientFactory();
+    factory.setConf(conf);
+    S3ClientFactory.S3ClientCreationParameters parameters
+        = new S3ClientFactory.S3ClientCreationParameters()
+        .withCredentialSet(new AnonymousAWSCredentialsProvider())
+        .withEndpoint(endpoint)
+        .withMetrics(new EmptyS3AStatisticsContext()
+            .newStatisticsFromAwsSdk());
+    AmazonS3 client = factory.createS3Client(
+        new URI("s3a://localhost/"),
+        parameters);
+    Assertions.assertThat(client.getRegionName())
+        .describedAs("Client region name")
+        .isEqualTo(expectedRegion);
+    return client;
+  }
 }