Skip to content

Commit 6c1fa24

Browse files
liuml07Gabor Bota
authored andcommitted
HADOOP-16732. S3Guard to support encrypted DynamoDB table (apache#1752). Contributed by Mingliang Liu.
1 parent 92c5890 commit 6c1fa24

File tree

9 files changed

+247
-10
lines changed

9 files changed

+247
-10
lines changed

hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1623,6 +1623,27 @@
16231623
</description>
16241624
</property>
16251625

1626+
<property>
1627+
<name>fs.s3a.s3guard.ddb.table.sse.enabled</name>
1628+
<value>false</value>
1629+
<description>
1630+
Whether server-side encryption (SSE) is enabled or disabled on the table.
1631+
By default it's disabled, meaning SSE is set to AWS owned CMK.
1632+
</description>
1633+
</property>
1634+
1635+
<property>
1636+
<name>fs.s3a.s3guard.ddb.table.sse.cmk</name>
1637+
<value/>
1638+
<description>
1639+
The KMS Customer Master Key (CMK) used for the KMS encryption on the table.
1640+
To specify a CMK, this config value can be its key ID, Amazon Resource Name
1641+
(ARN), alias name, or alias ARN. Users only need to provide this config if
1642+
the key is different from the default DynamoDB KMS Master Key, which is
1643+
alias/aws/dynamodb.
1644+
</description>
1645+
</property>
1646+
16261647
<property>
16271648
<name>fs.s3a.s3guard.ddb.max.retries</name>
16281649
<value>9</value>

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,25 @@ private Constants() {
568568
*/
569569
public static final long S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT = 0;
570570

571+
/**
572+
* Whether server-side encryption (SSE) is enabled or disabled on the table.
573+
* By default it's disabled, meaning SSE is set to AWS owned CMK.
574+
* @see com.amazonaws.services.dynamodbv2.model.SSESpecification#setEnabled
575+
*/
576+
public static final String S3GUARD_DDB_TABLE_SSE_ENABLED =
577+
"fs.s3a.s3guard.ddb.table.sse.enabled";
578+
579+
/**
580+
* The KMS Master Key (CMK) used for the KMS encryption on the table.
581+
*
582+
* To specify a CMK, this config value can be its key ID, Amazon Resource
583+
* Name (ARN), alias name, or alias ARN. Users only provide this config
584+
* if the key is different from the default DynamoDB KMS Master Key, which is
585+
* alias/aws/dynamodb.
586+
*/
587+
public static final String S3GUARD_DDB_TABLE_SSE_CMK =
588+
"fs.s3a.s3guard.ddb.table.sse.cmk";
589+
571590
/**
572591
* The maximum put or delete requests per BatchWriteItem request.
573592
*

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStore.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1870,7 +1870,9 @@ public Map<String, String> getDiagnostics() throws IOException {
18701870
throughput.getWriteCapacityUnits() == 0
18711871
? BILLING_MODE_PER_REQUEST
18721872
: BILLING_MODE_PROVISIONED);
1873-
map.put(TABLE, desc.toString());
1873+
map.put("sse", desc.getSSEDescription() == null
1874+
? "DISABLED"
1875+
: desc.getSSEDescription().toString());
18741876
map.put(MetadataStoreCapabilities.PERSISTS_AUTHORITATIVE_BIT,
18751877
Boolean.toString(true));
18761878
} else {

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/DynamoDBMetadataStoreTableManager.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
4545
import com.amazonaws.services.dynamodbv2.model.ResourceInUseException;
4646
import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
47+
import com.amazonaws.services.dynamodbv2.model.SSESpecification;
4748
import com.amazonaws.services.dynamodbv2.model.ScanRequest;
4849
import com.amazonaws.services.dynamodbv2.model.ScanResult;
4950
import com.amazonaws.services.dynamodbv2.model.TableDescription;
@@ -63,12 +64,18 @@
6364
import org.apache.hadoop.io.retry.RetryPolicy;
6465

6566
import static java.lang.String.valueOf;
67+
68+
import static org.apache.commons.lang3.StringUtils.isEmpty;
69+
6670
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT;
6771
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_CAPACITY_READ_KEY;
6872
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT;
6973
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY;
7074
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_CREATE_KEY;
75+
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_SSE_CMK;
76+
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_SSE_ENABLED;
7177
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_TABLE_TAG;
78+
import static org.apache.hadoop.fs.s3a.S3AUtils.lookupPassword;
7279
import static org.apache.hadoop.fs.s3a.S3AUtils.translateDynamoDBException;
7380
import static org.apache.hadoop.fs.s3a.S3AUtils.translateException;
7481
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.E_ON_DEMAND_NO_SET_CAPACITY;
@@ -102,6 +109,9 @@ public class DynamoDBMetadataStoreTableManager {
102109
public static final String E_INCOMPATIBLE_ITEM_VERSION
103110
= "Database table is from an incompatible S3Guard version based on table ITEM.";
104111

112+
/** The AWS managed CMK for DynamoDB server side encryption. */
113+
public static final String SSE_DEFAULT_MASTER_KEY = "alias/aws/dynamodb";
114+
105115
/** Invoker for IO. Until configured properly, use try-once. */
106116
private Invoker invoker = new Invoker(RetryPolicies.TRY_ONCE_THEN_FAIL,
107117
Invoker.NO_OP
@@ -298,6 +308,7 @@ private void createTable(ProvisionedThroughput capacity) throws IOException {
298308
.withTableName(tableName)
299309
.withKeySchema(keySchema())
300310
.withAttributeDefinitions(attributeDefinitions())
311+
.withSSESpecification(getSseSpecFromConfig())
301312
.withTags(getTableTagsFromConfig());
302313
if (capacity != null) {
303314
mode = String.format("with provisioned read capacity %d and"
@@ -322,6 +333,39 @@ private void createTable(ProvisionedThroughput capacity) throws IOException {
322333
putVersionMarkerItemToTable();
323334
}
324335

336+
/**
337+
* Get DynamoDB table server side encryption (SSE) settings from configuration.
338+
*/
339+
private SSESpecification getSseSpecFromConfig() {
340+
final SSESpecification sseSpecification = new SSESpecification();
341+
boolean enabled = conf.getBoolean(S3GUARD_DDB_TABLE_SSE_ENABLED, false);
342+
if (!enabled) {
343+
// Do not set other options if SSE is disabled. Otherwise it will throw
344+
// ValidationException.
345+
return sseSpecification;
346+
}
347+
sseSpecification.setEnabled(Boolean.TRUE);
348+
String cmk = null;
349+
try {
350+
// Get DynamoDB table SSE CMK from a configuration/credential provider.
351+
cmk = lookupPassword("", conf, S3GUARD_DDB_TABLE_SSE_CMK);
352+
} catch (IOException e) {
353+
LOG.error("Cannot retrieve " + S3GUARD_DDB_TABLE_SSE_CMK, e);
354+
}
355+
if (isEmpty(cmk)) {
356+
// Using Amazon managed default master key for DynamoDB table
357+
return sseSpecification;
358+
}
359+
if (SSE_DEFAULT_MASTER_KEY.equals(cmk)) {
360+
LOG.warn("Ignoring default DynamoDB table KMS Master Key {}",
361+
SSE_DEFAULT_MASTER_KEY);
362+
} else {
363+
sseSpecification.setSSEType("KMS");
364+
sseSpecification.setKMSMasterKeyId(cmk);
365+
}
366+
return sseSpecification;
367+
}
368+
325369
/**
326370
* Return tags from configuration and the version marker for adding to
327371
* dynamo table during creation.

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
import static org.apache.hadoop.fs.s3a.S3AUtils.propagateBucketOptions;
7474
import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*;
7575
import static org.apache.hadoop.fs.s3a.commit.staging.StagingCommitterConstants.FILESYSTEM_TEMP_PATH;
76+
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStoreTableManager.SSE_DEFAULT_MASTER_KEY;
7677
import static org.apache.hadoop.service.launcher.LauncherExitCodes.*;
7778

7879
/**
@@ -143,6 +144,8 @@ public abstract class S3GuardTool extends Configured implements Tool,
143144
public static final String REGION_FLAG = "region";
144145
public static final String READ_FLAG = "read";
145146
public static final String WRITE_FLAG = "write";
147+
public static final String SSE_FLAG = "sse";
148+
public static final String CMK_FLAG = "cmk";
146149
public static final String TAG_FLAG = "tag";
147150

148151
public static final String VERBOSE = "verbose";
@@ -509,6 +512,8 @@ static class Init extends S3GuardTool {
509512
" -" + REGION_FLAG + " REGION - Service region for connections\n" +
510513
" -" + READ_FLAG + " UNIT - Provisioned read throughput units\n" +
511514
" -" + WRITE_FLAG + " UNIT - Provisioned write through put units\n" +
515+
" -" + SSE_FLAG + " - Enable server side encryption\n" +
516+
" -" + CMK_FLAG + " KEY - Customer managed CMK\n" +
512517
" -" + TAG_FLAG + " key=value; list of tags to tag dynamo table\n" +
513518
"\n" +
514519
" URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
@@ -518,11 +523,13 @@ static class Init extends S3GuardTool {
518523
+ "capacities to 0";
519524

520525
Init(Configuration conf) {
521-
super(conf);
526+
super(conf, SSE_FLAG);
522527
// read capacity.
523528
getCommandFormat().addOptionWithValue(READ_FLAG);
524529
// write capacity.
525530
getCommandFormat().addOptionWithValue(WRITE_FLAG);
531+
// customer managed customer master key (CMK) for server side encryption
532+
getCommandFormat().addOptionWithValue(CMK_FLAG);
526533
// tag
527534
getCommandFormat().addOptionWithValue(TAG_FLAG);
528535
}
@@ -546,13 +553,13 @@ public int run(String[] args, PrintStream out) throws Exception {
546553
errorln(USAGE);
547554
throw e;
548555
}
549-
550-
String readCap = getCommandFormat().getOptValue(READ_FLAG);
556+
CommandFormat commands = getCommandFormat();
557+
String readCap = commands.getOptValue(READ_FLAG);
551558
if (readCap != null && !readCap.isEmpty()) {
552559
int readCapacity = Integer.parseInt(readCap);
553560
getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, readCapacity);
554561
}
555-
String writeCap = getCommandFormat().getOptValue(WRITE_FLAG);
562+
String writeCap = commands.getOptValue(WRITE_FLAG);
556563
if (writeCap != null && !writeCap.isEmpty()) {
557564
int writeCapacity = Integer.parseInt(writeCap);
558565
getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, writeCapacity);
@@ -565,7 +572,25 @@ public int run(String[] args, PrintStream out) throws Exception {
565572
setConf(bucketConf);
566573
}
567574

568-
String tags = getCommandFormat().getOptValue(TAG_FLAG);
575+
String cmk = commands.getOptValue(CMK_FLAG);
576+
if (commands.getOpt(SSE_FLAG)) {
577+
getConf().setBoolean(S3GUARD_DDB_TABLE_SSE_ENABLED, true);
578+
LOG.debug("SSE flag is passed to command {}", this.getName());
579+
if (!StringUtils.isEmpty(cmk)) {
580+
if (SSE_DEFAULT_MASTER_KEY.equals(cmk)) {
581+
LOG.warn("Ignoring default DynamoDB table KMS Master Key " +
582+
"alias/aws/dynamodb in configuration");
583+
} else {
584+
LOG.debug("Setting customer managed CMK {}", cmk);
585+
getConf().set(S3GUARD_DDB_TABLE_SSE_CMK, cmk);
586+
}
587+
}
588+
} else if (!StringUtils.isEmpty(cmk)) {
589+
throw invalidArgs("Option %s can only be used with option %s",
590+
CMK_FLAG, SSE_FLAG);
591+
}
592+
593+
String tags = commands.getOptValue(TAG_FLAG);
569594
if (tags != null && !tags.isEmpty()) {
570595
String[] stringList = tags.split(";");
571596
Map<String, String> tagsKV = new HashMap<>();

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,39 @@ This is the default, as configured in the default configuration options.
423423
</property>
424424
```
425425

426+
### 8. If creating a table: Enable server side encryption (SSE)
427+
428+
Encryption at rest can help you protect sensitive data in your DynamoDB table.
429+
When creating a new table, you can set server side encryption on the table
430+
using the default AWS owned customer master key (CMK), AWS managed CMK, or
431+
customer managed CMK. S3Guard code accessing the table is all the same whether
432+
SSE is enabled or not. For more details on DynamoDB table server side
433+
encryption, see the AWS page on [Encryption at Rest: How It Works](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/encryption.howitworks.html).
434+
435+
These are the default configuration options, as configured in `core-default.xml`.
436+
437+
```xml
438+
<property>
439+
<name>fs.s3a.s3guard.ddb.table.sse.enabled</name>
440+
<value>false</value>
441+
<description>
442+
Whether server-side encryption (SSE) is enabled or disabled on the table.
443+
By default it's disabled, meaning SSE is set to AWS owned CMK.
444+
</description>
445+
</property>
446+
447+
<property>
448+
<name>fs.s3a.s3guard.ddb.table.sse.cmk</name>
449+
<value/>
450+
<description>
451+
The KMS Customer Master Key (CMK) used for the KMS encryption on the table.
452+
To specify a CMK, this config value can be its key ID, Amazon Resource Name
453+
(ARN), alias name, or alias ARN. Users only need to provide this config if
454+
the key is different from the default DynamoDB KMS Master Key, which is
455+
alias/aws/dynamodb.
456+
</description>
457+
</property>
458+
```
426459

427460
## Authenticating with S3Guard
428461

@@ -583,13 +616,24 @@ of the table.
583616
[-write PROVISIONED_WRITES] [-read PROVISIONED_READS]
584617
```
585618

619+
Server side encryption (SSE) can be enabled with AWS managed customer master key
620+
(CMK), or customer managed CMK. By default the DynamoDB table will be encrypted
621+
with AWS owned CMK. To use a customer managed CMK, you can specify its KMS key
622+
ID, ARN, alias name, or alias ARN. If not specified, the default AWS managed CMK
623+
for DynamoDB "alias/aws/dynamodb" will be used.
624+
625+
```bash
626+
[-sse [-cmk KMS_CMK_ID]]
627+
```
628+
586629
Tag argument can be added with a key=value list of tags. The table for the
587630
metadata store will be created with these tags in DynamoDB.
588631
589632
```bash
590633
[-tag key=value;]
591634
```
592635
636+
593637
Example 1
594638
595639
```bash
@@ -608,6 +652,7 @@ hadoop s3guard init -meta dynamodb://ireland-team -region eu-west-1 --read 0 --w
608652
609653
Creates a table "ireland-team" in the region "eu-west-1.amazonaws.com"
610654
655+
611656
Example 3
612657
613658
```bash
@@ -619,6 +664,17 @@ write capacity will be those of the site configuration's values of
619664
`fs.s3a.s3guard.ddb.table.capacity.read` and `fs.s3a.s3guard.ddb.table.capacity.write`;
620665
if these are both zero then it will be an on-demand table.
621666
667+
668+
Example 4
669+
670+
```bash
671+
hadoop s3guard init -meta dynamodb://ireland-team -sse
672+
```
673+
674+
Creates a table "ireland-team" with server side encryption enabled. The CMK will
675+
be using the default AWS managed "alias/aws/dynamodb".
676+
677+
622678
### Import a bucket: `s3guard import`
623679
624680
```bash

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,27 @@ during the use of a S3Guarded S3A filesystem are wrapped by retry logic.
12591259
*The best way to verify resilience is to run the entire `hadoop-aws` test suite,
12601260
or even a real application, with throttling enabled.
12611261

1262+
### Testing encrypted DynamoDB tables
1263+
1264+
By default, a DynamoDB table is encrypted using AWS owned customer master key
1265+
(CMK). You can enable server side encryption (SSE) using AWS managed CMK or
1266+
customer managed CMK in KMS before running the S3Guard tests.
1267+
1. To enable AWS managed CMK, set the config
1268+
`fs.s3a.s3guard.ddb.table.sse.enabled` to true in `auth-keys.xml`.
1269+
1. To enable customer managed CMK, you need to create a KMS key and set the
1270+
config in `auth-keys.xml`. The value can be the key ARN or alias. Example:
1271+
```
1272+
<property>
1273+
<name>fs.s3a.s3guard.ddb.table.sse.enabled</name>
1274+
<value>true</value>
1275+
</property>
1276+
<property>
1277+
<name>fs.s3a.s3guard.ddb.table.sse.cmk</name>
1278+
<value>arn:aws:kms:us-west-2:360379543683:key/071a86ff-8881-4ba0-9230-95af6d01ca01</value>
1279+
</property>
1280+
```
1281+
For more details about SSE on DynamoDB table, please see [S3Guard doc](./s3guard.html).
1282+
12621283
### Testing only: Local Metadata Store
12631284

12641285
There is an in-memory Metadata Store for testing.

0 commit comments

Comments
 (0)