Skip to content

Commit f457367

Browse files
committed
HADOOP-17851. tuning content encoding
* change option to fs.s3a.object.content.encoding to line up for support of the other values * docs * dir markers do not have an encoding set * tests to verify that Change-Id: I4b042d38c9e94e7d4ffa8ee6afdc7a6c26a4c489
1 parent f4fddcb commit f457367

File tree

5 files changed

+64
-19
lines changed

5 files changed

+64
-19
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -410,8 +410,11 @@ private Constants() {
410410
public static final String CANNED_ACL = "fs.s3a.acl.default";
411411
public static final String DEFAULT_CANNED_ACL = "";
412412

413-
// gzip, deflate, compress, br, etc.
414-
public static final String CONTENT_ENCODING = "fs.s3a.content.encoding";
413+
/**
414+
* Content encoding: gzip, deflate, compress, br, etc.
415+
* Value {@value}.
416+
*/
417+
public static final String CONTENT_ENCODING = "fs.s3a.object.content.encoding";
415418

416419
// should we try to purge old multipart uploads when starting up
417420
public static final String PURGE_EXISTING_MULTIPART =

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/HeaderProcessing.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ public class HeaderProcessing extends AbstractStoreOperation {
8383
XA_HEADER_PREFIX + Headers.CONTENT_DISPOSITION;
8484

8585
/**
86-
* Standard HTTP header found on some S3 objects: {@value}.
86+
* Content encoding; can be configured: {@value}.
8787
*/
8888
public static final String XA_CONTENT_ENCODING =
8989
XA_HEADER_PREFIX + Headers.CONTENT_ENCODING;

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -251,14 +251,16 @@ protected void setOptionalPutRequestParameters(PutObjectRequest request) {
251251
/**
252252
* Set the optional metadata for an object being created or copied.
253253
* @param metadata to update.
254+
* @param isDirectoryMarker is this for a directory marker?
254255
*/
255-
protected void setOptionalObjectMetadata(ObjectMetadata metadata) {
256+
protected void setOptionalObjectMetadata(ObjectMetadata metadata,
257+
boolean isDirectoryMarker) {
256258
final S3AEncryptionMethods algorithm
257259
= getServerSideEncryptionAlgorithm();
258260
if (S3AEncryptionMethods.SSE_S3 == algorithm) {
259261
metadata.setSSEAlgorithm(algorithm.getMethod());
260262
}
261-
if (contentEncoding != null) {
263+
if (contentEncoding != null && !isDirectoryMarker) {
262264
metadata.setContentEncoding(contentEncoding);
263265
}
264266
}
@@ -273,8 +275,21 @@ protected void setOptionalObjectMetadata(ObjectMetadata metadata) {
273275
*/
274276
@Override
275277
public ObjectMetadata newObjectMetadata(long length) {
278+
return createObjectMetadata(length, false);
279+
}
280+
281+
/**
282+
* Create a new object metadata instance.
283+
* Any standard metadata headers are added here, for example:
284+
* encryption.
285+
*
286+
* @param length length of data to set in header; Ignored if negative
287+
* @param isDirectoryMarker is this for a directory marker?
288+
* @return a new metadata instance
289+
*/
290+
private ObjectMetadata createObjectMetadata(long length, boolean isDirectoryMarker) {
276291
final ObjectMetadata om = new ObjectMetadata();
277-
setOptionalObjectMetadata(om);
292+
setOptionalObjectMetadata(om, isDirectoryMarker);
278293
if (length >= 0) {
279294
om.setContentLength(length);
280295
}
@@ -289,7 +304,7 @@ public CopyObjectRequest newCopyObjectRequest(String srcKey,
289304
new CopyObjectRequest(getBucket(), srcKey, getBucket(), dstKey);
290305
ObjectMetadata dstom = newObjectMetadata(srcom.getContentLength());
291306
HeaderProcessing.cloneObjectMetadata(srcom, dstom);
292-
setOptionalObjectMetadata(dstom);
307+
setOptionalObjectMetadata(dstom, false);
293308
copyEncryptionParameters(srcom, copyObjectRequest);
294309
copyObjectRequest.setCannedAccessControlList(cannedACL);
295310
copyObjectRequest.setNewObjectMetadata(dstom);
@@ -389,7 +404,7 @@ public int read() throws IOException {
389404
}
390405
};
391406
// preparation happens in here
392-
final ObjectMetadata md = newObjectMetadata(0L);
407+
final ObjectMetadata md = createObjectMetadata(0L, true);
393408
md.setContentType(HeaderProcessing.CONTENT_TYPE_X_DIRECTORY);
394409
PutObjectRequest putObjectRequest =
395410
newPutObjectRequest(key, md, im);

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,6 +1080,17 @@ options are covered in [Testing](./testing.md).
10801080
client has permission to read the bucket.
10811081
</description>
10821082
</property>
1083+
1084+
<property>
1085+
<name>fs.s3a.object.content.encoding</name>
1086+
<value></value>
1087+
<description>
1088+
Content encoding: gzip, deflate, compress, br, etc.
1089+
This will be set in the "Content-Encoding" header of the object,
1090+
and returned in HTTP HEAD/GET requests.
1091+
</description>
1092+
</property>
1093+
10831094
```
10841095

10851096
## <a name="retry_and_recovery"></a>Retry and Recovery

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AContentEncoding.java

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,31 +18,33 @@
1818

1919
package org.apache.hadoop.fs.s3a;
2020

21+
import java.io.IOException;
2122
import java.util.Map;
2223

2324
import org.assertj.core.api.Assertions;
2425
import org.junit.Test;
25-
import org.slf4j.Logger;
26-
import org.slf4j.LoggerFactory;
2726

2827
import org.apache.hadoop.conf.Configuration;
2928
import org.apache.hadoop.fs.Path;
3029
import org.apache.hadoop.fs.contract.ContractTestUtils;
31-
import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.XA_CONTENT_ENCODING;
32-
import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.decodeBytes;
33-
import org.apache.hadoop.fs.s3a.impl.StoreContext;
3430

3531
import static org.apache.hadoop.fs.s3a.Constants.CONTENT_ENCODING;
32+
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
33+
import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.XA_CONTENT_ENCODING;
34+
import static org.apache.hadoop.fs.s3a.impl.HeaderProcessing.decodeBytes;
3635

3736
/**
3837
* Tests of content encoding object meta data.
3938
*/
4039
public class ITestS3AContentEncoding extends AbstractS3ATestBase {
4140

41+
private static final String GZIP = "gzip";
42+
4243
@Override
4344
protected Configuration createConfiguration() {
4445
Configuration conf = super.createConfiguration();
45-
conf.set(CONTENT_ENCODING, "gzip");
46+
removeBaseAndBucketOverrides(conf, CONTENT_ENCODING);
47+
conf.set(CONTENT_ENCODING, GZIP);
4648

4749
return conf;
4850
}
@@ -52,6 +54,11 @@ public void testCreatedObjectsHaveEncoding() throws Throwable {
5254
S3AFileSystem fs = getFileSystem();
5355
Path dir = methodPath();
5456
fs.mkdirs(dir);
57+
// even with content encoding enabled, directories do not have
58+
// encoding.
59+
Assertions.assertThat(getEncoding(dir))
60+
.describedAs("Encoding of object %s", dir)
61+
.isNull();
5562
Path path = new Path(dir, "1");
5663
ContractTestUtils.touch(fs, path);
5764
assertObjectHasEncoding(path);
@@ -63,15 +70,24 @@ public void testCreatedObjectsHaveEncoding() throws Throwable {
6370
/**
6471
* Assert that a given object has gzip encoding specified.
6572
* @param path path
73+
*
6674
*/
6775
private void assertObjectHasEncoding(Path path) throws Throwable {
76+
Assertions.assertThat(getEncoding(path))
77+
.describedAs("Encoding of object %s", path)
78+
.isEqualTo(GZIP);
79+
}
80+
81+
/**
82+
* Get the encoding of a path.
83+
* @param path path
84+
* @return encoding string or null
85+
* @throws IOException IO Failure.
86+
*/
87+
private String getEncoding(Path path) throws IOException {
6888
S3AFileSystem fs = getFileSystem();
6989

70-
StoreContext storeContext = fs.createStoreContext();
7190
Map<String, byte[]> xAttrs = fs.getXAttrs(path);
72-
String encoding = decodeBytes(xAttrs.get(XA_CONTENT_ENCODING));
73-
Assertions.assertThat(encoding)
74-
.describedAs("Encoding of object %s should be gzip, is %s", path, encoding)
75-
.isEqualTo("gzip");
91+
return decodeBytes(xAttrs.get(XA_CONTENT_ENCODING));
7692
}
7793
}

0 commit comments

Comments
 (0)