Skip to content

Commit 4c977f5

Browse files
dannycjonessteveloughran
authored andcommitted
HADOOP-18168. Fix S3A ITestMarkerTool use of purged public bucket. (#4140)
This moves off use of the purged s3a://landsat-pds bucket, so fixing tests which had started failing. * Adds a new class, PublicDatasetTestUtils to manage the use of public datasets. * The new test bucket s3a://usgs-landsat/ is requester pays, so depends upon HADOOP-14661. Consult the updated test documentation when running against other S3 stores. Contributed by Daniel Carl Jones Change-Id: Ie8585e4d9b67667f8cb80b2970225d79a4f8d257
1 parent c2b2494 commit 4c977f5

File tree

6 files changed

+170
-18
lines changed

6 files changed

+170
-18
lines changed

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,18 @@ which address issues. In particular, we encourage testing of Hadoop release
544544
candidates, as these third-party endpoints get even less testing than the
545545
S3 endpoint itself.
546546

547+
### Public datasets used in tests
548+
549+
Some tests rely on the presence of existing public datasets available on Amazon S3.
550+
You may find a number of these in `org.apache.hadoop.fs.s3a.test.PublicDatasetTestUtils`.
551+
552+
When testing against an endpoint which is not part of Amazon S3's standard commercial partition
553+
(`aws`) such as third-party implementations or AWS's China regions, you should replace these
554+
configurations with an empty space (` `) to disable the tests or an existing path in your object
555+
store that supports these tests.
556+
557+
An example of this might be the MarkerTools tests which require a bucket with a large number of
558+
objects or the requester pays tests that require requester pays to be enabled for the bucket.
547559

548560
### Disabling the encryption tests
549561

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ARequesterPays.java

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,11 @@
2626
import org.apache.hadoop.fs.FSDataInputStream;
2727
import org.apache.hadoop.fs.FileSystem;
2828
import org.apache.hadoop.fs.Path;
29+
import org.apache.hadoop.fs.s3a.test.PublicDatasetTestUtils;
2930
import org.apache.hadoop.fs.statistics.IOStatisticAssertions;
3031
import org.apache.hadoop.fs.statistics.StreamStatisticNames;
3132

3233
import static org.apache.hadoop.fs.s3a.Constants.ALLOW_REQUESTER_PAYS;
33-
import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT;
3434
import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
3535
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
3636

@@ -42,10 +42,15 @@ public class ITestS3ARequesterPays extends AbstractS3ATestBase {
4242
@Override
4343
protected Configuration createConfiguration() {
4444
Configuration conf = super.createConfiguration();
45-
S3ATestUtils.removeBaseAndBucketOverrides(conf,
45+
46+
Path requesterPaysPath = getRequesterPaysPath(conf);
47+
String requesterPaysBucketName = requesterPaysPath.toUri().getHost();
48+
S3ATestUtils.removeBaseAndBucketOverrides(
49+
requesterPaysBucketName,
50+
conf,
4651
ALLOW_REQUESTER_PAYS,
47-
ENDPOINT,
4852
S3A_BUCKET_PROBE);
53+
4954
return conf;
5055
}
5156

@@ -102,14 +107,8 @@ public void testRequesterPaysDisabledFails() throws Throwable {
102107
}
103108
}
104109

105-
private Path getRequesterPaysPath(Configuration conf) {
106-
String requesterPaysFile =
107-
conf.getTrimmed(KEY_REQUESTER_PAYS_FILE, DEFAULT_REQUESTER_PAYS_FILE);
108-
S3ATestUtils.assume(
109-
"Empty test property: " + KEY_REQUESTER_PAYS_FILE,
110-
!requesterPaysFile.isEmpty()
111-
);
112-
return new Path(requesterPaysFile);
110+
private static Path getRequesterPaysPath(Configuration conf) {
111+
return new Path(PublicDatasetTestUtils.getRequesterPaysObject(conf));
113112
}
114113

115114
}

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@
2020

2121
import java.time.Duration;
2222

23+
import org.apache.hadoop.conf.Configuration;
24+
import org.apache.hadoop.fs.s3a.test.PublicDatasetTestUtils;
25+
2326
/**
2427
* Constants for S3A Testing.
2528
*/
@@ -99,14 +102,19 @@ public interface S3ATestConstants {
99102

100103
/**
101104
* Configuration key for an existing object in a requester pays bucket: {@value}.
102-
* If not set, defaults to {@value DEFAULT_REQUESTER_PAYS_FILE}.
105+
*
106+
* Accessible via
107+
* {@link PublicDatasetTestUtils#getRequesterPaysObject(Configuration)}.
103108
*/
104109
String KEY_REQUESTER_PAYS_FILE = TEST_FS_S3A + "requester.pays.file";
105110

106111
/**
107-
* Default path for an S3 object inside a requester pays enabled bucket: {@value}.
112+
* Configuration key for an existing bucket with many objects: {@value}.
113+
*
114+
* This is used for tests depending on buckets with a large number of keys.
108115
*/
109-
String DEFAULT_REQUESTER_PAYS_FILE = "s3a://usgs-landsat/collection02/catalog.json";
116+
String KEY_BUCKET_WITH_MANY_OBJECTS
117+
= TEST_FS_S3A + "bucket-with-many-objects";
110118

111119
/**
112120
* Name of the property to define the timeout for scale tests: {@value}.
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.fs.s3a.test;
20+
21+
import org.apache.hadoop.classification.InterfaceAudience;
22+
import org.apache.hadoop.classification.InterfaceStability;
23+
import org.apache.hadoop.conf.Configuration;
24+
import org.apache.hadoop.fs.s3a.S3ATestUtils;
25+
26+
import static org.apache.hadoop.fs.s3a.S3ATestConstants.KEY_BUCKET_WITH_MANY_OBJECTS;
27+
import static org.apache.hadoop.fs.s3a.S3ATestConstants.KEY_REQUESTER_PAYS_FILE;
28+
29+
/**
30+
* Provides S3A filesystem URIs for public data sets for specific use cases.
31+
*
32+
* This allows for the contract between S3A tests and the existence of data sets
33+
* to be explicit and also standardizes access and configuration of
34+
* replacements.
35+
*
36+
* Bucket specific configuration such as endpoint or requester pays should be
37+
* configured within "hadoop-tools/hadoop-aws/src/test/resources/core-site.xml".
38+
*
39+
* Warning: methods may mutate the configuration instance passed in.
40+
*/
41+
@InterfaceAudience.Private
42+
@InterfaceStability.Unstable
43+
public final class PublicDatasetTestUtils {
44+
45+
/**
46+
* Private constructor for utility class.
47+
*/
48+
private PublicDatasetTestUtils() {}
49+
50+
/**
51+
* Default path for an object inside a requester pays bucket: {@value}.
52+
*/
53+
private static final String DEFAULT_REQUESTER_PAYS_FILE
54+
= "s3a://usgs-landsat/collection02/catalog.json";
55+
56+
/**
57+
* Default bucket for an S3A file system with many objects: {@value}.
58+
*
59+
* We use a subdirectory to ensure we have permissions on all objects
60+
* contained within as well as permission to inspect the directory itself.
61+
*/
62+
private static final String DEFAULT_BUCKET_WITH_MANY_OBJECTS
63+
= "s3a://usgs-landsat/collection02/level-1/";
64+
65+
/**
66+
* Provide a URI for a directory containing many objects.
67+
*
68+
* Unless otherwise configured,
69+
* this will be {@value DEFAULT_BUCKET_WITH_MANY_OBJECTS}.
70+
*
71+
* @param conf Hadoop configuration
72+
* @return S3A FS URI
73+
*/
74+
public static String getBucketPrefixWithManyObjects(Configuration conf) {
75+
return fetchFromConfig(conf,
76+
KEY_BUCKET_WITH_MANY_OBJECTS, DEFAULT_BUCKET_WITH_MANY_OBJECTS);
77+
}
78+
79+
/**
80+
* Provide a URI to an object within a requester pays enabled bucket.
81+
*
82+
* Unless otherwise configured,
83+
* this will be {@value DEFAULT_REQUESTER_PAYS_FILE}.
84+
*
85+
* @param conf Hadoop configuration
86+
* @return S3A FS URI
87+
*/
88+
public static String getRequesterPaysObject(Configuration conf) {
89+
return fetchFromConfig(conf,
90+
KEY_REQUESTER_PAYS_FILE, DEFAULT_REQUESTER_PAYS_FILE);
91+
}
92+
93+
private static String fetchFromConfig(Configuration conf, String key, String defaultValue) {
94+
String value = conf.getTrimmed(key, defaultValue);
95+
96+
S3ATestUtils.assume("Empty test property: " + key, !value.isEmpty());
97+
98+
return value;
99+
}
100+
101+
}

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerTool.java

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@
2828
import org.slf4j.Logger;
2929
import org.slf4j.LoggerFactory;
3030

31+
import org.apache.hadoop.conf.Configuration;
3132
import org.apache.hadoop.fs.FileSystem;
3233
import org.apache.hadoop.fs.Path;
3334
import org.apache.hadoop.fs.contract.ContractTestUtils;
35+
import org.apache.hadoop.fs.s3a.test.PublicDatasetTestUtils;
3436
import org.apache.hadoop.fs.s3a.S3AFileSystem;
3537

3638
import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY_AUTHORITATIVE;
@@ -307,22 +309,25 @@ public void testRunLimitedAudit() throws Throwable {
307309
}
308310

309311
/**
310-
* Run an audit against the landsat bucket.
312+
* Run an audit against a bucket with a large number of objects.
311313
* <p></p>
312314
* This tests paging/scale against a larger bucket without
313315
* worrying about setup costs.
314316
*/
315317
@Test
316-
public void testRunLimitedLandsatAudit() throws Throwable {
317-
describe("Audit a few thousand landsat objects");
318+
public void testRunAuditManyObjectsInBucket() throws Throwable {
319+
describe("Audit a few thousand objects");
318320
final File audit = tempAuditFile();
319321

322+
Configuration conf = super.createConfiguration();
323+
String bucketUri = PublicDatasetTestUtils.getBucketPrefixWithManyObjects(conf);
324+
320325
runToFailure(EXIT_INTERRUPTED,
321326
MARKERS,
322327
AUDIT,
323328
m(OPT_LIMIT), 3000,
324329
m(OPT_OUT), audit,
325-
LANDSAT_BUCKET);
330+
bucketUri);
326331
readOutput(audit);
327332
}
328333

hadoop-tools/hadoop-aws/src/test/resources/core-site.xml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
<final>false</final>
3131
</property>
3232

33+
<!-- Per-bucket configurations: landsat-pds -->
34+
3335
<property>
3436
<name>fs.s3a.bucket.landsat-pds.endpoint</name>
3537
<value>${central.endpoint}</value>
@@ -55,6 +57,31 @@
5557
<description>Do not add the referrer header to landsat operations</description>
5658
</property>
5759

60+
<!-- Per-bucket configurations: usgs-landsat -->
61+
62+
<property>
63+
<name>fs.s3a.bucket.usgs-landsat.endpoint</name>
64+
<value>${central.endpoint}</value>
65+
</property>
66+
67+
<property>
68+
<name>fs.s3a.bucket.usgs-landsat.requester.pays.enabled</name>
69+
<value>true</value>
70+
<description>usgs-landsat requires requester pays enabled</description>
71+
</property>
72+
73+
<property>
74+
<name>fs.s3a.bucket.usgs-landsat.multipart.purge</name>
75+
<value>false</value>
76+
<description>Don't try to purge uploads in the read-only bucket, as
77+
it will only create log noise.</description>
78+
</property>
79+
80+
<property>
81+
<name>fs.s3a.bucket.usgs-landsat.audit.add.referrer.header</name>
82+
<value>false</value>
83+
</property>
84+
5885

5986
<!--
6087
This is the default endpoint, which can be used to interact

0 commit comments

Comments
 (0)