Skip to content

HADOOP-13230. Optionally retain directory markers #1861

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.test;

import java.util.concurrent.Callable;

import org.assertj.core.description.Description;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Extra classes to work with AssertJ.
* These are kept separate from {@link LambdaTestUtils} so there's
* no requirement for AssertJ to be on the classpath in that broadly
* used class.
*/
public final class AssertExtensions {

private static final Logger LOG =
LoggerFactory.getLogger(AssertExtensions.class);

private AssertExtensions() {
}

/**
* A description for AssertJ "describedAs" clauses which evaluates the
* lambda-expression only on failure. That must return a string
* or null/"" to be skipped.
* @param eval lambda expression to invoke
* @return a description for AssertJ
*/
public static Description dynamicDescription(Callable<String> eval) {
return new DynamicDescription(eval);
}

private static final class DynamicDescription extends Description {
private final Callable<String> eval;

private DynamicDescription(final Callable<String> eval) {
this.eval = eval;
}

@Override
public String value() {
try {
return eval.call();
} catch (Exception e) {
LOG.warn("Failed to evaluate description: " + e);
LOG.debug("Evaluation failure", e);
// return null so that the description evaluation chain
// will skip this one
return null;
}
}
}


}
43 changes: 43 additions & 0 deletions hadoop-tools/hadoop-aws/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
<!-- Set a longer timeout for integration test (in milliseconds) -->
<test.integration.timeout>200000</test.integration.timeout>

<fs.s3a.directory.marker></fs.s3a.directory.marker>
</properties>

<profiles>
Expand Down Expand Up @@ -122,6 +123,7 @@
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
<fs.s3a.directory.marker>${fs.s3a.directory.marker}</fs.s3a.directory.marker>
</systemPropertyVariables>
</configuration>
</plugin>
Expand Down Expand Up @@ -162,6 +164,7 @@
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
<fs.s3a.directory.marker>${fs.s3a.directory.marker}</fs.s3a.directory.marker>

<test.default.timeout>${test.integration.timeout}</test.default.timeout>
</systemPropertyVariables>
Expand Down Expand Up @@ -214,6 +217,7 @@
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
<fs.s3a.directory.marker>${fs.s3a.directory.marker}</fs.s3a.directory.marker>
</systemPropertyVariables>
<!-- Do a sequential run for tests that cannot handle -->
<!-- parallel execution. -->
Expand Down Expand Up @@ -268,6 +272,7 @@
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
<fs.s3a.directory.marker>${fs.s3a.directory.marker}</fs.s3a.directory.marker>
</systemPropertyVariables>
<forkedProcessTimeoutInSeconds>${fs.s3a.scale.test.timeout}</forkedProcessTimeoutInSeconds>
</configuration>
Expand Down Expand Up @@ -331,6 +336,44 @@
</properties>
</profile>

<!-- Directory marker retention options, all from the -Dmarkers value-->
<profile>
<id>keep-markers</id>
<activation>
<property>
<name>markers</name>
<value>keep</value>
</property>
</activation>
<properties >
<fs.s3a.directory.marker>keep</fs.s3a.directory.marker>
</properties>
</profile>
<profile>
<id>delete-markers</id>
<activation>
<property>
<name>markers</name>
<value>delete</value>
</property>
</activation>
<properties >
<fs.s3a.directory.marker>delete</fs.s3a.directory.marker>
</properties>
</profile>
<profile>
<id>auth-markers</id>
<activation>
<property>
<name>markers</name>
<value>authoritative</value>
</property>
</activation>
<properties >
<fs.s3a.directory.marker>authoritative</fs.s3a.directory.marker>
</properties>
</profile>

</profiles>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -953,4 +953,40 @@ private Constants() {
* Value: {@value} seconds.
*/
public static final int THREAD_POOL_SHUTDOWN_DELAY_SECONDS = 30;

/**
* Policy for directory markers.
* This is a new feature of HADOOP-13230 which addresses
* some scale, performance and permissions issues -but
* at the risk of backwards compatibility.
*/
public static final String DIRECTORY_MARKER_POLICY =
"fs.s3a.directory.markers";

/**
* Retain directory markers.
* Value: {@value}.
*/
public static final String DIRECTORY_MARKER_POLICY_KEEP =
"keep";

/**
* Delete directory markers. This is the backwards compatible option.
* Value: {@value}.
*/
public static final String DIRECTORY_MARKER_POLICY_DELETE =
"delete";

/**
* Retain directory markers in authoritative directory trees only.
* Value: {@value}.
*/
public static final String DIRECTORY_MARKER_POLICY_AUTHORITATIVE =
"authoritative";

/**
* Default retention policy.
*/
public static final String DEFAULT_DIRECTORY_MARKER_POLICY =
DIRECTORY_MARKER_POLICY_DELETE;
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,27 @@ public FileStatusListingIterator createFileStatusListingIterator(
Listing.FileStatusAcceptor acceptor,
RemoteIterator<S3AFileStatus> providedStatus) throws IOException {
return new FileStatusListingIterator(
new ObjectListingIterator(listPath, request),
createObjectListingIterator(listPath, request),
filter,
acceptor,
providedStatus);
}

/**
* Create an object listing iterator against a path, with a given
* list object request.
* @param listPath path of the listing
* @param request initial request to make
* @return the iterator
* @throws IOException IO Problems
*/
@Retries.RetryRaw
public ObjectListingIterator createObjectListingIterator(
final Path listPath,
final S3ListRequest request) throws IOException {
return new ObjectListingIterator(listPath, request);
}

/**
* Create a located status iterator over a file status iterator.
* @param statusIterator an iterator over the remote status entries
Expand Down
Loading