Skip to content

Commit da1a2dc

Browse files
committed
HADOOP-18425. ABFS rename resilience through etags
If "fs.azure.enable.rename.resilience" is true, then do a HEAD of the source file before the rename, which can then be used to recover from the failure, as the manifest committer does (HADOOP-18163). Change-Id: Ia417f1501f7274662eb9ff919c6378fb913b476b HADOOP-18425. ABFS rename resilience through etags only get the etag on HNS stores Change-Id: I9faffa78294e1782f0b2db3d1c997ec3fe53637c
1 parent 759ddeb commit da1a2dc

File tree

5 files changed

+44
-8
lines changed

5 files changed

+44
-8
lines changed

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,10 @@ public class AbfsConfiguration{
328328
FS_AZURE_ENABLE_ABFS_LIST_ITERATOR, DefaultValue = DEFAULT_ENABLE_ABFS_LIST_ITERATOR)
329329
private boolean enableAbfsListIterator;
330330

331+
@BooleanConfigurationValidatorAnnotation(ConfigurationKey =
332+
FS_AZURE_ABFS_RENAME_RESILIENCE, DefaultValue = DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE)
333+
private boolean renameResilience;
334+
331335
public AbfsConfiguration(final Configuration rawConfig, String accountName)
332336
throws IllegalAccessException, InvalidConfigurationValueException, IOException {
333337
this.rawConfig = ProviderUtils.excludeIncompatibleCredentialProviders(
@@ -1130,4 +1134,7 @@ public void setEnableAbfsListIterator(boolean enableAbfsListIterator) {
11301134
this.enableAbfsListIterator = enableAbfsListIterator;
11311135
}
11321136

1137+
public boolean getRenameResilience() {
1138+
return renameResilience;
1139+
}
11331140
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@
4646
import javax.annotation.Nullable;
4747

4848
import org.apache.hadoop.classification.VisibleForTesting;
49+
import org.apache.hadoop.fs.EtagSource;
50+
import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation;
51+
import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation;
4952
import org.apache.hadoop.util.Preconditions;
5053
import org.slf4j.Logger;
5154
import org.slf4j.LoggerFactory;
@@ -154,6 +157,11 @@ public class AzureBlobFileSystem extends FileSystem
154157
/** Rate limiting for operations which use it to throttle their IO. */
155158
private RateLimiting rateLimiting;
156159

160+
/**
161+
* Enable resilient rename.
162+
*/
163+
private boolean renameResilience;
164+
157165
@Override
158166
public void initialize(URI uri, Configuration configuration)
159167
throws IOException {
@@ -226,6 +234,8 @@ public void initialize(URI uri, Configuration configuration)
226234
}
227235

228236
rateLimiting = RateLimitingFactory.create(abfsConfiguration.getRateLimit());
237+
238+
renameResilience = abfsConfiguration.getRenameResilience();
229239
LOG.debug("Initializing AzureBlobFileSystem for {} complete", uri);
230240
}
231241

@@ -442,10 +452,13 @@ public boolean rename(final Path src, final Path dst) throws IOException {
442452
}
443453

444454
// Non-HNS account need to check dst status on driver side.
445-
if (!abfsStore.getIsNamespaceEnabled(tracingContext) && dstFileStatus == null) {
455+
final boolean isNamespaceEnabled = abfsStore.getIsNamespaceEnabled(tracingContext);
456+
if (!isNamespaceEnabled && dstFileStatus == null) {
446457
dstFileStatus = tryGetFileStatus(qualifiedDstPath, tracingContext);
447458
}
448459

460+
FileStatus sourceFileStatus = null;
461+
449462
try {
450463
String sourceFileName = src.getName();
451464
Path adjustedDst = dst;
@@ -459,10 +472,24 @@ public boolean rename(final Path src, final Path dst) throws IOException {
459472

460473
qualifiedDstPath = makeQualified(adjustedDst);
461474

462-
abfsStore.rename(qualifiedSrcPath, qualifiedDstPath, tracingContext, null);
475+
String etag = null;
476+
if (renameResilience && isNamespaceEnabled) {
477+
// for resilient rename on an HNS store, get the etag before
478+
// attempting the rename, and pass it down
479+
sourceFileStatus = abfsStore.getFileStatus(qualifiedSrcPath, tracingContext);
480+
etag = ((EtagSource) sourceFileStatus).getEtag();
481+
}
482+
boolean recovered = abfsStore.rename(qualifiedSrcPath, qualifiedDstPath, tracingContext,
483+
etag);
484+
if (recovered) {
485+
LOG.info("Recovered from rename failure of {} to {}",
486+
qualifiedSrcPath, qualifiedDstPath);
487+
}
463488
return true;
464489
} catch (AzureBlobFileSystemException ex) {
465-
LOG.debug("Rename operation failed. ", ex);
490+
LOG.debug("Rename {} to {} failed. source {} dest {}",
491+
qualifiedSrcPath, qualifiedDstPath,
492+
sourceFileStatus, dstFileStatus, ex);
466493
checkException(
467494
src,
468495
ex,

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,9 @@ public final class ConfigurationKeys {
233233
/** Key for rate limit capacity, as used by IO operations which try to throttle themselves. */
234234
public static final String FS_AZURE_ABFS_IO_RATE_LIMIT = "fs.azure.io.rate.limit";
235235

236+
/** Add extra resilience to rename failures, at the expense of performance. */
237+
public static final String FS_AZURE_ABFS_RENAME_RESILIENCE = "fs.azure.enable.rename.resilience";
238+
236239
public static String accountProperty(String property, String account) {
237240
return property + "." + account;
238241
}

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ public final class FileSystemConfigurations {
118118

119119
public static final int STREAM_ID_LEN = 12;
120120
public static final boolean DEFAULT_ENABLE_ABFS_LIST_ITERATOR = true;
121+
public static final boolean DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE = true;
121122

122123
/**
123124
* Limit of queued block upload operations before writes

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -560,6 +560,7 @@ public AbfsClientRenameResult renamePath(
560560
if (!op.hasResult()) {
561561
throw e;
562562
}
563+
LOG.debug("Rename of {} to {} failed, attempting recovery", source, destination, e);
563564

564565
// ref: HADOOP-18242. Rename failure occurring due to a rare case of
565566
// tracking metadata being in incomplete state.
@@ -574,18 +575,15 @@ public AbfsClientRenameResult renamePath(
574575
// then we can retry the rename operation.
575576
AbfsRestOperation sourceStatusOp = getPathStatus(source, false,
576577
tracingContext);
577-
isMetadataIncompleteState = true;
578578
// Extract the sourceEtag, using the status Op, and set it
579579
// for future rename recovery.
580580
AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult();
581581
String sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult);
582582
renamePath(source, destination, continuation, tracingContext,
583-
sourceEtagAfterFailure, isMetadataIncompleteState);
583+
sourceEtagAfterFailure, true);
584584
}
585585
// if we get out of the condition without a successful rename, then
586586
// it isn't metadata incomplete state issue.
587-
isMetadataIncompleteState = false;
588-
589587
boolean etagCheckSucceeded = renameIdempotencyCheckOp(
590588
source,
591589
sourceEtag, op, destination, tracingContext);
@@ -594,7 +592,7 @@ public AbfsClientRenameResult renamePath(
594592
// throw back the exception
595593
throw e;
596594
}
597-
return new AbfsClientRenameResult(op, true, isMetadataIncompleteState);
595+
return new AbfsClientRenameResult(op, true, false);
598596
}
599597
}
600598

0 commit comments

Comments
 (0)