-
Notifications
You must be signed in to change notification settings - Fork 9.1k
Hadoop-17015. ABFS: Handling Rename and Delete idempotency #2021
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5a11d4d
6471661
16f0b7b
c158597
77f54cb
be7c024
4c9cdbc
db5c578
14be440
a6e0908
738af3a
639205d
6580109
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,9 +21,11 @@ | |
import java.io.Closeable; | ||
import java.io.IOException; | ||
import java.io.UnsupportedEncodingException; | ||
import java.net.HttpURLConnection; | ||
import java.net.MalformedURLException; | ||
import java.net.URL; | ||
import java.net.URLEncoder; | ||
import java.time.Instant; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
import java.util.Locale; | ||
|
@@ -44,9 +46,11 @@ | |
import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; | ||
import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; | ||
import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; | ||
import org.apache.hadoop.fs.azurebfs.utils.DateTimeUtils; | ||
import org.apache.hadoop.io.IOUtils; | ||
|
||
import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.*; | ||
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_DELETE_CONSIDERED_IDEMPOTENT; | ||
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.HTTPS_SCHEME; | ||
import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.*; | ||
import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.*; | ||
|
@@ -320,7 +324,51 @@ public AbfsRestOperation renamePath(String source, final String destination, fin | |
HTTP_METHOD_PUT, | ||
url, | ||
requestHeaders); | ||
Instant renameRequestStartTime = Instant.now(); | ||
op.execute(); | ||
|
||
if (op.getResult().getStatusCode() != HttpURLConnection.HTTP_OK) { | ||
return renameIdempotencyCheckOp(renameRequestStartTime, op, destination); | ||
} | ||
|
||
return op; | ||
} | ||
|
||
/** | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it be better to move the idempotency related methods to a separate Utility/Helper class? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The changes are not utility related and are insync with the handling of the ABFS response. The reason they were included as separate methods was to enable mock testing which was otherwise not possible. Retaining the change as such. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One option is restrict the method accessibility tp package level and use @VisibleForTesting annotation. Keeping a method public solely for testing doesn't look good practice. Also the idea is, If you have methods 'assisting', chances are the class is actually doing too much. Moving these methods into separate classes with public interfaces keeps the class with the assisting methods responsible for one thing and one thing only (see Single Responsibility Principle). This move into separte classes automatically makes for a testable structure as your methods must be made public There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. AbfsClient class handles triggering of requests to Store backend and returning the AbfsRestOperation back. For Rename and Delete, the response to return is not determined if request was re-tried by the idempotent logic. It will be not right to consider these methods as "assisting" or providing a utility service and are part of the actual flow. |
||
* Check if the rename request failure is post a retry and if earlier rename | ||
* request might have succeeded at back-end. | ||
* | ||
* If there is a parallel rename activity happening from any other store | ||
* interface, the logic here will detect the rename to have happened due to | ||
* the one initiated from this ABFS filesytem instance as it was retried. This | ||
* should be a corner case hence going ahead with LMT check. | ||
* @param renameRequestStartTime startTime for the rename request | ||
* @param op Rename request REST operation response | ||
* @param destination rename destination path | ||
* @return REST operation response post idempotency check | ||
* @throws AzureBlobFileSystemException if GetFileStatus hits any exception | ||
*/ | ||
public AbfsRestOperation renameIdempotencyCheckOp( | ||
final Instant renameRequestStartTime, | ||
final AbfsRestOperation op, | ||
final String destination) throws AzureBlobFileSystemException { | ||
if ((op.isARetriedRequest()) | ||
&& (op.getResult().getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND)) { | ||
// Server has returned HTTP 404, which means rename source no longer | ||
// exists. Check on destination status and if it has a recent LMT timestamp. | ||
// If yes, return success, else fall back to original rename request failure response. | ||
|
||
final AbfsRestOperation destStatusOp = getPathStatus(destination, false); | ||
if (destStatusOp.getResult().getStatusCode() == HttpURLConnection.HTTP_OK) { | ||
String lmt = destStatusOp.getResult().getResponseHeader( | ||
HttpHeaderConfigurations.LAST_MODIFIED); | ||
|
||
if (DateTimeUtils.isRecentlyModified(lmt, renameRequestStartTime)) { | ||
return destStatusOp; | ||
} | ||
} | ||
} | ||
|
||
return op; | ||
} | ||
|
||
|
@@ -476,6 +524,45 @@ public AbfsRestOperation deletePath(final String path, final boolean recursive, | |
url, | ||
requestHeaders); | ||
op.execute(); | ||
|
||
if (op.getResult().getStatusCode() != HttpURLConnection.HTTP_OK) { | ||
return deleteIdempotencyCheckOp(op); | ||
} | ||
|
||
return op; | ||
} | ||
|
||
/** | ||
* Check if the delete request failure is post a retry and if delete failure | ||
* qualifies to be a success response assuming idempotency. | ||
* | ||
* There are below scenarios where delete could be incorrectly deducted as | ||
* success post request retry: | ||
* 1. Target was originally not existing and initial delete request had to be | ||
* re-tried. | ||
* 2. Parallel delete issued from any other store interface rather than | ||
* delete issued from this filesystem instance. | ||
* These are few corner cases and usually returning a success at this stage | ||
* should help the job to continue. | ||
* @param op Delete request REST operation response | ||
* @return REST operation response post idempotency check | ||
*/ | ||
public AbfsRestOperation deleteIdempotencyCheckOp(final AbfsRestOperation op) { | ||
if ((op.isARetriedRequest()) | ||
&& (op.getResult().getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) | ||
&& DEFAULT_DELETE_CONSIDERED_IDEMPOTENT) { | ||
// Server has returned HTTP 404, which means path no longer | ||
// exists. Assuming delete result to be idempotent, return success. | ||
final AbfsRestOperation successOp = new AbfsRestOperation( | ||
AbfsRestOperationType.DeletePath, | ||
this, | ||
HTTP_METHOD_DELETE, | ||
op.getUrl(), | ||
op.getRequestHeaders()); | ||
successOp.hardSetResult(HttpURLConnection.HTTP_OK); | ||
return successOp; | ||
} | ||
|
||
return op; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/** | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.hadoop.fs.azurebfs.utils; | ||
|
||
import java.text.ParseException; | ||
import java.text.SimpleDateFormat; | ||
import java.time.Instant; | ||
import java.util.Date; | ||
import java.util.Locale; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_CLOCK_SKEW_WITH_SERVER_IN_MS; | ||
|
||
public final class DateTimeUtils { | ||
private static final Logger LOG = LoggerFactory.getLogger(DateTimeUtils.class); | ||
private static final String DATE_TIME_PATTERN = "E, dd MMM yyyy HH:mm:ss z"; | ||
|
||
public static long parseLastModifiedTime(final String lastModifiedTime) { | ||
long parsedTime = 0; | ||
try { | ||
Date utcDate = new SimpleDateFormat(DATE_TIME_PATTERN, Locale.US) | ||
.parse(lastModifiedTime); | ||
parsedTime = utcDate.getTime(); | ||
} catch (ParseException e) { | ||
LOG.error("Failed to parse the date {}", lastModifiedTime); | ||
} finally { | ||
return parsedTime; | ||
} | ||
} | ||
|
||
/** | ||
* Tries to identify if an operation was recently executed based on the LMT of | ||
* a file or folder. LMT needs to be more recent that the original request | ||
* start time. To include any clock skew with server, LMT within | ||
* DEFAULT_CLOCK_SKEW_WITH_SERVER_IN_MS from the request start time is going | ||
* to be considered to qualify for recent operation. | ||
* @param lastModifiedTime File/Folder LMT | ||
* @param expectedLMTUpdateTime original request timestamp which should | ||
* have updated the LMT on target | ||
* @return true if the LMT is within timespan for recent operation, else false | ||
*/ | ||
public static boolean isRecentlyModified(final String lastModifiedTime, | ||
final Instant expectedLMTUpdateTime) { | ||
long lmtEpochTime = DateTimeUtils.parseLastModifiedTime(lastModifiedTime); | ||
long currentEpochTime = expectedLMTUpdateTime.toEpochMilli(); | ||
|
||
return ((lmtEpochTime > currentEpochTime) | ||
|| ((currentEpochTime - lmtEpochTime) <= DEFAULT_CLOCK_SKEW_WITH_SERVER_IN_MS)); | ||
} | ||
|
||
private DateTimeUtils() { | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.