49
49
import org .apache .hadoop .thirdparty .com .google .common .util .concurrent .MoreExecutors ;
50
50
import org .apache .hadoop .thirdparty .com .google .common .util .concurrent .ThreadFactoryBuilder ;
51
51
52
+ import com .sun .tools .javac .util .Convert ;
52
53
import org .slf4j .Logger ;
53
54
import org .slf4j .LoggerFactory ;
54
55
75
76
import static org .apache .hadoop .fs .azurebfs .AzureBlobFileSystemStore .extractEtagHeader ;
76
77
import static org .apache .hadoop .fs .azurebfs .constants .AbfsHttpConstants .*;
77
78
import static org .apache .hadoop .fs .azurebfs .constants .FileSystemConfigurations .DEFAULT_DELETE_CONSIDERED_IDEMPOTENT ;
79
+ import static org .apache .hadoop .fs .azurebfs .constants .FileSystemConfigurations .ONE_MB ;
78
80
import static org .apache .hadoop .fs .azurebfs .constants .FileSystemConfigurations .SERVER_SIDE_ENCRYPTION_ALGORITHM ;
79
81
import static org .apache .hadoop .fs .azurebfs .constants .FileSystemUriSchemes .HTTPS_SCHEME ;
80
82
import static org .apache .hadoop .fs .azurebfs .constants .HttpHeaderConfigurations .*;
@@ -761,6 +763,8 @@ public AbfsRestOperation append(final String path, final byte[] buffer,
761
763
requestHeaders .add (new AbfsHttpHeader (USER_AGENT , userAgentRetry ));
762
764
}
763
765
766
+ addCheckSumHeaderForWrite (requestHeaders , buffer );
767
+
764
768
// AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance
765
769
String sasTokenForReuse = appendSASTokenToQuery (path , SASTokenProvider .WRITE_OPERATION ,
766
770
abfsUriQueryBuilder , cachedSasToken );
@@ -978,9 +982,12 @@ public AbfsRestOperation read(final String path, final long position, final byte
978
982
TracingContext tracingContext ) throws AzureBlobFileSystemException {
979
983
final List <AbfsHttpHeader > requestHeaders = createDefaultHeaders ();
980
984
addCustomerProvidedKeyHeaders (requestHeaders );
981
- requestHeaders .add (new AbfsHttpHeader (RANGE ,
982
- String .format ("bytes=%d-%d" , position , position + bufferLength - 1 )));
985
+
986
+ AbfsHttpHeader rangeHeader = new AbfsHttpHeader (RANGE ,
987
+ String .format ("bytes=%d-%d" , position , position + bufferLength - 1 ));
988
+ requestHeaders .add (rangeHeader );
983
989
requestHeaders .add (new AbfsHttpHeader (IF_MATCH , eTag ));
990
+ addCheckSumHeaderForRead (requestHeaders , bufferLength , rangeHeader );
984
991
985
992
final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder ();
986
993
// AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance
@@ -999,6 +1006,8 @@ public AbfsRestOperation read(final String path, final long position, final byte
999
1006
bufferLength , sasTokenForReuse );
1000
1007
op .execute (tracingContext );
1001
1008
1009
+ verifyCheckSumForRead (buffer , op .getResult ());
1010
+
1002
1011
return op ;
1003
1012
}
1004
1013
@@ -1412,6 +1421,54 @@ private void appendIfNotEmpty(StringBuilder sb, String regEx,
1412
1421
}
1413
1422
}
1414
1423
1424
+ private void addCheckSumHeaderForRead (List <AbfsHttpHeader > requestHeaders ,
1425
+ final int bufferLength , final AbfsHttpHeader rangeHeader ) {
1426
+ if (getAbfsConfiguration ().getIsChecksumEnabled () &&
1427
+ requestHeaders .contains (rangeHeader ) && bufferLength <= 4 * ONE_MB ) {
1428
+ requestHeaders .add (new AbfsHttpHeader (X_MS_RANGE_GET_CONTENT_MD5 , TRUE ));
1429
+ }
1430
+ }
1431
+
1432
+ private void addCheckSumHeaderForWrite (List <AbfsHttpHeader > requestHeaders ,
1433
+ final byte [] buffer ) {
1434
+ if (getAbfsConfiguration ().getIsChecksumEnabled ()) {
1435
+ try {
1436
+ MessageDigest md5Digest = MessageDigest .getInstance ("MD5" );
1437
+ byte [] md5Bytes = md5Digest .digest (buffer );
1438
+ String md5Hash = Base64 .getEncoder ().encodeToString (md5Bytes );
1439
+ requestHeaders .add (new AbfsHttpHeader (CONTENT_MD5 , md5Hash ));
1440
+ } catch (NoSuchAlgorithmException e ) {
1441
+ e .printStackTrace ();
1442
+ }
1443
+ }
1444
+ }
1445
+
1446
+ private void verifyCheckSumForRead (final byte [] buffer , final AbfsHttpOperation result )
1447
+ throws AbfsRestOperationException {
1448
+ if (getAbfsConfiguration ().getIsChecksumEnabled ()) {
1449
+ // Number of bytes returned by server could be less than or equal to what
1450
+ // caller requests. In case it is less, extra bytes will be initialized to 0
1451
+ // Server returned MD5 Hash will be computed on what server returned.
1452
+ // We need to get exact data that server returned and compute its md5 hash
1453
+ // Computed hash should be equal to what server returned
1454
+ int numberOfBytesRead = (int )result .getBytesReceived ();
1455
+ byte [] dataRead = new byte [numberOfBytesRead ];
1456
+ System .arraycopy (buffer , 0 , dataRead , 0 , numberOfBytesRead );
1457
+
1458
+ try {
1459
+ MessageDigest md5Digest = MessageDigest .getInstance ("MD5" );
1460
+ byte [] md5Bytes = md5Digest .digest (dataRead );
1461
+ String md5HashComputed = Base64 .getEncoder ().encodeToString (md5Bytes );
1462
+ String md5HashActual = result .getResponseHeader (CONTENT_MD5 );
1463
+ if (!md5HashComputed .equals (md5HashActual )) {
1464
+ throw new AbfsRestOperationException (-1 , "-1" , "Checksum Check Failed" , new IOException ());
1465
+ }
1466
+ } catch (NoSuchAlgorithmException e ) {
1467
+ e .printStackTrace ();
1468
+ }
1469
+ }
1470
+ }
1471
+
1415
1472
@ VisibleForTesting
1416
1473
URL getBaseUrl () {
1417
1474
return baseUrl ;
0 commit comments