Skip to content

Commit 3eb9f32

Browse files
authored
Merge branch 'apache:trunk' into YARN-11374
2 parents e732f13 + b63b777 commit 3eb9f32

File tree

7 files changed

+148
-47
lines changed

7 files changed

+148
-47
lines changed

hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,8 @@ public void setSymlink(final Path p) {
402402
}
403403

404404
/**
405-
* Compare this FileStatus to another FileStatus
405+
* Compare this FileStatus to another FileStatus based on lexicographical
406+
* order of path.
406407
* @param o the FileStatus to be compared.
407408
* @return a negative integer, zero, or a positive integer as this object
408409
* is less than, equal to, or greater than the specified object.
@@ -412,7 +413,8 @@ public int compareTo(FileStatus o) {
412413
}
413414

414415
/**
415-
* Compare this FileStatus to another FileStatus.
416+
* Compare this FileStatus to another FileStatus based on lexicographical
417+
* order of path.
416418
* This method was added back by HADOOP-14683 to keep binary compatibility.
417419
*
418420
* @param o the FileStatus to be compared.

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerCluster.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,6 @@ public DiskBalancerDataNode getNodeByIPAddress(String ipAddresss) {
389389
* @return DiskBalancerDataNode.
390390
*/
391391
public DiskBalancerDataNode getNodeByName(String hostName) {
392-
return hostNames.get(hostName);
392+
return hostNames.get(hostName.toLowerCase(Locale.US));
393393
}
394394
}

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3044,12 +3044,12 @@ LocatedBlock getAdditionalBlock(
30443044

30453045
LocatedBlock[] onRetryBlock = new LocatedBlock[1];
30463046
FSDirWriteFileOp.ValidateAddBlockResult r;
3047-
checkOperation(OperationCategory.READ);
3047+
checkOperation(OperationCategory.WRITE);
30483048
final FSPermissionChecker pc = getPermissionChecker();
30493049
FSPermissionChecker.setOperationType(operationName);
30503050
readLock();
30513051
try {
3052-
checkOperation(OperationCategory.READ);
3052+
checkOperation(OperationCategory.WRITE);
30533053
r = FSDirWriteFileOp.validateAddBlock(this, pc, src, fileId, clientName,
30543054
previous, onRetryBlock);
30553055
} finally {
@@ -3095,12 +3095,15 @@ LocatedBlock getAdditionalDatanode(String src, long fileId,
30953095
final byte storagePolicyID;
30963096
final List<DatanodeStorageInfo> chosen;
30973097
final BlockType blockType;
3098-
checkOperation(OperationCategory.READ);
3098+
checkOperation(OperationCategory.WRITE);
30993099
final FSPermissionChecker pc = getPermissionChecker();
31003100
FSPermissionChecker.setOperationType(null);
31013101
readLock();
31023102
try {
3103-
checkOperation(OperationCategory.READ);
3103+
// Changing this operation category to WRITE instead of making getAdditionalDatanode as a
3104+
// read method is aim to let Active NameNode to handle this RPC, because Active NameNode
3105+
// contains a more complete DN selection context than Observer NameNode.
3106+
checkOperation(OperationCategory.WRITE);
31043107
//check safe mode
31053108
checkNameNodeSafeMode("Cannot add datanode; src=" + src + ", blk=" + blk);
31063109
final INodesInPath iip = dir.resolvePath(pc, src, fileId);

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRpcServer.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,26 +29,32 @@
2929
import static org.junit.Assert.assertEquals;
3030
import static org.junit.Assert.assertNotEquals;
3131
import static org.junit.Assert.assertNotNull;
32+
import static org.junit.Assert.assertNull;
3233

3334
import java.io.IOException;
3435
import java.nio.charset.StandardCharsets;
3536
import java.security.PrivilegedExceptionAction;
37+
import java.util.EnumSet;
3638

3739
import org.apache.hadoop.conf.Configuration;
3840
import org.apache.hadoop.fs.FSDataOutputStream;
3941
import org.apache.hadoop.fs.FileSystem;
4042
import org.apache.hadoop.fs.Path;
4143
import org.apache.hadoop.fs.permission.FsPermission;
44+
import org.apache.hadoop.hdfs.AddBlockFlag;
4245
import org.apache.hadoop.hdfs.DFSTestUtil;
4346
import org.apache.hadoop.hdfs.DistributedFileSystem;
4447
import org.apache.hadoop.hdfs.HdfsConfiguration;
4548
import org.apache.hadoop.hdfs.MiniDFSCluster;
4649

50+
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
4751
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
4852
import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
4953
import org.apache.hadoop.ipc.CallerContext;
54+
import org.apache.hadoop.ipc.ObserverRetryOnActiveException;
5055
import org.apache.hadoop.security.UserGroupInformation;
5156
import org.apache.hadoop.test.GenericTestUtils;
57+
import org.apache.hadoop.test.LambdaTestUtils;
5258
import org.junit.Test;
5359
import org.junit.jupiter.api.Timeout;
5460

@@ -158,6 +164,43 @@ public void testNamenodeRpcClientIpProxyWithFailBack() throws Exception {
158164
}
159165
}
160166

167+
@Test
168+
@Timeout(30000)
169+
public void testObserverHandleAddBlock() throws Exception {
170+
String baseDir = GenericTestUtils.getRandomizedTempPath();
171+
Configuration conf = new HdfsConfiguration();
172+
MiniQJMHACluster.Builder builder = new MiniQJMHACluster.Builder(conf).setNumNameNodes(3);
173+
builder.getDfsBuilder().numDataNodes(3);
174+
try (MiniQJMHACluster qjmhaCluster = builder.baseDir(baseDir).build()) {
175+
MiniDFSCluster dfsCluster = qjmhaCluster.getDfsCluster();
176+
dfsCluster.waitActive();
177+
dfsCluster.transitionToActive(0);
178+
dfsCluster.transitionToObserver(2);
179+
180+
NameNode activeNN = dfsCluster.getNameNode(0);
181+
NameNode observerNN = dfsCluster.getNameNode(2);
182+
183+
// Stop the editLogTailer of Observer NameNode
184+
observerNN.getNamesystem().getEditLogTailer().stop();
185+
DistributedFileSystem dfs = dfsCluster.getFileSystem(0);
186+
187+
Path testPath = new Path("/testObserverHandleAddBlock/file.txt");
188+
try (FSDataOutputStream ignore = dfs.create(testPath)) {
189+
HdfsFileStatus fileStatus = activeNN.getRpcServer().getFileInfo(testPath.toUri().getPath());
190+
assertNotNull(fileStatus);
191+
assertNull(observerNN.getRpcServer().getFileInfo(testPath.toUri().getPath()));
192+
193+
LambdaTestUtils.intercept(ObserverRetryOnActiveException.class, () -> {
194+
observerNN.getRpcServer().addBlock(testPath.toUri().getPath(),
195+
dfs.getClient().getClientName(), null, null,
196+
fileStatus.getFileId(), null, EnumSet.noneOf(AddBlockFlag.class));
197+
});
198+
} finally {
199+
dfs.delete(testPath, true);
200+
}
201+
}
202+
}
203+
161204
/**
162205
* A test to make sure that if an authorized user adds "clientIp:" to their
163206
* caller context, it will be used to make locality decisions on the NN.

hadoop-project/src/site/markdown/index.md.vm

Lines changed: 50 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,29 @@ Overview of Changes
2323
Users are encouraged to read the full set of release notes.
2424
This page provides an overview of the major changes.
2525

26+
Azure ABFS: Critical Stream Prefetch Fix
27+
---------------------------------------------
28+
29+
The abfs has a critical bug fix
30+
[HADOOP-18546](https://issues.apache.org/jira/browse/HADOOP-18546).
31+
*ABFS. Disable purging list of in-progress reads in abfs stream close().*
32+
33+
All users of the abfs connector in hadoop releases 3.3.2+ MUST either upgrade
34+
or disable prefetching by setting `fs.azure.readaheadqueue.depth` to `0`
35+
36+
Consult the parent JIRA [HADOOP-18521](https://issues.apache.org/jira/browse/HADOOP-18521)
37+
*ABFS ReadBufferManager buffer sharing across concurrent HTTP requests*
38+
for root cause analysis, details on what is affected, and mitigations.
39+
40+
2641
Vectored IO API
2742
---------------
2843

44+
[HADOOP-18103](https://issues.apache.org/jira/browse/HADOOP-18103).
45+
*High performance vectored read API in Hadoop*
46+
2947
The `PositionedReadable` interface has now added an operation for
30-
Vectored (also known as Scatter/Gather IO):
48+
Vectored IO (also known as Scatter/Gather IO):
3149

3250
```java
3351
void readVectored(List<? extends FileRange> ranges, IntFunction<ByteBuffer> allocate)
@@ -38,25 +56,25 @@ possibly in parallel, with results potentially coming in out-of-order.
3856

3957
1. The default implementation uses a series of `readFully()` calls, so delivers
4058
equivalent performance.
41-
2. The local filesystem uses java native IO calls for higher performance reads than `readFully()`
59+
2. The local filesystem uses java native IO calls for higher performance reads than `readFully()`.
4260
3. The S3A filesystem issues parallel HTTP GET requests in different threads.
4361

44-
Benchmarking of (modified) ORC and Parquet clients through `file://` and `s3a://`
45-
show tangible improvements in query times.
62+
Benchmarking of enhanced Apache ORC and Apache Parquet clients through `file://` and `s3a://`
63+
show significant improvements in query performance.
4664

4765
Further Reading: [FsDataInputStream](./hadoop-project-dist/hadoop-common/filesystem/fsdatainputstream.html).
4866

49-
Manifest Committer for Azure ABFS and google GCS performance
50-
------------------------------------------------------------
67+
Mapreduce: Manifest Committer for Azure ABFS and google GCS
68+
----------------------------------------------------------
5169

52-
A new "intermediate manifest committer" uses a manifest file
70+
The new _Intermediate Manifest Committer_ uses a manifest file
5371
to commit the work of successful task attempts, rather than
5472
renaming directories.
5573
Job commit is matter of reading all the manifests, creating the
5674
destination directories (parallelized) and renaming the files,
5775
again in parallel.
5876

59-
This is fast and correct on Azure Storage and Google GCS,
77+
This is both fast and correct on Azure Storage and Google GCS,
6078
and should be used there instead of the classic v1/v2 file
6179
output committers.
6280

@@ -69,24 +87,6 @@ More details are available in the
6987
[manifest committer](./hadoop-mapreduce-client/hadoop-mapreduce-client-core/manifest_committer.html).
7088
documentation.
7189

72-
Transitive CVE fixes
73-
--------------------
74-
75-
A lot of dependencies have been upgraded to address recent CVEs.
76-
Many of the CVEs were not actually exploitable through the Hadoop
77-
so much of this work is just due diligence.
78-
However applications which have all the library is on a class path may
79-
be vulnerable, and the ugprades should also reduce the number of false
80-
positives security scanners report.
81-
82-
We have not been able to upgrade every single dependency to the latest
83-
version there is. Some of those changes are just going to be incompatible.
84-
If you have concerns about the state of a specific library, consult the apache JIRA
85-
issue tracker to see what discussions have taken place about the library in question.
86-
87-
As an open source project, contributions in this area are always welcome,
88-
especially in testing the active branches, testing applications downstream of
89-
those branches and of whether updated dependencies trigger regressions.
9090

9191
HDFS: Router Based Federation
9292
-----------------------------
@@ -96,7 +96,6 @@ A lot of effort has been invested into stabilizing/improving the HDFS Router Bas
9696
1. HDFS-13522, HDFS-16767 & Related Jiras: Allow Observer Reads in HDFS Router Based Federation.
9797
2. HDFS-13248: RBF supports Client Locality
9898

99-
10099
HDFS: Dynamic Datanode Reconfiguration
101100
--------------------------------------
102101

@@ -109,6 +108,29 @@ cluster-wide Datanode Restarts.
109108
See [DataNode.java](https://github.com/apache/hadoop/blob/branch-3.3.5/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java#L346-L361)
110109
for the list of dynamically reconfigurable attributes.
111110

111+
112+
Transitive CVE fixes
113+
--------------------
114+
115+
A lot of dependencies have been upgraded to address recent CVEs.
116+
Many of the CVEs were not actually exploitable through the Hadoop
117+
so much of this work is just due diligence.
118+
However applications which have all the library is on a class path may
119+
be vulnerable, and the ugprades should also reduce the number of false
120+
positives security scanners report.
121+
122+
We have not been able to upgrade every single dependency to the latest
123+
version there is. Some of those changes are just going to be incompatible.
124+
If you have concerns about the state of a specific library, consult the pache JIRA
125+
issue tracker to see whether a JIRA has been filed, discussions have taken place about
126+
the library in question, and whether or not there is already a fix in the pipeline.
127+
*Please don't file new JIRAs about dependency-X.Y.Z having a CVE without
128+
searching for any existing issue first*
129+
130+
As an open source project, contributions in this area are always welcome,
131+
especially in testing the active branches, testing applications downstream of
132+
those branches and of whether updated dependencies trigger regressions.
133+
112134
Getting Started
113135
===============
114136

@@ -119,3 +141,4 @@ which shows you how to set up a single-node Hadoop installation.
119141
Then move on to the
120142
[Cluster Setup](./hadoop-project-dist/hadoop-common/ClusterSetup.html)
121143
to learn how to set up a multi-node Hadoop installation.
144+

hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystemStore.java

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
import java.util.List;
7373
import java.util.ListIterator;
7474
import java.util.NoSuchElementException;
75+
import java.util.stream.Collectors;
7576

7677
import static org.apache.hadoop.fs.aliyun.oss.Constants.*;
7778

@@ -203,31 +204,29 @@ public void deleteObjects(List<String> keysToDelete) throws IOException {
203204

204205
int retry = 10;
205206
int tries = 0;
206-
List<String> deleteFailed = keysToDelete;
207-
while(CollectionUtils.isNotEmpty(deleteFailed)) {
207+
while (CollectionUtils.isNotEmpty(keysToDelete)) {
208208
DeleteObjectsRequest deleteRequest = new DeleteObjectsRequest(bucketName);
209-
deleteRequest.setKeys(deleteFailed);
209+
deleteRequest.setKeys(keysToDelete);
210210
// There are two modes to do batch delete:
211-
// 1. detail mode: DeleteObjectsResult.getDeletedObjects returns objects
212-
// which were deleted successfully.
213-
// 2. simple mode: DeleteObjectsResult.getDeletedObjects returns objects
214-
// which were deleted unsuccessfully.
215-
// Here, we choose the simple mode to do batch delete.
216-
deleteRequest.setQuiet(true);
211+
// 1. verbose mode: A list of all deleted objects is returned.
212+
// 2. quiet mode: No message body is returned.
213+
// Here, we choose the verbose mode to do batch delete.
214+
deleteRequest.setQuiet(false);
217215
DeleteObjectsResult result = ossClient.deleteObjects(deleteRequest);
218216
statistics.incrementWriteOps(1);
219-
deleteFailed = result.getDeletedObjects();
217+
final List<String> deletedObjects = result.getDeletedObjects();
218+
keysToDelete = keysToDelete.stream().filter(item -> !deletedObjects.contains(item))
219+
.collect(Collectors.toList());
220220
tries++;
221221
if (tries == retry) {
222222
break;
223223
}
224224
}
225225

226-
if (tries == retry && CollectionUtils.isNotEmpty(deleteFailed)) {
226+
if (tries == retry && CollectionUtils.isNotEmpty(keysToDelete)) {
227227
// Most of time, it is impossible to try 10 times, expect the
228228
// Aliyun OSS service problems.
229-
throw new IOException("Failed to delete Aliyun OSS objects for " +
230-
tries + " times.");
229+
throw new IOException("Failed to delete Aliyun OSS objects for " + tries + " times.");
231230
}
232231
}
233232

hadoop-tools/hadoop-aliyun/src/test/java/org/apache/hadoop/fs/aliyun/oss/TestAliyunOSSFileSystemStore.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,12 @@
1818

1919
package org.apache.hadoop.fs.aliyun.oss;
2020

21+
import com.aliyun.oss.model.OSSObjectSummary;
2122
import com.aliyun.oss.model.ObjectMetadata;
2223
import org.apache.hadoop.conf.Configuration;
2324
import org.apache.hadoop.fs.Path;
25+
import org.apache.hadoop.fs.contract.ContractTestUtils;
26+
2427
import org.junit.After;
2528
import org.junit.Before;
2629
import org.junit.BeforeClass;
@@ -36,7 +39,10 @@
3639
import java.security.DigestOutputStream;
3740
import java.security.MessageDigest;
3841
import java.security.NoSuchAlgorithmException;
42+
import java.util.ArrayList;
43+
import java.util.List;
3944

45+
import static org.apache.hadoop.fs.aliyun.oss.Constants.MAX_PAGING_KEYS_DEFAULT;
4046
import static org.junit.Assert.assertArrayEquals;
4147
import static org.junit.Assert.assertEquals;
4248
import static org.junit.Assert.assertTrue;
@@ -128,4 +134,29 @@ public void testLargeUpload()
128134
writeRenameReadCompare(new Path("/test/xlarge"),
129135
Constants.MULTIPART_UPLOAD_PART_SIZE_DEFAULT + 1);
130136
}
137+
138+
@Test
139+
public void testDeleteObjects() throws IOException, NoSuchAlgorithmException {
140+
// generate test files
141+
final int files = 10;
142+
final long size = 5 * 1024 * 1024;
143+
final String prefix = "dir";
144+
for (int i = 0; i < files; i++) {
145+
Path path = new Path(String.format("/%s/testFile-%d.txt", prefix, i));
146+
ContractTestUtils.generateTestFile(this.fs, path, size, 256, 255);
147+
}
148+
OSSListRequest listRequest =
149+
store.createListObjectsRequest(prefix, MAX_PAGING_KEYS_DEFAULT, null, null, true);
150+
List<String> keysToDelete = new ArrayList<>();
151+
OSSListResult objects = store.listObjects(listRequest);
152+
assertEquals(files, objects.getObjectSummaries().size());
153+
154+
// test delete files
155+
for (OSSObjectSummary objectSummary : objects.getObjectSummaries()) {
156+
keysToDelete.add(objectSummary.getKey());
157+
}
158+
store.deleteObjects(keysToDelete);
159+
objects = store.listObjects(listRequest);
160+
assertEquals(0, objects.getObjectSummaries().size());
161+
}
131162
}

0 commit comments

Comments
 (0)