Skip to content

Commit b38b00e

Browse files
haiyang1987huhaiyang
andauthored
HDFS-15998. Fix NullPointException In listOpenFiles (apache#3036)
Co-authored-by: huhaiyang <huhaiyang@didichuxing.com>
1 parent 9a0a808 commit b38b00e

File tree

4 files changed

+112
-3
lines changed

4 files changed

+112
-3
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2006,8 +2006,14 @@ public BatchedListEntries<OpenFileEntry> getFilesBlockingDecom(long prevId,
20062006
continue;
20072007
}
20082008
Preconditions.checkState(ucFile instanceof INodeFile);
2009-
openFileIds.add(ucFileId);
2009+
20102010
INodeFile inodeFile = ucFile.asFile();
2011+
if (!inodeFile.isUnderConstruction()) {
2012+
LOG.warn("The file {} is not under construction but has lease.",
2013+
inodeFile.getFullPathName());
2014+
continue;
2015+
}
2016+
openFileIds.add(ucFileId);
20112017

20122018
String fullPathName = inodeFile.getFullPathName();
20132019
if (org.apache.commons.lang3.StringUtils.isEmpty(path)

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -300,8 +300,13 @@ public BatchedListEntries<OpenFileEntry> getUnderConstructionFiles(
300300
Iterator<Long> inodeIdIterator = inodeIds.iterator();
301301
while (inodeIdIterator.hasNext()) {
302302
Long inodeId = inodeIdIterator.next();
303-
final INodeFile inodeFile =
304-
fsnamesystem.getFSDirectory().getInode(inodeId).asFile();
303+
INode ucFile = fsnamesystem.getFSDirectory().getInode(inodeId);
304+
if (ucFile == null) {
305+
//probably got deleted
306+
continue;
307+
}
308+
309+
final INodeFile inodeFile = ucFile.asFile();
305310
if (!inodeFile.isUnderConstruction()) {
306311
LOG.warn("The file {} is not under construction but has lease.",
307312
inodeFile.getFullPathName());

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import java.util.concurrent.ExecutionException;
3838
import java.util.concurrent.atomic.AtomicBoolean;
3939
import java.util.regex.Pattern;
40+
import java.util.EnumSet;
4041

4142
import java.util.function.Supplier;
4243
import org.apache.hadoop.thirdparty.com.google.common.collect.Lists;
@@ -46,6 +47,7 @@
4647
import org.apache.hadoop.fs.FSDataOutputStream;
4748
import org.apache.hadoop.fs.FileSystem;
4849
import org.apache.hadoop.fs.Path;
50+
import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedEntries;
4951
import org.apache.hadoop.hdfs.client.HdfsDataInputStream;
5052
import org.apache.hadoop.hdfs.client.HdfsDataOutputStream;
5153
import org.apache.hadoop.hdfs.protocol.DatanodeID;
@@ -55,6 +57,8 @@
5557
import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType;
5658
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
5759
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
60+
import org.apache.hadoop.hdfs.protocol.OpenFileEntry;
61+
import org.apache.hadoop.hdfs.protocol.OpenFilesIterator;
5862
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
5963
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
6064
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
@@ -867,6 +871,69 @@ public void run() {
867871
closedFileSet, openFilesMap, 0);
868872
}
869873

874+
/**
875+
* Verify Decommission In Progress with List Open Files
876+
* 1. start decommissioning a node (set LeavingServiceStatus)
877+
* 2. close file with decommissioning
878+
* @throws Exception
879+
*/
880+
@Test(timeout=180000)
881+
public void testDecommissionWithCloseFileAndListOpenFiles()
882+
throws Exception {
883+
LOG.info("Starting test testDecommissionWithCloseFileAndListOpenFiles");
884+
885+
// Disable redundancy monitor check so that open files blocking
886+
// decommission can be listed and verified.
887+
getConf().setInt(
888+
DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1000);
889+
getConf().setLong(
890+
DFSConfigKeys.DFS_NAMENODE_LIST_OPENFILES_NUM_RESPONSES, 1);
891+
892+
startSimpleCluster(1, 3);
893+
FileSystem fileSys = getCluster().getFileSystem(0);
894+
FSNamesystem ns = getCluster().getNamesystem(0);
895+
Path file = new Path("/openFile");
896+
FSDataOutputStream st = AdminStatesBaseTest.writeIncompleteFile(fileSys,
897+
file, (short)3, (short)(fileSize / blockSize));
898+
for (DataNode d: getCluster().getDataNodes()) {
899+
DataNodeTestUtils.triggerBlockReport(d);
900+
}
901+
902+
LocatedBlocks lbs = NameNodeAdapter.getBlockLocations(
903+
getCluster().getNameNode(0), file.toUri().getPath(),
904+
0, blockSize * 10);
905+
DatanodeInfo dnToDecommission = lbs.getLastLocatedBlock().getLocations()[0];
906+
907+
DatanodeManager dm = ns.getBlockManager().getDatanodeManager();
908+
dnToDecommission = dm.getDatanode(dnToDecommission.getDatanodeUuid());
909+
initExcludeHost(dnToDecommission.getXferAddr());
910+
refreshNodes(0);
911+
BlockManagerTestUtil.recheckDecommissionState(dm);
912+
waitNodeState(dnToDecommission, AdminStates.DECOMMISSION_INPROGRESS);
913+
Thread.sleep(3000);
914+
//Make sure DatanodeAdminMonitor(DatanodeAdminBackoffMonitor) At least twice run.
915+
916+
BatchedEntries<OpenFileEntry> batchedListEntries = getCluster().
917+
getNameNodeRpc(0).listOpenFiles(0,
918+
EnumSet.of(OpenFilesIterator.OpenFilesType.BLOCKING_DECOMMISSION),
919+
OpenFilesIterator.FILTER_PATH_DEFAULT);
920+
assertEquals(1, batchedListEntries.size());
921+
st.close(); //close file
922+
923+
try {
924+
batchedListEntries = getCluster().getNameNodeRpc().listOpenFiles(0,
925+
EnumSet.of(OpenFilesIterator.OpenFilesType.BLOCKING_DECOMMISSION),
926+
OpenFilesIterator.FILTER_PATH_DEFAULT);
927+
assertEquals(0, batchedListEntries.size());
928+
} catch (NullPointerException e) {
929+
Assert.fail("Should not throw NPE when the file is not under " +
930+
"construction but has lease!");
931+
}
932+
initExcludeHost("");
933+
refreshNodes(0);
934+
fileSys.delete(file, false);
935+
}
936+
870937
@Test(timeout = 360000)
871938
public void testDecommissionWithOpenFileAndBlockRecovery()
872939
throws IOException, InterruptedException {

hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestListOpenFiles.java

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,11 @@
5454
import org.apache.hadoop.hdfs.MiniDFSCluster;
5555
import org.apache.hadoop.hdfs.tools.DFSAdmin;
5656
import org.apache.hadoop.util.ToolRunner;
57+
import org.apache.hadoop.util.ChunkedArrayList;
5758
import org.junit.After;
5859
import org.junit.Before;
5960
import org.junit.Test;
61+
import org.junit.Assert;
6062

6163
/**
6264
* Verify open files listing.
@@ -321,4 +323,33 @@ public void testListOpenFilesWithInvalidPathClientSide() throws Exception {
321323
"hdfs://non-cluster/"));
322324
fs.listOpenFiles(EnumSet.of(OpenFilesType.ALL_OPEN_FILES), "/path");
323325
}
326+
327+
@Test
328+
public void testListOpenFilesWithDeletedPath() throws Exception {
329+
HashMap<Path, FSDataOutputStream> openFiles = new HashMap<>();
330+
openFiles.putAll(
331+
DFSTestUtil.createOpenFiles(fs, new Path("/"), "open-1", 1));
332+
BatchedEntries<OpenFileEntry> openFileEntryBatchedEntries = nnRpc
333+
.listOpenFiles(0, EnumSet.of(OpenFilesType.ALL_OPEN_FILES),
334+
OpenFilesIterator.FILTER_PATH_DEFAULT);
335+
assertEquals(1, openFileEntryBatchedEntries.size());
336+
String path = openFileEntryBatchedEntries.get(0).getFilePath();
337+
FSNamesystem fsNamesystem = cluster.getNamesystem();
338+
FSDirectory dir = fsNamesystem.getFSDirectory();
339+
List<INode> removedINodes = new ChunkedArrayList<>();
340+
removedINodes.add(dir.getINode(path));
341+
fsNamesystem.writeLock();
342+
try {
343+
dir.removeFromInodeMap(removedINodes);
344+
openFileEntryBatchedEntries = nnRpc
345+
.listOpenFiles(0, EnumSet.of(OpenFilesType.ALL_OPEN_FILES),
346+
OpenFilesIterator.FILTER_PATH_DEFAULT);
347+
assertEquals(0, openFileEntryBatchedEntries.size());
348+
fsNamesystem.leaseManager.removeLease(dir.getINode(path).getId());
349+
} catch (NullPointerException e) {
350+
Assert.fail("Should not throw NPE when the file is deleted but has lease!");
351+
} finally {
352+
fsNamesystem.writeUnlock();
353+
}
354+
}
324355
}

0 commit comments

Comments
 (0)