Skip to content

Commit 7028fd6

Browse files
fermiguangxuCheng
authored andcommitted
THADOOP-187 HDFS-13671. Namenode deletes large dir slowly caused by FoldedTreeSet#removeAndGet (apache#3065) (merge request !395)
Squash merge branch 'THADOOP-187' into 'release-3.2.1-tq-0.2' * add UT testBlockListMoveToHead * HDFS-13671. Namenode deletes large dir slowly caused by FoldedTreeSet#removeAndGet (apache#3065)
1 parent 2a1f2e3 commit 7028fd6

29 files changed

+616
-2573
lines changed

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -259,18 +259,6 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
259259
public static final int DFS_NAMENODE_REPLICATION_MAX_STREAMS_DEFAULT = 2;
260260
public static final String DFS_NAMENODE_REPLICATION_STREAMS_HARD_LIMIT_KEY = "dfs.namenode.replication.max-streams-hard-limit";
261261
public static final int DFS_NAMENODE_REPLICATION_STREAMS_HARD_LIMIT_DEFAULT = 4;
262-
public static final String DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_INTERVAL_MS_KEY
263-
= "dfs.namenode.storageinfo.defragment.interval.ms";
264-
public static final int
265-
DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_INTERVAL_MS_DEFAULT = 10 * 60 * 1000;
266-
public static final String DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_TIMEOUT_MS_KEY
267-
= "dfs.namenode.storageinfo.defragment.timeout.ms";
268-
public static final int
269-
DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_TIMEOUT_MS_DEFAULT = 4;
270-
public static final String DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_RATIO_KEY
271-
= "dfs.namenode.storageinfo.defragment.ratio";
272-
public static final double
273-
DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_RATIO_DEFAULT = 0.75;
274262
public static final String DFS_NAMENODE_BLOCKREPORT_QUEUE_SIZE_KEY
275263
= "dfs.namenode.blockreport.queue.size";
276264
public static final int DFS_NAMENODE_BLOCKREPORT_QUEUE_SIZE_DEFAULT

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -967,8 +967,8 @@ public static JournalInfoProto convert(JournalInfo j) {
967967

968968

969969
public static BlockReportContext convert(BlockReportContextProto proto) {
970-
return new BlockReportContext(proto.getTotalRpcs(), proto.getCurRpc(),
971-
proto.getId(), proto.getLeaseId(), proto.getSorted());
970+
return new BlockReportContext(proto.getTotalRpcs(),
971+
proto.getCurRpc(), proto.getId(), proto.getLeaseId());
972972
}
973973

974974
public static BlockReportContextProto convert(BlockReportContext context) {
@@ -977,7 +977,6 @@ public static BlockReportContextProto convert(BlockReportContext context) {
977977
setCurRpc(context.getCurRpc()).
978978
setId(context.getReportId()).
979979
setLeaseId(context.getLeaseId()).
980-
setSorted(context.isSorted()).
981980
build();
982981
}
983982

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfo.java

Lines changed: 152 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919

2020
import java.io.IOException;
2121
import java.util.Iterator;
22+
import java.util.LinkedList;
2223
import java.util.List;
23-
import java.util.NoSuchElementException;
2424

2525
import com.google.common.base.Preconditions;
2626
import org.apache.hadoop.classification.InterfaceAudience;
@@ -57,9 +57,19 @@ public abstract class BlockInfo extends Block
5757
/** For implementing {@link LightWeightGSet.LinkedElement} interface. */
5858
private LightWeightGSet.LinkedElement nextLinkedElement;
5959

60-
61-
// Storages this block is replicated on
62-
protected DatanodeStorageInfo[] storages;
60+
/**
61+
* This array contains triplets of references. For each i-th storage, the
62+
* block belongs to triplets[3*i] is the reference to the
63+
* {@link DatanodeStorageInfo} and triplets[3*i+1] and triplets[3*i+2] are
64+
* references to the previous and the next blocks, respectively, in the list
65+
* of blocks belonging to this storage.
66+
*
67+
* Using previous and next in Object triplets is done instead of a
68+
* {@link LinkedList} list to efficiently use memory. With LinkedList the cost
69+
* per replica is 42 bytes (LinkedList#Entry object per replica) versus 16
70+
* bytes using the triplets.
71+
*/
72+
protected Object[] triplets;
6373

6474
private BlockUnderConstructionFeature uc;
6575

@@ -69,14 +79,14 @@ public abstract class BlockInfo extends Block
6979
* in the block group
7080
*/
7181
public BlockInfo(short size) {
72-
this.storages = new DatanodeStorageInfo[size];
82+
this.triplets = new Object[3 * size];
7383
this.bcId = INVALID_INODE_ID;
7484
this.replication = isStriped() ? 0 : size;
7585
}
7686

7787
public BlockInfo(Block blk, short size) {
7888
super(blk);
79-
this.storages = new DatanodeStorageInfo[size];
89+
this.triplets = new Object[3*size];
8090
this.bcId = INVALID_INODE_ID;
8191
this.replication = isStriped() ? 0 : size;
8292
}
@@ -106,31 +116,7 @@ public boolean isDeleted() {
106116
}
107117

108118
public Iterator<DatanodeStorageInfo> getStorageInfos() {
109-
return new Iterator<DatanodeStorageInfo>() {
110-
111-
private int index = 0;
112-
113-
@Override
114-
public boolean hasNext() {
115-
while (index < storages.length && storages[index] == null) {
116-
index++;
117-
}
118-
return index < storages.length;
119-
}
120-
121-
@Override
122-
public DatanodeStorageInfo next() {
123-
if (!hasNext()) {
124-
throw new NoSuchElementException();
125-
}
126-
return storages[index++];
127-
}
128-
129-
@Override
130-
public void remove() {
131-
throw new UnsupportedOperationException("Sorry. can't remove.");
132-
}
133-
};
119+
return new BlocksMap.StorageIterator(this);
134120
}
135121

136122
public DatanodeDescriptor getDatanode(int index) {
@@ -139,18 +125,73 @@ public DatanodeDescriptor getDatanode(int index) {
139125
}
140126

141127
DatanodeStorageInfo getStorageInfo(int index) {
142-
assert this.storages != null : "BlockInfo is not initialized";
143-
return storages[index];
128+
assert this.triplets != null : "BlockInfo is not initialized";
129+
assert index >= 0 && index*3 < triplets.length : "Index is out of bound";
130+
return (DatanodeStorageInfo)triplets[index*3];
131+
}
132+
133+
BlockInfo getPrevious(int index) {
134+
assert this.triplets != null : "BlockInfo is not initialized";
135+
assert index >= 0 && index*3+1 < triplets.length : "Index is out of bound";
136+
BlockInfo info = (BlockInfo)triplets[index*3+1];
137+
assert info == null ||
138+
info.getClass().getName().startsWith(BlockInfo.class.getName()) :
139+
"BlockInfo is expected at " + index*3;
140+
return info;
141+
}
142+
143+
BlockInfo getNext(int index) {
144+
assert this.triplets != null : "BlockInfo is not initialized";
145+
assert index >= 0 && index*3+2 < triplets.length : "Index is out of bound";
146+
BlockInfo info = (BlockInfo)triplets[index*3+2];
147+
assert info == null || info.getClass().getName().startsWith(
148+
BlockInfo.class.getName()) :
149+
"BlockInfo is expected at " + index*3;
150+
return info;
144151
}
145152

146153
void setStorageInfo(int index, DatanodeStorageInfo storage) {
147-
assert this.storages != null : "BlockInfo is not initialized";
148-
this.storages[index] = storage;
154+
assert this.triplets != null : "BlockInfo is not initialized";
155+
assert index >= 0 && index*3 < triplets.length : "Index is out of bound";
156+
triplets[index*3] = storage;
157+
}
158+
159+
/**
160+
* Return the previous block on the block list for the datanode at
161+
* position index. Set the previous block on the list to "to".
162+
*
163+
* @param index - the datanode index
164+
* @param to - block to be set to previous on the list of blocks
165+
* @return current previous block on the list of blocks
166+
*/
167+
BlockInfo setPrevious(int index, BlockInfo to) {
168+
assert this.triplets != null : "BlockInfo is not initialized";
169+
assert index >= 0 && index*3+1 < triplets.length : "Index is out of bound";
170+
BlockInfo info = (BlockInfo) triplets[index*3+1];
171+
triplets[index*3+1] = to;
172+
return info;
173+
}
174+
175+
/**
176+
* Return the next block on the block list for the datanode at
177+
* position index. Set the next block on the list to "to".
178+
*
179+
* @param index - the datanode index
180+
* @param to - block to be set to next on the list of blocks
181+
* @return current next block on the list of blocks
182+
*/
183+
BlockInfo setNext(int index, BlockInfo to) {
184+
assert this.triplets != null : "BlockInfo is not initialized";
185+
assert index >= 0 && index*3+2 < triplets.length : "Index is out of bound";
186+
BlockInfo info = (BlockInfo) triplets[index*3+2];
187+
triplets[index*3+2] = to;
188+
return info;
149189
}
150190

151191
public int getCapacity() {
152-
assert this.storages != null : "BlockInfo is not initialized";
153-
return storages.length;
192+
assert this.triplets != null : "BlockInfo is not initialized";
193+
assert triplets.length % 3 == 0 : "Malformed BlockInfo";
194+
return triplets.length / 3;
154195
}
155196

156197
/**
@@ -221,6 +262,80 @@ int findStorageInfo(DatanodeStorageInfo storageInfo) {
221262
return -1;
222263
}
223264

265+
/**
266+
* Insert this block into the head of the list of blocks
267+
* related to the specified DatanodeStorageInfo.
268+
* If the head is null then form a new list.
269+
* @return current block as the new head of the list.
270+
*/
271+
BlockInfo listInsert(BlockInfo head, DatanodeStorageInfo storage) {
272+
int dnIndex = this.findStorageInfo(storage);
273+
assert dnIndex >= 0 : "Data node is not found: current";
274+
assert getPrevious(dnIndex) == null && getNext(dnIndex) == null :
275+
"Block is already in the list and cannot be inserted.";
276+
this.setPrevious(dnIndex, null);
277+
this.setNext(dnIndex, head);
278+
if (head != null) {
279+
head.setPrevious(head.findStorageInfo(storage), this);
280+
}
281+
return this;
282+
}
283+
284+
/**
285+
* Remove this block from the list of blocks
286+
* related to the specified DatanodeStorageInfo.
287+
* If this block is the head of the list then return the next block as
288+
* the new head.
289+
* @return the new head of the list or null if the list becomes
290+
* empy after deletion.
291+
*/
292+
BlockInfo listRemove(BlockInfo head, DatanodeStorageInfo storage) {
293+
if (head == null) {
294+
return null;
295+
}
296+
int dnIndex = this.findStorageInfo(storage);
297+
if (dnIndex < 0) { // this block is not on the data-node list
298+
return head;
299+
}
300+
301+
BlockInfo next = this.getNext(dnIndex);
302+
BlockInfo prev = this.getPrevious(dnIndex);
303+
this.setNext(dnIndex, null);
304+
this.setPrevious(dnIndex, null);
305+
if (prev != null) {
306+
prev.setNext(prev.findStorageInfo(storage), next);
307+
}
308+
if (next != null) {
309+
next.setPrevious(next.findStorageInfo(storage), prev);
310+
}
311+
if (this == head) { // removing the head
312+
head = next;
313+
}
314+
return head;
315+
}
316+
317+
/**
318+
* Remove this block from the list of blocks related to the specified
319+
* DatanodeDescriptor. Insert it into the head of the list of blocks.
320+
*
321+
* @return the new head of the list.
322+
*/
323+
public BlockInfo moveBlockToHead(BlockInfo head, DatanodeStorageInfo storage,
324+
int curIndex, int headIndex) {
325+
if (head == this) {
326+
return this;
327+
}
328+
BlockInfo next = this.setNext(curIndex, head);
329+
BlockInfo prev = this.setPrevious(curIndex, null);
330+
331+
head.setPrevious(headIndex, this);
332+
prev.setNext(prev.findStorageInfo(storage), next);
333+
if (next != null) {
334+
next.setPrevious(next.findStorageInfo(storage), prev);
335+
}
336+
return this;
337+
}
338+
224339
@Override
225340
public int hashCode() {
226341
// Super implementation is sufficient

hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockInfoContiguous.java

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -37,20 +37,20 @@ public BlockInfoContiguous(Block blk, short size) {
3737
}
3838

3939
/**
40-
* Ensure that there is enough space to include num more storages.
41-
* @return first free storage index.
40+
* Ensure that there is enough space to include num more triplets.
41+
* @return first free triplet index.
4242
*/
4343
private int ensureCapacity(int num) {
44-
assert this.storages != null : "BlockInfo is not initialized";
44+
assert this.triplets != null : "BlockInfo is not initialized";
4545
int last = numNodes();
46-
if (storages.length >= (last+num)) {
46+
if (triplets.length >= (last+num)*3) {
4747
return last;
4848
}
4949
/* Not enough space left. Create a new array. Should normally
5050
* happen only when replication is manually increased by the user. */
51-
DatanodeStorageInfo[] old = storages;
52-
storages = new DatanodeStorageInfo[(last+num)];
53-
System.arraycopy(old, 0, storages, 0, last);
51+
Object[] old = triplets;
52+
triplets = new Object[(last+num)*3];
53+
System.arraycopy(old, 0, triplets, 0, last * 3);
5454
return last;
5555
}
5656

@@ -62,6 +62,8 @@ boolean addStorage(DatanodeStorageInfo storage, Block reportedBlock) {
6262
// find the last null node
6363
int lastNode = ensureCapacity(1);
6464
setStorageInfo(lastNode, storage);
65+
setNext(lastNode, null);
66+
setPrevious(lastNode, null);
6567
return true;
6668
}
6769

@@ -71,18 +73,25 @@ boolean removeStorage(DatanodeStorageInfo storage) {
7173
if (dnIndex < 0) { // the node is not found
7274
return false;
7375
}
76+
assert getPrevious(dnIndex) == null && getNext(dnIndex) == null :
77+
"Block is still in the list and must be removed first.";
7478
// find the last not null node
7579
int lastNode = numNodes()-1;
76-
// replace current node entry by the lastNode one
80+
// replace current node triplet by the lastNode one
7781
setStorageInfo(dnIndex, getStorageInfo(lastNode));
78-
// set the last entry to null
82+
setNext(dnIndex, getNext(lastNode));
83+
setPrevious(dnIndex, getPrevious(lastNode));
84+
// set the last triplet to null
7985
setStorageInfo(lastNode, null);
86+
setNext(lastNode, null);
87+
setPrevious(lastNode, null);
8088
return true;
8189
}
8290

8391
@Override
8492
public int numNodes() {
85-
assert this.storages != null : "BlockInfo is not initialized";
93+
assert this.triplets != null : "BlockInfo is not initialized";
94+
assert triplets.length % 3 == 0 : "Malformed BlockInfo";
8695

8796
for (int idx = getCapacity()-1; idx >= 0; idx--) {
8897
if (getDatanode(idx) != null) {

0 commit comments

Comments
 (0)