Skip to content

HDFS-13671. Namenode deletes large dir slowly caused by FoldedTreeSet#removeAndGet #3113

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -291,18 +291,6 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final int DFS_NAMENODE_REPLICATION_MAX_STREAMS_DEFAULT = 2;
public static final String DFS_NAMENODE_REPLICATION_STREAMS_HARD_LIMIT_KEY = "dfs.namenode.replication.max-streams-hard-limit";
public static final int DFS_NAMENODE_REPLICATION_STREAMS_HARD_LIMIT_DEFAULT = 4;
public static final String DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_INTERVAL_MS_KEY
= "dfs.namenode.storageinfo.defragment.interval.ms";
public static final int
DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_INTERVAL_MS_DEFAULT = 10 * 60 * 1000;
public static final String DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_TIMEOUT_MS_KEY
= "dfs.namenode.storageinfo.defragment.timeout.ms";
public static final int
DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_TIMEOUT_MS_DEFAULT = 4;
public static final String DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_RATIO_KEY
= "dfs.namenode.storageinfo.defragment.ratio";
public static final double
DFS_NAMENODE_STORAGEINFO_DEFRAGMENT_RATIO_DEFAULT = 0.75;
public static final String DFS_NAMENODE_BLOCKREPORT_QUEUE_SIZE_KEY
= "dfs.namenode.blockreport.queue.size";
public static final int DFS_NAMENODE_BLOCKREPORT_QUEUE_SIZE_DEFAULT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -967,8 +967,8 @@ public static JournalInfoProto convert(JournalInfo j) {


public static BlockReportContext convert(BlockReportContextProto proto) {
return new BlockReportContext(proto.getTotalRpcs(), proto.getCurRpc(),
proto.getId(), proto.getLeaseId(), proto.getSorted());
return new BlockReportContext(proto.getTotalRpcs(),
proto.getCurRpc(), proto.getId(), proto.getLeaseId());
}

public static BlockReportContextProto convert(BlockReportContext context) {
Expand All @@ -977,7 +977,6 @@ public static BlockReportContextProto convert(BlockReportContext context) {
setCurRpc(context.getCurRpc()).
setId(context.getReportId()).
setLeaseId(context.getLeaseId()).
setSorted(context.isSorted()).
build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@

import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;

import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience;
Expand Down Expand Up @@ -57,9 +57,19 @@ public abstract class BlockInfo extends Block
/** For implementing {@link LightWeightGSet.LinkedElement} interface. */
private LightWeightGSet.LinkedElement nextLinkedElement;


// Storages this block is replicated on
protected DatanodeStorageInfo[] storages;
/**
* This array contains triplets of references. For each i-th storage, the
* block belongs to triplets[3*i] is the reference to the
* {@link DatanodeStorageInfo} and triplets[3*i+1] and triplets[3*i+2] are
* references to the previous and the next blocks, respectively, in the list
* of blocks belonging to this storage.
*
* Using previous and next in Object triplets is done instead of a
* {@link LinkedList} list to efficiently use memory. With LinkedList the cost
* per replica is 42 bytes (LinkedList#Entry object per replica) versus 16
* bytes using the triplets.
*/
protected Object[] triplets;

private BlockUnderConstructionFeature uc;

Expand All @@ -69,14 +79,14 @@ public abstract class BlockInfo extends Block
* in the block group
*/
public BlockInfo(short size) {
this.storages = new DatanodeStorageInfo[size];
this.triplets = new Object[3 * size];
this.bcId = INVALID_INODE_ID;
this.replication = isStriped() ? 0 : size;
}

public BlockInfo(Block blk, short size) {
super(blk);
this.storages = new DatanodeStorageInfo[size];
this.triplets = new Object[3*size];
this.bcId = INVALID_INODE_ID;
this.replication = isStriped() ? 0 : size;
}
Expand Down Expand Up @@ -106,31 +116,7 @@ public boolean isDeleted() {
}

public Iterator<DatanodeStorageInfo> getStorageInfos() {
return new Iterator<DatanodeStorageInfo>() {

private int index = 0;

@Override
public boolean hasNext() {
while (index < storages.length && storages[index] == null) {
index++;
}
return index < storages.length;
}

@Override
public DatanodeStorageInfo next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
return storages[index++];
}

@Override
public void remove() {
throw new UnsupportedOperationException("Sorry. can't remove.");
}
};
return new BlocksMap.StorageIterator(this);
}

public DatanodeDescriptor getDatanode(int index) {
Expand All @@ -139,18 +125,73 @@ public DatanodeDescriptor getDatanode(int index) {
}

DatanodeStorageInfo getStorageInfo(int index) {
assert this.storages != null : "BlockInfo is not initialized";
return storages[index];
assert this.triplets != null : "BlockInfo is not initialized";
assert index >= 0 && index*3 < triplets.length : "Index is out of bound";
return (DatanodeStorageInfo)triplets[index*3];
}

BlockInfo getPrevious(int index) {
assert this.triplets != null : "BlockInfo is not initialized";
assert index >= 0 && index*3+1 < triplets.length : "Index is out of bound";
BlockInfo info = (BlockInfo)triplets[index*3+1];
assert info == null ||
info.getClass().getName().startsWith(BlockInfo.class.getName()) :
"BlockInfo is expected at " + index*3;
return info;
}

BlockInfo getNext(int index) {
assert this.triplets != null : "BlockInfo is not initialized";
assert index >= 0 && index*3+2 < triplets.length : "Index is out of bound";
BlockInfo info = (BlockInfo)triplets[index*3+2];
assert info == null || info.getClass().getName().startsWith(
BlockInfo.class.getName()) :
"BlockInfo is expected at " + index*3;
return info;
}

void setStorageInfo(int index, DatanodeStorageInfo storage) {
assert this.storages != null : "BlockInfo is not initialized";
this.storages[index] = storage;
assert this.triplets != null : "BlockInfo is not initialized";
assert index >= 0 && index*3 < triplets.length : "Index is out of bound";
triplets[index*3] = storage;
}

/**
* Return the previous block on the block list for the datanode at
* position index. Set the previous block on the list to "to".
*
* @param index - the datanode index
* @param to - block to be set to previous on the list of blocks
* @return current previous block on the list of blocks
*/
BlockInfo setPrevious(int index, BlockInfo to) {
assert this.triplets != null : "BlockInfo is not initialized";
assert index >= 0 && index*3+1 < triplets.length : "Index is out of bound";
BlockInfo info = (BlockInfo) triplets[index*3+1];
triplets[index*3+1] = to;
return info;
}

/**
* Return the next block on the block list for the datanode at
* position index. Set the next block on the list to "to".
*
* @param index - the datanode index
* @param to - block to be set to next on the list of blocks
* @return current next block on the list of blocks
*/
BlockInfo setNext(int index, BlockInfo to) {
assert this.triplets != null : "BlockInfo is not initialized";
assert index >= 0 && index*3+2 < triplets.length : "Index is out of bound";
BlockInfo info = (BlockInfo) triplets[index*3+2];
triplets[index*3+2] = to;
return info;
}

public int getCapacity() {
assert this.storages != null : "BlockInfo is not initialized";
return storages.length;
assert this.triplets != null : "BlockInfo is not initialized";
assert triplets.length % 3 == 0 : "Malformed BlockInfo";
return triplets.length / 3;
}

/**
Expand Down Expand Up @@ -227,6 +268,80 @@ int findStorageInfo(DatanodeStorageInfo storageInfo) {
return -1;
}

/**
* Insert this block into the head of the list of blocks
* related to the specified DatanodeStorageInfo.
* If the head is null then form a new list.
* @return current block as the new head of the list.
*/
BlockInfo listInsert(BlockInfo head, DatanodeStorageInfo storage) {
int dnIndex = this.findStorageInfo(storage);
assert dnIndex >= 0 : "Data node is not found: current";
assert getPrevious(dnIndex) == null && getNext(dnIndex) == null :
"Block is already in the list and cannot be inserted.";
this.setPrevious(dnIndex, null);
this.setNext(dnIndex, head);
if (head != null) {
head.setPrevious(head.findStorageInfo(storage), this);
}
return this;
}

/**
* Remove this block from the list of blocks
* related to the specified DatanodeStorageInfo.
* If this block is the head of the list then return the next block as
* the new head.
* @return the new head of the list or null if the list becomes
* empy after deletion.
*/
BlockInfo listRemove(BlockInfo head, DatanodeStorageInfo storage) {
if (head == null) {
return null;
}
int dnIndex = this.findStorageInfo(storage);
if (dnIndex < 0) { // this block is not on the data-node list
return head;
}

BlockInfo next = this.getNext(dnIndex);
BlockInfo prev = this.getPrevious(dnIndex);
this.setNext(dnIndex, null);
this.setPrevious(dnIndex, null);
if (prev != null) {
prev.setNext(prev.findStorageInfo(storage), next);
}
if (next != null) {
next.setPrevious(next.findStorageInfo(storage), prev);
}
if (this == head) { // removing the head
head = next;
}
return head;
}

/**
* Remove this block from the list of blocks related to the specified
* DatanodeDescriptor. Insert it into the head of the list of blocks.
*
* @return the new head of the list.
*/
public BlockInfo moveBlockToHead(BlockInfo head, DatanodeStorageInfo storage,
int curIndex, int headIndex) {
if (head == this) {
return this;
}
BlockInfo next = this.setNext(curIndex, head);
BlockInfo prev = this.setPrevious(curIndex, null);

head.setPrevious(headIndex, this);
prev.setNext(prev.findStorageInfo(storage), next);
if (next != null) {
next.setPrevious(next.findStorageInfo(storage), prev);
}
return this;
}

@Override
public int hashCode() {
// Super implementation is sufficient
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,20 @@ public BlockInfoContiguous(Block blk, short size) {
}

/**
* Ensure that there is enough space to include num more storages.
* @return first free storage index.
* Ensure that there is enough space to include num more triplets.
* @return first free triplet index.
*/
private int ensureCapacity(int num) {
assert this.storages != null : "BlockInfo is not initialized";
assert this.triplets != null : "BlockInfo is not initialized";
int last = numNodes();
if (storages.length >= (last+num)) {
if (triplets.length >= (last+num)*3) {
return last;
}
/* Not enough space left. Create a new array. Should normally
* happen only when replication is manually increased by the user. */
DatanodeStorageInfo[] old = storages;
storages = new DatanodeStorageInfo[(last+num)];
System.arraycopy(old, 0, storages, 0, last);
Object[] old = triplets;
triplets = new Object[(last+num)*3];
System.arraycopy(old, 0, triplets, 0, last * 3);
return last;
}

Expand All @@ -63,6 +63,8 @@ boolean addStorage(DatanodeStorageInfo storage, Block reportedBlock) {
// find the last null node
int lastNode = ensureCapacity(1);
setStorageInfo(lastNode, storage);
setNext(lastNode, null);
setPrevious(lastNode, null);
return true;
}

Expand All @@ -72,12 +74,18 @@ boolean removeStorage(DatanodeStorageInfo storage) {
if (dnIndex < 0) { // the node is not found
return false;
}
assert getPrevious(dnIndex) == null && getNext(dnIndex) == null :
"Block is still in the list and must be removed first.";
// find the last not null node
int lastNode = numNodes()-1;
// replace current node entry by the lastNode one
// replace current node triplet by the lastNode one
setStorageInfo(dnIndex, getStorageInfo(lastNode));
// set the last entry to null
setNext(dnIndex, getNext(lastNode));
setPrevious(dnIndex, getPrevious(lastNode));
// set the last triplet to null
setStorageInfo(lastNode, null);
setNext(lastNode, null);
setPrevious(lastNode, null);
return true;
}

Expand All @@ -96,7 +104,8 @@ boolean isProvided() {

@Override
public int numNodes() {
assert this.storages != null : "BlockInfo is not initialized";
assert this.triplets != null : "BlockInfo is not initialized";
assert triplets.length % 3 == 0 : "Malformed BlockInfo";

for (int idx = getCapacity()-1; idx >= 0; idx--) {
if (getDatanode(idx) != null) {
Expand Down
Loading