Skip to content

Commit 9fe22e2

Browse files
committed
incorporating review comments
1 parent 86a1644 commit 9fe22e2

File tree

7 files changed

+89
-40
lines changed

7 files changed

+89
-40
lines changed

hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1473,7 +1473,7 @@ public enum OperationStatusCode {
14731473

14741474
// Regions Recovery based on high storeFileRefCount threshold value
14751475
public static final String STORE_FILE_REF_COUNT_THRESHOLD =
1476-
"hbase.regions.recovery.store.file.count";
1476+
"hbase.regions.recovery.store.file.ref.count";
14771477

14781478
// default -1 indicates there is no threshold on high storeRefCount
14791479
public static final int DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD = -1;

hbase-common/src/main/resources/hbase-default.xml

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1912,15 +1912,22 @@ possible configurations would overwhelm and obscure the important.
19121912
</description>
19131913
</property>
19141914
<property>
1915-
<name>hbase.regions.recovery.store.file.count</name>
1915+
<name>hbase.regions.recovery.store.file.ref.count</name>
19161916
<value>-1</value>
19171917
<description>
1918-
Store files Ref Count threshold value considered
1919-
for reopening regions. Any region with store files
1920-
ref count > this value would be eligible for
1921-
reopening by master. Default value -1 indicates
1922-
this feature is turned off. Only positive integer
1923-
value should be provided to enable the feature.
1918+
Very large ref count on a file indicates
1919+
that it is a ref leak on that object. Such files
1920+
can not be removed even after it is invalidated
1921+
via compaction. Only way to recover in such
1922+
scenario is to reopen the region which can
1923+
release all resources, like the refcount, leases, etc.
1924+
This config represents Store files Ref Count threshold
1925+
value considered for reopening regions.
1926+
Any region with store files ref count > this value
1927+
would be eligible for reopening by master.
1928+
Default value -1 indicates this feature is turned off.
1929+
Only positive integer value should be provided to enable
1930+
this feature.
19241931
</description>
19251932
</property>
19261933
</configuration>

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1471,7 +1471,7 @@ private void startServiceThreads() throws IOException {
14711471
getChoreService().scheduleChore(hfileCleaner);
14721472

14731473
// Regions Reopen based on very high storeFileRefCount is considered enabled
1474-
// only if hbase.regions.recovery.store.file.count has value > 0
1474+
// only if hbase.regions.recovery.store.file.ref.count has value > 0
14751475
final int maxStoreFileRefCount = conf.getInt(
14761476
HConstants.STORE_FILE_REF_COUNT_THRESHOLD,
14771477
HConstants.DEFAULT_STORE_FILE_REF_COUNT_THRESHOLD);

hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionsRecoveryChore.java

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -141,9 +141,10 @@ private Map<TableName, List<byte[]>> getTableToRegionsByRefCount(
141141
// Here, we take max ref count of all store files and not the cumulative
142142
// count of all store files
143143
final int maxStoreFileRefCount = regionMetrics.getMaxStoreFileRefCount();
144-
// ignore store file ref count threshold <= 0 (default is -1 i.e. disabled)
145-
if (storeFileRefCountThreshold > 0 && maxStoreFileRefCount > storeFileRefCountThreshold) {
146-
prepareTableToReopenRegionsMap(tableToReopenRegionsMap, regionMetrics,
144+
145+
if (maxStoreFileRefCount > storeFileRefCountThreshold) {
146+
final byte[] regionName = regionMetrics.getRegionName();
147+
prepareTableToReopenRegionsMap(tableToReopenRegionsMap, regionName,
147148
maxStoreFileRefCount);
148149
}
149150
}
@@ -154,9 +155,8 @@ private Map<TableName, List<byte[]>> getTableToRegionsByRefCount(
154155

155156
private void prepareTableToReopenRegionsMap(
156157
final Map<TableName, List<byte[]>> tableToReopenRegionsMap,
157-
final RegionMetrics regionMetrics, final int regionStoreRefCount) {
158+
final byte[] regionName, final int regionStoreRefCount) {
158159

159-
final byte[] regionName = regionMetrics.getRegionName();
160160
final RegionInfo regionInfo = hMaster.getAssignmentManager().getRegionInfo(regionName);
161161
final TableName tableName = regionInfo.getTable();
162162
if (TableName.isMetaTableName(tableName)) {
@@ -166,9 +166,7 @@ private void prepareTableToReopenRegionsMap(
166166
}
167167
LOG.warn("Region {} for Table {} has high storeFileRefCount {}, considering it for reopen..",
168168
regionInfo.getRegionNameAsString(), tableName, regionStoreRefCount);
169-
if (!tableToReopenRegionsMap.containsKey(tableName)) {
170-
tableToReopenRegionsMap.put(tableName, new ArrayList<>());
171-
}
169+
tableToReopenRegionsMap.putIfAbsent(tableName, new ArrayList<>());
172170
tableToReopenRegionsMap.get(tableName).add(regionName);
173171

174172
}

hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ReopenTableRegionsProcedure.java

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -57,25 +57,25 @@ public class ReopenTableRegionsProcedure
5757

5858
// Specify specific regions of a table to reopen.
5959
// if specified null, all regions of the table will be reopened.
60-
private final List<byte[]> regionNamesList;
60+
private final List<byte[]> regionNames;
6161

6262
private List<HRegionLocation> regions = Collections.emptyList();
6363

6464
private RetryCounter retryCounter;
6565

6666
public ReopenTableRegionsProcedure() {
67-
regionNamesList = null;
67+
regionNames = null;
6868
}
6969

7070
public ReopenTableRegionsProcedure(TableName tableName) {
7171
this.tableName = tableName;
72-
this.regionNamesList = null;
72+
this.regionNames = null;
7373
}
7474

7575
public ReopenTableRegionsProcedure(final TableName tableName,
76-
final List<byte[]> regionNamesList) {
76+
final List<byte[]> regionNames) {
7777
this.tableName = tableName;
78-
this.regionNamesList = regionNamesList;
78+
this.regionNames = regionNames;
7979
}
8080

8181
@Override
@@ -109,9 +109,9 @@ protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState
109109
LOG.info("Table {} is disabled, give up reopening its regions", tableName);
110110
return Flow.NO_MORE_STATE;
111111
}
112-
List<HRegionLocation> tableRegionsForReopen = env.getAssignmentManager()
112+
List<HRegionLocation> tableRegions = env.getAssignmentManager()
113113
.getRegionStates().getRegionsOfTableForReopen(tableName);
114-
regions = prepareRegionsForReopen(tableRegionsForReopen);
114+
regions = getRegionLocationsForReopen(tableRegions);
115115
setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
116116
return Flow.HAS_MORE_STATE;
117117
case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
@@ -167,13 +167,13 @@ protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState
167167
}
168168
}
169169

170-
private List<HRegionLocation> prepareRegionsForReopen(
170+
private List<HRegionLocation> getRegionLocationsForReopen(
171171
List<HRegionLocation> tableRegionsForReopen) {
172172

173173
List<HRegionLocation> regionsToReopen = new ArrayList<>();
174-
if (CollectionUtils.isNotEmpty(regionNamesList) &&
174+
if (CollectionUtils.isNotEmpty(regionNames) &&
175175
CollectionUtils.isNotEmpty(tableRegionsForReopen)) {
176-
for (byte[] regionName : regionNamesList) {
176+
for (byte[] regionName : regionNames) {
177177
for (HRegionLocation hRegionLocation : tableRegionsForReopen) {
178178
if (Bytes.equals(regionName, hRegionLocation.getRegion().getRegionName())) {
179179
regionsToReopen.add(hRegionLocation);

hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ public void testRegionReopensWithStoreRefConfig() throws Exception {
129129
}
130130
Stoppable stoppable = new StoppableImplementation();
131131
Configuration configuration = getCustomConf();
132-
configuration.setInt("hbase.regions.recovery.store.file.count", 300);
132+
configuration.setInt("hbase.regions.recovery.store.file.ref.count", 300);
133133
regionsRecoveryChore = new RegionsRecoveryChore(stoppable, configuration, hMaster);
134134
regionsRecoveryChore.chore();
135135

@@ -144,6 +144,43 @@ public void testRegionReopensWithStoreRefConfig() throws Exception {
144144
.getRegionInfo(Mockito.any());
145145
}
146146

147+
@Test
148+
public void testRegionReopensWithLessThreshold() throws Exception {
149+
regionNo = 0;
150+
ClusterMetrics clusterMetrics = TestRegionsRecoveryChore.getClusterMetrics(4);
151+
final Map<ServerName, ServerMetrics> serverMetricsMap =
152+
clusterMetrics.getLiveServerMetrics();
153+
LOG.debug("All Region Names with refCount....");
154+
for (ServerMetrics serverMetrics : serverMetricsMap.values()) {
155+
Map<byte[], RegionMetrics> regionMetricsMap = serverMetrics.getRegionMetrics();
156+
for (RegionMetrics regionMetrics : regionMetricsMap.values()) {
157+
LOG.debug("name: " + new String(regionMetrics.getRegionName()) + " refCount: " +
158+
regionMetrics.getStoreRefCount());
159+
}
160+
}
161+
Mockito.when(hMaster.getClusterMetrics()).thenReturn(clusterMetrics);
162+
Mockito.when(hMaster.getAssignmentManager()).thenReturn(assignmentManager);
163+
for (byte[] regionName : REGION_NAME_LIST) {
164+
Mockito.when(assignmentManager.getRegionInfo(regionName))
165+
.thenReturn(TestRegionsRecoveryChore.getRegionInfo(regionName));
166+
}
167+
Stoppable stoppable = new StoppableImplementation();
168+
Configuration configuration = getCustomConf();
169+
configuration.setInt("hbase.regions.recovery.store.file.ref.count", 400);
170+
regionsRecoveryChore = new RegionsRecoveryChore(stoppable, configuration, hMaster);
171+
regionsRecoveryChore.chore();
172+
173+
// Verify that we need to reopen regions of only 1 table
174+
Mockito.verify(hMaster, Mockito.times(1)).reopenRegions(Mockito.any(), Mockito.anyList(),
175+
Mockito.anyLong(), Mockito.anyLong());
176+
Mockito.verify(hMaster, Mockito.times(1)).getClusterMetrics();
177+
178+
// Verify that we need to reopen only 1 region with refCount > 400
179+
Mockito.verify(hMaster, Mockito.times(1)).getAssignmentManager();
180+
Mockito.verify(assignmentManager, Mockito.times(1))
181+
.getRegionInfo(Mockito.any());
182+
}
183+
147184
@Test
148185
public void testRegionReopensWithoutStoreRefConfig() throws Exception {
149186
regionNo = 0;
@@ -166,7 +203,7 @@ public void testRegionReopensWithoutStoreRefConfig() throws Exception {
166203
}
167204
Stoppable stoppable = new StoppableImplementation();
168205
Configuration configuration = getCustomConf();
169-
configuration.unset("hbase.regions.recovery.store.file.count");
206+
configuration.unset("hbase.regions.recovery.store.file.ref.count");
170207
regionsRecoveryChore = new RegionsRecoveryChore(stoppable, configuration, hMaster);
171208
regionsRecoveryChore.chore();
172209

src/main/asciidoc/_chapters/hbase-default.adoc

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2178,17 +2178,24 @@ The percent of region server RPC threads failed to abort RS.
21782178
`1200000`
21792179

21802180

2181-
[[hbase.regions.recovery.store.file.count]]
2182-
*`hbase.regions.recovery.store.file.count`*::
2183-
+
2184-
.Description
2185-
2186-
Store files Ref Count threshold value considered
2187-
for reopening regions. Any region with store files
2188-
ref count > this value would be eligible for
2189-
reopening by master. Default value -1 indicates
2190-
this feature is turned off. Only positive integer
2191-
value should be provided to enable the feature.
2181+
[[hbase.regions.recovery.store.file.ref.count]]
2182+
*`hbase.regions.recovery.store.file.ref.count`*::
2183+
+
2184+
.Description
2185+
2186+
Very large ref count on a file indicates
2187+
that it is a ref leak on that object. Such files
2188+
can not be removed even after it is invalidated
2189+
via compaction. Only way to recover in such
2190+
scenario is to reopen the region which can
2191+
release all resources, like the refcount, leases, etc.
2192+
This config represents Store files Ref Count threshold
2193+
value considered for reopening regions.
2194+
Any region with store files ref count > this value
2195+
would be eligible for reopening by master.
2196+
Default value -1 indicates this feature is turned off.
2197+
Only positive integer value should be provided to enable
2198+
this feature.
21922199

21932200
+
21942201
.Default

0 commit comments

Comments
 (0)