Skip to content

Commit 56a883c

Browse files
busbeyJenkins
authored andcommitted
HBASE-23222 MOB compaction supportability improvements
* better logging on MOB compaction process * HFileCleanerDelegate to optionally halt removal of mob hfiles * use archiving when removing committed mob file after bulkload ref failure closes apache#763 Signed-off-by: Wellington Chevreuil <wchevreuil@apache.org> Signed-off-by: Balazs Meszaros <meszibalu@apache.org> (cherry picked from commit 8dc26c9) Change-Id: I453e0188931ee4f4b170464cbc306f1be3c0d5cd
1 parent 41ba62c commit 56a883c

File tree

5 files changed

+180
-16
lines changed

5 files changed

+180
-16
lines changed
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
package org.apache.hadoop.hbase.mob;
19+
20+
import java.util.concurrent.ConcurrentHashMap;
21+
import java.util.concurrent.ConcurrentMap;
22+
23+
import org.apache.hadoop.fs.FileStatus;
24+
import org.apache.hadoop.fs.Path;
25+
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
26+
import org.apache.hadoop.hbase.TableName;
27+
import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate;
28+
import org.apache.hadoop.hbase.util.FSUtils;
29+
import org.apache.yetus.audience.InterfaceAudience;
30+
import org.slf4j.Logger;
31+
import org.slf4j.LoggerFactory;
32+
33+
/**
34+
* {@link BaseHFileCleanerDelegate} that prevents cleaning HFiles from a mob region
35+
*
36+
* keeps a map of table name strings to mob region name strings over the life of
37+
* a JVM instance. if there's churn of unique table names we'll eat memory until
38+
* Master restart.
39+
*/
40+
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
41+
public class ManualMobMaintHFileCleaner extends BaseHFileCleanerDelegate {
42+
43+
private static final Logger LOG = LoggerFactory.getLogger(ManualMobMaintHFileCleaner.class);
44+
45+
// We need to avoid making HRegionInfo objects for every table we check.
46+
private static final ConcurrentMap<TableName, String> MOB_REGIONS = new ConcurrentHashMap<>();
47+
48+
@Override
49+
public boolean isFileDeletable(FileStatus fStat) {
50+
try {
51+
// if its a directory, then it can be deleted
52+
if (fStat.isDirectory()) {
53+
return true;
54+
}
55+
56+
Path file = fStat.getPath();
57+
58+
// we need the table and region to determine if this is from a mob region
59+
// we don't need to worry about hfilelink back references, because the hfilelink cleaner will
60+
// retain them.
61+
Path family = file.getParent();
62+
Path region = family.getParent();
63+
Path table = region.getParent();
64+
65+
TableName tableName = FSUtils.getTableName(table);
66+
67+
String mobRegion = MOB_REGIONS.get(tableName);
68+
if (mobRegion == null) {
69+
String tmp = MobUtils.getMobRegionInfo(tableName).getEncodedName();
70+
if (tmp == null) {
71+
LOG.error("couldn't determine mob region for table {} keeping files just in case.",
72+
tableName);
73+
return false;
74+
}
75+
mobRegion = MOB_REGIONS.putIfAbsent(tableName, tmp);
76+
// a return of null means that tmp is now in the map for future lookups.
77+
if (mobRegion == null) {
78+
mobRegion = tmp;
79+
}
80+
LOG.debug("Had to calculate name of mob region for table {} and it is {}", tableName,
81+
mobRegion);
82+
}
83+
84+
boolean ret = !mobRegion.equals(region.getName());
85+
if (LOG.isDebugEnabled() && !ret) {
86+
LOG.debug("Keeping file '{}' because it is from mob dir", fStat.getPath());
87+
}
88+
return ret;
89+
} catch (RuntimeException e) {
90+
LOG.error("Failed to determine mob status of '{}', keeping it just in case.", fStat.getPath(),
91+
e);
92+
return false;
93+
}
94+
}
95+
96+
}

hbase-server/src/main/java/org/apache/hadoop/hbase/mob/MobUtils.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,11 +855,15 @@ public static void doMobCompaction(Configuration conf, FileSystem fs, TableName
855855
// with major compaction in mob-enabled column.
856856
try {
857857
lock.acquire();
858+
LOG.info("start MOB compaction of files for table='{}', column='{}', allFiles={}, " +
859+
"compactor='{}'", tableName, hcd.getNameAsString(), allFiles, compactor.getClass());
858860
compactor.compact(allFiles);
859861
} catch (Exception e) {
860862
LOG.error("Failed to compact the mob files for the column " + hcd.getNameAsString()
861863
+ " in the table " + tableName.getNameAsString(), e);
862864
} finally {
865+
LOG.info("end MOB compaction of files for table='{}', column='{}', allFiles={}, " +
866+
"compactor='{}'", tableName, hcd.getNameAsString(), allFiles, compactor.getClass());
863867
lock.release();
864868
}
865869
}

hbase-server/src/main/java/org/apache/hadoop/hbase/mob/compactions/PartitionedMobCompactor.java

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -305,9 +305,9 @@ protected PartitionedMobCompactionRequest select(List<FileStatus> candidates,
305305
// all the files are selected
306306
request.setCompactionType(CompactionType.ALL_FILES);
307307
}
308-
LOG.info("The compaction type is " + request.getCompactionType() + ", the request has "
309-
+ totalDelFiles + " del files, " + selectedFileCount + " selected files, and "
310-
+ irrelevantFileCount + " irrelevant files");
308+
LOG.info("The compaction type is {}, the request has {} del files, {} selected files, and {} " +
309+
"irrelevant files table '{}' and column '{}'", request.getCompactionType(), totalDelFiles,
310+
selectedFileCount, irrelevantFileCount, tableName, column.getNameAsString());
311311
return request;
312312
}
313313

@@ -347,10 +347,12 @@ protected List<Path> performCompaction(PartitionedMobCompactionRequest request)
347347
totalDelFileCount++;
348348
}
349349
}
350-
LOG.info("After merging, there are " + totalDelFileCount + " del files");
350+
LOG.info("After merging, there are {} del files. table='{}' column='{}'", totalDelFileCount,
351+
tableName, column.getNameAsString());
351352
// compact the mob files by partitions.
352353
paths = compactMobFiles(request);
353-
LOG.info("After compaction, there are " + paths.size() + " mob files");
354+
LOG.info("After compaction, there are {} mob files. table='{}' column='{}'", paths.size(),
355+
tableName, column.getNameAsString());
354356
} finally {
355357
for (CompactionDelPartition delPartition : request.getDelPartitions()) {
356358
closeStoreFileReaders(delPartition.getStoreFiles());
@@ -359,15 +361,17 @@ protected List<Path> performCompaction(PartitionedMobCompactionRequest request)
359361

360362
// archive the del files if all the mob files are selected.
361363
if (request.type == CompactionType.ALL_FILES && !request.getDelPartitions().isEmpty()) {
362-
LOG.info(
363-
"After a mob compaction with all files selected, archiving the del files ");
364+
LOG.info("After a mob compaction with all files selected, archiving the del files for " +
365+
"table='{}' and column='{}'", tableName, column.getNameAsString());
364366
for (CompactionDelPartition delPartition : request.getDelPartitions()) {
365367
LOG.info(Objects.toString(delPartition.listDelFiles()));
366368
try {
367369
MobUtils.removeMobFiles(conf, fs, tableName, mobTableDir, column.getName(),
368370
delPartition.getStoreFiles());
369371
} catch (IOException e) {
370-
LOG.error("Failed to archive the del files " + delPartition.getStoreFiles(), e);
372+
LOG.error("Failed to archive the del files {} for partition {} table='{}' and " +
373+
"column='{}'", delPartition.getStoreFiles(), delPartition.getId(), tableName,
374+
column.getNameAsString(), e);
371375
}
372376
}
373377
}
@@ -461,7 +465,8 @@ protected List<Path> compactMobFiles(final PartitionedMobCompactionRequest reque
461465
throws IOException {
462466
Collection<CompactionPartition> partitions = request.compactionPartitions;
463467
if (partitions == null || partitions.isEmpty()) {
464-
LOG.info("No partitions of mob files");
468+
LOG.info("No partitions of mob files in table='{}' and column='{}'", tableName,
469+
column.getNameAsString());
465470
return Collections.emptyList();
466471
}
467472
List<Path> paths = new ArrayList<>();
@@ -483,7 +488,8 @@ protected List<Path> compactMobFiles(final PartitionedMobCompactionRequest reque
483488
results.put(partition.getPartitionId(), pool.submit(new Callable<List<Path>>() {
484489
@Override
485490
public List<Path> call() throws Exception {
486-
LOG.info("Compacting mob files for partition " + partition.getPartitionId());
491+
LOG.info("Compacting mob files for partition {} for table='{}' and column='{}'",
492+
partition.getPartitionId(), tableName, column.getNameAsString());
487493
return compactMobFilePartition(request, partition, delFiles, c, table);
488494
}
489495
}));
@@ -495,13 +501,15 @@ public List<Path> call() throws Exception {
495501
paths.addAll(result.getValue().get());
496502
} catch (Exception e) {
497503
// just log the error
498-
LOG.error("Failed to compact the partition " + result.getKey(), e);
504+
LOG.error("Failed to compact the partition {} for table='{}' and column='{}'",
505+
result.getKey(), tableName, column.getNameAsString(), e);
499506
failedPartitions.add(result.getKey());
500507
}
501508
}
502509
if (!failedPartitions.isEmpty()) {
503510
// if any partition fails in the compaction, directly throw an exception.
504-
throw new IOException("Failed to compact the partitions " + failedPartitions);
511+
throw new IOException("Failed to compact the partitions " + failedPartitions +
512+
" for table='" + tableName + "' column='" + column.getNameAsString() + "'");
505513
}
506514
} finally {
507515
try {
@@ -567,8 +575,9 @@ private List<Path> compactMobFilePartition(PartitionedMobCompactionRequest reque
567575
// move to the next batch.
568576
offset += batch;
569577
}
570-
LOG.info("Compaction is finished. The number of mob files is changed from " + files.size()
571-
+ " to " + newFiles.size());
578+
LOG.info("Compaction is finished. The number of mob files is changed from {} to {} for " +
579+
"partition={} for table='{}' and column='{}'", files.size(), newFiles.size(),
580+
partition.getPartitionId(), tableName, column.getNameAsString());
572581
return newFiles;
573582
}
574583

@@ -675,8 +684,12 @@ private void compactMobFilesInBatch(PartitionedMobCompactionRequest request,
675684
cleanupTmpMobFile = false;
676685
cleanupCommittedMobFile = true;
677686
// bulkload the ref file
687+
LOG.info("start MOB ref bulkload for partition {} table='{}' column='{}'",
688+
partition.getPartitionId(), tableName, column.getNameAsString());
678689
bulkloadRefFile(connection, table, bulkloadPathOfPartition, filePath.getName());
679690
cleanupCommittedMobFile = false;
691+
LOG.info("end MOB ref bulkload for partition {} table='{}' column='{}'",
692+
partition.getPartitionId(), tableName, column.getNameAsString());
680693
newFiles.add(new Path(mobFamilyDir, filePath.getName()));
681694
}
682695

@@ -703,7 +716,11 @@ private void compactMobFilesInBatch(PartitionedMobCompactionRequest request,
703716
}
704717

705718
if (cleanupCommittedMobFile) {
706-
deletePath(new Path(mobFamilyDir, filePath.getName()));
719+
LOG.error("failed MOB ref bulkload for partition {} table='{}' column='{}'",
720+
partition.getPartitionId(), tableName, column.getNameAsString());
721+
MobUtils.removeMobFiles(conf, fs, tableName, mobTableDir, column.getName(),
722+
Collections.singletonList(new HStoreFile(fs, new Path(mobFamilyDir, filePath.getName()),
723+
conf, compactionCacheConfig, BloomType.NONE, true)));
707724
}
708725
}
709726
}
@@ -904,6 +921,7 @@ private Pair<Long, Long> getFileInfo(List<HStoreFile> storeFiles) throws IOExcep
904921
* @param path The path of the file to be deleted.
905922
*/
906923
private void deletePath(Path path) {
924+
LOG.debug("Cleanup, delete path '{}'", path);
907925
try {
908926
if (path != null) {
909927
fs.delete(path, true);

hbase-server/src/test/java/org/apache/hadoop/hbase/master/cleaner/TestHFileCleaner.java

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,14 +34,19 @@
3434
import org.apache.hadoop.hbase.HBaseClassTestRule;
3535
import org.apache.hadoop.hbase.HBaseTestingUtility;
3636
import org.apache.hadoop.hbase.HConstants;
37+
import org.apache.hadoop.hbase.HRegionInfo;
3738
import org.apache.hadoop.hbase.Server;
3839
import org.apache.hadoop.hbase.ServerName;
40+
import org.apache.hadoop.hbase.TableName;
3941
import org.apache.hadoop.hbase.client.ClusterConnection;
4042
import org.apache.hadoop.hbase.client.Connection;
43+
import org.apache.hadoop.hbase.mob.ManualMobMaintHFileCleaner;
44+
import org.apache.hadoop.hbase.mob.MobUtils;
4145
import org.apache.hadoop.hbase.testclassification.MasterTests;
4246
import org.apache.hadoop.hbase.testclassification.MediumTests;
4347
import org.apache.hadoop.hbase.util.EnvironmentEdge;
4448
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
49+
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
4550
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
4651
import org.junit.AfterClass;
4752
import org.junit.Assert;
@@ -96,6 +101,44 @@ public void testTTLCleaner() throws IOException, InterruptedException {
96101
+ " with create time:" + createTime, cleaner.isFileDeletable(fs.getFileStatus(file)));
97102
}
98103

104+
@Test
105+
public void testManualMobCleanerStopsMobRemoval() throws IOException {
106+
FileSystem fs = UTIL.getDFSCluster().getFileSystem();
107+
Path root = UTIL.getDataTestDirOnTestFS();
108+
TableName table = TableName.valueOf("testManualMobCleanerStopsMobRemoval");
109+
Path mob = HFileArchiveUtil.getRegionArchiveDir(root, table,
110+
MobUtils.getMobRegionInfo(table).getEncodedName());
111+
Path family= new Path(mob, "family");
112+
113+
Path file = new Path(family, "someHFileThatWouldBeAUUID");
114+
fs.createNewFile(file);
115+
assertTrue("Test file not created!", fs.exists(file));
116+
117+
ManualMobMaintHFileCleaner cleaner = new ManualMobMaintHFileCleaner();
118+
119+
assertFalse("Mob File shouldn't have been deletable. check path. '"+file+"'",
120+
cleaner.isFileDeletable(fs.getFileStatus(file)));
121+
}
122+
123+
@Test
124+
public void testManualMobCleanerLetsNonMobGo() throws IOException {
125+
FileSystem fs = UTIL.getDFSCluster().getFileSystem();
126+
Path root = UTIL.getDataTestDirOnTestFS();
127+
TableName table = TableName.valueOf("testManualMobCleanerLetsNonMobGo");
128+
Path nonmob = HFileArchiveUtil.getRegionArchiveDir(root, table,
129+
new HRegionInfo(table).getEncodedName());
130+
Path family= new Path(nonmob, "family");
131+
132+
Path file = new Path(family, "someHFileThatWouldBeAUUID");
133+
fs.createNewFile(file);
134+
assertTrue("Test file not created!", fs.exists(file));
135+
136+
ManualMobMaintHFileCleaner cleaner = new ManualMobMaintHFileCleaner();
137+
138+
assertTrue("Non-Mob File should have been deletable. check path. '"+file+"'",
139+
cleaner.isFileDeletable(fs.getFileStatus(file)));
140+
}
141+
99142
/**
100143
* @param file to check
101144
* @return loggable information about the file
@@ -114,7 +157,8 @@ public void testHFileCleaning() throws Exception {
114157
// set TTL
115158
long ttl = 2000;
116159
conf.set(HFileCleaner.MASTER_HFILE_CLEANER_PLUGINS,
117-
"org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner");
160+
"org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner," +
161+
"org.apache.hadoop.hbase.mob.ManualMobMaintHFileCleaner");
118162
conf.setLong(TimeToLiveHFileCleaner.TTL_CONF_KEY, ttl);
119163
Server server = new DummyServer();
120164
Path archivedHfileDir =

hbase-server/src/test/resources/log4j.properties

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ log4j.logger.org.apache.hadoop=WARN
5858
log4j.logger.org.apache.zookeeper=ERROR
5959
log4j.logger.org.apache.hadoop.hbase=DEBUG
6060

61+
log4j.logger.org.apache.hadoop.hbase.mob.ManualMobMaintHFileCleaner=DEBUG
62+
6163
#These settings are workarounds against spurious logs from the minicluster.
6264
#See HBASE-4709
6365
log4j.logger.org.apache.hadoop.metrics2.impl.MetricsConfig=WARN

0 commit comments

Comments
 (0)