Skip to content

Commit 67a91db

Browse files
committed
HBASE-26640 Reimplement master local region initialization to better work with SFT
1 parent cd45cad commit 67a91db

File tree

9 files changed

+368
-36
lines changed

9 files changed

+368
-36
lines changed

hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -799,7 +799,7 @@ private void tryMigrateMetaLocationsFromZooKeeper() throws IOException, KeeperEx
799799
// done with in a one row put, which means if we have data in catalog family then we can
800800
// make sure that the migration is done.
801801
LOG.info("The {} family in master local region already has data in it, skip migrating...",
802-
HConstants.CATALOG_FAMILY);
802+
HConstants.CATALOG_FAMILY_STR);
803803
return;
804804
}
805805
}
@@ -4077,7 +4077,7 @@ public MetaLocationSyncer getMetaLocationSyncer() {
40774077

40784078
@RestrictedApi(explanation = "Should only be called in tests", link = "",
40794079
allowedOnPath = ".*/src/test/.*")
4080-
MasterRegion getMasterRegion() {
4080+
public MasterRegion getMasterRegion() {
40814081
return masterRegion;
40824082
}
40834083

hbase-server/src/main/java/org/apache/hadoop/hbase/master/region/MasterRegion.java

Lines changed: 112 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,27 +20,34 @@
2020
import static org.apache.hadoop.hbase.HConstants.HREGION_LOGDIR_NAME;
2121

2222
import java.io.IOException;
23+
import java.util.List;
2324
import org.apache.hadoop.conf.Configuration;
2425
import org.apache.hadoop.fs.FileStatus;
2526
import org.apache.hadoop.fs.FileSystem;
2627
import org.apache.hadoop.fs.Path;
2728
import org.apache.hadoop.hbase.HBaseIOException;
2829
import org.apache.hadoop.hbase.Server;
2930
import org.apache.hadoop.hbase.TableName;
31+
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
3032
import org.apache.hadoop.hbase.client.Get;
3133
import org.apache.hadoop.hbase.client.RegionInfo;
3234
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
3335
import org.apache.hadoop.hbase.client.Result;
3436
import org.apache.hadoop.hbase.client.ResultScanner;
3537
import org.apache.hadoop.hbase.client.Scan;
3638
import org.apache.hadoop.hbase.client.TableDescriptor;
39+
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
3740
import org.apache.hadoop.hbase.regionserver.HRegion;
3841
import org.apache.hadoop.hbase.regionserver.HRegion.FlushResult;
3942
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
4043
import org.apache.hadoop.hbase.regionserver.RegionScanner;
44+
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
45+
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
46+
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
4147
import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
4248
import org.apache.hadoop.hbase.util.Bytes;
4349
import org.apache.hadoop.hbase.util.CommonFSUtils;
50+
import org.apache.hadoop.hbase.util.FSTableDescriptors;
4451
import org.apache.hadoop.hbase.util.FSUtils;
4552
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
4653
import org.apache.hadoop.hbase.util.RecoverLeaseFSUtils;
@@ -92,6 +99,10 @@ public final class MasterRegion {
9299

93100
private static final String DEAD_WAL_DIR_SUFFIX = "-dead";
94101

102+
static final String INITIALIZING_FLAG = ".initializing";
103+
104+
static final String INITIALIZED_FLAG = ".initialized";
105+
95106
private static final int REGION_ID = 1;
96107

97108
private final WALFactory walFactory;
@@ -196,32 +207,37 @@ private static WAL createWAL(WALFactory walFactory, MasterRegionWALRoller walRol
196207

197208
private static HRegion bootstrap(Configuration conf, TableDescriptor td, FileSystem fs,
198209
Path rootDir, FileSystem walFs, Path walRootDir, WALFactory walFactory,
199-
MasterRegionWALRoller walRoller, String serverName) throws IOException {
210+
MasterRegionWALRoller walRoller, String serverName, boolean touchInitializingFlag)
211+
throws IOException {
200212
TableName tn = td.getTableName();
201213
RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tn).setRegionId(REGION_ID).build();
202-
Path tmpTableDir = CommonFSUtils.getTableDir(rootDir,
203-
TableName.valueOf(tn.getNamespaceAsString(), tn.getQualifierAsString() + "-tmp"));
204-
if (fs.exists(tmpTableDir) && !fs.delete(tmpTableDir, true)) {
205-
throw new IOException("Can not delete partial created proc region " + tmpTableDir);
206-
}
207-
HRegion.createHRegion(conf, regionInfo, fs, tmpTableDir, td).close();
208214
Path tableDir = CommonFSUtils.getTableDir(rootDir, tn);
209-
if (!fs.rename(tmpTableDir, tableDir)) {
210-
throw new IOException("Can not rename " + tmpTableDir + " to " + tableDir);
215+
// persist table descriptor
216+
FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, td, true);
217+
HRegion.createHRegion(conf, regionInfo, fs, tableDir, td).close();
218+
Path initializedFlag = new Path(tableDir, INITIALIZED_FLAG);
219+
if (!fs.mkdirs(initializedFlag)) {
220+
throw new IOException("Can not touch initialized flag: " + initializedFlag);
221+
}
222+
Path initializingFlag = new Path(tableDir, INITIALIZING_FLAG);
223+
if (!fs.delete(initializingFlag, true)) {
224+
LOG.warn("failed to clean up initializing flag: " + initializingFlag);
211225
}
212226
WAL wal = createWAL(walFactory, walRoller, serverName, walFs, walRootDir, regionInfo);
213227
return HRegion.openHRegionFromTableDir(conf, fs, tableDir, regionInfo, td, wal, null, null);
214228
}
215229

216-
private static HRegion open(Configuration conf, TableDescriptor td, FileSystem fs, Path rootDir,
217-
FileSystem walFs, Path walRootDir, WALFactory walFactory, MasterRegionWALRoller walRoller,
218-
String serverName) throws IOException {
219-
Path tableDir = CommonFSUtils.getTableDir(rootDir, td.getTableName());
230+
private static RegionInfo loadRegionInfo(FileSystem fs, Path tableDir) throws IOException {
220231
Path regionDir =
221232
fs.listStatus(tableDir, p -> RegionInfo.isEncodedRegionName(Bytes.toBytes(p.getName())))[0]
222233
.getPath();
223-
RegionInfo regionInfo = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
234+
return HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
235+
}
224236

237+
private static HRegion open(Configuration conf, TableDescriptor td, RegionInfo regionInfo,
238+
FileSystem fs, Path rootDir, FileSystem walFs, Path walRootDir, WALFactory walFactory,
239+
MasterRegionWALRoller walRoller, String serverName) throws IOException {
240+
Path tableDir = CommonFSUtils.getTableDir(rootDir, td.getTableName());
225241
Path walRegionDir = FSUtils.getRegionDirFromRootDir(walRootDir, regionInfo);
226242
Path replayEditsDir = new Path(walRegionDir, REPLAY_EDITS_DIR);
227243
if (!walFs.exists(replayEditsDir) && !walFs.mkdirs(replayEditsDir)) {
@@ -287,6 +303,39 @@ private static void replayWALs(Configuration conf, FileSystem walFs, Path walRoo
287303
}
288304
}
289305

306+
private static void tryMigrate(Configuration conf, FileSystem fs, Path tableDir,
307+
RegionInfo regionInfo, TableDescriptor oldTd, TableDescriptor newTd) throws IOException {
308+
Class<? extends StoreFileTracker> oldSft =
309+
StoreFileTrackerFactory.getTrackerClass(oldTd.getValue(StoreFileTrackerFactory.TRACKER_IMPL));
310+
Class<? extends StoreFileTracker> newSft =
311+
StoreFileTrackerFactory.getTrackerClass(newTd.getValue(StoreFileTrackerFactory.TRACKER_IMPL));
312+
if (oldSft.equals(newSft)) {
313+
LOG.debug("old store file tracker {} is the same with new store file tracker, skip migration",
314+
StoreFileTrackerFactory.getStoreFileTrackerName(oldSft));
315+
if (!oldTd.equals(newTd)) {
316+
// we may change other things such as adding a new family, so here we still need to persist
317+
// the new table descriptor
318+
LOG.info("Update table descriptor from {} to {}", oldTd, newTd);
319+
FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, newTd, true);
320+
}
321+
return;
322+
}
323+
LOG.info("Migrate store file tracker from {} to {}", oldSft.getSimpleName(),
324+
newSft.getSimpleName());
325+
HRegionFileSystem hfs =
326+
HRegionFileSystem.openRegionFromFileSystem(conf, fs, tableDir, regionInfo, false);
327+
for (ColumnFamilyDescriptor oldCfd : oldTd.getColumnFamilies()) {
328+
StoreFileTracker oldTracker = StoreFileTrackerFactory.create(conf, oldTd, oldCfd, hfs);
329+
StoreFileTracker newTracker = StoreFileTrackerFactory.create(conf, oldTd, oldCfd, hfs);
330+
List<StoreFileInfo> files = oldTracker.load();
331+
LOG.debug("Store file list for {}: {}", oldCfd.getNameAsString(), files);
332+
newTracker.set(oldTracker.load());
333+
}
334+
// persist the new table descriptor after migration
335+
LOG.info("Update table descriptor from {} to {}", oldTd, newTd);
336+
FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, newTd, true);
337+
}
338+
290339
public static MasterRegion create(MasterRegionParams params) throws IOException {
291340
TableDescriptor td = params.tableDescriptor();
292341
LOG.info("Create or load local region for table " + td);
@@ -321,16 +370,58 @@ public static MasterRegion create(MasterRegionParams params) throws IOException
321370

322371
WALFactory walFactory = new WALFactory(conf, server.getServerName().toString(), server, false);
323372
Path tableDir = CommonFSUtils.getTableDir(rootDir, td.getTableName());
373+
Path initializingFlag = new Path(tableDir, INITIALIZING_FLAG);
374+
Path initializedFlag = new Path(tableDir, INITIALIZED_FLAG);
324375
HRegion region;
325-
if (fs.exists(tableDir)) {
326-
// load the existing region.
327-
region = open(conf, td, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
328-
server.getServerName().toString());
329-
} else {
330-
// bootstrapping...
376+
if (!fs.exists(tableDir)) {
377+
// bootstrap, no doubt
378+
if (!fs.mkdirs(initializedFlag)) {
379+
throw new IOException("Can not touch initialized flag");
380+
}
331381
region = bootstrap(conf, td, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
332-
server.getServerName().toString());
382+
server.getServerName().toString(), true);
383+
} else {
384+
if (!fs.exists(initializedFlag)) {
385+
if (!fs.exists(initializingFlag)) {
386+
// should be old style, where we do not have the initializing or initialized file, persist
387+
// the table descriptor, touch the initialized flag and then open the region.
388+
// the store file tracker must be DEFAULT
389+
LOG.info("No {} or {} file, try upgrading", INITIALIZING_FLAG, INITIALIZED_FLAG);
390+
TableDescriptor oldTd =
391+
TableDescriptorBuilder.newBuilder(td).setValue(StoreFileTrackerFactory.TRACKER_IMPL,
392+
StoreFileTrackerFactory.Trackers.DEFAULT.name()).build();
393+
FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, oldTd, true);
394+
if (!fs.mkdirs(initializedFlag)) {
395+
throw new IOException("Can not touch initialized flag: " + initializedFlag);
396+
}
397+
RegionInfo regionInfo = loadRegionInfo(fs, tableDir);
398+
tryMigrate(conf, fs, tableDir, regionInfo, oldTd, td);
399+
region = open(conf, td, regionInfo, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
400+
server.getServerName().toString());
401+
} else {
402+
// delete all contents besides the initializing flag, here we can make sure tableDir
403+
// exists(unless someone delete it manually...), so we do not do null check here.
404+
for (FileStatus status : fs.listStatus(tableDir)) {
405+
if (!status.getPath().getName().equals(INITIALIZING_FLAG)) {
406+
fs.delete(status.getPath(), true);
407+
}
408+
}
409+
region = bootstrap(conf, td, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
410+
server.getServerName().toString(), false);
411+
}
412+
} else {
413+
if (fs.exists(initializingFlag) && !fs.delete(initializingFlag, true)) {
414+
LOG.warn("failed to clean up initializing flag: " + initializingFlag);
415+
}
416+
// open it, make sure to load the table descriptor from fs
417+
TableDescriptor oldTd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
418+
RegionInfo regionInfo = loadRegionInfo(fs, tableDir);
419+
tryMigrate(conf, fs, tableDir, regionInfo, oldTd, td);
420+
region = open(conf, td, regionInfo, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
421+
server.getServerName().toString());
422+
}
333423
}
424+
334425
Path globalArchiveDir = HFileArchiveUtil.getArchivePath(baseConf);
335426
MasterRegionFlusherAndCompactor flusherAndCompactor = new MasterRegionFlusherAndCompactor(conf,
336427
server, region, params.flushSize(), params.flushPerChanges(), params.flushIntervalMs(),

hbase-server/src/main/java/org/apache/hadoop/hbase/master/region/MasterRegionFactory.java

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,10 @@
2828
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
2929
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
3030
import org.apache.hadoop.hbase.regionserver.BloomType;
31+
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
32+
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
3133
import org.apache.hadoop.hbase.util.Bytes;
34+
import org.apache.hadoop.hbase.util.ReflectionUtils;
3235
import org.apache.yetus.audience.InterfaceAudience;
3336

3437
/**
@@ -78,6 +81,8 @@ public final class MasterRegionFactory {
7881

7982
private static final int DEFAULT_RING_BUFFER_SLOT_COUNT = 128;
8083

84+
public static final String TRACKER_IMPL = "hbase.master.store.region.file-tracker.impl";
85+
8186
public static final TableName TABLE_NAME = TableName.valueOf("master:store");
8287

8388
public static final byte[] PROC_FAMILY = Bytes.toBytes("proc");
@@ -89,10 +94,23 @@ public final class MasterRegionFactory {
8994
.setDataBlockEncoding(DataBlockEncoding.ROW_INDEX_V1).build())
9095
.setColumnFamily(ColumnFamilyDescriptorBuilder.of(PROC_FAMILY)).build();
9196

97+
private static TableDescriptor withTrackerConfigs(Configuration conf) {
98+
String trackerImpl = conf.get(TRACKER_IMPL, conf.get(StoreFileTrackerFactory.TRACKER_IMPL,
99+
StoreFileTrackerFactory.Trackers.DEFAULT.name()));
100+
Class<? extends StoreFileTracker> trackerClass =
101+
StoreFileTrackerFactory.getTrackerClass(trackerImpl);
102+
if (StoreFileTrackerFactory.isMigration(trackerClass)) {
103+
throw new IllegalArgumentException("Should not set store file tracker to " +
104+
StoreFileTrackerFactory.Trackers.MIGRATION.name() + " for master local region");
105+
}
106+
StoreFileTracker tracker = ReflectionUtils.newInstance(trackerClass, conf, true, null);
107+
return tracker.updateWithTrackerConfigs(TableDescriptorBuilder.newBuilder(TABLE_DESC)).build();
108+
}
109+
92110
public static MasterRegion create(Server server) throws IOException {
93-
MasterRegionParams params = new MasterRegionParams().server(server)
94-
.regionDirName(MASTER_STORE_DIR).tableDescriptor(TABLE_DESC);
95111
Configuration conf = server.getConfiguration();
112+
MasterRegionParams params = new MasterRegionParams().server(server)
113+
.regionDirName(MASTER_STORE_DIR).tableDescriptor(withTrackerConfigs(conf));
96114
long flushSize = conf.getLong(FLUSH_SIZE_KEY, DEFAULT_FLUSH_SIZE);
97115
long flushPerChanges = conf.getLong(FLUSH_PER_CHANGES_KEY, DEFAULT_FLUSH_PER_CHANGES);
98116
long flushIntervalMs = conf.getLong(FLUSH_INTERVAL_MS_KEY, DEFAULT_FLUSH_INTERVAL_MS);

hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/storefiletracker/StoreFileTrackerFactory.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ public static String getStoreFileTrackerName(Configuration conf) {
8484
return conf.get(TRACKER_IMPL, Trackers.DEFAULT.name());
8585
}
8686

87-
static String getStoreFileTrackerName(Class<? extends StoreFileTracker> clazz) {
87+
public static String getStoreFileTrackerName(Class<? extends StoreFileTracker> clazz) {
8888
Trackers name = CLASS_TO_ENUM.get(clazz);
8989
return name != null ? name.name() : clazz.getName();
9090
}
@@ -184,4 +184,8 @@ public static TableDescriptor updateWithTrackerConfigs(Configuration conf,
184184
}
185185
return descriptor;
186186
}
187+
188+
public static boolean isMigration(Class<?> clazz) {
189+
return MigrationStoreFileTracker.class.isAssignableFrom(clazz);
190+
}
187191
}

hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSTableDescriptors.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,13 @@ private static Optional<Pair<FileStatus, TableDescriptor>> getTableDescriptorFro
520520
return td != null ? Optional.of(Pair.newPair(descFile, td)) : Optional.empty();
521521
}
522522

523+
@RestrictedApi(explanation = "Should only be called in tests", link = "",
524+
allowedOnPath = ".*/src/test/.*")
525+
public static void deleteTableDescriptors(FileSystem fs, Path tableDir) throws IOException {
526+
Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
527+
deleteTableDescriptorFiles(fs, tableInfoDir, Integer.MAX_VALUE);
528+
}
529+
523530
/**
524531
* Deletes files matching the table info file pattern within the given directory whose sequenceId
525532
* is at most the given max sequenceId.

hbase-server/src/test/java/org/apache/hadoop/hbase/master/region/MasterRegionTestBase.java

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
3535
import org.apache.hadoop.hbase.master.cleaner.DirScanPool;
3636
import org.apache.hadoop.hbase.regionserver.MemStoreLAB;
37+
import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
3738
import org.apache.hadoop.hbase.util.Bytes;
3839
import org.apache.hadoop.hbase.util.CommonFSUtils;
3940
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
@@ -87,20 +88,25 @@ public void setUp() throws IOException {
8788
/**
8889
* Creates a new MasterRegion using an existing {@code htu} on this class.
8990
*/
90-
protected void createMasterRegion() throws IOException {
91-
configure(htu.getConfiguration());
91+
protected final void createMasterRegion() throws IOException {
92+
Configuration conf = htu.getConfiguration();
93+
configure(conf);
9294
choreService = new ChoreService(getClass().getSimpleName());
93-
hfileCleanerPool = DirScanPool.getHFileCleanerScanPool(htu.getConfiguration());
94-
logCleanerPool = DirScanPool.getLogCleanerScanPool(htu.getConfiguration());
95+
hfileCleanerPool = DirScanPool.getHFileCleanerScanPool(conf);
96+
logCleanerPool = DirScanPool.getLogCleanerScanPool(conf);
9597
Server server = mock(Server.class);
96-
when(server.getConfiguration()).thenReturn(htu.getConfiguration());
98+
when(server.getConfiguration()).thenReturn(conf);
9799
when(server.getServerName())
98100
.thenReturn(ServerName.valueOf("localhost", 12345, EnvironmentEdgeManager.currentTime()));
99101
when(server.getChoreService()).thenReturn(choreService);
100102
Path testDir = htu.getDataTestDir();
101-
CommonFSUtils.setRootDir(htu.getConfiguration(), testDir);
103+
CommonFSUtils.setRootDir(conf, testDir);
102104
MasterRegionParams params = new MasterRegionParams();
103-
params.server(server).regionDirName(REGION_DIR_NAME).tableDescriptor(TD)
105+
TableDescriptor td = TableDescriptorBuilder
106+
.newBuilder(TD).setValue(StoreFileTrackerFactory.TRACKER_IMPL, conf
107+
.get(StoreFileTrackerFactory.TRACKER_IMPL, StoreFileTrackerFactory.Trackers.DEFAULT.name()))
108+
.build();
109+
params.server(server).regionDirName(REGION_DIR_NAME).tableDescriptor(td)
104110
.flushSize(TableDescriptorBuilder.DEFAULT_MEMSTORE_FLUSH_SIZE).flushPerChanges(1_000_000)
105111
.flushIntervalMs(TimeUnit.MINUTES.toMillis(15)).compactMin(4).maxWals(32).useHsync(false)
106112
.ringBufferSlotCount(16).rollPeriodMs(TimeUnit.MINUTES.toMillis(15))

0 commit comments

Comments
 (0)