Skip to content

Commit

Permalink
HBASE-25213 Should request Compaction after bulkLoadHFiles is done
Browse files Browse the repository at this point in the history
  • Loading branch information
niuyulin committed Nov 13, 2020
1 parent 09aaa68 commit 6cf8aab
Show file tree
Hide file tree
Showing 4 changed files with 398 additions and 233 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7096,6 +7096,19 @@ public Map<byte[], List<Path>> bulkLoadHFiles(Collection<Pair<byte[], String>> f
}

isSuccessful = true;
//request compaction
familyWithFinalPath.keySet().forEach(family -> {
HStore store = getStore(family);
try {
if (this.rsServices != null && store.needsCompaction()) {
this.rsServices.getCompactionRequestor().requestCompaction(this, store,
"bulkload hfiles request compaction", Store.PRIORITY_USER + 1,
CompactionLifeCycleTracker.DUMMY, null);
}
} catch (IOException e) {
LOG.error("bulkload hfiles request compaction error ", e);
}
});
} finally {
if (wal != null && !storeFiles.isEmpty()) {
// Write a bulk load event for hfiles that are loaded
Expand Down Expand Up @@ -7835,20 +7848,19 @@ public void run() throws IOException {

// Utility methods
/**
* A utility method to create new instances of HRegion based on the
* {@link HConstants#REGION_IMPL} configuration property.
* @param tableDir qualified path of directory where region should be located,
* usually the table directory.
* @param wal The WAL is the outbound log for any updates to the HRegion
* The wal file is a logfile from the previous execution that's
* custom-computed for this HRegion. The HRegionServer computes and sorts the
* appropriate wal info for this HRegion. If there is a previous file
* (implying that the HRegion has been written-to before), then read it from
* the supplied path.
* A utility method to create new instances of HRegion based on the {@link HConstants#REGION_IMPL}
* configuration property.
* @param tableDir qualified path of directory where region should be located, usually the table
* directory.
* @param wal The WAL is the outbound log for any updates to the HRegion The wal file is a logfile
* from the previous execution that's custom-computed for this HRegion. The HRegionServer
* computes and sorts the appropriate wal info for this HRegion. If there is a previous
* file (implying that the HRegion has been written-to before), then read it from the
* supplied path.
* @param fs is the filesystem.
* @param conf is global configuration settings.
* @param regionInfo - RegionInfo that describes the region
* is new), then read them from the supplied path.
* @param regionInfo - RegionInfo that describes the region is new), then read them from the
* supplied path.
* @param htd the table descriptor
* @return the new instance
*/
Expand All @@ -7874,22 +7886,37 @@ public static HRegion newHRegion(Path tableDir, WAL wal, FileSystem fs,

/**
* Convenience method creating new HRegions. Used by createTable.
*
* @param info Info for region to create.
* @param rootDir Root directory for HBase instance
* @param wal shared WAL
* @param initialize - true to initialize the region
* @return new HRegion
*/
public static HRegion createHRegion(final RegionInfo info, final Path rootDir,
final Configuration conf, final TableDescriptor hTableDescriptor, final WAL wal,
final boolean initialize) throws IOException {
LOG.info("creating " + info + ", tableDescriptor=" +
(hTableDescriptor == null ? "null" : hTableDescriptor) + ", regionDir=" + rootDir);
final Configuration conf, final TableDescriptor hTableDescriptor, final WAL wal,
final boolean initialize) throws IOException {
return createHRegion(info, rootDir, conf, hTableDescriptor, wal, initialize, null);
}

/**
* Convenience method creating new HRegions. Used by createTable.
* @param info Info for region to create.
* @param rootDir Root directory for HBase instance
* @param wal shared WAL
* @param initialize - true to initialize the region
* @param rsRpcServices An interface we can request flushes against.
* @return new HRegion
*/
public static HRegion createHRegion(final RegionInfo info, final Path rootDir,
final Configuration conf, final TableDescriptor hTableDescriptor, final WAL wal,
final boolean initialize, RegionServerServices rsRpcServices) throws IOException {
LOG.info("creating " + info + ", tableDescriptor="
+ (hTableDescriptor == null ? "null" : hTableDescriptor) + ", regionDir=" + rootDir);
createRegionDir(conf, info, rootDir);
FileSystem fs = rootDir.getFileSystem(conf);
Path tableDir = CommonFSUtils.getTableDir(rootDir, info.getTable());
HRegion region = HRegion.newHRegion(tableDir, wal, fs, conf, info, hTableDescriptor, null);
HRegion region =
HRegion.newHRegion(tableDir, wal, fs, conf, info, hTableDescriptor, rsRpcServices);
if (initialize) {
region.initialize(null);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,95 +18,42 @@
package org.apache.hadoop.hbase.regionserver;

import static java.util.Arrays.asList;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyLong;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import static org.mockito.hamcrest.MockitoHamcrest.argThat;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.CellBuilderType;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.ExtendedCellBuilderFactory;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
import org.apache.hadoop.hbase.testclassification.SmallTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.wal.WAL;
import org.apache.hadoop.hbase.wal.WALEdit;
import org.apache.hadoop.hbase.wal.WALKeyImpl;
import org.hamcrest.Description;
import org.hamcrest.Matcher;
import org.hamcrest.TypeSafeMatcher;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.TemporaryFolder;
import org.junit.rules.TestName;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;

import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor;
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor;

/**
* This class attempts to unit test bulk HLog loading.
*/
@Category(SmallTests.class)
public class TestBulkLoad {
public class TestBulkLoad extends TestBulkloadBase {

@ClassRule
public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestBulkLoad.class);

@ClassRule
public static TemporaryFolder testFolder = new TemporaryFolder();
private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
private final WAL log = mock(WAL.class);
private final Configuration conf = HBaseConfiguration.create();
private final Random random = new Random();
private final byte[] randomBytes = new byte[100];
private final byte[] family1 = Bytes.toBytes("family1");
private final byte[] family2 = Bytes.toBytes("family2");
private final byte[] family3 = Bytes.toBytes("family3");

@Rule
public TestName name = new TestName();

@Before
public void before() throws IOException {
random.nextBytes(randomBytes);
// Mockito.when(log.append(htd, info, key, edits, inMemstore));
}

@Test
public void verifyBulkLoadEvent() throws IOException {
TableName tableName = TableName.valueOf("test", "test");
Expand Down Expand Up @@ -142,19 +89,19 @@ public void bulkHLogShouldThrowNoErrorAndWriteMarkerWithBlankInput() throws IOEx

@Test
public void shouldBulkLoadSingleFamilyHLog() throws IOException {
when(log.appendMarker(any(),
any(), argThat(bulkLogWalEditType(WALEdit.BULK_LOAD)))).thenAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) {
WALKeyImpl walKey = invocation.getArgument(1);
MultiVersionConcurrencyControl mvcc = walKey.getMvcc();
if (mvcc != null) {
MultiVersionConcurrencyControl.WriteEntry we = mvcc.begin();
walKey.setWriteEntry(we);
}
return 01L;
}
});
when(log.appendMarker(any(), any(), argThat(bulkLogWalEditType(WALEdit.BULK_LOAD))))
.thenAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) {
WALKeyImpl walKey = invocation.getArgument(1);
MultiVersionConcurrencyControl mvcc = walKey.getMvcc();
if (mvcc != null) {
MultiVersionConcurrencyControl.WriteEntry we = mvcc.begin();
walKey.setWriteEntry(we);
}
return 01L;
}
});
testRegionWithFamilies(family1).bulkLoadHFiles(withFamilyPathsFor(family1), false, null);
verify(log).sync(anyLong());
}
Expand All @@ -181,19 +128,19 @@ public Object answer(InvocationOnMock invocation) {

@Test
public void shouldBulkLoadManyFamilyHLogEvenWhenTableNameNamespaceSpecified() throws IOException {
when(log.appendMarker(any(),
any(), argThat(bulkLogWalEditType(WALEdit.BULK_LOAD)))).thenAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) {
WALKeyImpl walKey = invocation.getArgument(1);
MultiVersionConcurrencyControl mvcc = walKey.getMvcc();
if (mvcc != null) {
MultiVersionConcurrencyControl.WriteEntry we = mvcc.begin();
walKey.setWriteEntry(we);
}
return 01L;
}
});
when(log.appendMarker(any(), any(), argThat(bulkLogWalEditType(WALEdit.BULK_LOAD))))
.thenAnswer(new Answer() {
@Override
public Object answer(InvocationOnMock invocation) {
WALKeyImpl walKey = invocation.getArgument(1);
MultiVersionConcurrencyControl mvcc = walKey.getMvcc();
if (mvcc != null) {
MultiVersionConcurrencyControl.WriteEntry we = mvcc.begin();
walKey.setWriteEntry(we);
}
return 01L;
}
});
TableName tableName = TableName.valueOf("test", "test");
testRegionWithFamiliesAndSpecifiedTableName(tableName, family1, family2)
.bulkLoadHFiles(withFamilyPathsFor(family1, family2), false, null);
Expand Down Expand Up @@ -240,138 +187,4 @@ public void shouldThrowErrorIfMultiHFileDoesNotExist() throws IOException {
list.addAll(asList(withMissingHFileForFamily(family2)));
testRegionWithFamilies(family1, family2).bulkLoadHFiles(list, false, null);
}

private Pair<byte[], String> withMissingHFileForFamily(byte[] family) {
return new Pair<>(family, getNotExistFilePath());
}

private String getNotExistFilePath() {
Path path = new Path(TEST_UTIL.getDataTestDir(), "does_not_exist");
return path.toUri().getPath();
}

private Pair<byte[], String> withInvalidColumnFamilyButProperHFileLocation(byte[] family)
throws IOException {
createHFileForFamilies(family);
return new Pair<>(new byte[]{0x00, 0x01, 0x02}, getNotExistFilePath());
}

private HRegion testRegionWithFamiliesAndSpecifiedTableName(TableName tableName,
byte[]... families) throws IOException {
RegionInfo hRegionInfo = RegionInfoBuilder.newBuilder(tableName).build();
TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(tableName);

for (byte[] family : families) {
builder.setColumnFamily(ColumnFamilyDescriptorBuilder.of(family));
}
ChunkCreator.initialize(MemStoreLAB.CHUNK_SIZE_DEFAULT, false, 0, 0,
0, null, MemStoreLAB.INDEX_CHUNK_SIZE_PERCENTAGE_DEFAULT);
// TODO We need a way to do this without creating files
return HRegion.createHRegion(hRegionInfo, new Path(testFolder.newFolder().toURI()), conf,
builder.build(), log);
}

private HRegion testRegionWithFamilies(byte[]... families) throws IOException {
TableName tableName = TableName.valueOf(name.getMethodName());
return testRegionWithFamiliesAndSpecifiedTableName(tableName, families);
}

private List<Pair<byte[], String>> getBlankFamilyPaths(){
return new ArrayList<>();
}

private List<Pair<byte[], String>> withFamilyPathsFor(byte[]... families) throws IOException {
List<Pair<byte[], String>> familyPaths = getBlankFamilyPaths();
for (byte[] family : families) {
familyPaths.add(new Pair<>(family, createHFileForFamilies(family)));
}
return familyPaths;
}

private String createHFileForFamilies(byte[] family) throws IOException {
HFile.WriterFactory hFileFactory = HFile.getWriterFactoryNoCache(conf);
// TODO We need a way to do this without creating files
File hFileLocation = testFolder.newFile();
FSDataOutputStream out = new FSDataOutputStream(new FileOutputStream(hFileLocation), null);
try {
hFileFactory.withOutputStream(out);
hFileFactory.withFileContext(new HFileContextBuilder().build());
HFile.Writer writer = hFileFactory.create();
try {
writer.append(new KeyValue(ExtendedCellBuilderFactory.create(CellBuilderType.DEEP_COPY)
.setRow(randomBytes)
.setFamily(family)
.setQualifier(randomBytes)
.setTimestamp(0L)
.setType(KeyValue.Type.Put.getCode())
.setValue(randomBytes)
.build()));
} finally {
writer.close();
}
} finally {
out.close();
}
return hFileLocation.getAbsoluteFile().getAbsolutePath();
}

private static Matcher<WALEdit> bulkLogWalEditType(byte[] typeBytes) {
return new WalMatcher(typeBytes);
}

private static Matcher<WALEdit> bulkLogWalEdit(byte[] typeBytes, byte[] tableName,
byte[] familyName, List<String> storeFileNames) {
return new WalMatcher(typeBytes, tableName, familyName, storeFileNames);
}

private static class WalMatcher extends TypeSafeMatcher<WALEdit> {
private final byte[] typeBytes;
private final byte[] tableName;
private final byte[] familyName;
private final List<String> storeFileNames;

public WalMatcher(byte[] typeBytes) {
this(typeBytes, null, null, null);
}

public WalMatcher(byte[] typeBytes, byte[] tableName, byte[] familyName,
List<String> storeFileNames) {
this.typeBytes = typeBytes;
this.tableName = tableName;
this.familyName = familyName;
this.storeFileNames = storeFileNames;
}

@Override
protected boolean matchesSafely(WALEdit item) {
assertTrue(Arrays.equals(CellUtil.cloneQualifier(item.getCells().get(0)), typeBytes));
BulkLoadDescriptor desc;
try {
desc = WALEdit.getBulkLoadDescriptor(item.getCells().get(0));
} catch (IOException e) {
return false;
}
assertNotNull(desc);

if (tableName != null) {
assertTrue(Bytes.equals(ProtobufUtil.toTableName(desc.getTableName()).getName(),
tableName));
}

if(storeFileNames != null) {
int index=0;
StoreDescriptor store = desc.getStores(0);
assertTrue(Bytes.equals(store.getFamilyName().toByteArray(), familyName));
assertTrue(Bytes.equals(Bytes.toBytes(store.getStoreHomeDir()), familyName));
assertEquals(storeFileNames.size(), store.getStoreFileCount());
}

return true;
}

@Override
public void describeTo(Description description) {

}
}
}
Loading

0 comments on commit 6cf8aab

Please sign in to comment.