Skip to content

Commit

Permalink
KYLIN-1726 Scalable streaming cubing
Browse files Browse the repository at this point in the history
  • Loading branch information
shaofengshi committed Aug 10, 2016
1 parent acde339 commit 23407e3
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 16 deletions.
11 changes: 4 additions & 7 deletions assembly/src/test/java/org/apache/kylin/job/DeployUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,8 @@

package org.apache.kylin.job;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.*;
import java.nio.ByteBuffer;
import java.util.List;

import org.apache.commons.io.IOUtils;
Expand Down Expand Up @@ -165,7 +161,7 @@ public static void prepareTestDataForStreamingCube(long startTime, long endTime,
TimedJsonStreamParser timedJsonStreamParser = new TimedJsonStreamParser(tableColumns, "formatTs=true");
StringBuilder sb = new StringBuilder();
for (String json : data) {
List<String> rowColumns = timedJsonStreamParser.parse((new MessageAndOffset(new Message(json.getBytes()), 0)).message().payload()).getData();
List<String> rowColumns = timedJsonStreamParser.parse(ByteBuffer.wrap(json.getBytes())).getData();
sb.append(StringUtils.join(rowColumns, ","));
sb.append(System.getProperty("line.separator"));
}
Expand All @@ -183,6 +179,7 @@ public static void overrideFactTableData(String factTableContent, String factTab
in.close();
}


private static void deployHiveTables() throws Exception {

MetadataManager metaMgr = MetadataManager.getInstance(config());
Expand Down
15 changes: 7 additions & 8 deletions core-cube/src/main/java/org/apache/kylin/cube/CubeSegment.java
Original file line number Diff line number Diff line change
Expand Up @@ -121,19 +121,18 @@ public CubeDesc getCubeDesc() {
* returns "yyyyMMddHHmmss_yyyyMMddHHmmss"
*/
public static String makeSegmentName(long startDate, long endDate, long startOffset, long endOffset) {
if (startOffset == 0 && endOffset == 0) {
startOffset = startDate;
endOffset = endDate;
}
if (startOffset != 0 || endOffset != 0) {
if (startOffset == 0 && (endOffset == 0 || endOffset == Long.MAX_VALUE)) {
return "FULL_BUILD";
}

if (startOffset == 0 && (endOffset == 0 || endOffset == Long.MAX_VALUE)) {
return "FULL_BUILD";
return startOffset + "_" + endOffset;
}

// using time
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss");
dateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));

return dateFormat.format(startOffset) + "_" + dateFormat.format(endOffset);
return dateFormat.format(startDate) + "_" + dateFormat.format(endDate);
}

// ============================================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,12 @@ public class BatchMergeJobBuilder2 extends JobBuilderSupport {
private static final Logger logger = LoggerFactory.getLogger(BatchMergeJobBuilder2.class);

private final IMROutput2.IMRBatchMergeOutputSide2 outputSide;
private final IMRInput.IMRBatchMergeInputSide inputSide;

public BatchMergeJobBuilder2(CubeSegment mergeSegment, String submitter) {
super(mergeSegment, submitter);
this.outputSide = MRUtil.getBatchMergeOutputSide2((CubeSegment) seg);
this.outputSide = MRUtil.getBatchMergeOutputSide2(seg);
this.inputSide = MRUtil.getBatchMergeInputSide(seg);
}

public CubingJob build() {
Expand All @@ -57,6 +59,7 @@ public CubingJob build() {
}

// Phase 1: Merge Dictionary
inputSide.addStepPhase1_MergeDictionary(result);
result.addTask(createMergeDictionaryStep(mergingSegmentIds));
result.addTask(createMergeStatisticsStep(cubeSegment, mergingSegmentIds, getStatisticsPath(jobId)));
outputSide.addStepPhase1_MergeDictionary(result);
Expand Down
10 changes: 10 additions & 0 deletions engine-mr/src/main/java/org/apache/kylin/engine/mr/IMRInput.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ public interface IMRInput {
/** Return an InputFormat that reads from specified table. */
public IMRTableInputFormat getTableInputFormat(TableDesc table);

/** Return a helper to participate in batch cubing merge job flow. */
public IMRBatchMergeInputSide getBatchMergeInputSide(CubeSegment seg);

/**
* Utility that configures mapper to read from a table.
*/
Expand Down Expand Up @@ -66,4 +69,11 @@ public interface IMRBatchCubingInputSide {
/** Add step that does necessary clean up, like delete the intermediate flat table */
public void addStepPhase4_Cleanup(DefaultChainedExecutable jobFlow);
}

public interface IMRBatchMergeInputSide {

/** Add step that executes before merge dictionary and before merge cube. */
public void addStepPhase1_MergeDictionary(DefaultChainedExecutable jobFlow);

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ public static IMRBatchMergeOutputSide2 getBatchMergeOutputSide2(CubeSegment seg)
return StorageFactory.createEngineAdapter(seg, IMROutput2.class).getBatchMergeOutputSide(seg);
}

public static IMRInput.IMRBatchMergeInputSide getBatchMergeInputSide(CubeSegment seg) {
return SourceFactory.createEngineAdapter(seg, IMRInput.class).getBatchMergeInputSide(seg);
}

// use this method instead of ToolRunner.run() because ToolRunner.run() is not thread-sale
// Refer to: http://stackoverflow.com/questions/22462665/is-hadoops-toorunner-thread-safe
public static int runMRJob(Tool tool, String[] args) throws Exception {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,16 @@ public IMRTableInputFormat getTableInputFormat(TableDesc table) {
return new HiveTableInputFormat(table.getIdentity());
}

@Override
public IMRBatchMergeInputSide getBatchMergeInputSide(CubeSegment seg) {
return new IMRBatchMergeInputSide() {
@Override
public void addStepPhase1_MergeDictionary(DefaultChainedExecutable jobFlow) {
// doing nothing
}
};
}

public static class HiveTableInputFormat implements IMRTableInputFormat {
final String dbName;
final String tableName;
Expand Down

0 comments on commit 23407e3

Please sign in to comment.