Skip to content

Commit

Permalink
[Enhancement](metric) add current edit log metric (apache#15657)
Browse files Browse the repository at this point in the history
  • Loading branch information
yongjinhou authored Jan 10, 2023
1 parent 503b6ee commit a67cea2
Show file tree
Hide file tree
Showing 8 changed files with 50 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,11 @@ curl http://be_host:webserver_port/metrics?type=json
|`doris_fe_counter_hit_sql_block_rule`|| Num| 被 SQL BLOCK RULE 拦截的查询数量 | | |
|`doris_fe_edit_log_clean`| {type="failed"} | Num| 清理历史元数据日志失败的次数 | 不应失败,如失败,需人工介入 | P0|
|| {type="success"} | Num| 清理历史元数据日志成功的次数 | |
|`doris_fe_edit_log`| {type="bytes"} |字节 | 元数据日志写入量的累计值 | 通过计算斜率可以获得写入速率,来观察是否元数据写入有延迟 | P0 |
|`doris_fe_edit_log`| {type="accumulated_bytes"} |字节 | 元数据日志写入量的累计值 | 通过计算斜率可以获得写入速率,来观察是否元数据写入有延迟 | P0 |
|| {type="current_bytes"} |字节 | 元数据日志当前值 | 用于监控editlog 大小。如果大小超限,需人工介入 | P0 |
|| {type="read"} |Num| 元数据日志读取次数的计数 | 通过斜率观察元数据读取频率是否正常 |P0 |
|| {type="write"} |Num | 元数据日志写入次数的计数 |通过斜率观察元数据写入频率是否正常 |P0 |
|| {type="current"} |Num | 元数据日志当前数量 |用于监控editlog 数量。如果数量超限,需人工介入 |P0 |
|`doris_fe_editlog_write_latency_ms`| | 毫秒| 元数据日志写入延迟的百分位统计。如 {quantile="0.75"} 表示 75 分位的写入延迟 | |
|`doris_fe_image_clean`|{type="failed"} | Num | 清理历史元数据镜像文件失败的次数 | 不应失败,如失败,需人工介入 | P0|
||{type="success"} | Num | 清理历史元数据镜像文件成功的次数 | |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ public interface Journal {
// Write a journal and sync to disk
public void write(short op, Writable writable) throws IOException;

// Get current journal number
public long getJournalNum();

// Delete journals whose max id is less than deleteToJournalId
public void deleteJournals(long deleteJournalToId);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ public synchronized void write(short op, Writable writable) throws IOException {
DatabaseEntry theData = new DatabaseEntry(buffer.getData());
if (MetricRepo.isInit) {
MetricRepo.COUNTER_EDIT_LOG_SIZE_BYTES.increase((long) theData.getSize());
MetricRepo.COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.increase((long) theData.getSize());
}
LOG.debug("opCode = {}, journal size = {}", op, theData.getSize());
// Write the key value pair to bdb.
Expand Down Expand Up @@ -367,6 +368,11 @@ private void reSetupBdbEnvironment(InsufficientLogException insufficientLogEx) {
helperNode.first + ":" + helperNode.second, Env.getServingEnv().isElectable());
}

@Override
public long getJournalNum() {
return currentJournalDB.count();
}

@Override
public void deleteJournals(long deleteToJournalId) {
List<Long> dbNames = getDatabaseNames();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,11 @@ public long getMinJournalId() {
return 0;
}

@Override
public long getJournalNum() {
return 0;
}

@Override
public void close() {
if (outputStream == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,8 @@ public synchronized void doCheckpoint() throws CheckpointException {
editLog.deleteJournals(deleteVersion + 1);
if (MetricRepo.isInit) {
MetricRepo.COUNTER_EDIT_LOG_CLEAN_SUCCESS.increase(1L);
MetricRepo.COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.reset();
MetricRepo.COUNTER_EDIT_LOG_CURRENT.update(editLog.getEditLogNum());
}
LOG.info("journals <= {} are deleted. image version {}, other nodes min version {}",
deleteVersion, checkPointVersion, minOtherNodesJournalId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,13 @@ public void increase(Long delta) {
public Long getValue() {
return value.longValue();
}

public void reset() {
value.reset();
}

public void update(Long delta) {
value.reset();
value.add(delta);
}
}
15 changes: 13 additions & 2 deletions fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,9 @@ public final class MetricRepo {

public static LongCounterMetric COUNTER_EDIT_LOG_WRITE;
public static LongCounterMetric COUNTER_EDIT_LOG_READ;
public static LongCounterMetric COUNTER_EDIT_LOG_CURRENT;
public static LongCounterMetric COUNTER_EDIT_LOG_SIZE_BYTES;
public static LongCounterMetric COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES;
public static LongCounterMetric COUNTER_EDIT_LOG_CLEAN_SUCCESS;
public static LongCounterMetric COUNTER_EDIT_LOG_CLEAN_FAILED;
public static Histogram HISTO_EDIT_LOG_WRITE_LATENCY;
Expand Down Expand Up @@ -345,9 +347,18 @@ public Long getValue() {
"counter of edit log read from bdbje");
COUNTER_EDIT_LOG_READ.addLabel(new MetricLabel("type", "read"));
DORIS_METRIC_REGISTER.addMetrics(COUNTER_EDIT_LOG_READ);
COUNTER_EDIT_LOG_SIZE_BYTES = new LongCounterMetric("edit_log", MetricUnit.BYTES, "size of edit log");
COUNTER_EDIT_LOG_SIZE_BYTES.addLabel(new MetricLabel("type", "bytes"));
COUNTER_EDIT_LOG_CURRENT = new LongCounterMetric("edit_log", MetricUnit.OPERATIONS,
"counter of current edit log in bdbje");
COUNTER_EDIT_LOG_CURRENT.addLabel(new MetricLabel("type", "current"));
DORIS_METRIC_REGISTER.addMetrics(COUNTER_EDIT_LOG_CURRENT);
COUNTER_EDIT_LOG_SIZE_BYTES = new LongCounterMetric("edit_log", MetricUnit.BYTES,
"size of accumulated edit log");
COUNTER_EDIT_LOG_SIZE_BYTES.addLabel(new MetricLabel("type", "accumulated_bytes"));
DORIS_METRIC_REGISTER.addMetrics(COUNTER_EDIT_LOG_SIZE_BYTES);
COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES = new LongCounterMetric("edit_log", MetricUnit.BYTES,
"size of current edit log");
COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.addLabel(new MetricLabel("type", "current_bytes"));
DORIS_METRIC_REGISTER.addMetrics(COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES);
HISTO_EDIT_LOG_WRITE_LATENCY = METRIC_REGISTER.histogram(
MetricRegistry.name("editlog", "write", "latency", "ms"));

Expand Down
10 changes: 9 additions & 1 deletion fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,7 @@ private synchronized void logEdit(short op, Writable writable) {
totalTimeTransactions += (end - start);
if (MetricRepo.isInit) {
MetricRepo.HISTO_EDIT_LOG_WRITE_LATENCY.update((end - start));
MetricRepo.COUNTER_EDIT_LOG_CURRENT.increase(1L);
}

if (LOG.isDebugEnabled()) {
Expand All @@ -1066,10 +1067,17 @@ private synchronized void logEdit(short op, Writable writable) {
/**
* Return the size of the current EditLog
*/
synchronized long getEditLogSize() throws IOException {
public synchronized long getEditLogSize() throws IOException {
return editStream.length();
}

/**
* Return the number of the current EditLog
*/
public synchronized long getEditLogNum() throws IOException {
return journal.getJournalNum();
}

public synchronized long getTxId() {
return txId;
}
Expand Down

0 comments on commit a67cea2

Please sign in to comment.