Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] Remove HeapAlloc_rate as HotShardRCA's upstream node. #412

Merged
merged 1 commit into from
Jun 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion config/rca.conf
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@
//hot shard rca
"hot-shard-rca": {
"cpu-utilization" : 0.015,
"heap-alloc-rate-in-bytes" : 1400000.0,
"top-k-consumers" : 50
},
// field data cache rca
Expand Down
4 changes: 1 addition & 3 deletions config/rca_cluster_manager.conf
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,11 @@
//hot shard rca
"hot-shard-rca": {
"cpu-utilization" : 0.015,
"heap-alloc-rate-in-bytes" : 1400000.0,
"top-k-consumers" : 50
},
//hot shard cluster rca
"hot-shard-cluster-rca": {
"cpu-utilization-cluster-percentage" : 0.3,
"heap-alloc-rate-cluster-percentage" : 0.3
"cpu-utilization-cluster-percentage" : 0.3
},
// field data cache rca
"field-data-cache-rca": {
Expand Down
4 changes: 1 addition & 3 deletions config/rca_idle_cluster_manager.conf
Original file line number Diff line number Diff line change
Expand Up @@ -60,13 +60,11 @@
//hot shard rca
"hot-shard-rca": {
"cpu-utilization" : 0.015,
"heap-alloc-rate-in-bytes" : 1400000.0,
"top-k-consumers" : 50
},
//hot shard cluster rca
"hot-shard-cluster-rca": {
"cpu-utilization-cluster-percentage" : 0.3,
"heap-alloc-rate-cluster-percentage" : 0.3
"cpu-utilization-cluster-percentage" : 0.3
},
// field data cache rca
"field-data-cache-rca": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,8 @@ public class HotShardClusterRcaConfig {
public static final String CONFIG_NAME = "hot-shard-cluster-rca";

private Double cpuUtilizationClusterThreshold;
private Double heapAllocRateClusterThreshold;

public static final double DEFAULT_CPU_UTILIZATION_CLUSTER_THRESHOLD = 0.3;
public static final double DEFAULT_HEAP_ALLOC_RATE_CLUSTER_THRESHOLD = 0.3;

public HotShardClusterRcaConfig(final RcaConf rcaConf) {
cpuUtilizationClusterThreshold =
Expand All @@ -26,28 +24,14 @@ public HotShardClusterRcaConfig(final RcaConf rcaConf) {
DEFAULT_CPU_UTILIZATION_CLUSTER_THRESHOLD,
(s) -> (s > 0),
Double.class);
heapAllocRateClusterThreshold =
rcaConf.readRcaConfig(
CONFIG_NAME,
HotShardClusterRcaConfig.RCA_CONF_KEY_CONSTANTS
.HEAP_ALLOC_RATE_CLUSTER_THRESHOLD,
DEFAULT_HEAP_ALLOC_RATE_CLUSTER_THRESHOLD,
(s) -> (s > 0),
Double.class);
}

public double getCpuUtilizationClusterThreshold() {
return cpuUtilizationClusterThreshold;
}

public double getHeapAllocRateClusterThreshold() {
return heapAllocRateClusterThreshold;
}

public static class RCA_CONF_KEY_CONSTANTS {
private static final String CPU_UTILIZATION_CLUSTER_THRESHOLD =
"cpu-utilization-cluster-percentage";
private static final String HEAP_ALLOC_RATE_CLUSTER_THRESHOLD =
"heap-alloc-rate-cluster-percentage";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,9 @@ public class HotShardRcaConfig {
public static final String CONFIG_NAME = "hot-shard-rca";

private final Double cpuUtilizationThreshold;
private final Double heapAllocRateThreshold;

private final Integer maxConsumersToSend;

public static final double DEFAULT_CPU_UTILIZATION_THRESHOLD = 0.015;
public static final double DEFAULT_HEAP_ALLOC_RATE_THRESHOLD_IN_BYTE_PER_SEC = 1400000.0;
public static final int DEFAULT_TOP_K_CONSUMERS = 50;

public HotShardRcaConfig(final RcaConf rcaConf) {
Expand All @@ -28,13 +25,6 @@ public HotShardRcaConfig(final RcaConf rcaConf) {
DEFAULT_CPU_UTILIZATION_THRESHOLD,
(s) -> (s > 0),
Double.class);
heapAllocRateThreshold =
rcaConf.readRcaConfig(
CONFIG_NAME,
HotShardRcaConfig.RCA_CONF_KEY_CONSTANTS.HEAP_ALLOC_RATE_THRESHOLD_IN_BYTES,
DEFAULT_HEAP_ALLOC_RATE_THRESHOLD_IN_BYTE_PER_SEC,
(s) -> (s > 0),
Double.class);
maxConsumersToSend =
rcaConf.readRcaConfig(
CONFIG_NAME,
Expand All @@ -48,17 +38,12 @@ public double getCpuUtilizationThreshold() {
return cpuUtilizationThreshold;
}

public double getHeapAllocRateThreshold() {
return heapAllocRateThreshold;
}

public int getMaximumConsumersToSend() {
return maxConsumersToSend;
}

public static class RCA_CONF_KEY_CONSTANTS {
public static final String CPU_UTILIZATION_THRESHOLD = "cpu-utilization";
public static final String HEAP_ALLOC_RATE_THRESHOLD_IN_BYTES = "heap-alloc-rate-in-bytes";
public static final String TOP_K_CONSUMERS = "top-k-consumers";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@

/**
* HotShardSummary contains information such as the index_name, shard_id, node_id, cpu_utilization,
* heap_alloc_rate, criteria and time_period.
* criteria and time_period.
*
* <p>The hot shard summary is created by node level and cluster level RCAs running on data nodes
* and elected cluster_manager node resp. This object is persisted in SQLite table. Table name :
* HotClusterSummary
*
* <p>schema : | ID(primary key) | index_name | shard_id | node_id | cpu_utilization |
* heap_alloc_rate| ID in FlowUnit(foreign key)
* <p>schema : | ID(primary key) | index_name | shard_id | node_id | cpu_utilization | ID in
* FlowUnit(foreign key)
*/
public class HotShardSummary extends GenericSummary {

Expand All @@ -43,7 +43,6 @@ public class HotShardSummary extends GenericSummary {
private final String nodeId;
private CriteriaEnum criteria;
private double cpuUtilization;
private double heapAllocRate;
private int timePeriodInSeconds;

public HotShardSummary(String indexName, String shardId, String nodeId, int timePeriod) {
Expand All @@ -58,10 +57,6 @@ public void setCpuUtilization(final double cpuUtilization) {
this.cpuUtilization = cpuUtilization;
}

public void setHeapAllocRate(final double heapAllocRate) {
this.heapAllocRate = heapAllocRate;
}

public void setCriteria(final CriteriaEnum criteria) {
this.criteria = criteria;
}
Expand All @@ -82,10 +77,6 @@ public double getCpuUtilization() {
return this.cpuUtilization;
}

public double getHeapAllocRate() {
return this.heapAllocRate;
}

public CriteriaEnum getCriteria() {
return this.criteria;
}
Expand All @@ -98,7 +89,6 @@ public HotShardSummaryMessage buildSummaryMessage() {
summaryMessageBuilder.setShardId(this.shardId);
summaryMessageBuilder.setNodeId(this.nodeId);
summaryMessageBuilder.setCpuUtilization(this.cpuUtilization);
summaryMessageBuilder.setHeapAllocRate(this.heapAllocRate);
summaryMessageBuilder.setCriteria(this.criteria);
summaryMessageBuilder.setTimePeriod(this.timePeriodInSeconds);
return summaryMessageBuilder.build();
Expand All @@ -117,7 +107,6 @@ public static HotShardSummary buildHotShardSummaryFromMessage(HotShardSummaryMes
message.getNodeId(),
message.getTimePeriod());
summary.setCpuUtilization(message.getCpuUtilization());
summary.setHeapAllocRate(message.getHeapAllocRate());
summary.setCriteria(message.getCriteria());
return summary;
}
Expand All @@ -131,7 +120,6 @@ public String toString() {
this.shardId,
this.nodeId,
String.valueOf(this.cpuUtilization),
String.valueOf(this.heapAllocRate),
String.valueOf(this.criteria)
});
}
Expand All @@ -148,7 +136,6 @@ public List<Field<?>> getSqlSchema() {
schema.add(HotShardSummaryField.SHARD_ID_FIELD.getField());
schema.add(HotShardSummaryField.NODE_ID_FIELD.getField());
schema.add(HotShardSummaryField.CPU_UTILIZATION_FIELD.getField());
schema.add(HotShardSummaryField.HEAP_ALLOC_RATE_FIELD.getField());
schema.add(HotShardSummaryField.CRITERIA_FIELD.getField());
schema.add(HotShardSummaryField.TIME_PERIOD_FIELD.getField());
return schema;
Expand All @@ -161,7 +148,6 @@ public List<Object> getSqlValue() {
value.add(this.shardId);
value.add(this.nodeId);
value.add(this.cpuUtilization);
value.add(this.heapAllocRate);
value.add(this.criteria.getNumber());
value.add(Integer.valueOf(this.timePeriodInSeconds));
return value;
Expand All @@ -179,7 +165,6 @@ public JsonElement toJson() {
summaryObj.addProperty(SQL_SCHEMA_CONSTANTS.SHARD_ID_COL_NAME, this.shardId);
summaryObj.addProperty(SQL_SCHEMA_CONSTANTS.NODE_ID_COL_NAME, this.nodeId);
summaryObj.addProperty(SQL_SCHEMA_CONSTANTS.CPU_UTILIZATION_COL_NAME, this.cpuUtilization);
summaryObj.addProperty(SQL_SCHEMA_CONSTANTS.HEAP_ALLOC_RATE_COL_NAME, this.heapAllocRate);
summaryObj.addProperty(SQL_SCHEMA_CONSTANTS.CRITERIA_COL_NAME, this.criteria.toString());
summaryObj.addProperty(SQL_SCHEMA_CONSTANTS.TIME_PERIOD_COL_NAME, this.timePeriodInSeconds);
return summaryObj;
Expand All @@ -190,7 +175,6 @@ public static class SQL_SCHEMA_CONSTANTS {
public static final String SHARD_ID_COL_NAME = "shard_id";
public static final String NODE_ID_COL_NAME = "node_id";
public static final String CPU_UTILIZATION_COL_NAME = "cpu_utilization";
public static final String HEAP_ALLOC_RATE_COL_NAME = "heap_alloc_rate";
public static final String CRITERIA_COL_NAME = "criteria";
public static final String TIME_PERIOD_COL_NAME = "time_period";
}
Expand All @@ -201,7 +185,6 @@ public enum HotShardSummaryField implements JooqFieldValue {
SHARD_ID_FIELD(SQL_SCHEMA_CONSTANTS.SHARD_ID_COL_NAME, String.class),
NODE_ID_FIELD(SQL_SCHEMA_CONSTANTS.NODE_ID_COL_NAME, String.class),
CPU_UTILIZATION_FIELD(SQL_SCHEMA_CONSTANTS.CPU_UTILIZATION_COL_NAME, Double.class),
HEAP_ALLOC_RATE_FIELD(SQL_SCHEMA_CONSTANTS.HEAP_ALLOC_RATE_COL_NAME, Double.class),
CRITERIA_FIELD(SQL_SCHEMA_CONSTANTS.CRITERIA_COL_NAME, Integer.class),
TIME_PERIOD_FIELD(SQL_SCHEMA_CONSTANTS.TIME_PERIOD_COL_NAME, Integer.class);

Expand Down Expand Up @@ -244,29 +227,22 @@ public static HotShardSummary buildSummary(final Record record) {
String nodeId = record.get(HotShardSummaryField.NODE_ID_FIELD.getField(), String.class);
Double cpuUtilization =
record.get(HotShardSummaryField.CPU_UTILIZATION_FIELD.getField(), Double.class);
Double heapAllocRate =
record.get(HotShardSummaryField.HEAP_ALLOC_RATE_FIELD.getField(), Double.class);
Integer criteria =
record.get(HotShardSummaryField.CRITERIA_FIELD.getField(), Integer.class);

Integer timePeriod =
record.get(HotShardSummaryField.TIME_PERIOD_FIELD.getField(), Integer.class);
if (timePeriod == null
|| cpuUtilization == null
|| heapAllocRate == null
|| criteria == null) {
if (timePeriod == null || cpuUtilization == null || criteria == null) {
LOG.warn(
"read null object from SQL, timePeriod: {}, cpuUtilization: {}, heapAllocRate: {},"
"read null object from SQL, timePeriod: {}, cpuUtilization: {},"
+ " criteria: {}",
timePeriod,
cpuUtilization,
heapAllocRate,
criteria);
return null;
}
summary = new HotShardSummary(indexName, shardId, nodeId, timePeriod);
summary.setCpuUtilization(cpuUtilization);
summary.setHeapAllocRate(heapAllocRate);
summary.setCriteria(CriteriaEnum.forNumber(criteria));
} catch (IllegalArgumentException ie) {
LOG.error("Some fields might not be found in record, cause : {}", ie.getMessage());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
import org.opensearch.performanceanalyzer.rca.framework.api.metrics.GC_Collection_Event;
import org.opensearch.performanceanalyzer.rca.framework.api.metrics.GC_Collection_Time;
import org.opensearch.performanceanalyzer.rca.framework.api.metrics.GC_Type;
import org.opensearch.performanceanalyzer.rca.framework.api.metrics.Heap_AllocRate;
import org.opensearch.performanceanalyzer.rca.framework.api.metrics.Heap_Max;
import org.opensearch.performanceanalyzer.rca.framework.api.metrics.Heap_Used;
import org.opensearch.performanceanalyzer.rca.framework.api.metrics.IndexWriter_Memory;
Expand Down Expand Up @@ -499,26 +498,20 @@ private AdmissionControlDecider buildAdmissionControlDecider(Metric heapUsed, Me

private void constructShardResourceUsageGraph() {
Metric cpuUtilization = new CPU_Utilization(EVALUATION_INTERVAL_SECONDS);
Metric heapAllocRate = new Heap_AllocRate(EVALUATION_INTERVAL_SECONDS);

cpuUtilization.addTag(
RcaConsts.RcaTagConstants.TAG_LOCUS,
RcaConsts.RcaTagConstants.LOCUS_DATA_CLUSTER_MANAGER_NODE);
heapAllocRate.addTag(
RcaConsts.RcaTagConstants.TAG_LOCUS,
RcaConsts.RcaTagConstants.LOCUS_DATA_CLUSTER_MANAGER_NODE);

addLeaf(cpuUtilization);
addLeaf(heapAllocRate);

// High CPU Utilization RCA
HotShardRca hotShardRca =
new HotShardRca(
EVALUATION_INTERVAL_SECONDS, RCA_PERIOD, cpuUtilization, heapAllocRate);
new HotShardRca(EVALUATION_INTERVAL_SECONDS, RCA_PERIOD, cpuUtilization);
hotShardRca.addTag(
RcaConsts.RcaTagConstants.TAG_LOCUS,
RcaConsts.RcaTagConstants.LOCUS_DATA_CLUSTER_MANAGER_NODE);
hotShardRca.addAllUpstreams(Arrays.asList(cpuUtilization, heapAllocRate));
hotShardRca.addAllUpstreams(Arrays.asList(cpuUtilization));

// Hot Shard Cluster RCA which consumes the above
HotShardClusterRca hotShardClusterRca = new HotShardClusterRca(RCA_PERIOD, hotShardRca);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ public class HotShardClusterRca extends Rca<ResourceFlowUnit<HotClusterSummary>>
private static final int SLIDING_WINDOW_IN_SECONDS = 60;

private double cpuUtilizationClusterThreshold;
private double heapAllocRateClusterThreshold;

private final Rca<ResourceFlowUnit<HotNodeSummary>> hotShardRca;
private int rcaPeriod;
private int counter;
Expand All @@ -56,7 +54,6 @@ public class HotShardClusterRca extends Rca<ResourceFlowUnit<HotClusterSummary>>
// TODO: Use the fact that we're getting at max topK*2 consumers from each node and perform
// further optimization.
private Table<String, NodeShardKey, Double> cpuUtilizationInfoTable;
private Table<String, NodeShardKey, Double> heapAllocRateInfoTable;

public <R extends Rca<ResourceFlowUnit<HotNodeSummary>>> HotShardClusterRca(
final int rcaPeriod, final R hotShardRca) {
Expand All @@ -66,11 +63,8 @@ public <R extends Rca<ResourceFlowUnit<HotNodeSummary>>> HotShardClusterRca(
this.counter = 0;
this.unhealthyNodes = new HashSet<>();
this.cpuUtilizationInfoTable = HashBasedTable.create();
this.heapAllocRateInfoTable = HashBasedTable.create();
this.cpuUtilizationClusterThreshold =
HotShardClusterRcaConfig.DEFAULT_CPU_UTILIZATION_CLUSTER_THRESHOLD;
this.heapAllocRateClusterThreshold =
HotShardClusterRcaConfig.DEFAULT_HEAP_ALLOC_RATE_CLUSTER_THRESHOLD;
}

private void populateResourceInfoTable(
Expand All @@ -81,8 +75,8 @@ private void populateResourceInfoTable(
if (null == metricMap.get(indexName, nodeShardKey)) {
metricMap.put(indexName, nodeShardKey, metricValue);
} else {
double existingOccurence = metricMap.get(indexName, nodeShardKey);
metricMap.put(indexName, nodeShardKey, existingOccurence + metricValue);
double existingOccurrence = metricMap.get(indexName, nodeShardKey);
metricMap.put(indexName, nodeShardKey, existingOccurrence + metricValue);
}
}

Expand All @@ -105,14 +99,6 @@ private void consumeFlowUnit(ResourceFlowUnit<HotNodeSummary> resourceFlowUnit)
hotShardSummary.getCpuUtilization(),
cpuUtilizationInfoTable);
}
if (CriteriaEnum.HEAP_ALLOC_RATE_CRITERIA.equals(criteria)
|| CriteriaEnum.DOUBLE_CRITERIA.equals(criteria)) {
populateResourceInfoTable(
indexName,
nodeShardKey,
hotShardSummary.getHeapAllocRate(),
heapAllocRateInfoTable);
}
}
}
}
Expand Down Expand Up @@ -217,12 +203,6 @@ public ResourceFlowUnit<HotClusterSummary> operate() {
hotShardSummaryList,
ResourceUtil.CPU_USAGE);

findHotShardAndCreateSummary(
heapAllocRateInfoTable,
heapAllocRateClusterThreshold,
hotShardSummaryList,
ResourceUtil.HEAP_ALLOC_RATE);

if (hotShardSummaryList.isEmpty()) {
context = new ResourceContext(Resources.State.HEALTHY);
} else {
Expand All @@ -243,7 +223,6 @@ public ResourceFlowUnit<HotClusterSummary> operate() {
counter = 0;
this.unhealthyNodes.clear();
this.cpuUtilizationInfoTable.clear();
this.heapAllocRateInfoTable.clear();
LOG.debug("Hot Shard Cluster RCA Context : " + context.toString());
return new ResourceFlowUnit<>(System.currentTimeMillis(), context, summary, true);
} else {
Expand All @@ -261,7 +240,6 @@ public ResourceFlowUnit<HotClusterSummary> operate() {
public void readRcaConf(RcaConf conf) {
HotShardClusterRcaConfig configObj = conf.getHotShardClusterRcaConfig();
cpuUtilizationClusterThreshold = configObj.getCpuUtilizationClusterThreshold();
heapAllocRateClusterThreshold = configObj.getHeapAllocRateClusterThreshold();
}

/**
Expand Down
Loading