Skip to content

Commit

Permalink
calibrate: refactor metrics error (#44451)
Browse files Browse the repository at this point in the history
ref #43212
  • Loading branch information
CabinfeverB authored Jun 16, 2023
1 parent 052c17f commit 841aed8
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 26 deletions.
20 changes: 11 additions & 9 deletions executor/calibrate_resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ func (e *calibrateResourceExec) Next(ctx context.Context, req *chunk.Chunk) erro
return e.staticCalibrate(ctx, req, exec)
}

var (
errLowUsage = errors.Errorf("The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead")
errNoCPUQuotaMetrics = errors.Normalize("There is no CPU quota metrics, %v")
)

func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk.Chunk, exec sqlexec.RestrictedSQLExecutor) error {
startTs, endTs, err := e.parseCalibrateDuration(ctx)
if err != nil {
Expand All @@ -193,11 +198,11 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk

totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}
totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}
rus, err := getRUPerSec(ctx, e.ctx, exec, startTime, endTime)
if err != nil {
Expand Down Expand Up @@ -243,10 +248,10 @@ func (e *calibrateResourceExec) dynamicCalibrate(ctx context.Context, req *chunk
tikvCPUs.next()
}
if len(quotas) < 5 {
return errors.Errorf("There are too few metrics points available in selected time window")
return errLowUsage
}
if float64(len(quotas))/float64(len(quotas)+lowCount) <= percentOfPass {
return errors.Errorf("The workload in selected time window is too low, with which TiDB is unable to reach a capacity estimation; please select another time window with higher workload, or calibrate resource by hardware instead")
return errLowUsage
}
sort.Slice(quotas, func(i, j int) bool {
return quotas[i] > quotas[j]
Expand Down Expand Up @@ -274,11 +279,11 @@ func (e *calibrateResourceExec) staticCalibrate(ctx context.Context, req *chunk.

totalKVCPUQuota, err := getTiKVTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}
totalTiDBCPU, err := getTiDBTotalCPUQuota(ctx, exec)
if err != nil {
return err
return errNoCPUQuotaMetrics.FastGenByArgs(err.Error())
}

// The default workload to calculate the RU capacity.
Expand Down Expand Up @@ -378,9 +383,6 @@ func getValuesFromMetrics(ctx context.Context, sctx sessionctx.Context, exec sql
if err != nil {
return nil, errors.Trace(err)
}
if len(rows) == 0 {
return nil, errors.Errorf("metrics '%s' is empty", metrics)
}
ret := make([]*timePointValue, 0, len(rows))
for _, row := range rows {
if tp, err := row.GetTime(0).AdjustedGoTime(sctx.GetSessionVars().Location()); err == nil {
Expand Down
47 changes: 30 additions & 17 deletions executor/calibrate_resource_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,24 +89,30 @@ func TestCalibrateResource(t *testing.T) {
return time
}

mockData := map[string][][]types.Datum{
"tikv_cpu_quota": {
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0),
},
"tidb_server_maxprocs": {
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0),
},
}
mockData := make(map[string][][]types.Datum)
ctx := context.WithValue(context.Background(), "__mockMetricsTableData", mockData)
ctx = failpoint.WithHook(ctx, func(_ context.Context, fpname string) bool {
return fpName == fpname
})
rs, err = tk.Exec("CALIBRATE RESOURCE")
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
// because when mock metrics is empty, error is always `pd unavailable`, don't check detail.
require.ErrorContains(t, err, "There is no CPU quota metrics, query metric error: pd unavailable")

mockData["tikv_cpu_quota"] = [][]types.Datum{
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tikv-2", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-0", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-1", 8.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tikv-2", 8.0),
}
mockData["tidb_server_maxprocs"] = [][]types.Datum{
types.MakeDatums(datetime("2020-02-12 10:35:00"), "tidb-0", 40.0),
types.MakeDatums(datetime("2020-02-12 10:36:00"), "tidb-0", 40.0),
}
tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE").Check(testkit.Rows("69768"))
tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD TPCC").Check(testkit.Rows("69768"))
tk.MustQueryWithContext(ctx, "CALIBRATE RESOURCE WORKLOAD OLTP_READ_WRITE").Check(testkit.Rows("55823"))
Expand Down Expand Up @@ -396,7 +402,7 @@ func TestCalibrateResource(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
require.ErrorContains(t, err, "The workload in selected time window is too low")

ru3 := [][]types.Datum{
types.MakeDatums(datetime("2020-02-12 10:25:00"), 2200.0),
Expand Down Expand Up @@ -436,7 +442,7 @@ func TestCalibrateResource(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
require.ErrorContains(t, err, "The workload in selected time window is too low")

// flash back to init data.
mockData["resource_manager_resource_unit"] = ru1
Expand Down Expand Up @@ -547,7 +553,14 @@ func TestCalibrateResource(t *testing.T) {
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "There are too few metrics points available in selected time window")
require.ErrorContains(t, err, "The workload in selected time window is too low")

delete(mockData, "process_cpu_usage")
rs, err = tk.Exec("CALIBRATE RESOURCE START_TIME '2020-02-12 10:35:00' END_TIME '2020-02-12 10:45:00'")
require.NoError(t, err)
require.NotNil(t, rs)
err = rs.Next(ctx, rs.NewChunk(nil))
require.ErrorContains(t, err, "query metric error: pd unavailable")
}

type mockResourceGroupProvider struct {
Expand Down

0 comments on commit 841aed8

Please sign in to comment.