Skip to content

Commit bd54017

Browse files
committed
This commit is deep refactoring of AQO machinery and machine learning core.
Many weaknesses was removed. Comments added. At this moment we know about possible problems with clauses and selectivities collection algorithm. We need to extract AQO knowledge base from database to file storage. Anyway we need to get rid of disk access during prediction and learning procedures.
1 parent 1d45280 commit bd54017

File tree

10 files changed

+470
-344
lines changed

10 files changed

+470
-344
lines changed

aqo.c

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ static const struct config_enum_entry format_options[] = {
1414
{"forced", AQO_MODE_FORCED, false},
1515
{"controlled", AQO_MODE_CONTROLLED, false},
1616
{"learn", AQO_MODE_LEARN, false},
17+
{"fixed", AQO_MODE_FIXED, false},
1718
{"disabled", AQO_MODE_DISABLED, false},
1819
{NULL, 0, false}
1920
};
@@ -28,11 +29,24 @@ int auto_tuning_infinite_loop = 8;
2829
/* stat_size > infinite_loop + window_size + 3 is required for auto_tuning*/
2930

3031
/* Machine learning parameters */
31-
double object_selection_prediction_threshold = 0.3;
32-
double object_selection_object_threshold = 0.1;
33-
double learning_rate = 1e-1;
32+
33+
/*
34+
* Defines where we do not perform learning procedure
35+
*/
36+
const double object_selection_prediction_threshold = 0.3;
37+
38+
/*
39+
* This parameter tell us that the new learning sample object has very small
40+
* distance from one whose features stored in matrix already.
41+
* In this case we will not to add new line in matrix, but will modify this
42+
* nearest neighbor features and cardinality with linear smoothing by
43+
* learning_rate coefficient.
44+
*/
45+
const double object_selection_threshold = 0.1;
46+
const double learning_rate = 1e-1;
47+
48+
/* The number of nearest neighbors which will be chosen for ML-operations */
3449
int aqo_k = 3;
35-
int aqo_K = 30;
3650
double log_selectivity_lower_bound = -30;
3751

3852
/*
@@ -70,7 +84,7 @@ _PG_init(void)
7084
&aqo_mode,
7185
AQO_MODE_CONTROLLED,
7286
format_options,
73-
PGC_SUSET,
87+
PGC_USERSET,
7488
0,
7589
NULL,
7690
NULL,
@@ -83,7 +97,7 @@ _PG_init(void)
8397
prev_ExecutorStart_hook = ExecutorStart_hook;
8498
ExecutorStart_hook = aqo_ExecutorStart;
8599
prev_ExecutorEnd_hook = ExecutorEnd_hook;
86-
ExecutorEnd_hook = learn_query_stat;
100+
ExecutorEnd_hook = aqo_ExecutorEnd;
87101
prev_set_baserel_rows_estimate_hook = set_baserel_rows_estimate_hook;
88102
set_baserel_rows_estimate_hook = aqo_set_baserel_rows_estimate;
89103
prev_get_parameterized_baserel_size_hook = get_parameterized_baserel_size_hook;
@@ -96,6 +110,7 @@ _PG_init(void)
96110
copy_generic_path_info_hook = aqo_copy_generic_path_info;
97111
prev_ExplainOnePlan_hook = ExplainOnePlan_hook;
98112
ExplainOnePlan_hook = print_into_explain;
113+
parampathinfo_postinit_hook = ppi_hook;
99114

100115
init_deactivated_queries_storage();
101116
AQOMemoryContext = AllocSetContextCreate(TopMemoryContext, "AQOMemoryContext", ALLOCSET_DEFAULT_SIZES);

aqo.h

Lines changed: 33 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@
133133
#include "executor/execdesc.h"
134134
#include "nodes/makefuncs.h"
135135
#include "nodes/nodeFuncs.h"
136+
#include "optimizer/pathnode.h"
136137
#include "optimizer/planmain.h"
137138
#include "optimizer/planner.h"
138139
#include "optimizer/cost.h"
@@ -144,7 +145,6 @@
144145
#include "utils/hsearch.h"
145146
#include "utils/memutils.h"
146147
#include "utils/rel.h"
147-
#include "utils/tqual.h"
148148
#include "utils/fmgroids.h"
149149
#include "utils/snapmgr.h"
150150

@@ -165,11 +165,19 @@ typedef enum
165165
AQO_MODE_CONTROLLED,
166166
/* Creates new feature space for each query type without auto-tuning */
167167
AQO_MODE_LEARN,
168+
/* Use only current AQO estimations, without learning or tuning */
169+
AQO_MODE_FIXED,
168170
/* Aqo is disabled for all queries */
169171
AQO_MODE_DISABLED,
170172
} AQO_MODE;
171173
extern int aqo_mode;
172174

175+
/*
176+
* It is mostly needed for auto tuning of query. with auto tuning mode aqo
177+
* checks stability of last executions of the query, bad influence of strong
178+
* cardinality estimation on query execution (planner bug?) and so on.
179+
* It can induce aqo to suppress machine learning for this query.
180+
*/
173181
typedef struct
174182
{
175183
double *execution_time_with_aqo;
@@ -178,12 +186,14 @@ typedef struct
178186
double *planning_time_without_aqo;
179187
double *cardinality_error_with_aqo;
180188
double *cardinality_error_without_aqo;
189+
181190
int execution_time_with_aqo_size;
182191
int execution_time_without_aqo_size;
183192
int planning_time_with_aqo_size;
184193
int planning_time_without_aqo_size;
185194
int cardinality_error_with_aqo_size;
186195
int cardinality_error_without_aqo_size;
196+
187197
int64 executions_with_aqo;
188198
int64 executions_without_aqo;
189199
} QueryStat;
@@ -205,6 +215,9 @@ typedef struct QueryContextData
205215
double query_planning_time;
206216
} QueryContextData;
207217

218+
extern double predicted_ppi_rows;
219+
extern double fss_ppi_hash;
220+
208221
/* Parameters of autotuning */
209222
extern int aqo_stat_size;
210223
extern int auto_tuning_window_size;
@@ -213,11 +226,12 @@ extern int auto_tuning_max_iterations;
213226
extern int auto_tuning_infinite_loop;
214227

215228
/* Machine learning parameters */
216-
extern double object_selection_prediction_threshold;
217-
extern double object_selection_object_threshold;
218-
extern double learning_rate;
229+
#define aqo_K (30)
230+
231+
extern const double object_selection_prediction_threshold;
232+
extern const double object_selection_threshold;
233+
extern const double learning_rate;
219234
extern int aqo_k;
220-
extern int aqo_K;
221235
extern double log_selectivity_lower_bound;
222236

223237
/* Parameters for current query */
@@ -244,11 +258,12 @@ extern copy_generic_path_info_hook_type
244258
prev_copy_generic_path_info_hook;
245259
extern ExplainOnePlan_hook_type prev_ExplainOnePlan_hook;
246260

261+
extern void ppi_hook(ParamPathInfo *ppi);
247262

248263
/* Hash functions */
249264
int get_query_hash(Query *parse, const char *query_text);
250-
void get_fss_for_object(List *clauselist, List *selectivities, List *relidslist,
251-
int *nfeatures, int *fss_hash, double **features);
265+
extern int get_fss_for_object(List *clauselist, List *selectivities,
266+
List *relidslist, int *nfeatures, double **features);
252267
void get_eclasses(List *clauselist, int *nargs, int **args_hash, int **eclass_hash);
253268
int get_clause_hash(Expr *clause, int nargs, int *args_hash, int *eclass_hash);
254269

@@ -264,9 +279,8 @@ bool update_query(int query_hash, bool learn_aqo, bool use_aqo,
264279
bool add_query_text(int query_hash, const char *query_text);
265280
bool load_fss(int fss_hash, int ncols,
266281
double **matrix, double *targets, int *rows);
267-
bool update_fss(int fss_hash, int nrows, int ncols,
268-
double **matrix, double *targets,
269-
int old_nrows, List *changed_rows);
282+
extern bool update_fss(int fss_hash, int nrows, int ncols,
283+
double **matrix, double *targets);
270284
QueryStat *get_aqo_stat(int query_hash);
271285
void update_aqo_stat(int query_hash, QueryStat * stat);
272286
void init_deactivated_queries_storage(void);
@@ -288,7 +302,7 @@ void print_into_explain(PlannedStmt *plannedstmt, IntoClause *into,
288302
void disable_aqo_for_query(void);
289303

290304
/* Cardinality estimation hooks */
291-
void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel);
305+
extern void aqo_set_baserel_rows_estimate(PlannerInfo *root, RelOptInfo *rel);
292306
double aqo_get_parameterized_baserel_size(PlannerInfo *root,
293307
RelOptInfo *rel,
294308
List *param_clauses);
@@ -314,22 +328,21 @@ List *get_list_of_relids(PlannerInfo *root, Relids relids);
314328
List *get_path_clauses(Path *path, PlannerInfo *root, List **selectivities);
315329

316330
/* Cardinality estimation */
317-
double predict_for_relation(List *restrict_clauses,
318-
List *selectivities,
319-
List *relids);
331+
double predict_for_relation(List *restrict_clauses, List *selectivities,
332+
List *relids, int *fss_hash);
320333

321334
/* Query execution statistics collecting hooks */
322335
void aqo_ExecutorStart(QueryDesc *queryDesc, int eflags);
323336
void aqo_copy_generic_path_info(PlannerInfo *root, Plan *dest, Path *src);
324-
void learn_query_stat(QueryDesc *queryDesc);
337+
void aqo_ExecutorEnd(QueryDesc *queryDesc);
325338

326339
/* Machine learning techniques */
327-
double OkNNr_predict(int matrix_rows, int matrix_cols,
328-
double **matrix, double *targets,
329-
double *nw_features);
330-
List *OkNNr_learn(int matrix_rows, int matrix_cols,
340+
extern double OkNNr_predict(int nrows, int ncols,
341+
double **matrix, const double *targets,
342+
double *features);
343+
extern int OkNNr_learn(int matrix_rows, int matrix_cols,
331344
double **matrix, double *targets,
332-
double *nw_features, double nw_target);
345+
double *features, double target);
333346

334347
/* Automatic query tuning */
335348
void automatical_query_tuning(int query_hash, QueryStat * stat);

auto_tuning.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,8 @@ automatical_query_tuning(int query_hash, QueryStat * stat)
168168
}
169169

170170
if (num_iterations <= auto_tuning_max_iterations || p_use > 0.5)
171-
update_query(query_hash, query_context.learn_aqo, query_context.use_aqo, query_context.fspace_hash, true);
171+
update_query(query_hash, query_context.learn_aqo, query_context.use_aqo,
172+
query_context.fspace_hash, true);
172173
else
173174
update_query(query_hash, false, false, query_context.fspace_hash, false);
174175
}

cardinality_estimation.c

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,38 +13,42 @@
1313
* General method for prediction the cardinality of given relation.
1414
*/
1515
double
16-
predict_for_relation(List *restrict_clauses, List *selectivities, List *relids)
16+
predict_for_relation(List *restrict_clauses, List *selectivities, List *relids, int *fss_hash)
1717
{
18-
int nfeatures;
19-
int fss_hash;
20-
double **matrix;
21-
double *target;
22-
double *features;
23-
double result;
24-
int rows;
25-
int i;
18+
int nfeatures;
19+
double *matrix[aqo_K];
20+
double targets[aqo_K];
21+
double *features;
22+
double result;
23+
int rows;
24+
int i;
2625

27-
get_fss_for_object(restrict_clauses, selectivities, relids,
28-
&nfeatures, &fss_hash, &features);
26+
*fss_hash = get_fss_for_object(restrict_clauses, selectivities, relids,
27+
&nfeatures, &features);
2928

30-
matrix = palloc(sizeof(*matrix) * aqo_K);
31-
for (i = 0; i < aqo_K; ++i)
32-
matrix[i] = palloc0(sizeof(**matrix) * nfeatures);
33-
target = palloc0(sizeof(*target) * aqo_K);
29+
if (nfeatures > 0)
30+
for (i = 0; i < aqo_K; ++i)
31+
matrix[i] = palloc0(sizeof(**matrix) * nfeatures);
3432

35-
if (load_fss(fss_hash, nfeatures, matrix, target, &rows))
36-
result = OkNNr_predict(rows, nfeatures, matrix, target, features);
33+
if (load_fss(*fss_hash, nfeatures, matrix, targets, &rows))
34+
result = OkNNr_predict(rows, nfeatures, matrix, targets, features);
3735
else
36+
{
37+
/*
38+
* Due to planning optimizer tries to build many alternate paths. Many
39+
* of these not used in final query execution path. Consequently, only
40+
* small part of paths was used for AQO learning and fetch into the AQO
41+
* knowledge base.
42+
*/
3843
result = -1;
44+
}
3945

4046
pfree(features);
41-
for (i = 0; i < aqo_K; ++i)
42-
pfree(matrix[i]);
43-
pfree(matrix);
44-
pfree(target);
45-
list_free_deep(selectivities);
46-
list_free(restrict_clauses);
47-
list_free(relids);
47+
if (nfeatures > 0)
48+
{
49+
for (i = 0; i < aqo_K; ++i)
50+
pfree(matrix[i]);
51+
}
4852

4953
if (result < 0)
5054
return -1;

0 commit comments

Comments
 (0)