@@ -22,40 +22,53 @@ void InferStatisticsForReadTable(const TExprNode::TPtr& input, TTypeAnnotationCo
22
22
const TKqpOptimizeContext& kqpCtx) {
23
23
24
24
auto inputNode = TExprBase (input);
25
- double nRows = 0 ;
26
- int nAttrs = 0 ;
25
+ std::shared_ptr<TOptimizerStatistics> inputStats;
27
26
28
- const TExprNode* path;
27
+ int nAttrs = 0 ;
28
+ bool readRange = false ;
29
29
30
30
if (auto readTable = inputNode.Maybe <TKqlReadTableBase>()) {
31
- path = readTable.Cast ().Table ().Path (). Raw ();
31
+ inputStats = typeCtx-> GetStats ( readTable.Cast ().Table ().Raw () );
32
32
nAttrs = readTable.Cast ().Columns ().Size ();
33
+
34
+ auto range = readTable.Cast ().Range ();
35
+ auto rangeFrom = range.From ().Maybe <TKqlKeyTuple>();
36
+ auto rangeTo = range.To ().Maybe <TKqlKeyTuple>();
37
+ if (rangeFrom && rangeTo) {
38
+ readRange = true ;
39
+ }
33
40
} else if (auto readRanges = inputNode.Maybe <TKqlReadTableRangesBase>()) {
34
- path = readRanges.Cast ().Table ().Path (). Raw ();
41
+ inputStats = typeCtx-> GetStats ( readRanges.Cast ().Table ().Raw () );
35
42
nAttrs = readRanges.Cast ().Columns ().Size ();
36
43
} else {
37
44
Y_ENSURE (false , " Invalid node type for InferStatisticsForReadTable" );
38
45
}
39
46
40
- const auto & tableData = kqpCtx. Tables -> ExistingTable (kqpCtx. Cluster , path-> Content ());
41
- int totalAttrs = tableData. Metadata -> Columns . size ();
42
- nRows = tableData. Metadata -> RecordsCount ;
43
-
44
- double byteSize = tableData. Metadata -> DataSize * (nAttrs / ( double )totalAttrs);
45
-
46
- auto keyColumns = TIntrusivePtr<TOptimizerStatistics::TKeyColumns>( new TOptimizerStatistics::TKeyColumns (tableData. Metadata -> KeyColumnNames )) ;
47
- auto stats = std::make_shared<TOptimizerStatistics>(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0.0 , keyColumns) ;
48
- if (kqpCtx. Config -> OverrideStatistics . Get () ) {
49
- stats = OverrideStatistics (*stats, path-> Content (), *kqpCtx. Config -> OverrideStatistics . Get ()) ;
47
+ /* *
48
+ * We need index statistics to calculate this in the future
49
+ * Right now we use very small estimates to make sure CBO picks Lookup Joins
50
+ * I.e. there can be a chain of lookup joins in OLTP scenario and we want to make
51
+ * sure the cardinality doesn't blow up and lookup joins are still being picked
52
+ */
53
+ double inputRows = inputStats-> Nrows ;
54
+ double nRows = inputRows ;
55
+ if (readRange ) {
56
+ nRows = 1 ;
50
57
}
51
58
52
- if (stats->ColumnStatistics ) {
53
- for (const auto & [columnName, metaData]: tableData.Metadata ->Columns ) {
54
- stats->ColumnStatistics ->Data [columnName].Type = metaData.Type ;
55
- }
56
- }
59
+ double sizePerRow = inputStats->ByteSize / (inputRows==0 ?1 :inputRows);
60
+ double byteSize = nRows * sizePerRow * (nAttrs / (double )inputStats->Ncols );
57
61
58
- YQL_CLOG (TRACE, CoreDq) << " Infer statistics for read table, nrows: " << stats->Nrows << " , nattrs: " << stats->Ncols ;
62
+ auto stats = std::make_shared<TOptimizerStatistics>(
63
+ EStatisticsType::BaseTable,
64
+ nRows,
65
+ nAttrs,
66
+ byteSize,
67
+ 0.0 ,
68
+ inputStats->KeyColumns ,
69
+ inputStats->ColumnStatistics );
70
+
71
+ YQL_CLOG (TRACE, CoreDq) << " Infer statistics for read table, nrows: " << stats->Nrows << " , nattrs: " << stats->Ncols << " , byteSize: " << stats->ByteSize ;
59
72
60
73
typeCtx->SetStats (input.Get (), stats);
61
74
}
@@ -81,7 +94,7 @@ void InferStatisticsForKqpTable(const TExprNode::TPtr& input, TTypeAnnotationCon
81
94
stats = OverrideStatistics (*stats, path.Value (), *kqpCtx.Config ->OverrideStatistics .Get ());
82
95
}
83
96
84
- YQL_CLOG (TRACE, CoreDq) << " Infer statistics for table: " << path.Value () << " , nrows: " << stats->Nrows << " , nattrs: " << stats->Ncols << " , nKeyColumns: " << stats->KeyColumns ->Data .size ();
97
+ YQL_CLOG (TRACE, CoreDq) << " Infer statistics for table: " << path.Value () << " , nrows: " << stats->Nrows << " , nattrs: " << stats->Ncols << " , byteSize: " << stats-> ByteSize << " , nKeyColumns: " << stats->KeyColumns ->Data .size ();
85
98
86
99
typeCtx->SetStats (input.Get (), stats);
87
100
}
@@ -103,7 +116,14 @@ void InferStatisticsForSteamLookup(const TExprNode::TPtr& input, TTypeAnnotation
103
116
auto inputStats = typeCtx->GetStats (streamLookup.Table ().Raw ());
104
117
auto byteSize = inputStats->ByteSize * (nAttrs / (double ) inputStats->Ncols );
105
118
106
- typeCtx->SetStats (input.Get (), std::make_shared<TOptimizerStatistics>(EStatisticsType::BaseTable, inputStats->Nrows , nAttrs, byteSize, 0 , inputStats->KeyColumns ));
119
+ typeCtx->SetStats (input.Get (), std::make_shared<TOptimizerStatistics>(
120
+ EStatisticsType::BaseTable,
121
+ inputStats->Nrows ,
122
+ nAttrs,
123
+ byteSize,
124
+ 0 ,
125
+ inputStats->KeyColumns ,
126
+ inputStats->ColumnStatistics ));
107
127
}
108
128
109
129
/* *
@@ -134,7 +154,14 @@ void InferStatisticsForLookupTable(const TExprNode::TPtr& input, TTypeAnnotation
134
154
byteSize = 10 ;
135
155
}
136
156
137
- typeCtx->SetStats (input.Get (), std::make_shared<TOptimizerStatistics>(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, 0 , inputStats->KeyColumns ));
157
+ typeCtx->SetStats (input.Get (), std::make_shared<TOptimizerStatistics>(
158
+ EStatisticsType::BaseTable,
159
+ nRows,
160
+ nAttrs,
161
+ byteSize,
162
+ 0 ,
163
+ inputStats->KeyColumns ,
164
+ inputStats->ColumnStatistics ));
138
165
}
139
166
140
167
/* *
@@ -151,7 +178,8 @@ void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnn
151
178
return ;
152
179
}
153
180
154
- double nRows = inputStats->Nrows ;
181
+ double inputRows = inputStats->Nrows ;
182
+ double nRows = inputRows;
155
183
156
184
// Check if we have a range expression, in that case just assign a single row to this read
157
185
// We don't currently check the size of an index lookup
@@ -165,10 +193,19 @@ void InferStatisticsForRowsSourceSettings(const TExprNode::TPtr& input, TTypeAnn
165
193
}
166
194
167
195
int nAttrs = sourceSettings.Columns ().Size ();
196
+
197
+ double sizePerRow = inputStats->ByteSize / (inputRows==0 ?1 :inputRows);
198
+ double byteSize = nRows * sizePerRow * (nAttrs / (double )inputStats->Ncols );
168
199
double cost = inputStats->Cost ;
169
- double byteSize = inputStats->ByteSize * (nAttrs / (double )inputStats->Ncols );
170
200
171
- typeCtx->SetStats (input.Get (), std::make_shared<TOptimizerStatistics>(EStatisticsType::BaseTable, nRows, nAttrs, byteSize, cost, inputStats->KeyColumns ));
201
+ typeCtx->SetStats (input.Get (), std::make_shared<TOptimizerStatistics>(
202
+ EStatisticsType::BaseTable,
203
+ nRows,
204
+ nAttrs,
205
+ byteSize,
206
+ cost,
207
+ inputStats->KeyColumns ,
208
+ inputStats->ColumnStatistics ));
172
209
}
173
210
174
211
/* *
@@ -199,7 +236,8 @@ void InferStatisticsForReadTableIndexRanges(const TExprNode::TPtr& input, TTypeA
199
236
inputStats->Ncols ,
200
237
inputStats->ByteSize ,
201
238
inputStats->Cost ,
202
- indexColumnsPtr);
239
+ indexColumnsPtr,
240
+ inputStats->ColumnStatistics );
203
241
204
242
typeCtx->SetStats (input.Get (), stats);
205
243
0 commit comments