|
| 1 | +package NKikimrSSA; |
| 2 | +option java_package = "ru.yandex.kikimr.proto"; |
| 3 | + |
| 4 | +// Program to pushdown to ColumnShard |
| 5 | +// |
| 6 | +// > 'SELECT y, z WHERE x > 10' |
| 7 | +// PROJECTION x, y, z |
| 8 | +// ASSIGN tmp = x > 10 |
| 9 | +// FILTER BY tmp |
| 10 | +// PROJECTION y, z |
| 11 | +// |
| 12 | +// > 'SELECT min(x), sum(y) GROUP BY z' |
| 13 | +// PROJECTION x, y, z |
| 14 | +// ASSIGN agg1 = min(x) |
| 15 | +// ASSIGN agg2 = sum(y) |
| 16 | +// GROUP BY z |
| 17 | +// PROJECTION agg1, agg2 |
| 18 | +// |
| 19 | +message TProgram { |
| 20 | + message TColumn { |
| 21 | + optional uint64 Id = 1; |
| 22 | + optional string Name = 2; |
| 23 | + } |
| 24 | + |
| 25 | + message TConstant { |
| 26 | + oneof value { |
| 27 | + bool Bool = 1; |
| 28 | + int32 Int32 = 2; |
| 29 | + uint32 Uint32 = 3; |
| 30 | + int64 Int64 = 4; |
| 31 | + uint64 Uint64 = 5; |
| 32 | + float Float = 6; |
| 33 | + double Double = 7; |
| 34 | + bytes Bytes = 8; |
| 35 | + string Text = 9; |
| 36 | + int32 Int8 = 10; |
| 37 | + uint32 Uint8 = 11; |
| 38 | + int32 Int16 = 12; |
| 39 | + uint32 Uint16 = 13; |
| 40 | + uint64 Timestamp = 14; |
| 41 | + } |
| 42 | + } |
| 43 | + |
| 44 | + message TBloomFilterChecker { |
| 45 | + repeated uint64 HashValues = 1; |
| 46 | + } |
| 47 | + |
| 48 | + message TOlapIndexChecker { |
| 49 | + optional uint32 IndexId = 1; |
| 50 | + optional string ClassName = 2; |
| 51 | + |
| 52 | + message TCompositeChecker { |
| 53 | + repeated TOlapIndexChecker ChildrenCheckers = 1; |
| 54 | + } |
| 55 | + |
| 56 | + oneof Implementation { |
| 57 | + TBloomFilterChecker BloomFilter = 40; |
| 58 | + TCompositeChecker Composite = 41; |
| 59 | + } |
| 60 | + } |
| 61 | + |
| 62 | + message TParameter { |
| 63 | + optional string Name = 1; |
| 64 | + } |
| 65 | + |
| 66 | + enum EFunctionType { |
| 67 | + SIMPLE_ARROW = 1; |
| 68 | + YQL_KERNEL = 2; |
| 69 | + } |
| 70 | + |
| 71 | + message TAssignment { |
| 72 | + enum EFunction { |
| 73 | + FUNC_UNSPECIFIED = 0; |
| 74 | + FUNC_CMP_EQUAL = 1; |
| 75 | + FUNC_CMP_NOT_EQUAL = 2; |
| 76 | + FUNC_CMP_LESS = 3; |
| 77 | + FUNC_CMP_LESS_EQUAL = 4; |
| 78 | + FUNC_CMP_GREATER = 5; |
| 79 | + FUNC_CMP_GREATER_EQUAL = 6; |
| 80 | + FUNC_IS_NULL = 7; |
| 81 | + FUNC_STR_LENGTH = 8; |
| 82 | + FUNC_STR_MATCH = 9; |
| 83 | + FUNC_BINARY_NOT = 10; |
| 84 | + FUNC_BINARY_AND = 11; |
| 85 | + FUNC_BINARY_OR = 12; |
| 86 | + FUNC_BINARY_XOR = 13; |
| 87 | + FUNC_MATH_ADD = 14; |
| 88 | + FUNC_MATH_SUBTRACT = 15; |
| 89 | + FUNC_MATH_MULTIPLY = 16; |
| 90 | + FUNC_MATH_DIVIDE = 17; |
| 91 | + FUNC_CAST_TO_BOOLEAN = 18; |
| 92 | + FUNC_CAST_TO_INT8 = 19; |
| 93 | + FUNC_CAST_TO_INT16 = 20; |
| 94 | + FUNC_CAST_TO_INT32 = 21; |
| 95 | + FUNC_CAST_TO_INT64 = 22; |
| 96 | + FUNC_CAST_TO_UINT8 = 23; |
| 97 | + FUNC_CAST_TO_UINT16 = 24; |
| 98 | + FUNC_CAST_TO_UINT32 = 25; |
| 99 | + FUNC_CAST_TO_UINT64 = 26; |
| 100 | + FUNC_CAST_TO_FLOAT = 27; |
| 101 | + FUNC_CAST_TO_DOUBLE = 28; |
| 102 | + FUNC_CAST_TO_BINARY = 29; |
| 103 | + FUNC_CAST_TO_FIXED_SIZE_BINARY = 30; |
| 104 | + FUNC_CAST_TO_TIMESTAMP = 31; |
| 105 | + FUNC_STR_MATCH_LIKE = 32; |
| 106 | + FUNC_STR_STARTS_WITH = 33; |
| 107 | + FUNC_STR_ENDS_WITH = 34; |
| 108 | + FUNC_STR_MATCH_IGNORE_CASE = 35; |
| 109 | + FUNC_STR_STARTS_WITH_IGNORE_CASE = 36; |
| 110 | + FUNC_STR_ENDS_WITH_IGNORE_CASE = 37; |
| 111 | + } |
| 112 | + |
| 113 | + message TFunction { |
| 114 | + optional uint32 Id = 1; // EFunction |
| 115 | + repeated TColumn Arguments = 2; |
| 116 | + optional EFunctionType FunctionType = 3 [ default = SIMPLE_ARROW ]; |
| 117 | + optional uint32 KernelIdx = 4; |
| 118 | + optional uint32 YqlOperationId = 5; // TKernelRequestBuilder::EBinaryOp |
| 119 | + } |
| 120 | + |
| 121 | + message TExternalFunction { |
| 122 | + optional string Name = 1; |
| 123 | + repeated TColumn Arguments = 2; |
| 124 | + } |
| 125 | + |
| 126 | + optional TColumn Column = 1; |
| 127 | + oneof expression { |
| 128 | + TFunction Function = 2; |
| 129 | + TExternalFunction ExternalFunction = 3; |
| 130 | + TConstant Constant = 4; |
| 131 | + bool Null = 5; |
| 132 | + TParameter Parameter = 6; |
| 133 | + } |
| 134 | + } |
| 135 | + |
| 136 | + message TAggregateAssignment { |
| 137 | + enum EAggregateFunction { |
| 138 | + AGG_UNSPECIFIED = 0; |
| 139 | + AGG_SOME = 1; |
| 140 | + AGG_COUNT = 2; |
| 141 | + AGG_MIN = 3; |
| 142 | + AGG_MAX = 4; |
| 143 | + AGG_SUM = 5; |
| 144 | + //AGG_AVG = 6; |
| 145 | + //AGG_VAR = 7; |
| 146 | + //AGG_COVAR = 8; |
| 147 | + //AGG_STDDEV = 9; |
| 148 | + //AGG_CORR = 10; |
| 149 | + //AGG_ARG_MIN = 11; |
| 150 | + //AGG_ARG_MAX = 12; |
| 151 | + //AGG_COUNT_DISTINCT = 13; |
| 152 | + //AGG_QUANTILES = 14; |
| 153 | + //AGG_TOP_COUNT = 15; |
| 154 | + //AGG_TOP_SUM = 16; |
| 155 | + } |
| 156 | + |
| 157 | + message TAggregateFunction { |
| 158 | + optional uint32 Id = 1; // EAggregateFunction |
| 159 | + repeated TColumn Arguments = 2; |
| 160 | + optional string Variant = 3; // i.e. POP/SAMP for AGG_VAR, AGG_COVAR, AGG_STDDEV |
| 161 | + optional EFunctionType FunctionType = 4 [ default = SIMPLE_ARROW ]; |
| 162 | + optional uint32 KernelIdx = 5; |
| 163 | + // TODO: Parameters, i.e. N for topK(N)(arg) |
| 164 | + } |
| 165 | + |
| 166 | + optional TColumn Column = 1; |
| 167 | + optional TAggregateFunction Function = 2; |
| 168 | + } |
| 169 | + |
| 170 | + message TProjection { |
| 171 | + repeated TColumn Columns = 1; |
| 172 | + } |
| 173 | + |
| 174 | + message TFilter { |
| 175 | + // Predicate should be a bool column: |
| 176 | + // true - keep the row |
| 177 | + // false - remove the row |
| 178 | + optional TColumn Predicate = 1; |
| 179 | + } |
| 180 | + |
| 181 | + message TGroupBy { |
| 182 | + repeated TAggregateAssignment Aggregates = 1; |
| 183 | + repeated TColumn KeyColumns = 2; |
| 184 | + } |
| 185 | + |
| 186 | + message TCommand { |
| 187 | + oneof line { |
| 188 | + TAssignment Assign = 1; |
| 189 | + TProjection Projection = 2; |
| 190 | + TFilter Filter = 3; |
| 191 | + TGroupBy GroupBy = 4; |
| 192 | + // TODO: ORDER BY, LIMIT |
| 193 | + } |
| 194 | + } |
| 195 | + |
| 196 | + repeated TCommand Command = 1; |
| 197 | + optional uint32 Version = 2; |
| 198 | + optional bytes Kernels = 3; |
| 199 | +} |
| 200 | + |
| 201 | +message TOlapProgram { |
| 202 | + // Store OLAP program in serialized format in case we do not need to deserialize it in TScanTaskMeta |
| 203 | + // Note: when this message exists the program must be present. |
| 204 | + optional bytes Program = 1; |
| 205 | + // RecordBatch deserialization require arrow::Schema, thus store it here |
| 206 | + optional bytes ParametersSchema = 2; |
| 207 | + optional bytes Parameters = 3; |
| 208 | + optional TProgram.TOlapIndexChecker IndexChecker = 4; |
| 209 | +} |
0 commit comments