2
2
#include " yql_generic_read_actor.h"
3
3
#include " yql_generic_token_provider.h"
4
4
5
+ #include < util/string/join.h>
5
6
#include < ydb/library/actors/core/actor_bootstrapped.h>
6
7
#include < ydb/library/actors/core/actorsystem.h>
7
8
#include < ydb/library/actors/core/event_local.h>
@@ -46,11 +47,13 @@ namespace NYql::NDq {
46
47
TGenericTokenProvider::TPtr tokenProvider,
47
48
Generic::TSource&& source,
48
49
const NActors::TActorId& computeActorId,
49
- const NKikimr::NMiniKQL::THolderFactory& holderFactory)
50
+ const NKikimr::NMiniKQL::THolderFactory& holderFactory,
51
+ TVector<TString>&& splitDescriptions)
50
52
: InputIndex_(inputIndex)
51
53
, ComputeActorId_(computeActorId)
52
54
, Client_(std::move(client))
53
55
, TokenProvider_(std::move(tokenProvider))
56
+ , SplitDescriptions_(std::move(splitDescriptions))
54
57
, HolderFactory_(holderFactory)
55
58
, Source_(source)
56
59
{
@@ -59,7 +62,7 @@ namespace NYql::NDq {
59
62
60
63
void Bootstrap () {
61
64
Become (&TGenericReadActor::StateFunc);
62
- auto issue = InitSplitsListing ();
65
+ auto issue = InitSplitsReading ();
63
66
if (issue) {
64
67
return NotifyComputeActorWithIssue (
65
68
TActivationContext::ActorSystem (),
@@ -72,145 +75,43 @@ namespace NYql::NDq {
72
75
static constexpr char ActorName[] = " GENERIC_READ_ACTOR" ;
73
76
74
77
private:
75
- // TODO: make two different states
76
78
// clang-format off
77
79
STRICT_STFUNC (StateFunc,
78
- hFunc (TEvListSplitsIterator, Handle);
79
- hFunc (TEvListSplitsPart, Handle);
80
- hFunc (TEvListSplitsFinished, Handle);
81
80
hFunc (TEvReadSplitsIterator, Handle);
82
81
hFunc (TEvReadSplitsPart, Handle);
83
82
hFunc (TEvReadSplitsFinished, Handle);
84
83
)
85
84
// clang-format on
86
85
87
- // ListSplits
88
-
89
- TMaybe<TIssue> InitSplitsListing () {
90
- YQL_CLOG (DEBUG, ProviderGeneric) << " Start splits listing" ;
91
-
92
- // Prepare request
93
- NConnector::NApi::TListSplitsRequest request;
94
- NConnector::NApi::TSelect select = Source_.select (); // copy TSelect from source
95
-
96
- auto error = TokenProvider_->MaybeFillToken (*select.mutable_data_source_instance ());
97
- if (error) {
98
- return TIssue (error);
99
- }
100
-
101
- *request.mutable_selects ()->Add () = std::move (select);
102
-
103
- // Initialize stream
104
- Client_->ListSplits (request).Subscribe (
105
- [actorSystem = TActivationContext::ActorSystem (),
106
- selfId = SelfId (),
107
- computeActorId = ComputeActorId_,
108
- inputIndex = InputIndex_](
109
- const NConnector::TListSplitsStreamIteratorAsyncResult& future) {
110
- AwaitIterator<
111
- NConnector::TListSplitsStreamIteratorAsyncResult,
112
- TEvListSplitsIterator>(
113
- actorSystem, selfId, computeActorId, inputIndex, future);
114
- });
115
-
116
- return Nothing ();
117
- }
118
-
119
- void Handle (TEvListSplitsIterator::TPtr& ev) {
120
- ListSplitsIterator_ = std::move (ev->Get ()->Iterator );
121
-
122
- AwaitNextStreamItem<NConnector::IListSplitsStreamIterator,
123
- TEvListSplitsPart,
124
- TEvListSplitsFinished>(ListSplitsIterator_);
125
- }
126
-
127
- void Handle (TEvListSplitsPart::TPtr& ev) {
128
- auto & response = ev->Get ()->Response ;
129
- YQL_CLOG (TRACE, ProviderGeneric) << " Handle :: EvListSplitsPart :: event handling started"
130
- << " : splits_size=" << response.splits ().size ();
131
-
132
- if (!NConnector::IsSuccess (response)) {
133
- return NotifyComputeActorWithError (
134
- TActivationContext::ActorSystem (),
135
- ComputeActorId_,
136
- InputIndex_,
137
- response.error ());
138
- }
139
-
140
- // Save splits for the further usage
141
- Splits_.insert (
142
- Splits_.end (),
143
- std::move_iterator (response.mutable_splits ()->begin ()),
144
- std::move_iterator (response.mutable_splits ()->end ()));
145
-
146
- // ask for next stream message
147
- AwaitNextStreamItem<NConnector::IListSplitsStreamIterator,
148
- TEvListSplitsPart,
149
- TEvListSplitsFinished>(ListSplitsIterator_);
150
-
151
- YQL_CLOG (TRACE, ProviderGeneric) << " Handle :: EvListSplitsPart :: event handling finished" ;
152
- }
153
-
154
- void Handle (TEvListSplitsFinished::TPtr& ev) {
155
- const auto & status = ev->Get ()->Status ;
156
-
157
- YQL_CLOG (TRACE, ProviderGeneric) << " Handle :: EvListSplitsFinished :: event handling started: " ;
158
-
159
- // Server sent EOF, now we are ready to start splits reading
160
- if (NConnector::GrpcStatusEndOfStream (status)) {
161
- YQL_CLOG (DEBUG, ProviderGeneric) << " Handle :: EvListSplitsFinished :: last message was reached, start data reading" ;
162
- auto issue = InitSplitsReading ();
163
- if (issue) {
164
- return NotifyComputeActorWithIssue (
165
- TActivationContext::ActorSystem (),
166
- ComputeActorId_,
167
- InputIndex_,
168
- std::move (*issue));
169
- }
170
-
171
- return ;
172
- }
173
-
174
- // Server temporary failure
175
- if (NConnector::GrpcStatusNeedsRetry (status)) {
176
- YQL_CLOG (WARN, ProviderGeneric) << " Handle :: EvListSplitsFinished :: you should retry your operation due to '"
177
- << status.ToDebugString () << " ' error" ;
178
- // TODO: retry
179
- }
180
-
181
- return NotifyComputeActorWithError (
182
- TActivationContext::ActorSystem (),
183
- ComputeActorId_,
184
- InputIndex_,
185
- NConnector::ErrorFromGRPCStatus (status));
186
- }
187
-
188
86
// ReadSplits
189
87
TMaybe<TIssue> InitSplitsReading () {
190
88
YQL_CLOG (DEBUG, ProviderGeneric) << " Start splits reading" ;
191
89
192
- if (Splits_ .empty ()) {
90
+ if (SplitDescriptions_ .empty ()) {
193
91
YQL_CLOG (WARN, ProviderGeneric) << " Accumulated empty list of splits" ;
194
92
ReadSplitsFinished_ = true ;
195
93
NotifyComputeActorWithData ();
196
94
return Nothing ();
197
95
}
198
96
199
- // Prepare request
97
+ // Prepare ReadSplits request. For the sake of simplicity,
98
+ // all the splits will be packed into a single ReadSplits call.
200
99
NConnector::NApi::TReadSplitsRequest request;
201
100
request.set_format (NConnector::NApi::TReadSplitsRequest::ARROW_IPC_STREAMING);
202
101
request.set_filtering (NConnector::NApi::TReadSplitsRequest::FILTERING_OPTIONAL);
203
- request.mutable_splits ()->Reserve (Splits_ .size ());
102
+ request.mutable_splits ()->Reserve (SplitDescriptions_ .size ());
204
103
205
- for (const auto & split : Splits_) {
206
- NConnector::NApi::TSplit splitCopy = split;
104
+ for (const auto & splitDescription : SplitDescriptions_) {
105
+ NConnector::NApi::TSplit split;
106
+ split.mutable_select ()->CopyFrom (Source_.select ());
107
+ split.set_description (splitDescription);
207
108
208
- auto error = TokenProvider_->MaybeFillToken (*splitCopy .mutable_select ()->mutable_data_source_instance ());
109
+ auto error = TokenProvider_->MaybeFillToken (*split .mutable_select ()->mutable_data_source_instance ());
209
110
if (error) {
210
111
return TIssue (std::move (error));
211
112
}
212
113
213
- *request.mutable_splits ()->Add () = std::move (splitCopy );
114
+ *request.mutable_splits ()->Add () = std::move (split );
214
115
}
215
116
216
117
// Start streaming
@@ -471,8 +372,9 @@ namespace NYql::NDq {
471
372
472
373
NConnector::IClient::TPtr Client_;
473
374
TGenericTokenProvider::TPtr TokenProvider_;
474
- NConnector::IListSplitsStreamIterator::TPtr ListSplitsIterator_;
475
- TVector<NConnector::NApi::TSplit> Splits_; // accumulated list of table splits
375
+
376
+ const TVector<TString> SplitDescriptions_;
377
+
476
378
NConnector::IReadSplitsStreamIterator::TPtr ReadSplitsIterator_;
477
379
std::optional<NConnector::NApi::TReadSplitsResponse> LastReadSplitsResponse_;
478
380
bool ReadSplitsFinished_ = false ;
@@ -482,24 +384,48 @@ namespace NYql::NDq {
482
384
Generic::TSource Source_;
483
385
};
484
386
387
+ void ExtractSplitDescriptions (
388
+ TVector<TString>& splitDescriptions,
389
+ const THashMap<TString, TString>& taskParams, // ranges are here in v1
390
+ const TVector<TString>& srcReadRanges // ranges are here in v2
391
+ ) {
392
+ if (srcReadRanges.size () > 0 ) {
393
+ splitDescriptions = srcReadRanges;
394
+ } else {
395
+ const auto & range = taskParams.find (GenericProviderName);
396
+ if (range != taskParams.end ()) {
397
+ splitDescriptions.push_back (range->second );
398
+ }
399
+ }
400
+
401
+ Y_ENSURE (splitDescriptions.size () > 0 , " read ranges must not be empty" );
402
+ }
403
+
485
404
std::pair<NYql::NDq::IDqComputeActorAsyncInput*, IActor*>
486
405
CreateGenericReadActor (NConnector::IClient::TPtr genericClient,
487
406
Generic::TSource&& source,
488
407
ui64 inputIndex,
489
408
TCollectStatsLevel statsLevel,
490
409
const THashMap<TString, TString>& /* secureParams*/ ,
491
- const THashMap<TString, TString>& /* taskParams*/ ,
410
+ const ui64 taskId,
411
+ const THashMap<TString, TString>& taskParams,
412
+ const TVector<TString>& readRanges,
492
413
const NActors::TActorId& computeActorId,
493
414
ISecuredServiceAccountCredentialsFactory::TPtr credentialsFactory,
494
415
const NKikimr::NMiniKQL::THolderFactory& holderFactory)
495
416
{
417
+ TVector<TString> splitDescriptions;
418
+ ExtractSplitDescriptions (splitDescriptions, taskParams, readRanges);
419
+
496
420
const auto dsi = source.select ().data_source_instance ();
497
421
YQL_CLOG (INFO, ProviderGeneric) << " Creating read actor with params:"
498
422
<< " kind=" << NYql::EGenericDataSourceKind_Name (dsi.kind ())
499
423
<< " , endpoint=" << dsi.endpoint ().ShortDebugString ()
500
424
<< " , database=" << dsi.database ()
501
425
<< " , use_tls=" << ToString (dsi.use_tls ())
502
- << " , protocol=" << NYql::EGenericProtocol_Name (dsi.protocol ());
426
+ << " , protocol=" << NYql::EGenericProtocol_Name (dsi.protocol ())
427
+ << " , taskId=" << taskId
428
+ << " , splitDescriptions=" << JoinSeq (" ," , splitDescriptions);
503
429
504
430
// FIXME: strange piece of logic - authToken is created but not used:
505
431
// https://a.yandex-team.ru/arcadia/ydb/library/yql/providers/clickhouse/actors/yql_ch_read_actor.cpp?rev=r11550199#L140
@@ -512,20 +438,6 @@ namespace NYql::NDq {
512
438
YQL_ENSURE(one != TString::npos && two != TString::npos && one < two, "Bad token format:" << token);
513
439
*/
514
440
515
- // Obtain token to access remote data source if necessary
516
- // TODO: partitioning is not implemented now, but this code will be useful for the further research:
517
- /*
518
- TStringBuilder part;
519
- if (const auto taskParamsIt = taskParams.find(GenericProviderName); taskParamsIt != taskParams.cend()) {
520
- Generic::TRange range;
521
- TStringInput input(taskParamsIt->second);
522
- range.Load(&input);
523
- if (const auto& r = range.GetRange(); !r.empty())
524
- part << ' ' << r;
525
- }
526
- part << ';';
527
- */
528
-
529
441
auto tokenProvider = CreateGenericTokenProvider (
530
442
source.GetToken (),
531
443
source.GetServiceAccountId (),
@@ -539,7 +451,8 @@ namespace NYql::NDq {
539
451
std::move (tokenProvider),
540
452
std::move (source),
541
453
computeActorId,
542
- holderFactory);
454
+ holderFactory,
455
+ std::move (splitDescriptions));
543
456
544
457
return {actor, actor};
545
458
}
0 commit comments