@@ -195,6 +195,27 @@ inline TType OptionFromNode(const NYT::TNode& value) {
195195 }
196196}
197197
198+ void PopulatePathStatResult (IYtGateway::TPathStatResult& out, int index, NYT::TTableColumnarStatistics& extendedStat) {
199+ for (const auto & entry : extendedStat.ColumnDataWeight ) {
200+ out.DataSize [index] += entry.second ;
201+ }
202+ out.Extended [index] = IYtGateway::TPathStatResult::TExtendedResult{
203+ .DataWeight = extendedStat.ColumnDataWeight ,
204+ .EstimatedUniqueCounts = extendedStat.ColumnEstimatedUniqueCounts
205+ };
206+ }
207+
208+ TString DebugPath (NYT::TRichYPath path) {
209+ constexpr int maxDebugColumns = 20 ;
210+ if (!path.Columns_ || std::ssize (path.Columns_ ->Parts_ ) <= maxDebugColumns) {
211+ return NYT::NodeToCanonicalYsonString (NYT::PathToNode (path), NYT::NYson::EYsonFormat::Text);
212+ }
213+ int numColumns = std::ssize (path.Columns_ ->Parts_ );
214+ path.Columns_ ->Parts_ .erase (path.Columns_ ->Parts_ .begin () + maxDebugColumns, path.Columns_ ->Parts_ .end ());
215+ path.Columns_ ->Parts_ .push_back (" ..." );
216+ return NYT::NodeToCanonicalYsonString (NYT::PathToNode (path), NYT::NYson::EYsonFormat::Text) + " (" + std::to_string (numColumns) + " columns)" ;
217+ }
218+
198219} // unnamed
199220
200221// /////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -4505,13 +4526,15 @@ class TYtNativeGateway : public IYtGateway {
45054526 try {
45064527 TPathStatResult res;
45074528 res.DataSize .resize (execCtx->Options_ .Paths ().size (), 0 );
4529+ res.Extended .resize (execCtx->Options_ .Paths ().size ());
45084530
45094531 auto entry = execCtx->GetOrCreateEntry ();
45104532 auto tx = entry->Tx ;
45114533 const TString tmpFolder = GetTablesTmpFolder (*execCtx->Options_ .Config ());
45124534 const NYT::EOptimizeForAttr tmpOptimizeFor = execCtx->Options_ .Config ()->OptimizeFor .Get (execCtx->Cluster_ ).GetOrElse (NYT::EOptimizeForAttr::OF_LOOKUP_ATTR);
45134535 TVector<NYT::TRichYPath> ytPaths (Reserve (execCtx->Options_ .Paths ().size ()));
45144536 TVector<size_t > pathMap;
4537+ bool extended = execCtx->Options_ .Extended ();
45154538
45164539 auto extractSysColumns = [] (NYT::TRichYPath& ytPath) -> TVector<TString> {
45174540 TVector<TString> res;
@@ -4555,16 +4578,19 @@ class TYtNativeGateway : public IYtGateway {
45554578 YQL_CLOG (INFO, ProviderYt) << " Adding stat for " << col << " : " << size << " (virtual)" ;
45564579 }
45574580 }
4558- if (auto val = entry->GetColumnarStat (ytPath)) {
4559- res.DataSize [i] += *val;
4560- YQL_CLOG (INFO, ProviderYt) << " Stat for " << req.Path ().Path_ << " : " << res.DataSize [i] << " (from cache)" ;
4581+ TMaybe<ui64> cachedStat;
4582+ TMaybe<NYT::TTableColumnarStatistics> cachedExtendedStat;
4583+ if (!extended && (cachedStat = entry->GetColumnarStat (ytPath))) {
4584+ res.DataSize [i] += *cachedStat;
4585+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath (req.Path ()) << " : " << res.DataSize [i] << " (from cache, extended: false)" ;
4586+ } else if (extended && (cachedExtendedStat = entry->GetExtendedColumnarStat (ytPath))) {
4587+ PopulatePathStatResult (res, i, *cachedExtendedStat);
4588+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath (req.Path ()) << " (from cache, extended: true)" ;
45614589 } else if (onlyCached) {
4562- YQL_CLOG (INFO, ProviderYt) << " Stat for " << req.Path (). Path_ << " is missing in cache - sync path stat failed" ;
4590+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath ( req.Path ()) << " is missing in cache - sync path stat failed (extended: " << extended << " ) " ;
45634591 return res;
4564- } else if (NYT::EOptimizeForAttr::OF_SCAN_ATTR == tmpOptimizeFor) {
4565- pathMap.push_back (i);
4566- ytPaths.push_back (ytPath);
4567- } else {
4592+ } else if (NYT::EOptimizeForAttr::OF_SCAN_ATTR != tmpOptimizeFor && !extended) {
4593+
45684594 // Use entire table size for lookup tables (YQL-7257)
45694595 if (attrs.IsUndefined ()) {
45704596 attrs = tx->Get (ytPath.Path_ + " /@" , NYT::TGetOptions ().AttributeFilter (
@@ -4576,7 +4602,10 @@ class TYtNativeGateway : public IYtGateway {
45764602 auto size = CalcDataSize (ytPath, attrs);
45774603 res.DataSize [i] += size;
45784604 entry->UpdateColumnarStat (ytPath, size);
4579- YQL_CLOG (INFO, ProviderYt) << " Stat for " << req.Path ().Path_ << " : " << res.DataSize [i] << " (uncompressed_data_size for lookup)" ;
4605+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath (req.Path ()) << " : " << res.DataSize [i] << " (uncompressed_data_size for lookup, extended: false)" ;
4606+ } else {
4607+ ytPaths.push_back (ytPath);
4608+ pathMap.push_back (i);
45804609 }
45814610 } else {
45824611 auto p = entry->Snapshots .FindPtr (std::make_pair (tablePath, req.Epoch ()));
@@ -4607,11 +4636,19 @@ class TYtNativeGateway : public IYtGateway {
46074636 YQL_CLOG (INFO, ProviderYt) << " Adding stat for " << col << " : " << size << " (virtual)" ;
46084637 }
46094638 }
4610- if (auto val = entry->GetColumnarStat (ytPath)) {
4611- res.DataSize [i] += *val;
4612- YQL_CLOG (INFO, ProviderYt) << " Stat for " << req.Path ().Path_ << " (epoch=" << req.Epoch () << " ): " << res.DataSize [i] << " (from cache)" ;
4639+ TMaybe<ui64> cachedStat;
4640+ TMaybe<NYT::TTableColumnarStatistics> cachedExtendedStat;
4641+ if (!extended && (cachedStat = entry->GetColumnarStat (ytPath))) {
4642+ res.DataSize [i] += *cachedStat;
4643+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath (req.Path ()) << " (epoch=" << req.Epoch () << " ): " << res.DataSize [i] << " (from cache, extended: false)" ;
4644+ } else if (extended && (cachedExtendedStat = entry->GetExtendedColumnarStat (ytPath))) {
4645+ PopulatePathStatResult (res, i, *cachedExtendedStat);
4646+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath (req.Path ()) << " (from cache, extended: true)" ;
46134647 } else if (onlyCached) {
4614- YQL_CLOG (INFO, ProviderYt) << " Stat for " << req.Path ().Path_ << " (epoch=" << req.Epoch () << " ) is missing in cache - sync path stat failed" ;
4648+ YQL_CLOG (INFO, ProviderYt)
4649+ << " Stat for " << DebugPath (req.Path ())
4650+ << " (epoch=" << req.Epoch () << " , extended: " << extended
4651+ << " ) is missing in cache - sync path stat failed" ;
46154652 return res;
46164653 } else {
46174654 if (attrs.IsUndefined ()) {
@@ -4623,40 +4660,46 @@ class TYtNativeGateway : public IYtGateway {
46234660 .AddAttribute (TString (" schema" ))
46244661 ));
46254662 }
4626- if (attrs.HasKey (" optimize_for" ) && attrs[" optimize_for" ] == " scan" &&
4627- AllPathColumnsAreInSchema (req.Path (), attrs))
4663+ if (extended ||
4664+ (attrs.HasKey (" optimize_for" ) && attrs[" optimize_for" ] == " scan" &&
4665+ AllPathColumnsAreInSchema (req.Path (), attrs)))
46284666 {
46294667 pathMap.push_back (i);
46304668 ytPaths.push_back (ytPath);
4631- YQL_CLOG (INFO, ProviderYt) << " Stat for " << req.Path (). Path_ << " (epoch=" << req.Epoch () << " ) add for request with path " << ytPath.Path_ ;
4669+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath ( req.Path ()) << " (epoch=" << req.Epoch () << " ) add for request with path " << ytPath.Path_ << " (extended: " << extended << " ) " ;
46324670 } else {
46334671 // Use entire table size for lookup tables (YQL-7257)
46344672 auto size = CalcDataSize (ytPath, attrs);
46354673 res.DataSize [i] += size;
46364674 entry->UpdateColumnarStat (ytPath, size);
4637- YQL_CLOG (INFO, ProviderYt) << " Stat for " << req.Path (). Path_ << " (epoch=" << req.Epoch () << " ): " << res.DataSize [i] << " (uncompressed_data_size for lookup)" ;
4675+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath ( req.Path ()) << " (epoch=" << req.Epoch () << " ): " << res.DataSize [i] << " (uncompressed_data_size for lookup)" ;
46384676 }
46394677 }
46404678 }
46414679 }
46424680
46434681 if (ytPaths) {
46444682 YQL_ENSURE (!onlyCached);
4645- auto fetchMode = execCtx->Options_ .Config ()->JoinColumnarStatisticsFetcherMode .Get ().GetOrElse (NYT::EColumnarStatisticsFetcherMode::Fallback);
4683+ auto fetchMode = extended ?
4684+ NYT::EColumnarStatisticsFetcherMode::FromNodes :
4685+ execCtx->Options_ .Config ()->JoinColumnarStatisticsFetcherMode .Get ().GetOrElse (NYT::EColumnarStatisticsFetcherMode::Fallback);
46464686 auto columnStats = tx->GetTableColumnarStatistics (ytPaths, NYT::TGetTableColumnarStatisticsOptions ().FetcherMode (fetchMode));
46474687 YQL_ENSURE (pathMap.size () == columnStats.size ());
4648- for (size_t i: xrange (columnStats.size ())) {
4688+ for (size_t i: xrange (columnStats.size ())) {
46494689 auto & columnStat = columnStats[i];
46504690 const ui64 weight = columnStat.LegacyChunksDataWeight +
46514691 Accumulate (columnStat.ColumnDataWeight .begin (), columnStat.ColumnDataWeight .end (), 0ull ,
46524692 [](ui64 sum, decltype (*columnStat.ColumnDataWeight .begin ())& v) { return sum + v.second ; });
46534693
4694+ if (extended) {
4695+ PopulatePathStatResult (res, pathMap[i], columnStat);
4696+ }
4697+
46544698 res.DataSize [pathMap[i]] += weight;
4655- entry->UpdateColumnarStat (ytPaths[i], columnStat);
4699+ entry->UpdateColumnarStat (ytPaths[i], columnStat, extended );
46564700 YQL_CLOG (INFO, ProviderYt) << " Stat for " << execCtx->Options_ .Paths ()[pathMap[i]].Path ().Path_ << " : " << weight << " (fetched)" ;
46574701 }
46584702 }
4659-
46604703 res.SetSuccess ();
46614704 return res;
46624705 } catch (...) {
0 commit comments