@@ -195,6 +195,27 @@ inline TType OptionFromNode(const NYT::TNode& value) {
195195 }
196196}
197197
198+ void PopulatePathStatResult (IYtGateway::TPathStatResult& out, int index, NYT::TTableColumnarStatistics& extendedStat) {
199+ for (const auto & entry : extendedStat.ColumnDataWeight ) {
200+ out.DataSize [index] += entry.second ;
201+ }
202+ out.Extended [index] = IYtGateway::TPathStatResult::TExtendedResult{
203+ .DataWeight = extendedStat.ColumnDataWeight ,
204+ .EstimatedUniqueCounts = extendedStat.ColumnEstimatedUniqueCounts
205+ };
206+ }
207+
208+ TString DebugPath (NYT::TRichYPath path) {
209+ constexpr int maxDebugColumns = 20 ;
210+ if (!path.Columns_ || std::ssize (path.Columns_ ->Parts_ ) <= maxDebugColumns) {
211+ return NYT::NodeToCanonicalYsonString (NYT::PathToNode (path), NYT::NYson::EYsonFormat::Text);
212+ }
213+ int numColumns = std::ssize (path.Columns_ ->Parts_ );
214+ path.Columns_ ->Parts_ .erase (path.Columns_ ->Parts_ .begin () + maxDebugColumns, path.Columns_ ->Parts_ .end ());
215+ path.Columns_ ->Parts_ .push_back (" ..." );
216+ return NYT::NodeToCanonicalYsonString (NYT::PathToNode (path), NYT::NYson::EYsonFormat::Text) + " (" + std::to_string (numColumns) + " columns)" ;
217+ }
218+
198219} // unnamed
199220
200221// /////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -4495,13 +4516,15 @@ class TYtNativeGateway : public IYtGateway {
44954516 try {
44964517 TPathStatResult res;
44974518 res.DataSize .resize (execCtx->Options_ .Paths ().size (), 0 );
4519+ res.Extended .resize (execCtx->Options_ .Paths ().size ());
44984520
44994521 auto entry = execCtx->GetOrCreateEntry ();
45004522 auto tx = entry->Tx ;
45014523 const TString tmpFolder = GetTablesTmpFolder (*execCtx->Options_ .Config ());
45024524 const NYT::EOptimizeForAttr tmpOptimizeFor = execCtx->Options_ .Config ()->OptimizeFor .Get (execCtx->Cluster_ ).GetOrElse (NYT::EOptimizeForAttr::OF_LOOKUP_ATTR);
45034525 TVector<NYT::TRichYPath> ytPaths (Reserve (execCtx->Options_ .Paths ().size ()));
45044526 TVector<size_t > pathMap;
4527+ bool extended = execCtx->Options_ .Extended ();
45054528
45064529 auto extractSysColumns = [] (NYT::TRichYPath& ytPath) -> TVector<TString> {
45074530 TVector<TString> res;
@@ -4545,16 +4568,19 @@ class TYtNativeGateway : public IYtGateway {
45454568 YQL_CLOG (INFO, ProviderYt) << " Adding stat for " << col << " : " << size << " (virtual)" ;
45464569 }
45474570 }
4548- if (auto val = entry->GetColumnarStat (ytPath)) {
4549- res.DataSize [i] += *val;
4550- YQL_CLOG (INFO, ProviderYt) << " Stat for " << req.Path ().Path_ << " : " << res.DataSize [i] << " (from cache)" ;
4571+ TMaybe<ui64> cachedStat;
4572+ TMaybe<NYT::TTableColumnarStatistics> cachedExtendedStat;
4573+ if (!extended && (cachedStat = entry->GetColumnarStat (ytPath))) {
4574+ res.DataSize [i] += *cachedStat;
4575+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath (req.Path ()) << " : " << res.DataSize [i] << " (from cache, extended: false)" ;
4576+ } else if (extended && (cachedExtendedStat = entry->GetExtendedColumnarStat (ytPath))) {
4577+ PopulatePathStatResult (res, i, *cachedExtendedStat);
4578+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath (req.Path ()) << " (from cache, extended: true)" ;
45514579 } else if (onlyCached) {
4552- YQL_CLOG (INFO, ProviderYt) << " Stat for " << req.Path (). Path_ << " is missing in cache - sync path stat failed" ;
4580+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath ( req.Path ()) << " is missing in cache - sync path stat failed (extended: " << extended << " ) " ;
45534581 return res;
4554- } else if (NYT::EOptimizeForAttr::OF_SCAN_ATTR == tmpOptimizeFor) {
4555- pathMap.push_back (i);
4556- ytPaths.push_back (ytPath);
4557- } else {
4582+ } else if (NYT::EOptimizeForAttr::OF_SCAN_ATTR != tmpOptimizeFor && !extended) {
4583+
45584584 // Use entire table size for lookup tables (YQL-7257)
45594585 if (attrs.IsUndefined ()) {
45604586 attrs = tx->Get (ytPath.Path_ + " /@" , NYT::TGetOptions ().AttributeFilter (
@@ -4566,7 +4592,10 @@ class TYtNativeGateway : public IYtGateway {
45664592 auto size = CalcDataSize (ytPath, attrs);
45674593 res.DataSize [i] += size;
45684594 entry->UpdateColumnarStat (ytPath, size);
4569- YQL_CLOG (INFO, ProviderYt) << " Stat for " << req.Path ().Path_ << " : " << res.DataSize [i] << " (uncompressed_data_size for lookup)" ;
4595+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath (req.Path ()) << " : " << res.DataSize [i] << " (uncompressed_data_size for lookup, extended: false)" ;
4596+ } else {
4597+ ytPaths.push_back (ytPath);
4598+ pathMap.push_back (i);
45704599 }
45714600 } else {
45724601 auto p = entry->Snapshots .FindPtr (std::make_pair (tablePath, req.Epoch ()));
@@ -4597,11 +4626,19 @@ class TYtNativeGateway : public IYtGateway {
45974626 YQL_CLOG (INFO, ProviderYt) << " Adding stat for " << col << " : " << size << " (virtual)" ;
45984627 }
45994628 }
4600- if (auto val = entry->GetColumnarStat (ytPath)) {
4601- res.DataSize [i] += *val;
4602- YQL_CLOG (INFO, ProviderYt) << " Stat for " << req.Path ().Path_ << " (epoch=" << req.Epoch () << " ): " << res.DataSize [i] << " (from cache)" ;
4629+ TMaybe<ui64> cachedStat;
4630+ TMaybe<NYT::TTableColumnarStatistics> cachedExtendedStat;
4631+ if (!extended && (cachedStat = entry->GetColumnarStat (ytPath))) {
4632+ res.DataSize [i] += *cachedStat;
4633+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath (req.Path ()) << " (epoch=" << req.Epoch () << " ): " << res.DataSize [i] << " (from cache, extended: false)" ;
4634+ } else if (extended && (cachedExtendedStat = entry->GetExtendedColumnarStat (ytPath))) {
4635+ PopulatePathStatResult (res, i, *cachedExtendedStat);
4636+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath (req.Path ()) << " (from cache, extended: true)" ;
46034637 } else if (onlyCached) {
4604- YQL_CLOG (INFO, ProviderYt) << " Stat for " << req.Path ().Path_ << " (epoch=" << req.Epoch () << " ) is missing in cache - sync path stat failed" ;
4638+ YQL_CLOG (INFO, ProviderYt)
4639+ << " Stat for " << DebugPath (req.Path ())
4640+ << " (epoch=" << req.Epoch () << " , extended: " << extended
4641+ << " ) is missing in cache - sync path stat failed" ;
46054642 return res;
46064643 } else {
46074644 if (attrs.IsUndefined ()) {
@@ -4613,36 +4650,43 @@ class TYtNativeGateway : public IYtGateway {
46134650 .AddAttribute (TString (" schema" ))
46144651 ));
46154652 }
4616- if (attrs.HasKey (" optimize_for" ) && attrs[" optimize_for" ] == " scan" &&
4617- AllPathColumnsAreInSchema (req.Path (), attrs))
4653+ if (extended ||
4654+ (attrs.HasKey (" optimize_for" ) && attrs[" optimize_for" ] == " scan" &&
4655+ AllPathColumnsAreInSchema (req.Path (), attrs)))
46184656 {
46194657 pathMap.push_back (i);
46204658 ytPaths.push_back (ytPath);
4621- YQL_CLOG (INFO, ProviderYt) << " Stat for " << req.Path (). Path_ << " (epoch=" << req.Epoch () << " ) add for request with path " << ytPath.Path_ ;
4659+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath ( req.Path ()) << " (epoch=" << req.Epoch () << " ) add for request with path " << ytPath.Path_ << " (extended: " << extended << " ) " ;
46224660 } else {
46234661 // Use entire table size for lookup tables (YQL-7257)
46244662 auto size = CalcDataSize (ytPath, attrs);
46254663 res.DataSize [i] += size;
46264664 entry->UpdateColumnarStat (ytPath, size);
4627- YQL_CLOG (INFO, ProviderYt) << " Stat for " << req.Path (). Path_ << " (epoch=" << req.Epoch () << " ): " << res.DataSize [i] << " (uncompressed_data_size for lookup)" ;
4665+ YQL_CLOG (INFO, ProviderYt) << " Stat for " << DebugPath ( req.Path ()) << " (epoch=" << req.Epoch () << " ): " << res.DataSize [i] << " (uncompressed_data_size for lookup)" ;
46284666 }
46294667 }
46304668 }
46314669 }
46324670
46334671 if (ytPaths) {
46344672 YQL_ENSURE (!onlyCached);
4635- auto fetchMode = execCtx->Options_ .Config ()->JoinColumnarStatisticsFetcherMode .Get ().GetOrElse (NYT::EColumnarStatisticsFetcherMode::Fallback);
4673+ auto fetchMode = extended ?
4674+ NYT::EColumnarStatisticsFetcherMode::FromNodes :
4675+ execCtx->Options_ .Config ()->JoinColumnarStatisticsFetcherMode .Get ().GetOrElse (NYT::EColumnarStatisticsFetcherMode::Fallback);
46364676 auto columnStats = tx->GetTableColumnarStatistics (ytPaths, NYT::TGetTableColumnarStatisticsOptions ().FetcherMode (fetchMode));
46374677 YQL_ENSURE (pathMap.size () == columnStats.size ());
4638- for (size_t i: xrange (columnStats.size ())) {
4678+ for (size_t i: xrange (columnStats.size ())) {
46394679 auto & columnStat = columnStats[i];
46404680 const ui64 weight = columnStat.LegacyChunksDataWeight +
46414681 Accumulate (columnStat.ColumnDataWeight .begin (), columnStat.ColumnDataWeight .end (), 0ull ,
46424682 [](ui64 sum, decltype (*columnStat.ColumnDataWeight .begin ())& v) { return sum + v.second ; });
46434683
4684+ if (extended) {
4685+ PopulatePathStatResult (res, pathMap[i], columnStat);
4686+ }
4687+
46444688 res.DataSize [pathMap[i]] += weight;
4645- entry->UpdateColumnarStat (ytPaths[i], columnStat);
4689+ entry->UpdateColumnarStat (ytPaths[i], columnStat, extended );
46464690 YQL_CLOG (INFO, ProviderYt) << " Stat for " << execCtx->Options_ .Paths ()[pathMap[i]].Path ().Path_ << " : " << weight << " (fetched)" ;
46474691 }
46484692 }
0 commit comments