From 06c7837edc59e82871d5ab3c9dfe4b6c0ea31baa Mon Sep 17 00:00:00 2001 From: Jialiang Tan Date: Wed, 8 May 2024 13:57:53 -0700 Subject: [PATCH] Add memory and cache stats to PeriodicStatsReporter (#9723) Summary: Added memory allocator and cache stats to PeriodicStatsReporter. Added documentations to monitoring doc. Reviewed By: xiaoxmeng, zacw7 Differential Revision: D57085696 Pulled By: tanjialiang --- velox/common/base/Counters.cpp | 184 +++++++++++++++++ velox/common/base/Counters.h | 134 +++++++++++++ velox/common/base/PeriodicStatsReporter.cpp | 143 ++++++++++++- velox/common/base/PeriodicStatsReporter.h | 21 +- velox/common/base/tests/StatsReporterTest.cpp | 188 +++++++++++++++++- velox/common/caching/AsyncDataCache.cpp | 19 ++ velox/common/caching/AsyncDataCache.h | 133 +++++++------ velox/common/caching/SsdFile.h | 41 +++- velox/docs/monitoring/metrics.rst | 166 ++++++++++++++++ 9 files changed, 953 insertions(+), 76 deletions(-) diff --git a/velox/common/base/Counters.cpp b/velox/common/base/Counters.cpp index f8b138db1de6..cb431b827b3a 100644 --- a/velox/common/base/Counters.cpp +++ b/velox/common/base/Counters.cpp @@ -46,6 +46,190 @@ void registerVeloxMetrics() { DEFINE_HISTOGRAM_METRIC( kMetricCacheShrinkTimeMs, 10'000, 0, 100'000, 50, 90, 99, 100); + /// ================== Memory Allocator Counters ================= + + // Number of bytes currently mapped in MemoryAllocator. These bytes represent + // the bytes that are either currently being allocated or were in the past + // allocated, not yet been returned back to the operating system, in the + // form of 'Allocation' or 'ContiguousAllocation'. + DEFINE_METRIC(kMetricMappedMemoryBytes, facebook::velox::StatType::AVG); + + // Number of bytes currently allocated (used) from MemoryAllocator in the form + // of 'Allocation' or 'ContiguousAllocation'. + DEFINE_METRIC(kMetricAllocatedMemoryBytes, facebook::velox::StatType::AVG); + + // Number of bytes currently mapped in MmapAllocator, in the form of + // 'ContiguousAllocation'. + // + // NOTE: This applies only to MmapAllocator + DEFINE_METRIC(kMetricMmapExternalMappedBytes, facebook::velox::StatType::AVG); + + // Number of bytes currently allocated from MmapAllocator directly from raw + // allocateBytes() interface, and internally allocated by malloc. Only small + // chunks of memory are delegated to malloc. + // + // NOTE: This applies only to MmapAllocator + DEFINE_METRIC(kMetricMmapDelegatedAllocBytes, facebook::velox::StatType::AVG); + + /// ================== AsyncDataCache Counters ================= + + // Max possible age of AsyncDataCache and SsdCache entries since the raw file + // was opened to load the cache. + DEFINE_METRIC(kMetricCacheMaxAgeSecs, facebook::velox::StatType::AVG); + + // Total number of cache entries. + DEFINE_METRIC(kMetricMemoryCacheNumEntries, facebook::velox::StatType::AVG); + + // Total number of cache entries that do not cache anything. + DEFINE_METRIC( + kMetricMemoryCacheNumEmptyEntries, facebook::velox::StatType::AVG); + + // Total number of cache entries that are pinned for shared access. + DEFINE_METRIC( + kMetricMemoryCacheNumSharedEntries, facebook::velox::StatType::AVG); + + // Total number of cache entries that are pinned for exclusive access. + DEFINE_METRIC( + kMetricMemoryCacheNumExclusiveEntries, facebook::velox::StatType::AVG); + + // Total number of cache entries that are being or have been prefetched but + // have not been hit. + DEFINE_METRIC( + kMetricMemoryCacheNumPrefetchedEntries, facebook::velox::StatType::AVG); + + // Total number of bytes of the cached data that is much smaller than + // kTinyDataSize. + DEFINE_METRIC( + kMetricMemoryCacheTotalTinyBytes, facebook::velox::StatType::AVG); + + // Total number of bytes of the cached data excluding + // 'kMetricMemoryCacheTotalTinyBytes'. + DEFINE_METRIC( + kMetricMemoryCacheTotalLargeBytes, facebook::velox::StatType::AVG); + + // Total unused capacity bytes in 'kMetricMemoryCacheTotalTinyBytes'. + DEFINE_METRIC( + kMetricMemoryCacheTotalTinyPaddingBytes, facebook::velox::StatType::AVG); + + // Total unused capacity bytes in 'kMetricMemoryCacheTotalLargeBytes'. + DEFINE_METRIC( + kMetricMemoryCacheTotalLargePaddingBytes, facebook::velox::StatType::AVG); + + // Total bytes of cache entries in prefetch state. + DEFINE_METRIC( + kMetricMemoryCacheTotalPrefetchBytes, facebook::velox::StatType::AVG); + + // Sum of scores of evicted entries. This serves to infer an average lifetime + // for entries in cache. + DEFINE_METRIC( + kMetricMemoryCacheSumEvictScore, facebook::velox::StatType::SUM); + + // Number of hits (saved IO) since last counter retrieval. The first hit to a + // prefetched entry does not count. + DEFINE_METRIC(kMetricMemoryCacheNumHits, facebook::velox::StatType::SUM); + + // Amount of hit bytes (saved IO) since last counter retrieval. The first hit + // to a prefetched entry does not count. + DEFINE_METRIC(kMetricMemoryCacheHitBytes, facebook::velox::StatType::SUM); + + // Number of new entries created since last counter retrieval. + DEFINE_METRIC(kMetricMemoryCacheNumNew, facebook::velox::StatType::SUM); + + // Number of times a valid entry was removed in order to make space, since + // last counter retrieval. + DEFINE_METRIC(kMetricMemoryCacheNumEvicts, facebook::velox::StatType::SUM); + + // Number of entries considered for evicting, since last counter retrieval. + DEFINE_METRIC( + kMetricMemoryCacheNumEvictChecks, facebook::velox::StatType::SUM); + + // Number of times a user waited for an entry to transit from exclusive to + // shared mode, since last counter retrieval. + DEFINE_METRIC( + kMetricMemoryCacheNumWaitExclusive, facebook::velox::StatType::SUM); + + // Clocks spent in allocating or freeing memory for backing cache entries, + // since last counter retrieval + DEFINE_METRIC( + kMetricMemoryCacheNumAllocClocks, facebook::velox::StatType::SUM); + + // Number of AsyncDataCache entries that are aged out and evicted + // given configured TTL. + DEFINE_METRIC( + kMetricMemoryCacheNumAgedOutEntries, facebook::velox::StatType::SUM); + + /// ================== SsdCache Counters ================== + + // Number of regions currently cached by SSD. + DEFINE_METRIC(kMetricSsdCacheCachedRegions, facebook::velox::StatType::AVG); + + // Number of entries currently cached by SSD. + DEFINE_METRIC(kMetricSsdCacheCachedEntries, facebook::velox::StatType::AVG); + + // Total bytes currently cached by SSD. + DEFINE_METRIC(kMetricSsdCacheCachedBytes, facebook::velox::StatType::AVG); + + // Total number of entries read from SSD. + DEFINE_METRIC(kMetricSsdCacheReadEntries, facebook::velox::StatType::SUM); + + // Total number of bytes read from SSD. + DEFINE_METRIC(kMetricSsdCacheReadBytes, facebook::velox::StatType::SUM); + + // Total number of entries written to SSD. + DEFINE_METRIC(kMetricSsdCacheWrittenEntries, facebook::velox::StatType::SUM); + + // Total number of bytes written to SSD. + DEFINE_METRIC(kMetricSsdCacheWrittenBytes, facebook::velox::StatType::SUM); + + // Total number of SsdCache entries that are aged out and evicted given + // configured TTL. + DEFINE_METRIC(kMetricSsdCacheAgedOutEntries, facebook::velox::StatType::SUM); + + // Total number of SsdCache regions that are aged out and evicted given + // configured TTL. + DEFINE_METRIC(kMetricSsdCacheAgedOutRegions, facebook::velox::StatType::SUM); + + // Total number of SSD file open errors. + DEFINE_METRIC(kMetricSsdCacheOpenSsdErrors, facebook::velox::StatType::SUM); + + // Total number of SSD checkpoint file open errors. + DEFINE_METRIC( + kMetricSsdCacheOpenCheckpointErrors, facebook::velox::StatType::SUM); + + // Total number of SSD evict log file open errors. + DEFINE_METRIC(kMetricSsdCacheOpenLogErrors, facebook::velox::StatType::SUM); + + // Total number of errors while deleting SSD checkpoint files. + DEFINE_METRIC( + kMetricSsdCacheDeleteCheckpointErrors, facebook::velox::StatType::SUM); + + // Total number of errors while growing SSD cache files. + DEFINE_METRIC(kMetricSsdCacheGrowFileErrors, facebook::velox::StatType::SUM); + + // Total number of error while writing to SSD cache files. + DEFINE_METRIC(kMetricSsdCacheWriteSsdErrors, facebook::velox::StatType::SUM); + + // Total number of errors while writing SSD checkpoint file. + DEFINE_METRIC( + kMetricSsdCacheWriteCheckpointErrors, facebook::velox::StatType::SUM); + + // Total number of errors while reading from SSD cache files. + DEFINE_METRIC(kMetricSsdCacheReadSsdErrors, facebook::velox::StatType::SUM); + + // Total number of errors while reading from SSD checkpoint files. + DEFINE_METRIC( + kMetricSsdCacheReadCheckpointErrors, facebook::velox::StatType::SUM); + + // Total number of checkpoints read. + DEFINE_METRIC(kMetricSsdCacheCheckpointsRead, facebook::velox::StatType::SUM); + + // Total number of checkpoints written. + DEFINE_METRIC( + kMetricSsdCacheCheckpointsWritten, facebook::velox::StatType::SUM); + + // Total number of cache regions evicted. + DEFINE_METRIC(kMetricSsdCacheRegionsEvicted, facebook::velox::StatType::SUM); + /// ================== Memory Arbitration Counters ================= // The number of arbitration requests. diff --git a/velox/common/base/Counters.h b/velox/common/base/Counters.h index 321639d886e2..522bc4f48495 100644 --- a/velox/common/base/Counters.h +++ b/velox/common/base/Counters.h @@ -131,4 +131,138 @@ constexpr folly::StringPiece kMetricFileWriterEarlyFlushedRawBytes{ constexpr folly::StringPiece kMetricArbitratorRequestsCount{ "velox.arbitrator_requests_count"}; + +constexpr folly::StringPiece kMetricMappedMemoryBytes{ + "velox.memory_allocator_mapped_bytes"}; + +constexpr folly::StringPiece kMetricAllocatedMemoryBytes{ + "velox.memory_allocator_alloc_bytes"}; + +constexpr folly::StringPiece kMetricMmapExternalMappedBytes{ + "velox.mmap_allocator_external_mapped_bytes"}; + +constexpr folly::StringPiece kMetricMmapDelegatedAllocBytes{ + "velox.mmap_allocator_delegated_alloc_bytes"}; + +constexpr folly::StringPiece kMetricCacheMaxAgeSecs{"velox.cache_max_age_secs"}; + +constexpr folly::StringPiece kMetricMemoryCacheNumEntries{ + "velox.memory_cache_num_entries"}; + +constexpr folly::StringPiece kMetricMemoryCacheNumEmptyEntries{ + "velox.memory_cache_num_empty_entries"}; + +constexpr folly::StringPiece kMetricMemoryCacheNumSharedEntries{ + "velox.memory_cache_num_shared_entries"}; + +constexpr folly::StringPiece kMetricMemoryCacheNumExclusiveEntries{ + "velox.memory_cache_num_exclusive_entries"}; + +constexpr folly::StringPiece kMetricMemoryCacheNumPrefetchedEntries{ + "velox.memory_cache_num_prefetched_entries"}; + +constexpr folly::StringPiece kMetricMemoryCacheTotalTinyBytes{ + "velox.memory_cache_total_tiny_bytes"}; + +constexpr folly::StringPiece kMetricMemoryCacheTotalLargeBytes{ + "velox.memory_cache_total_large_bytes"}; + +constexpr folly::StringPiece kMetricMemoryCacheTotalTinyPaddingBytes{ + "velox.memory_cache_total_tiny_padding_bytes"}; + +constexpr folly::StringPiece kMetricMemoryCacheTotalLargePaddingBytes{ + "velox.memory_cache_total_large_padding_bytes"}; + +constexpr folly::StringPiece kMetricMemoryCacheTotalPrefetchBytes{ + "velox.memory_cache_total_prefetched_bytes"}; + +constexpr folly::StringPiece kMetricMemoryCacheSumEvictScore{ + "velox.memory_cache_sum_evict_score"}; + +constexpr folly::StringPiece kMetricMemoryCacheNumHits{ + "velox.memory_cache_num_hits"}; + +constexpr folly::StringPiece kMetricMemoryCacheHitBytes{ + "velox.memory_cache_hit_bytes"}; + +constexpr folly::StringPiece kMetricMemoryCacheNumNew{ + "velox.memory_cache_num_new"}; + +constexpr folly::StringPiece kMetricMemoryCacheNumEvicts{ + "velox.memory_cache_num_evicts"}; + +constexpr folly::StringPiece kMetricMemoryCacheNumEvictChecks{ + "velox.memory_cache_num_evict_checks"}; + +constexpr folly::StringPiece kMetricMemoryCacheNumWaitExclusive{ + "velox.memory_cache_num_wait_exclusive"}; + +constexpr folly::StringPiece kMetricMemoryCacheNumAllocClocks{ + "velox.memory_cache_num_alloc_clocks"}; + +constexpr folly::StringPiece kMetricMemoryCacheNumAgedOutEntries{ + "velox.memory_cache_num_aged_out_entries"}; + +constexpr folly::StringPiece kMetricSsdCacheCachedRegions{ + "velox.ssd_cache_cached_regions"}; + +constexpr folly::StringPiece kMetricSsdCacheCachedEntries{ + "velox.ssd_cache_cached_entries"}; + +constexpr folly::StringPiece kMetricSsdCacheCachedBytes{ + "velox.ssd_cache_cached_bytes"}; + +constexpr folly::StringPiece kMetricSsdCacheReadEntries{ + "velox.ssd_cache_read_entries"}; + +constexpr folly::StringPiece kMetricSsdCacheReadBytes{ + "velox.ssd_cache_read_bytes"}; + +constexpr folly::StringPiece kMetricSsdCacheWrittenEntries{ + "velox.ssd_cache_written_entries"}; + +constexpr folly::StringPiece kMetricSsdCacheWrittenBytes{ + "velox.ssd_cache_written_bytes"}; + +constexpr folly::StringPiece kMetricSsdCacheAgedOutEntries{ + "velox.ssd_cache_aged_out_entries"}; + +constexpr folly::StringPiece kMetricSsdCacheAgedOutRegions{ + "velox.ssd_cache_aged_out_regions"}; + +constexpr folly::StringPiece kMetricSsdCacheOpenSsdErrors{ + "velox.ssd_cache_open_ssd_errors"}; + +constexpr folly::StringPiece kMetricSsdCacheOpenCheckpointErrors{ + "velox.ssd_cache_open_checkpoint_errors"}; + +constexpr folly::StringPiece kMetricSsdCacheOpenLogErrors{ + "velox.ssd_cache_open_log_errors"}; + +constexpr folly::StringPiece kMetricSsdCacheDeleteCheckpointErrors{ + "velox.ssd_cache_delete_checkpoint_errors"}; + +constexpr folly::StringPiece kMetricSsdCacheGrowFileErrors{ + "velox.ssd_cache_grow_file_errors"}; + +constexpr folly::StringPiece kMetricSsdCacheWriteSsdErrors{ + "velox.ssd_cache_write_ssd_errors"}; + +constexpr folly::StringPiece kMetricSsdCacheWriteCheckpointErrors{ + "velox.ssd_cache_write_checkpoint_errors"}; + +constexpr folly::StringPiece kMetricSsdCacheReadSsdErrors{ + "velox.ssd_cache_read_ssd_errors"}; + +constexpr folly::StringPiece kMetricSsdCacheReadCheckpointErrors{ + "velox.ssd_cache_read_checkpoint_errors"}; + +constexpr folly::StringPiece kMetricSsdCacheCheckpointsRead{ + "velox.ssd_cache_checkpoints_read"}; + +constexpr folly::StringPiece kMetricSsdCacheCheckpointsWritten{ + "velox.ssd_cache_checkpoints_written"}; + +constexpr folly::StringPiece kMetricSsdCacheRegionsEvicted{ + "velox.ssd_cache_regions_evicted"}; } // namespace facebook::velox diff --git a/velox/common/base/PeriodicStatsReporter.cpp b/velox/common/base/PeriodicStatsReporter.cpp index f3c3b6e5b608..23fdfc2309d7 100644 --- a/velox/common/base/PeriodicStatsReporter.cpp +++ b/velox/common/base/PeriodicStatsReporter.cpp @@ -17,7 +17,9 @@ #include "velox/common/base/PeriodicStatsReporter.h" #include "velox/common/base/Counters.h" #include "velox/common/base/StatsReporter.h" +#include "velox/common/caching/CacheTTLController.h" #include "velox/common/memory/Memory.h" +#include "velox/common/memory/MmapAllocator.h" namespace facebook::velox { @@ -57,11 +59,22 @@ void stopPeriodicStatsReporter() { } PeriodicStatsReporter::PeriodicStatsReporter(const Options& options) - : arbitrator_(options.arbitrator), options_(options) {} + : allocator_(options.allocator), + cache_(options.cache), + arbitrator_(options.arbitrator), + options_(options) {} void PeriodicStatsReporter::start() { LOG(INFO) << "Starting PeriodicStatsReporter with options " << options_.toString(); + addTask( + "report_allocator_stats", + [this]() { reportAllocatorStats(); }, + options_.allocatorStatsIntervalMs); + addTask( + "report_cache_stats", + [this]() { reportCacheStats(); }, + options_.cacheStatsIntervalMs); addTask( "report_arbitrator_stats", [this]() { reportArbitratorStats(); }, @@ -87,4 +100,132 @@ void PeriodicStatsReporter::reportArbitratorStats() { stats.freeReservedCapacityBytes); } +void PeriodicStatsReporter::reportAllocatorStats() { + if (allocator_ == nullptr) { + return; + } + RECORD_METRIC_VALUE( + kMetricMappedMemoryBytes, + (velox::memory::AllocationTraits::pageBytes(allocator_->numMapped()))); + RECORD_METRIC_VALUE( + kMetricAllocatedMemoryBytes, + (velox::memory::AllocationTraits::pageBytes(allocator_->numAllocated()))); + // TODO(jtan6): Remove condition after T150019700 is done + if (auto* mmapAllocator = + dynamic_cast(allocator_)) { + RECORD_METRIC_VALUE( + kMetricMmapDelegatedAllocBytes, (mmapAllocator->numMallocBytes())); + RECORD_METRIC_VALUE( + kMetricMmapExternalMappedBytes, + velox::memory::AllocationTraits::pageBytes( + (mmapAllocator->numExternalMapped()))); + } + // TODO(xiaoxmeng): add memory allocation size stats. +} + +void PeriodicStatsReporter::reportCacheStats() { + if (cache_ == nullptr) { + return; + } + const auto cacheStats = cache_->refreshStats(); + + // Memory cache snapshot stats. + RECORD_METRIC_VALUE(kMetricMemoryCacheNumEntries, cacheStats.numEntries); + RECORD_METRIC_VALUE( + kMetricMemoryCacheNumEmptyEntries, cacheStats.numEmptyEntries); + RECORD_METRIC_VALUE(kMetricMemoryCacheNumSharedEntries, cacheStats.numShared); + RECORD_METRIC_VALUE( + kMetricMemoryCacheNumExclusiveEntries, cacheStats.numExclusive); + RECORD_METRIC_VALUE( + kMetricMemoryCacheNumPrefetchedEntries, cacheStats.numPrefetch); + RECORD_METRIC_VALUE(kMetricMemoryCacheTotalTinyBytes, cacheStats.tinySize); + RECORD_METRIC_VALUE(kMetricMemoryCacheTotalLargeBytes, cacheStats.largeSize); + RECORD_METRIC_VALUE( + kMetricMemoryCacheTotalTinyPaddingBytes, cacheStats.tinyPadding); + RECORD_METRIC_VALUE( + kMetricMemoryCacheTotalLargePaddingBytes, cacheStats.largePadding); + RECORD_METRIC_VALUE( + kMetricMemoryCacheTotalPrefetchBytes, cacheStats.prefetchBytes); + + // Memory cache cumulative stats. + const auto deltaCacheStats = cacheStats - lastCacheStats_; + + REPORT_IF_NOT_ZERO(kMetricMemoryCacheNumHits, deltaCacheStats.numHit); + REPORT_IF_NOT_ZERO(kMetricMemoryCacheHitBytes, deltaCacheStats.hitBytes); + REPORT_IF_NOT_ZERO(kMetricMemoryCacheNumNew, deltaCacheStats.numNew); + REPORT_IF_NOT_ZERO(kMetricMemoryCacheNumEvicts, deltaCacheStats.numEvict); + REPORT_IF_NOT_ZERO( + kMetricMemoryCacheNumEvictChecks, deltaCacheStats.numEvictChecks); + REPORT_IF_NOT_ZERO( + kMetricMemoryCacheNumWaitExclusive, deltaCacheStats.numWaitExclusive); + REPORT_IF_NOT_ZERO( + kMetricMemoryCacheNumAllocClocks, deltaCacheStats.allocClocks); + REPORT_IF_NOT_ZERO( + kMetricMemoryCacheNumAgedOutEntries, deltaCacheStats.numAgedOut); + REPORT_IF_NOT_ZERO( + kMetricMemoryCacheSumEvictScore, deltaCacheStats.sumEvictScore); + + // SSD cache snapshot stats. + if (cacheStats.ssdStats != nullptr) { + RECORD_METRIC_VALUE( + kMetricSsdCacheCachedEntries, cacheStats.ssdStats->entriesCached); + RECORD_METRIC_VALUE( + kMetricSsdCacheCachedRegions, cacheStats.ssdStats->regionsCached); + RECORD_METRIC_VALUE( + kMetricSsdCacheCachedBytes, cacheStats.ssdStats->bytesCached); + } + + // SSD cache cumulative stats. + if (deltaCacheStats.ssdStats != nullptr) { + const auto deltaSsdStats = *deltaCacheStats.ssdStats; + REPORT_IF_NOT_ZERO(kMetricSsdCacheReadEntries, deltaSsdStats.entriesRead) + REPORT_IF_NOT_ZERO(kMetricSsdCacheReadBytes, deltaSsdStats.bytesRead); + REPORT_IF_NOT_ZERO( + kMetricSsdCacheWrittenEntries, deltaSsdStats.entriesWritten); + REPORT_IF_NOT_ZERO(kMetricSsdCacheWrittenBytes, deltaSsdStats.bytesWritten); + REPORT_IF_NOT_ZERO( + kMetricSsdCacheOpenSsdErrors, deltaSsdStats.openFileErrors); + REPORT_IF_NOT_ZERO( + kMetricSsdCacheOpenCheckpointErrors, + deltaSsdStats.openCheckpointErrors); + REPORT_IF_NOT_ZERO( + kMetricSsdCacheOpenLogErrors, deltaSsdStats.openLogErrors); + REPORT_IF_NOT_ZERO( + kMetricSsdCacheDeleteCheckpointErrors, + deltaSsdStats.deleteCheckpointErrors); + REPORT_IF_NOT_ZERO( + kMetricSsdCacheGrowFileErrors, deltaSsdStats.growFileErrors); + REPORT_IF_NOT_ZERO( + kMetricSsdCacheWriteSsdErrors, deltaSsdStats.writeSsdErrors); + REPORT_IF_NOT_ZERO( + kMetricSsdCacheWriteCheckpointErrors, + deltaSsdStats.writeCheckpointErrors); + REPORT_IF_NOT_ZERO( + kMetricSsdCacheReadSsdErrors, deltaSsdStats.readSsdErrors); + REPORT_IF_NOT_ZERO( + kMetricSsdCacheReadCheckpointErrors, + deltaSsdStats.readCheckpointErrors); + REPORT_IF_NOT_ZERO( + kMetricSsdCacheCheckpointsRead, deltaSsdStats.checkpointsRead); + REPORT_IF_NOT_ZERO( + kMetricSsdCacheCheckpointsWritten, deltaSsdStats.checkpointsWritten); + REPORT_IF_NOT_ZERO( + kMetricSsdCacheRegionsEvicted, deltaSsdStats.regionsEvicted); + REPORT_IF_NOT_ZERO( + kMetricSsdCacheAgedOutEntries, deltaSsdStats.entriesAgedOut) + REPORT_IF_NOT_ZERO( + kMetricSsdCacheAgedOutRegions, deltaSsdStats.regionsAgedOut); + } + + // TTL controler snapshot stats. + if (auto* cacheTTLController = + velox::cache::CacheTTLController::getInstance()) { + RECORD_METRIC_VALUE( + kMetricCacheMaxAgeSecs, + cacheTTLController->getCacheAgeStats().maxAgeSecs); + } + + lastCacheStats_ = cacheStats; +} + } // namespace facebook::velox diff --git a/velox/common/base/PeriodicStatsReporter.h b/velox/common/base/PeriodicStatsReporter.h index 7621ac99a01c..2e5a01a15001 100644 --- a/velox/common/base/PeriodicStatsReporter.h +++ b/velox/common/base/PeriodicStatsReporter.h @@ -17,6 +17,8 @@ #pragma once #include +#include "velox/common/caching/AsyncDataCache.h" +#include "velox/common/caching/SsdFile.h" #include "velox/common/memory/MemoryArbitrator.h" namespace folly { @@ -39,13 +41,22 @@ class PeriodicStatsReporter { struct Options { Options() {} - const memory::MemoryArbitrator* arbitrator{nullptr}; + const velox::memory::MemoryAllocator* allocator{nullptr}; + uint64_t allocatorStatsIntervalMs{2'000}; + + const velox::cache::AsyncDataCache* cache{nullptr}; + uint64_t cacheStatsIntervalMs{60'000}; + const memory::MemoryArbitrator* arbitrator{nullptr}; uint64_t arbitratorStatsIntervalMs{60'000}; std::string toString() const { return fmt::format( - "arbitratorStatsIntervalMs:{}", arbitratorStatsIntervalMs); + "allocatorStatsIntervalMs:{}, cacheStatsIntervalMs:{}, " + "arbitratorStatsIntervalMs:{}", + allocatorStatsIntervalMs, + cacheStatsIntervalMs, + arbitratorStatsIntervalMs); } }; @@ -76,11 +87,17 @@ class PeriodicStatsReporter { }); } + void reportCacheStats(); + void reportAllocatorStats(); void reportArbitratorStats(); + const velox::memory::MemoryAllocator* const allocator_{nullptr}; + const velox::cache::AsyncDataCache* const cache_{nullptr}; const velox::memory::MemoryArbitrator* const arbitrator_{nullptr}; const Options options_; + cache::CacheStats lastCacheStats_; + folly::ThreadedRepeatingFunctionRunner scheduler_; }; diff --git a/velox/common/base/tests/StatsReporterTest.cpp b/velox/common/base/tests/StatsReporterTest.cpp index fd7e89a6f35f..2c21f6bc81d3 100644 --- a/velox/common/base/tests/StatsReporterTest.cpp +++ b/velox/common/base/tests/StatsReporterTest.cpp @@ -24,6 +24,10 @@ #include "velox/common/base/Counters.h" #include "velox/common/base/PeriodicStatsReporter.h" #include "velox/common/base/tests/GTestUtils.h" +#include "velox/common/caching/AsyncDataCache.h" +#include "velox/common/caching/CacheTTLController.h" +#include "velox/common/caching/SsdCache.h" +#include "velox/common/memory/MmapAllocator.h" namespace facebook::velox { @@ -141,6 +145,59 @@ TEST_F(StatsReporterTest, trivialReporter) { class PeriodicStatsReporterTest : public StatsReporterTest {}; +class TestStatsReportMmapAllocator : public memory::MmapAllocator { + public: + TestStatsReportMmapAllocator( + memory::MachinePageCount numMapped, + memory::MachinePageCount numAllocated, + memory::MachinePageCount numMallocBytes, + memory::MachinePageCount numExternalMapped) + : memory::MmapAllocator({.capacity = 1024}), + numMapped_(numMapped), + numAllocated_(numAllocated), + numMallocBytes_(numMallocBytes), + numExternalMapped_(numExternalMapped) {} + + memory::MachinePageCount numMapped() const override { + return numMapped_; + } + + memory::MachinePageCount numAllocated() const override { + return numAllocated_; + } + + uint64_t numMallocBytes() const { + return numMallocBytes_; + } + + memory::MachinePageCount numExternalMapped() const { + return numExternalMapped_; + } + + private: + memory::MachinePageCount numMapped_; + memory::MachinePageCount numAllocated_; + memory::MachinePageCount numMallocBytes_; + memory::MachinePageCount numExternalMapped_; +}; + +class TestStatsReportAsyncDataCache : public cache::AsyncDataCache { + public: + TestStatsReportAsyncDataCache(cache::CacheStats stats) + : cache::AsyncDataCache(nullptr, nullptr), stats_(stats) {} + + cache::CacheStats refreshStats() const override { + return stats_; + } + + void updateStats(cache::CacheStats stats) { + stats_ = stats; + } + + private: + cache::CacheStats stats_; +}; + class TestStatsReportMemoryArbitrator : public memory::MemoryArbitrator { public: explicit TestStatsReportMemoryArbitrator( @@ -195,23 +252,146 @@ class TestStatsReportMemoryArbitrator : public memory::MemoryArbitrator { }; TEST_F(PeriodicStatsReporterTest, basic) { + TestStatsReportMmapAllocator allocator(1, 1, 1, 1); + TestStatsReportAsyncDataCache cache( + {.ssdStats = std::make_shared()}); + cache::CacheTTLController::create(cache); TestStatsReportMemoryArbitrator arbitrator({}); PeriodicStatsReporter::Options options; + options.cache = &cache; + options.cacheStatsIntervalMs = 4'000; + options.allocator = &allocator; + options.allocatorStatsIntervalMs = 4'000; options.arbitrator = &arbitrator; options.arbitratorStatsIntervalMs = 4'000; PeriodicStatsReporter periodicReporter(options); periodicReporter.start(); std::this_thread::sleep_for(std::chrono::milliseconds(2'000)); - // Stop right after sufficient wait to ensure the following reads from main - // thread does not trigger TSAN failures. - periodicReporter.stop(); + // Check snapshot stats const auto& counterMap = reporter_->counterMap; - ASSERT_EQ(counterMap.size(), 2); ASSERT_EQ(counterMap.count(kMetricArbitratorFreeCapacityBytes.str()), 1); ASSERT_EQ( counterMap.count(kMetricArbitratorFreeReservedCapacityBytes.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumEntries.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumEmptyEntries.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumSharedEntries.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumExclusiveEntries.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumPrefetchedEntries.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheTotalTinyBytes.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheTotalLargeBytes.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheTotalTinyPaddingBytes.str()), 1); + ASSERT_EQ( + counterMap.count(kMetricMemoryCacheTotalLargePaddingBytes.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheTotalPrefetchBytes.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheCachedEntries.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheCachedRegions.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheCachedBytes.str()), 1); + ASSERT_EQ(counterMap.count(kMetricCacheMaxAgeSecs.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMappedMemoryBytes.str()), 1); + ASSERT_EQ(counterMap.count(kMetricAllocatedMemoryBytes.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMmapDelegatedAllocBytes.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMmapExternalMappedBytes.str()), 1); + // Check deltas are not reported + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumHits.str()), 0); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheHitBytes.str()), 0); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumNew.str()), 0); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumEvicts.str()), 0); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumEvictChecks.str()), 0); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumWaitExclusive.str()), 0); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumAllocClocks.str()), 0); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumAgedOutEntries.str()), 0); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheSumEvictScore.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheReadEntries.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheReadBytes.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheWrittenEntries.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheWrittenBytes.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheOpenSsdErrors.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheOpenCheckpointErrors.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheOpenLogErrors.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheDeleteCheckpointErrors.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheGrowFileErrors.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheWriteSsdErrors.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheWriteCheckpointErrors.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheReadSsdErrors.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheReadCheckpointErrors.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheCheckpointsRead.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheCheckpointsWritten.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheRegionsEvicted.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheAgedOutEntries.str()), 0); + ASSERT_EQ(counterMap.count(kMetricSsdCacheAgedOutRegions.str()), 0); + ASSERT_EQ(counterMap.size(), 20); + + // Update stats + auto newSsdStats = std::make_shared(); + newSsdStats->entriesWritten = 10; + newSsdStats->bytesWritten = 10; + newSsdStats->checkpointsWritten = 10; + newSsdStats->entriesRead = 10; + newSsdStats->bytesRead = 10; + newSsdStats->checkpointsRead = 10; + newSsdStats->entriesAgedOut = 10; + newSsdStats->regionsAgedOut = 10; + newSsdStats->regionsEvicted = 10; + newSsdStats->numPins = 10; + newSsdStats->openFileErrors = 10; + newSsdStats->openCheckpointErrors = 10; + newSsdStats->openLogErrors = 10; + newSsdStats->deleteCheckpointErrors = 10; + newSsdStats->growFileErrors = 10; + newSsdStats->writeSsdErrors = 10; + newSsdStats->writeCheckpointErrors = 10; + newSsdStats->readSsdErrors = 10; + newSsdStats->readCheckpointErrors = 10; + cache.updateStats( + {.numHit = 10, + .hitBytes = 10, + .numNew = 10, + .numEvict = 10, + .numEvictChecks = 10, + .numWaitExclusive = 10, + .numAgedOut = 10, + .allocClocks = 10, + .sumEvictScore = 10, + .ssdStats = newSsdStats}); + arbitrator.updateStats(memory::MemoryArbitrator::Stats( + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10)); + std::this_thread::sleep_for(std::chrono::milliseconds(4'000)); + + // Stop right after sufficient wait to ensure the following reads from main + // thread does not trigger TSAN failures. + periodicReporter.stop(); + + // Check delta stats are reported + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumHits.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheHitBytes.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumNew.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumEvicts.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumEvictChecks.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumWaitExclusive.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumAllocClocks.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheNumAgedOutEntries.str()), 1); + ASSERT_EQ(counterMap.count(kMetricMemoryCacheSumEvictScore.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheReadEntries.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheReadBytes.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheWrittenEntries.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheWrittenBytes.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheOpenSsdErrors.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheOpenCheckpointErrors.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheOpenLogErrors.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheDeleteCheckpointErrors.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheGrowFileErrors.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheWriteSsdErrors.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheWriteCheckpointErrors.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheReadSsdErrors.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheReadCheckpointErrors.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheCheckpointsRead.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheCheckpointsWritten.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheRegionsEvicted.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheAgedOutEntries.str()), 1); + ASSERT_EQ(counterMap.count(kMetricSsdCacheAgedOutRegions.str()), 1); + ASSERT_EQ(counterMap.size(), 47); } TEST_F(PeriodicStatsReporterTest, globalInstance) { diff --git a/velox/common/caching/AsyncDataCache.cpp b/velox/common/caching/AsyncDataCache.cpp index 6fa2e9cc0ee4..d2196c033a5f 100644 --- a/velox/common/caching/AsyncDataCache.cpp +++ b/velox/common/caching/AsyncDataCache.cpp @@ -17,6 +17,7 @@ #include "velox/common/caching/AsyncDataCache.h" #include "velox/common/caching/FileIds.h" #include "velox/common/caching/SsdCache.h" +#include "velox/common/caching/SsdFile.h" #include "velox/common/base/Counters.h" #include "velox/common/base/Exceptions.h" @@ -588,6 +589,24 @@ bool CacheShard::removeFileEntries( return true; } +CacheStats CacheStats::operator-(CacheStats& other) const { + CacheStats result; + result.numHit = numHit - other.numHit; + result.hitBytes = hitBytes - other.hitBytes; + result.numNew = numNew - other.numNew; + result.numEvict = numEvict - other.numEvict; + result.numEvictChecks = numEvictChecks - other.numEvictChecks; + result.numWaitExclusive = numWaitExclusive - other.numWaitExclusive; + result.numAgedOut = numAgedOut - other.numAgedOut; + result.allocClocks = allocClocks - other.allocClocks; + result.sumEvictScore = sumEvictScore - other.sumEvictScore; + if (ssdStats != nullptr && other.ssdStats != nullptr) { + result.ssdStats = + std::make_shared(*ssdStats - *other.ssdStats); + } + return result; +} + AsyncDataCache::AsyncDataCache( memory::MemoryAllocator* allocator, std::unique_ptr ssdCache) diff --git a/velox/common/caching/AsyncDataCache.h b/velox/common/caching/AsyncDataCache.h index b1e9cf06c7b7..f205e5392254 100644 --- a/velox/common/caching/AsyncDataCache.h +++ b/velox/common/caching/AsyncDataCache.h @@ -134,17 +134,17 @@ struct hash<::facebook::velox::cache::RawFileCacheKey> { namespace facebook::velox::cache { -// Represents a contiguous range of bytes cached from a file. This -// is the primary unit of access. These are typically owned via -// CachePin and can be in shared or exclusive mode. 'numPins_' -// counts the shared leases, the special value kExclusive means that -// this is being written to by another thread. It is possible to -// wait for the exclusive mode to finish, at which time one can -// retry getting access. Entries belong to one CacheShard at a -// time. The CacheShard serializes the mapping from a key to the -// entry and the setting entries to exclusive mode. An unpinned -// entry is evictable. CacheShard decides the eviction policy and -// serializes eviction with other access. +/// Represents a contiguous range of bytes cached from a file. This +/// is the primary unit of access. These are typically owned via +/// CachePin and can be in shared or exclusive mode. 'numPins_' +/// counts the shared leases, the special value kExclusive means that +/// this is being written to by another thread. It is possible to +/// wait for the exclusive mode to finish, at which time one can +/// retry getting access. Entries belong to one CacheShard at a +/// time. The CacheShard serializes the mapping from a key to the +/// entry and the setting entries to exclusive mode. An unpinned +/// entry is evictable. CacheShard decides the eviction policy and +/// serializes eviction with other access. class AsyncDataCacheEntry { public: static constexpr int32_t kExclusive = -10000; @@ -153,9 +153,9 @@ class AsyncDataCacheEntry { explicit AsyncDataCacheEntry(CacheShard* shard); ~AsyncDataCacheEntry(); - // Sets the key and allocates the entry's memory. Resets - // all other state. The entry must be held exclusively and must - // hold no memory when calling this. + /// Sets the key and allocates the entry's memory. Resets + /// all other state. The entry must be held exclusively and must + /// hold no memory when calling this. void initialize(FileCacheKey key); memory::Allocation& data() { @@ -254,9 +254,9 @@ class AsyncDataCacheEntry { /// Sets access stats so that this is immediately evictable. void makeEvictable(); - // Moves the promise out of 'this'. Used in order to handle the - // promise within the lock of the cache shard, so not within private - // methods of 'this'. + /// Moves the promise out of 'this'. Used in order to handle the + /// promise within the lock of the cache shard, so not within private + /// methods of 'this'. std::unique_ptr> movePromise() { return std::move(promise_); } @@ -471,59 +471,66 @@ class CoalescedLoad { std::vector sizes_; }; -// Struct for CacheShard stats. Stats from all shards are added into -// this struct to provide a snapshot of state. +/// Struct for CacheShard stats. Stats from all shards are added into +/// this struct to provide a snapshot of state. struct CacheStats { - // Total size in 'tinyData_' + /// ============= Snapshot stats ============= + + /// Total size in 'tinyData_' int64_t tinySize{0}; - // Total size in 'data_' + /// Total size in 'data_' int64_t largeSize{0}; - // Unused capacity in 'tinyData_'. + /// Unused capacity in 'tinyData_'. int64_t tinyPadding{0}; - // Unused capacity in 'data_'. + /// Unused capacity in 'data_'. int64_t largePadding{0}; - // Total number of entries. + /// Total number of entries. int32_t numEntries{0}; - // Number of entries that do not cache anything. + /// Number of entries that do not cache anything. int32_t numEmptyEntries{0}; - // Number of entries pinned for shared access. + /// Number of entries pinned for shared access. int32_t numShared{0}; - // Number of entries pinned for exclusive access. + /// Number of entries pinned for exclusive access. int32_t numExclusive{0}; - // Number of entries that are being or have been prefetched but have not been - // hit. + /// Number of entries that are being or have been prefetched but have not been + /// hit. int32_t numPrefetch{0}; - // Total size of entries in prefetch state. + /// Total size of entries in prefetch state. int64_t prefetchBytes{0}; - // Number of hits (saved IO). The first hit to a prefetched entry does not - // count. + /// Total size of shared/exclusive pinned entries. + int64_t sharedPinnedBytes{0}; + int64_t exclusivePinnedBytes{0}; + + /// ============= Cumulative stats ============= + + /// Number of hits (saved IO). The first hit to a prefetched entry does not + /// count. int64_t numHit{0}; - // Sum of sizes of entries counted in 'numHit'. + /// Sum of sizes of entries counted in 'numHit'. int64_t hitBytes{0}; - // Number of new entries created. + /// Number of new entries created. int64_t numNew{0}; - // Number of times a valid entry was removed in order to make space. + /// Number of times a valid entry was removed in order to make space. int64_t numEvict{0}; - // Number of entries considered for evicting. + /// Number of entries considered for evicting. int64_t numEvictChecks{0}; - // Number of times a user waited for an entry to transit from exclusive to - // shared mode. + /// Number of times a user waited for an entry to transit from exclusive to + /// shared mode. int64_t numWaitExclusive{0}; - // Total number of entries that are aged out and beyond TTL. + /// Total number of entries that are aged out and beyond TTL. int64_t numAgedOut{}; - // Cumulative clocks spent in allocating or freeing memory for backing cache - // entries. + /// Cumulative clocks spent in allocating or freeing memory for backing cache + /// entries. uint64_t allocClocks{0}; - // Sum of scores of evicted entries. This serves to infer an average - // lifetime for entries in cache. + /// Sum of scores of evicted entries. This serves to infer an average + /// lifetime for entries in cache. int64_t sumEvictScore{0}; - // Total size of shared/exclusive pinned entries. - int64_t sharedPinnedBytes{0}; - int64_t exclusivePinnedBytes{0}; - + /// Ssd cache stats that include both snapshot and cumulative stats. std::shared_ptr ssdStats = nullptr; + CacheStats operator-(CacheStats& other) const; + std::string toString() const; }; @@ -569,20 +576,20 @@ class CacheShard { memory::MachinePageCount pagesToAcquire, memory::Allocation& acquiredAllocation); - // Removes 'entry' from 'this'. Removes a possible promise from the entry - // inside the shard mutex and returns it so that it can be realized outside of - // the mutex. + /// Removes 'entry' from 'this'. Removes a possible promise from the entry + /// inside the shard mutex and returns it so that it can be realized outside + /// of the mutex. std::unique_ptr> removeEntry( AsyncDataCacheEntry* entry); - // Adds the stats of 'this' to 'stats'. + /// Adds the stats of 'this' to 'stats'. void updateStats(CacheStats& stats); - // Appends a batch of non-saved SSD savable entries in 'this' to - // 'pins'. This may have to be called several times since this keeps - // limits on the batch to write at one time. The savable entries - // are pinned for read. 'pins' should be written or dropped before - // calling this a second time. + /// Appends a batch of non-saved SSD savable entries in 'this' to + /// 'pins'. This may have to be called several times since this keeps + /// limits on the batch to write at one time. The savable entries + /// are pinned for read. 'pins' should be written or dropped before + /// calling this a second time. void appendSsdSaveable(std::vector& pins); /// Remove cache entries from this shard for files in the fileNum set @@ -637,24 +644,24 @@ class CacheShard { int32_t evictionThreshold_{kNoThreshold}; // Cumulative count of cache hits. uint64_t numHit_{0}; - // Sum of bytes in cache hits. + // Cumulative Sum of bytes in cache hits. uint64_t hitBytes_{0}; // Cumulative count of hits on entries held in exclusive mode. uint64_t numWaitExclusive_{0}; // Cumulative count of new entry creation. uint64_t numNew_{0}; - // Count of entries evicted. + // Cumulative count of entries evicted. uint64_t numEvict_{0}; - // Count of entries considered for eviction. This divided by + // Cumulative count of entries considered for eviction. This divided by // 'numEvict_' measured efficiency of eviction. uint64_t numEvictChecks_{0}; - // Count of entries aged out due to TTL. + // Cumulative count of entries aged out due to TTL. uint64_t numAgedOut_{}; - // Sum of evict scores. This divided by 'numEvict_' correlates to + // Cumulative sum of evict scores. This divided by 'numEvict_' correlates to // time data stays in cache. uint64_t sumEvictScore_{0}; - // Tracker of time spent in allocating/freeing MemoryAllocator space - // for backing cached data. + // Tracker of cumulative time spent in allocating/freeing MemoryAllocator + // space for backing cached data. std::atomic allocClocks_{0}; }; diff --git a/velox/common/caching/SsdFile.h b/velox/common/caching/SsdFile.h index b078e23f57a8..99c860fe040c 100644 --- a/velox/common/caching/SsdFile.h +++ b/velox/common/caching/SsdFile.h @@ -118,7 +118,7 @@ class SsdPin { SsdRun run_; }; -// Metrics for SSD cache. Maintained by SsdFile and aggregated by SsdCache. +/// Metrics for SSD cache. Maintained by SsdFile and aggregated by SsdCache. struct SsdCacheStats { SsdCacheStats() {} @@ -152,20 +152,49 @@ struct SsdCacheStats { readCheckpointErrors = tsanAtomicValue(other.readCheckpointErrors); } + SsdCacheStats operator-(const SsdCacheStats& other) const { + SsdCacheStats result; + result.entriesWritten = entriesWritten - other.entriesWritten; + result.bytesWritten = bytesWritten - other.bytesWritten; + result.checkpointsWritten = checkpointsWritten - other.checkpointsWritten; + result.entriesRead = entriesRead - other.entriesRead; + result.bytesRead = bytesRead - other.bytesRead; + result.checkpointsRead = checkpointsRead - other.checkpointsRead; + result.entriesAgedOut = entriesAgedOut - other.entriesAgedOut; + result.regionsAgedOut = regionsAgedOut - other.regionsAgedOut; + result.regionsEvicted = regionsEvicted - other.regionsEvicted; + result.openFileErrors = openFileErrors - other.openFileErrors; + result.openCheckpointErrors = + openCheckpointErrors - other.openCheckpointErrors; + result.openLogErrors = openLogErrors - other.openLogErrors; + result.deleteCheckpointErrors = + deleteCheckpointErrors - other.deleteCheckpointErrors; + result.growFileErrors = growFileErrors - other.growFileErrors; + result.writeSsdErrors = writeSsdErrors - other.writeSsdErrors; + result.writeCheckpointErrors = + writeCheckpointErrors - other.writeCheckpointErrors; + result.readSsdErrors = readSsdErrors - other.readSsdErrors; + result.readCheckpointErrors = + readCheckpointErrors - other.readCheckpointErrors; + return result; + } + + /// Snapshot stats + tsan_atomic entriesCached{0}; + tsan_atomic regionsCached{0}; + tsan_atomic bytesCached{0}; + tsan_atomic numPins{0}; + + /// Cumulative stats tsan_atomic entriesWritten{0}; tsan_atomic bytesWritten{0}; tsan_atomic checkpointsWritten{0}; tsan_atomic entriesRead{0}; tsan_atomic bytesRead{0}; tsan_atomic checkpointsRead{0}; - tsan_atomic entriesCached{0}; - tsan_atomic regionsCached{0}; - tsan_atomic bytesCached{0}; tsan_atomic entriesAgedOut{0}; tsan_atomic regionsAgedOut{0}; tsan_atomic regionsEvicted{0}; - tsan_atomic numPins{0}; - tsan_atomic openFileErrors{0}; tsan_atomic openCheckpointErrors{0}; tsan_atomic openLogErrors{0}; diff --git a/velox/docs/monitoring/metrics.rst b/velox/docs/monitoring/metrics.rst index 681e78e724d4..273d13246c91 100644 --- a/velox/docs/monitoring/metrics.rst +++ b/velox/docs/monitoring/metrics.rst @@ -188,6 +188,172 @@ Memory Management - Tracks the count of double frees in memory allocator, indicating the possibility of buffer ownership issues when a buffer is freed more than once. + * - memory_allocator_mapped_bytes + - Avg + - Number of bytes currently mapped in MemoryAllocator. These bytes represent + the bytes that are either currently being allocated or were in the past + allocated, not yet been returned back to the operating system, in the + form of 'Allocation' or 'ContiguousAllocation'. + * - memory_allocator_alloc_bytes + - Avg + - Number of bytes currently allocated (used) from MemoryAllocator in the form + of 'Allocation' or 'ContiguousAllocation'. + * - mmap_allocator_external_mapped_bytes + - Avg + - Number of bytes currently mapped in MmapAllocator, in the form of + 'ContiguousAllocation'. + NOTE: This applies only to MmapAllocator + * - mmap_allocator_delegated_alloc_bytes + - Avg + - Number of bytes currently allocated from MmapAllocator directly from raw + allocateBytes() interface, and internally allocated by malloc. Only small + chunks of memory are delegated to malloc + NOTE: This applies only to MmapAllocator + +Cache +-------------- + +.. list-table:: + :widths: 40 10 50 + :header-rows: 1 + + * - Metric Name + - Type + - Description + * - cache_max_age_secs + - Avg + - Max possible age of AsyncDataCache and SsdCache entries since the raw file + was opened to load the cache. + * - memory_cache_num_entries + - Avg + - Total number of cache entries. + * - memory_cache_num_empty_entries + - Avg + - Total number of cache entries that do not cache anything. + * - memory_cache_num_shared_entries + - Avg + - Total number of cache entries that are pinned for shared access. + * - memory_cache_num_exclusive_entries + - Avg + - Total number of cache entries that are pinned for exclusive access. + * - memory_cache_num_prefetched_entries + - Avg + - Total number of cache entries that are being or have been prefetched but + have not been hit. + * - memory_cache_total_tiny_bytes + - Avg + - Total number of bytes of the cached data that is much smaller than kTinyDataSize. + * - memory_cache_total_large_bytes + - Avg + - Total number of bytes of the cached data excluding 'memory_cache_total_tiny_bytes' + * - memory_cache_total_tiny_padding_bytes + - Avg + - Total unused capacity bytes in 'memory_cache_total_tiny_bytes'. + * - memory_cache_total_large_padding_bytes + - Avg + - Total unused capacity bytes in 'memory_cache_total_large_bytes'. + * - memory_cache_total_prefetched_bytes + - Avg + - Total bytes of cache entries in prefetch state. + * - memory_cache_sum_evict_score + - Sum + - Sum of scores of evicted entries. This serves to infer an average lifetime + for entries in cache. + * - memory_cache_num_hits + - Sum + - Number of hits (saved IO) since last counter retrieval. The first hit to a + prefetched entry does not count. + * - memory_cache_hit_bytes + - Sum + - Amount of hit bytes (saved IO) since last counter retrieval. The first hit + to a prefetched entry does not count. + * - memory_cache_num_new + - Sum + - Number of new entries created since last counter retrieval. + * - memory_cache_num_evicts + - Sum + - Number of times a valid entry was removed in order to make space, since + last counter retrieval. + * - memory_cache_num_evict_checks + - Sum + - Number of entries considered for evicting, since last counter retrieval. + * - memory_cache_num_wait_exclusive + - Sum + - Number of times a user waited for an entry to transit from exclusive to + shared mode, since last counter retrieval. + * - memory_cache_num_alloc_clocks + - Sum + - Clocks spent in allocating or freeing memory for backing cache entries, + since last counter retrieval + * - memory_cache_num_aged_out_entries + - Sum + - Number of AsyncDataCache entries that are aged out and evicted. + given configured TTL. + * - ssd_cache_cached_regions + - Avg + - Number of regions currently cached by SSD. + * - ssd_cache_cached_entries + - Avg + - Number of entries currently cached by SSD. + * - ssd_cache_cached_bytes + - Avg + - Total bytes currently cached by SSD. + * - ssd_cache_read_entries + - Sum + - Total number of entries read from SSD. + * - ssd_cache_read_bytes + - Sum + - Total number of bytes read from SSD. + * - ssd_cache_written_entries + - Sum + - Total number of entries written to SSD. + * - ssd_cache_written_bytes + - Sum + - Total number of bytes written to SSD. + * - ssd_cache_aged_out_entries + - Sum + - Total number of SsdCache entries that are aged out and evicted given + configured TTL. + * - ssd_cache_aged_out_regions + - Sum + - Total number of SsdCache regions that are aged out and evicted given + configured TTL. + * - ssd_cache_open_ssd_errors + - Sum + - Total number of SSD file open errors. + * - ssd_cache_open_checkpoint_errors + - Sum + - Total number of SSD checkpoint file open errors. + * - ssd_cache_open_log_errors + - Sum + - Total number of SSD evict log file open errors. + * - ssd_cache_delete_checkpoint_errors + - Sum + - Total number of errors while deleting SSD checkpoint files. + * - ssd_cache_grow_file_errors + - Sum + - Total number of errors while growing SSD cache files. + * - ssd_cache_write_ssd_errors + - Sum + - Total number of error while writing to SSD cache files. + * - ssd_cache_write_checkpoint_errors + - Sum + - Total number of errors while writing SSD checkpoint file. + * - ssd_cache_read_ssd_errors + - Sum + - Total number of errors while reading from SSD cache files. + * - ssd_cache_read_checkpoint_errors + - Sum + - Total number of errors while reading from SSD checkpoint files. + * - ssd_cache_checkpoints_read + - Sum + - Total number of checkpoints read. + * - ssd_cache_checkpoints_written + - Sum + - Total number of checkpoints written. + * - ssd_cache_regions_evicted + - Sum + - Total number of cache regions evicted. Spilling --------