Skip to content

Background eviction using dml #88

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 22 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions accel-config.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash

if [ "$#" -ne 3 ]; then
echo "ERROR: Pass DSA id, SHARED WQ id and ENGINE count as args."
echo "e.g. 0 for dsa0, 1 for dsa0/wq0.1 and 4 for engine count"
exit 1
fi

DEVID=${1}
SWQID=${2}
NENGS=${3}

DEV=dsa${DEVID}
SWQ=${DEV}/wq${DEVID}.${SWQID}

echo "=> ${SWQ}:"
sudo accel-config disable-wq ${SWQ}

echo "=> ${DEV}:"
sudo accel-config disable-device ${DEV}

for ((i=0; i < ${NENGS}; i++))
do
echo "=> ${DEV}/engine${DEVID}.${i}"
echo "configured"
sudo accel-config config-engine ${DEV}/engine${DEVID}.${i} --group-id=0
done

sudo accel-config config-wq ${SWQ} --group-id=0
sudo accel-config config-wq ${SWQ} --priority=1
sudo accel-config config-wq ${SWQ} --wq-size=128
sudo accel-config config-wq ${SWQ} --max-batch-size=1024
sudo accel-config config-wq ${SWQ} --max-transfer-size=4194304
sudo accel-config config-wq ${SWQ} --block-on-fault=0
sudo accel-config config-wq ${SWQ} --type=user
sudo accel-config config-wq ${SWQ} --name="dsa-test"
sudo accel-config config-wq ${SWQ} --mode=shared
sudo accel-config config-wq ${SWQ} --threshold=127
sudo accel-config config-wq ${SWQ} --driver-name="user"

echo "=> ${DEV}:"
sudo accel-config enable-device ${DEV}

echo "=> ${SWQ}:"
sudo accel-config enable-wq ${SWQ}

5 changes: 4 additions & 1 deletion cachelib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ set(PACKAGE_BUGREPORT "https://github.com/facebook/TBD")
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

option(BUILD_TESTS "If enabled, compile the tests." ON)

option(WITH_DTO "If enabled, build with DSA transparent offloading." OFF)
option(WITH_DML_WRAPPER "If enabled, build with DML wrapper" OFF)

set(BIN_INSTALL_DIR bin CACHE STRING
"The subdirectory where binaries should be installed")
Expand Down Expand Up @@ -351,6 +352,8 @@ target_link_libraries(cachelib INTERFACE
cachelib_shm
cachelib_navy
cachelib_allocator
dmlhl
dl
)
target_include_directories(
cachelib
Expand Down
99 changes: 72 additions & 27 deletions cachelib/allocator/BackgroundMover-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,37 @@ template <typename CacheT>
BackgroundMover<CacheT>::BackgroundMover(
Cache& cache,
std::shared_ptr<BackgroundMoverStrategy> strategy,
MoverDir direction)
MoverDir direction,
bool dsaEnabled)
: cache_(cache), strategy_(strategy), direction_(direction) {
if (direction_ == MoverDir::Evict) {
moverFunc = BackgroundMoverAPIWrapper<CacheT>::traverseAndEvictItems;

if (dsaEnabled) {
moverFunc =
BackgroundMoverAPIWrapper<CacheT>::traverseAndEvictItemsUsingDsa;
} else {
moverFunc = BackgroundMoverAPIWrapper<CacheT>::traverseAndEvictItems;
}
} else if (direction_ == MoverDir::Promote) {
moverFunc = BackgroundMoverAPIWrapper<CacheT>::traverseAndPromoteItems;
}
}

template <typename CacheT>
void BackgroundMover<CacheT>::TraversalStats::recordTraversalTime(uint64_t msTaken) {
lastTraversalTimeMs_.store(msTaken, std::memory_order_relaxed);
minTraversalTimeMs_.store(std::min(minTraversalTimeMs_.load(), msTaken),
std::memory_order_relaxed);
maxTraversalTimeMs_.store(std::max(maxTraversalTimeMs_.load(), msTaken),
std::memory_order_relaxed);
totalTraversalTimeMs_.fetch_add(msTaken, std::memory_order_relaxed);
}

template <typename CacheT>
uint64_t BackgroundMover<CacheT>::TraversalStats::getAvgTraversalTimeMs(
uint64_t numTraversals) const {
return numTraversals ? totalTraversalTimeMs_ / numTraversals : 0;
}

template <typename CacheT>
BackgroundMover<CacheT>::~BackgroundMover() {
stop(std::chrono::seconds(0));
Expand Down Expand Up @@ -64,46 +85,70 @@ template <typename CacheT>
void BackgroundMover<CacheT>::checkAndRun() {
auto assignedMemory = mutex.lock_combine([this] { return assignedMemory_; });

unsigned int moves = 0;
std::set<ClassId> classes{};
auto batches = strategy_->calculateBatchSizes(cache_, assignedMemory);

for (size_t i = 0; i < batches.size(); i++) {
const auto [tid, pid, cid] = assignedMemory[i];
const auto batch = batches[i];

classes.insert(cid);
const auto& mpStats = cache_.getPoolByTid(pid, tid).getStats();

if (!batch) {
continue;
while (true) {
unsigned int moves = 0;
const auto begin = util::getCurrentTimeNs();
for (size_t i = 0; i < assignedMemory.size(); i++) {
const auto [tid, pid, cid] = assignedMemory[i];
uint32_t batch = 0;
if (direction_ == MoverDir::Promote) {
batch = 10;
} else {
batch =
BackgroundMoverAPIWrapper<CacheT>::getBatchForTarget(cache_, tid, pid, cid, 0.95);
if (batch > 1024) {
batch = 1024;
}
}

//classes.insert(cid);

if (batch == 0) {
continue;
}

// try moving BATCH items from the class in order to reach free target
auto moved = moverFunc(cache_, tid, pid, cid, batch);
moves += moved;
moves_per_class_[assignedMemory[i]] += moved;
//totalBytesMoved.add(moved * mpStats.acStats.at(cid).allocSize);
}

// try moving BATCH items from the class in order to reach free target
auto moved = moverFunc(cache_, tid, pid, cid, batch);
moves += moved;
moves_per_class_[tid][pid][cid] += moved;
totalBytesMoved.add(moved * mpStats.acStats.at(cid).allocSize);
auto end = util::getCurrentTimeNs();
if (moves > 0) {
traversalStats_.recordTraversalTime(end > begin ? end - begin : 0);
numMovedItems.add(moves);
numTraversals.inc();
//totalClasses.add(classes.size());
}
//we didn't move that many objects done with this run
if (moves == 0 || moves < (assignedMemory.size()/2) || shouldStopWork()) {
break;
}
break;
}

numTraversals.inc();
numMovedItems.add(moves);
totalClasses.add(classes.size());
}

template <typename CacheT>
BackgroundMoverStats BackgroundMover<CacheT>::getStats() const noexcept {
BackgroundMoverStats stats;
stats.numMovedItems = numMovedItems.get();
stats.runCount = numTraversals.get();
stats.totalBytesMoved = totalBytesMoved.get();
stats.totalClasses = totalClasses.get();
auto runCount = getRunCount();
stats.runCount = runCount;
stats.numTraversals = numTraversals.get();
stats.avgItemsMoved = (double) stats.numMovedItems / (double)runCount;
stats.lastTraversalTimeMs = traversalStats_.getLastTraversalTimeMs();
stats.avgTraversalTimeMs = traversalStats_.getAvgTraversalTimeMs(runCount);
stats.minTraversalTimeMs = traversalStats_.getMinTraversalTimeMs();
stats.maxTraversalTimeMs = traversalStats_.getMaxTraversalTimeMs();

return stats;
}

template <typename CacheT>
std::map<TierId, std::map<PoolId, std::map<ClassId, uint64_t>>>
std::map<MemoryDescriptorType,uint64_t>
BackgroundMover<CacheT>::getClassStats() const noexcept {
return moves_per_class_;
}
Expand Down
49 changes: 42 additions & 7 deletions cachelib/allocator/BackgroundMover.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,28 @@ struct BackgroundMoverAPIWrapper {
return cache.traverseAndEvictItems(tid, pid, cid, batch);
}

static size_t traverseAndEvictItemsUsingDsa(C& cache,
unsigned int tid,
unsigned int pid,
unsigned int cid,
size_t batch) {
return cache.traverseAndEvictItemsUsingDsa(tid, pid, cid, batch);
}

static size_t traverseAndPromoteItems(C& cache,
unsigned int tid,
unsigned int pid,
unsigned int cid,
size_t batch) {
return cache.traverseAndPromoteItems(tid, pid, cid, batch);
}
static size_t getBatchForTarget(C& cache,
unsigned int tid,
unsigned int pid,
unsigned int cid,
double target) {
return cache.getPoolByTid(pid,tid).getBatchForTarget(cid, target);
}
};

enum class MoverDir { Evict = 0, Promote };
Expand All @@ -60,20 +75,40 @@ class BackgroundMover : public PeriodicWorker {
// (promoted vs. evicted and how much)
BackgroundMover(Cache& cache,
std::shared_ptr<BackgroundMoverStrategy> strategy,
MoverDir direction_);
MoverDir direction_,
bool dsaEnabled = false);

~BackgroundMover() override;

BackgroundMoverStats getStats() const noexcept;
std::map<TierId, std::map<PoolId, std::map<ClassId, uint64_t>>>
getClassStats() const noexcept;
std::map<MemoryDescriptorType,uint64_t> getClassStats() const noexcept;

void setAssignedMemory(
std::vector<MemoryDescriptorType>&& assignedMemory);
void setAssignedMemory(std::vector<MemoryDescriptorType>&& assignedMemory);

private:
std::map<TierId, std::map<PoolId, std::map<ClassId, uint64_t>>>
moves_per_class_;
std::map<MemoryDescriptorType,uint64_t> moves_per_class_;

struct TraversalStats {
// record a traversal and its time taken
void recordTraversalTime(uint64_t msTaken);

uint64_t getAvgTraversalTimeMs(uint64_t numTraversals) const;
uint64_t getMinTraversalTimeMs() const { return minTraversalTimeMs_; }
uint64_t getMaxTraversalTimeMs() const { return maxTraversalTimeMs_; }
uint64_t getLastTraversalTimeMs() const { return lastTraversalTimeMs_; }
uint64_t getNumTraversals() const { return numTraversals_; }

private:
// time it took us the last time to traverse the cache.
std::atomic<uint64_t> lastTraversalTimeMs_{0};
std::atomic<uint64_t> minTraversalTimeMs_{
std::numeric_limits<uint64_t>::max()};
std::atomic<uint64_t> maxTraversalTimeMs_{0};
std::atomic<uint64_t> totalTraversalTimeMs_{0};
std::atomic<uint64_t> numTraversals_{0};
};

TraversalStats traversalStats_;
// cache allocator's interface for evicting
using Item = typename Cache::Item;

Expand Down
34 changes: 27 additions & 7 deletions cachelib/allocator/BackgroundMoverStrategy.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,6 @@
namespace facebook {
namespace cachelib {

struct MemoryDescriptorType {
MemoryDescriptorType(TierId tid, PoolId pid, ClassId cid) :
tid_(tid), pid_(pid), cid_(cid) {}
TierId tid_;
PoolId pid_;
ClassId cid_;
};

// Base class for background eviction strategy.
class BackgroundMoverStrategy {
Expand All @@ -38,5 +31,32 @@ class BackgroundMoverStrategy {
std::vector<MemoryDescriptorType> acVec) = 0;
};

class DefaultBackgroundMoverStrategy : public BackgroundMoverStrategy {
public:
DefaultBackgroundMoverStrategy(uint64_t batchSize)
: batchSize_(batchSize) {}
~DefaultBackgroundMoverStrategy() {}

std::vector<size_t> calculateBatchSizes(
const CacheBase& cache,
std::vector<MemoryDescriptorType> acVec) {
std::vector<size_t> batches{};
for (auto [tid, pid, cid] : acVec) {
double usage = cache.getPoolByTid(pid, tid).getApproxUsage(cid);
uint32_t perSlab = cache.getPoolByTid(pid, tid).getPerSlab(cid);
if (usage >= 0.95) {
uint32_t batch = batchSize_ > perSlab ? perSlab : batchSize_;
//uint32_t batch = 10;
batches.push_back(batch);
} else {
batches.push_back(0);
}
}
return batches;
}
private:
uint64_t batchSize_{100};
};

} // namespace cachelib
} // namespace facebook
3 changes: 3 additions & 0 deletions cachelib/allocator/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ add_library (cachelib_allocator
PoolOptimizeStrategy.cpp
PoolRebalancer.cpp
PoolResizer.cpp
PrivateMemoryManager.cpp
RebalanceStrategy.cpp
SlabReleaseStats.cpp
TempShmMapping.cpp
Expand All @@ -64,6 +65,8 @@ target_link_libraries(cachelib_allocator PUBLIC
cachelib_navy
cachelib_common
cachelib_shm
dmlhl
dl
)

if ((CMAKE_SYSTEM_NAME STREQUAL Linux) AND
Expand Down
32 changes: 32 additions & 0 deletions cachelib/allocator/Cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,38 @@ enum class DestructorContext {
kRemovedFromNVM
};

struct MemoryDescriptorType {
MemoryDescriptorType(TierId tid, PoolId pid, ClassId cid) :
tid_(tid), pid_(pid), cid_(cid) {}
TierId tid_;
PoolId pid_;
ClassId cid_;

bool operator<(const MemoryDescriptorType& rhs) {
if (this->tid_ <= rhs.tid_) {
if (this->pid_ <= rhs.pid_) {
if (this->cid_ < rhs.cid_) return true;
else return false;
}
return false;
}
return false;
}

friend bool operator<(const MemoryDescriptorType& lhs, const MemoryDescriptorType& rhs);
};

inline bool operator<(const MemoryDescriptorType& lhs, const MemoryDescriptorType& rhs) {
if (lhs.tid_ <= rhs.tid_) {
if (lhs.pid_ <= rhs.pid_) {
if (lhs.cid_ < rhs.cid_) return true;
else return false;
}
return false;
}
return false;
}

// A base class of cache exposing members and status agnostic of template type.
class CacheBase {
public:
Expand Down
Loading