Skip to content

Commit

Permalink
chore: update realm allocator impl
Browse files Browse the repository at this point in the history
  • Loading branch information
chenzhuofu committed Feb 27, 2025
1 parent 6c84fb3 commit d6aa7ad
Show file tree
Hide file tree
Showing 13 changed files with 99 additions and 418 deletions.
30 changes: 0 additions & 30 deletions lib/realm-backend/include/realm-backend/allocated_tensors.h

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ using PerLayerElapsedTime =
struct ModelTrainingInstance {
ModelTrainingInstance(RealmTrainingBacking const &,
tensor_guid_t const &logit_tensor,
TensorShape const &label_tensor_shape,
loss_tensor_t const &label_tensor,
LossAttrs const &,
OptimizerAttrs const &);

Expand Down
37 changes: 6 additions & 31 deletions lib/realm-backend/include/realm-backend/realm_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,56 +3,31 @@

#include "realm-backend/driver.h"
#include "realm.h"
#include "kernels/allocation.h"
#include <realm/event.h>

namespace FlexFlow {

struct RealmAllocatorImpl;

struct RealmRegion {
Realm::RegionInstance instance;
RealmAllocatorImpl *allocator;
};

struct RealmAllocatorImpl {
struct RealmAllocatorImpl : public IAllocator {
RealmAllocatorImpl() = delete;
RealmAllocatorImpl(RealmAllocatorImpl const &) = delete;
RealmAllocatorImpl(RealmAllocatorImpl &&) = delete;
RealmAllocatorImpl(Realm::Processor);
~RealmAllocatorImpl() = default;

RealmRegion allocate(size_t);
void deallocate(RealmRegion);
void *allocate(size_t) override;
void deallocate(void *) override;

private:
std::unordered_map<Realm::RegionInstance, void *> ptrs;
std::unordered_map<void *, Realm::RegionInstance> ptrs;
Realm::Processor proc;
Realm::Memory mem;
std::vector<size_t> field_sizes = {sizeof(char)};
};

struct RealmAllocator {
RealmAllocator() = delete;

RealmRegion allocate(size_t);
void deallocate(RealmRegion);

template <typename T, typename... Args>
static typename std::enable_if<std::is_base_of<RealmAllocatorImpl, T>::value,
RealmAllocator>::type
create(Args &&...args) {
return RealmAllocator(std::make_shared<T>(std::forward<Args>(args)...));
}

RealmAllocator(std::shared_ptr<RealmAllocatorImpl> ptr) : i_allocator(ptr) {};
RealmAllocator(RealmAllocator const &allocator)
: i_allocator(allocator.i_allocator) {};

private:
std::shared_ptr<RealmAllocatorImpl> i_allocator;
};

RealmAllocator create_realm_memory_allocator(Realm::Processor);
Allocator create_realm_memory_allocator(Realm::Processor);

} // namespace FlexFlow

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ using TensorSlotsBacking = std::unordered_map<
using ArgSlotsBacking = std::unordered_map<slot_id_t, ConcreteArgSpec>;

struct RealmTaskArgumentAccessor : public ITaskArgumentAccessor {
RealmTaskArgumentAccessor(RealmAllocator const &allocator,
RealmTaskArgumentAccessor(Allocator const &allocator,
TensorSlotsBacking const &tensor_slots_backing,
ArgSlotsBacking const &arg_slots_backing);

Expand All @@ -35,7 +35,7 @@ struct RealmTaskArgumentAccessor : public ITaskArgumentAccessor {
size_t get_device_idx() const override;

private:
RealmAllocator allocator;
Allocator allocator;
TensorSlotsBacking tensor_slots_backing;
ArgSlotsBacking arg_slots_backing;
};
Expand Down
12 changes: 5 additions & 7 deletions lib/realm-backend/include/realm-backend/realm_tensor_backing.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,21 @@
#include "pcg/computation_graph.dtg.h"
#include "pcg/layer_guid_t.dtg.h"
#include "pcg/optimizer_attrs.dtg.h"
#include "realm-backend/allocated_tensors.dtg.h"
#include "local-execution/allocated_tensors.dtg.h"
#include "realm-backend/realm_allocator.h"
#include "realm-backend/realm_task_argument_accessor.h"
#include "realm-backend/unallocated_tensors.dtg.h"
#include "local-execution/unallocated_tensors.dtg.h"
#include "task-spec/lowered_tensor_t.dtg.h"
#include "task-spec/task_invocation.dtg.h"
#include "task-spec/tensor_role.dtg.h"

namespace FlexFlow {

using TensorBackingMap = std::unordered_map<lowered_tensor_t, std::pair<RealmRegion, TensorShape>>;
using TensorBackingMap = std::unordered_map<lowered_tensor_t, GenericTensorAccessorW>;

struct RealmTensorBacking {
RealmTensorBacking(AllocatedTensors const &, UnallocatedTensors const &,
RealmAllocator const &);
Allocator const &);

public:
GenericTensorAccessorW get_tensor(TensorTypeVariant const &) const;
Expand All @@ -45,15 +45,13 @@ struct RealmTensorBacking {
std::unordered_map<tensor_guid_t, std::vector<optimizer_tensor_t>>
tensor_optimizer_mapping;

RealmAllocator allocator;
Allocator allocator;

private:
lowered_tensor_t insert_tensor(TensorTypeVariant const &);
LoweredTensorSource lowered_tensor_source;
};

GenericTensorAccessorW wrappup_tensor_accessor(std::pair<RealmRegion, TensorShape> const &);

UnallocatedTensors generate_unallocated_tensors(
AllocatedTensors const &,
std::unordered_map<tensor_guid_t, TensorAttrs> const &,
Expand Down
17 changes: 12 additions & 5 deletions lib/realm-backend/include/realm-backend/realm_training_backing.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "op-attrs/ops/loss_functions/loss_attrs.dtg.h"
#include "pcg/computation_graph.dtg.h"
#include "pcg/optimizer_attrs.dtg.h"
#include "realm-backend/allocated_tensors.dtg.h"
#include "local-execution/allocated_tensors.dtg.h"
#include "realm-backend/driver.h"
#include "realm-backend/realm_allocator.h"
#include "realm-backend/realm_args_backing.h"
Expand All @@ -19,20 +19,27 @@ using PerLayerElapsedTime =
std::unordered_map<layer_guid_t, std::optional<float>>;

struct RealmTrainingBacking {
RealmTrainingBacking(Realm::Processor, AllocatedTensors const &,
RealmTrainingBacking(Realm::Processor,
std::vector<Realm::Processor> const &,
std::vector<Allocator> const &,
AllocatedTensors const &,
ComputationGraph const &, RuntimeArgConfig const &);

RealmTrainingBacking(Realm::Processor, AllocatedTensors const &,
RealmTrainingBacking(Realm::Processor,
std::vector<Realm::Processor> const &,
std::vector<Allocator> const &,
AllocatedTensors const &,
ComputationGraph const &, RuntimeArgConfig const &,
OptimizerAttrs const &);

public:
// runtime
Realm::Processor master_proc;
Realm::Event master_event;
Realm::Memory master_mem;
std::vector<Realm::Processor> worker_procs;
std::unordered_map<Realm::Processor, Realm::Event> proc_events;
std::vector<RealmAllocator> allocators;
std::vector<Realm::Event> worker_events;
std::vector<Allocator> allocators;

RealmTensorBacking realm_tensor_backing;
RealmArgsBacking realm_args_backing;
Expand Down

This file was deleted.

2 changes: 1 addition & 1 deletion lib/realm-backend/src/allocated_tensors.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "realm-backend/allocated_tensors.h"
#include "local-execution/allocated_tensors.h"
#include "pcg/optimizer_attrs.h"
#include "utils/containers/keys.h"
#include "utils/containers/set_union.h"
Expand Down
29 changes: 9 additions & 20 deletions lib/realm-backend/src/realm_allocator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,40 +15,29 @@ RealmAllocatorImpl::RealmAllocatorImpl(Processor proc) : proc(proc) {
}

// TODO: now the region instance only corresponds to one tensor
RealmRegion RealmAllocatorImpl::allocate(size_t requested_memory_size) {
void *RealmAllocatorImpl::allocate(size_t requested_memory_size) {
Rect<1> bounds(Point<1>(0), Point<1>(requested_memory_size - 1));
RegionInstance requested_instance = RegionInstance::NO_INST;
RegionInstance::create_instance(requested_instance, mem, bounds, field_sizes,
/*SOA*/ 1, ProfilingRequestSet())
.wait();
void *ptr = requested_instance.pointer_untyped(0, 0);
this->ptrs.insert({requested_instance, ptr});
return {requested_instance, this};
this->ptrs.insert({ptr, requested_instance});
return ptr;
}

void RealmAllocatorImpl::deallocate(RealmRegion region) {
if (region.allocator == this and contains_key(this->ptrs, region.instance)) {
RegionInstance instance = this->ptrs.at(region.instance);
instance.destroy();
void RealmAllocatorImpl::deallocate(void *ptr) {
if (this->ptrs.count(ptr)) {
RegionInstance region = this->ptrs.at(ptr);
region.destroy();
} else {
throw std::runtime_error(
"Deallocating a pointer that was not allocated by this Allocator");
}
}


/*********** RealmAllocator ***********/

RealmRegion RealmAllocator::allocate(size_t mem_size) {
return this->i_allocator->allocate(mem_size);
}

void RealmAllocator::deallocate(RealmRegion region) {
this->i_allocator->deallocate(region);
}

RealmAllocator create_realm_memory_allocator(Processor proc) {
return RealmAllocator::create<RealmAllocatorImpl>(proc);
Allocator create_realm_memory_allocator(Processor proc) {
return Allocator::create<RealmAllocatorImpl>(proc);
}

} // namespace FlexFlow
Loading

0 comments on commit d6aa7ad

Please sign in to comment.