diff --git a/lib/realm-backend/include/realm-backend/allocated_tensors.h b/lib/realm-backend/include/realm-backend/allocated_tensors.h deleted file mode 100644 index 8effd06954..0000000000 --- a/lib/realm-backend/include/realm-backend/allocated_tensors.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef _FLEXFLOW_LOCAL_EXECUTION_ALLOCATED_TENSORS_H -#define _FLEXFLOW_LOCAL_EXECUTION_ALLOCATED_TENSORS_H - -#include "realm-backend/allocated_tensors.dtg.h" -#include "pcg/computation_graph.h" - -namespace FlexFlow { - -bool are_allocated_forward_tensors_valid( - AllocatedTensors const &, - std::unordered_map const &); -bool are_allocated_gradient_tensors_valid( - AllocatedTensors const &, - std::unordered_map const &); -bool are_allocated_optimizer_tensors_valid( - AllocatedTensors const &, - std::unordered_map const &); - -bool are_allocated_tensors_valid( - AllocatedTensors const &, - std::unordered_map const &); - -bool is_allocated_tensor_backing_valid( - TensorTypeVariant const &, - std::unordered_map const &, - ArrayShape const &); - -} // namespace FlexFlow - -#endif diff --git a/lib/realm-backend/include/realm-backend/allocated_tensors.struct.toml b/lib/realm-backend/include/realm-backend/allocated_tensors.struct.toml deleted file mode 100644 index d459027e5d..0000000000 --- a/lib/realm-backend/include/realm-backend/allocated_tensors.struct.toml +++ /dev/null @@ -1,32 +0,0 @@ -namespace = "FlexFlow" -name = "AllocatedTensors" -features = [ - "eq", - "fmt", - "hash", -] - -includes = [ - "task-spec/tensor_type_t.dtg.h", - "kernels/accessor.h", - "realm-backend/realm_allocator.h" -] - -src_includes = [ - "utils/hash/unordered_map.h", - "utils/fmt/unordered_map.h", - "utils/hash/vector.h", - "utils/fmt/vector.h" -] - -[[fields]] -name = "tensor_type_backings" -type = "std::unordered_map<::FlexFlow::TensorTypeVariant, std::pair<::FlexFlow::RealmRegion,::FlexFlow::TensorShape>>" - -[[fields]] -name = "gradient_mapping" -type = "std::unordered_map<::FlexFlow::tensor_guid_t, ::FlexFlow::gradient_tensor_t>" - -[[fields]] -name = "optimizer_mapping" -type = "std::unordered_map<::FlexFlow::tensor_guid_t, std::vector<::FlexFlow::optimizer_tensor_t>>" diff --git a/lib/realm-backend/include/realm-backend/model_training_instance.h b/lib/realm-backend/include/realm-backend/model_training_instance.h index 62d8311ccb..e30ae7a9a8 100644 --- a/lib/realm-backend/include/realm-backend/model_training_instance.h +++ b/lib/realm-backend/include/realm-backend/model_training_instance.h @@ -14,7 +14,7 @@ using PerLayerElapsedTime = struct ModelTrainingInstance { ModelTrainingInstance(RealmTrainingBacking const &, tensor_guid_t const &logit_tensor, - TensorShape const &label_tensor_shape, + loss_tensor_t const &label_tensor, LossAttrs const &, OptimizerAttrs const &); diff --git a/lib/realm-backend/include/realm-backend/realm_allocator.h b/lib/realm-backend/include/realm-backend/realm_allocator.h index 1e0c7b23c4..304ca38e32 100644 --- a/lib/realm-backend/include/realm-backend/realm_allocator.h +++ b/lib/realm-backend/include/realm-backend/realm_allocator.h @@ -3,56 +3,31 @@ #include "realm-backend/driver.h" #include "realm.h" +#include "kernels/allocation.h" #include namespace FlexFlow { struct RealmAllocatorImpl; -struct RealmRegion { - Realm::RegionInstance instance; - RealmAllocatorImpl *allocator; -}; - -struct RealmAllocatorImpl { +struct RealmAllocatorImpl : public IAllocator { RealmAllocatorImpl() = delete; RealmAllocatorImpl(RealmAllocatorImpl const &) = delete; RealmAllocatorImpl(RealmAllocatorImpl &&) = delete; RealmAllocatorImpl(Realm::Processor); ~RealmAllocatorImpl() = default; - RealmRegion allocate(size_t); - void deallocate(RealmRegion); + void *allocate(size_t) override; + void deallocate(void *) override; private: - std::unordered_map ptrs; + std::unordered_map ptrs; Realm::Processor proc; Realm::Memory mem; std::vector field_sizes = {sizeof(char)}; }; -struct RealmAllocator { - RealmAllocator() = delete; - - RealmRegion allocate(size_t); - void deallocate(RealmRegion); - - template - static typename std::enable_if::value, - RealmAllocator>::type - create(Args &&...args) { - return RealmAllocator(std::make_shared(std::forward(args)...)); - } - - RealmAllocator(std::shared_ptr ptr) : i_allocator(ptr) {}; - RealmAllocator(RealmAllocator const &allocator) - : i_allocator(allocator.i_allocator) {}; - -private: - std::shared_ptr i_allocator; -}; - -RealmAllocator create_realm_memory_allocator(Realm::Processor); +Allocator create_realm_memory_allocator(Realm::Processor); } // namespace FlexFlow diff --git a/lib/realm-backend/include/realm-backend/realm_task_argument_accessor.h b/lib/realm-backend/include/realm-backend/realm_task_argument_accessor.h index ce826e162e..d5c1a63b48 100644 --- a/lib/realm-backend/include/realm-backend/realm_task_argument_accessor.h +++ b/lib/realm-backend/include/realm-backend/realm_task_argument_accessor.h @@ -15,7 +15,7 @@ using TensorSlotsBacking = std::unordered_map< using ArgSlotsBacking = std::unordered_map; struct RealmTaskArgumentAccessor : public ITaskArgumentAccessor { - RealmTaskArgumentAccessor(RealmAllocator const &allocator, + RealmTaskArgumentAccessor(Allocator const &allocator, TensorSlotsBacking const &tensor_slots_backing, ArgSlotsBacking const &arg_slots_backing); @@ -35,7 +35,7 @@ struct RealmTaskArgumentAccessor : public ITaskArgumentAccessor { size_t get_device_idx() const override; private: - RealmAllocator allocator; + Allocator allocator; TensorSlotsBacking tensor_slots_backing; ArgSlotsBacking arg_slots_backing; }; diff --git a/lib/realm-backend/include/realm-backend/realm_tensor_backing.h b/lib/realm-backend/include/realm-backend/realm_tensor_backing.h index 25136ad2ff..dac93c84b0 100644 --- a/lib/realm-backend/include/realm-backend/realm_tensor_backing.h +++ b/lib/realm-backend/include/realm-backend/realm_tensor_backing.h @@ -10,21 +10,21 @@ #include "pcg/computation_graph.dtg.h" #include "pcg/layer_guid_t.dtg.h" #include "pcg/optimizer_attrs.dtg.h" -#include "realm-backend/allocated_tensors.dtg.h" +#include "local-execution/allocated_tensors.dtg.h" #include "realm-backend/realm_allocator.h" #include "realm-backend/realm_task_argument_accessor.h" -#include "realm-backend/unallocated_tensors.dtg.h" +#include "local-execution/unallocated_tensors.dtg.h" #include "task-spec/lowered_tensor_t.dtg.h" #include "task-spec/task_invocation.dtg.h" #include "task-spec/tensor_role.dtg.h" namespace FlexFlow { -using TensorBackingMap = std::unordered_map>; +using TensorBackingMap = std::unordered_map; struct RealmTensorBacking { RealmTensorBacking(AllocatedTensors const &, UnallocatedTensors const &, - RealmAllocator const &); + Allocator const &); public: GenericTensorAccessorW get_tensor(TensorTypeVariant const &) const; @@ -45,15 +45,13 @@ struct RealmTensorBacking { std::unordered_map> tensor_optimizer_mapping; - RealmAllocator allocator; + Allocator allocator; private: lowered_tensor_t insert_tensor(TensorTypeVariant const &); LoweredTensorSource lowered_tensor_source; }; -GenericTensorAccessorW wrappup_tensor_accessor(std::pair const &); - UnallocatedTensors generate_unallocated_tensors( AllocatedTensors const &, std::unordered_map const &, diff --git a/lib/realm-backend/include/realm-backend/realm_training_backing.h b/lib/realm-backend/include/realm-backend/realm_training_backing.h index 81df422b7a..45285464b8 100644 --- a/lib/realm-backend/include/realm-backend/realm_training_backing.h +++ b/lib/realm-backend/include/realm-backend/realm_training_backing.h @@ -6,7 +6,7 @@ #include "op-attrs/ops/loss_functions/loss_attrs.dtg.h" #include "pcg/computation_graph.dtg.h" #include "pcg/optimizer_attrs.dtg.h" -#include "realm-backend/allocated_tensors.dtg.h" +#include "local-execution/allocated_tensors.dtg.h" #include "realm-backend/driver.h" #include "realm-backend/realm_allocator.h" #include "realm-backend/realm_args_backing.h" @@ -19,20 +19,27 @@ using PerLayerElapsedTime = std::unordered_map>; struct RealmTrainingBacking { - RealmTrainingBacking(Realm::Processor, AllocatedTensors const &, + RealmTrainingBacking(Realm::Processor, + std::vector const &, + std::vector const &, + AllocatedTensors const &, ComputationGraph const &, RuntimeArgConfig const &); - RealmTrainingBacking(Realm::Processor, AllocatedTensors const &, + RealmTrainingBacking(Realm::Processor, + std::vector const &, + std::vector const &, + AllocatedTensors const &, ComputationGraph const &, RuntimeArgConfig const &, OptimizerAttrs const &); public: // runtime Realm::Processor master_proc; + Realm::Event master_event; Realm::Memory master_mem; std::vector worker_procs; - std::unordered_map proc_events; - std::vector allocators; + std::vector worker_events; + std::vector allocators; RealmTensorBacking realm_tensor_backing; RealmArgsBacking realm_args_backing; diff --git a/lib/realm-backend/include/realm-backend/unallocated_tensors.struct.toml b/lib/realm-backend/include/realm-backend/unallocated_tensors.struct.toml deleted file mode 100644 index e86cc2a532..0000000000 --- a/lib/realm-backend/include/realm-backend/unallocated_tensors.struct.toml +++ /dev/null @@ -1,31 +0,0 @@ -namespace = "FlexFlow" -name = "UnallocatedTensors" -features = [ - "eq", - "fmt", - "hash", -] - -includes = [ - "task-spec/tensor_type_t.dtg.h", - "op-attrs/tensor_shape.dtg.h" -] - -src_includes = [ - "utils/hash/unordered_map.h", - "utils/fmt/unordered_map.h", - "utils/hash/vector.h", - "utils/fmt/vector.h" -] - -[[fields]] -name = "tensor_type_shapes" -type = "std::unordered_map<::FlexFlow::TensorTypeVariant, ::FlexFlow::TensorShape>" - -[[fields]] -name = "gradient_mapping" -type = "std::unordered_map<::FlexFlow::tensor_guid_t, ::FlexFlow::gradient_tensor_t>" - -[[fields]] -name = "optimizer_mapping" -type = "std::unordered_map<::FlexFlow::tensor_guid_t, std::vector<::FlexFlow::optimizer_tensor_t>>" diff --git a/lib/realm-backend/src/allocated_tensors.cc b/lib/realm-backend/src/allocated_tensors.cc index f27db14643..3e249bf6d1 100644 --- a/lib/realm-backend/src/allocated_tensors.cc +++ b/lib/realm-backend/src/allocated_tensors.cc @@ -1,4 +1,4 @@ -#include "realm-backend/allocated_tensors.h" +#include "local-execution/allocated_tensors.h" #include "pcg/optimizer_attrs.h" #include "utils/containers/keys.h" #include "utils/containers/set_union.h" diff --git a/lib/realm-backend/src/realm_allocator.cc b/lib/realm-backend/src/realm_allocator.cc index fadc7f5719..d7139210bc 100644 --- a/lib/realm-backend/src/realm_allocator.cc +++ b/lib/realm-backend/src/realm_allocator.cc @@ -15,40 +15,29 @@ RealmAllocatorImpl::RealmAllocatorImpl(Processor proc) : proc(proc) { } // TODO: now the region instance only corresponds to one tensor -RealmRegion RealmAllocatorImpl::allocate(size_t requested_memory_size) { +void *RealmAllocatorImpl::allocate(size_t requested_memory_size) { Rect<1> bounds(Point<1>(0), Point<1>(requested_memory_size - 1)); RegionInstance requested_instance = RegionInstance::NO_INST; RegionInstance::create_instance(requested_instance, mem, bounds, field_sizes, /*SOA*/ 1, ProfilingRequestSet()) .wait(); void *ptr = requested_instance.pointer_untyped(0, 0); - this->ptrs.insert({requested_instance, ptr}); - return {requested_instance, this}; + this->ptrs.insert({ptr, requested_instance}); + return ptr; } -void RealmAllocatorImpl::deallocate(RealmRegion region) { - if (region.allocator == this and contains_key(this->ptrs, region.instance)) { - RegionInstance instance = this->ptrs.at(region.instance); - instance.destroy(); +void RealmAllocatorImpl::deallocate(void *ptr) { + if (this->ptrs.count(ptr)) { + RegionInstance region = this->ptrs.at(ptr); + region.destroy(); } else { throw std::runtime_error( "Deallocating a pointer that was not allocated by this Allocator"); } } - -/*********** RealmAllocator ***********/ - -RealmRegion RealmAllocator::allocate(size_t mem_size) { - return this->i_allocator->allocate(mem_size); -} - -void RealmAllocator::deallocate(RealmRegion region) { - this->i_allocator->deallocate(region); -} - -RealmAllocator create_realm_memory_allocator(Processor proc) { - return RealmAllocator::create(proc); +Allocator create_realm_memory_allocator(Processor proc) { + return Allocator::create(proc); } } // namespace FlexFlow diff --git a/lib/realm-backend/src/realm_tensor_backing copy.cc b/lib/realm-backend/src/realm_tensor_backing copy.cc deleted file mode 100644 index bac16c6b69..0000000000 --- a/lib/realm-backend/src/realm_tensor_backing copy.cc +++ /dev/null @@ -1,142 +0,0 @@ -#include "task-spec/slot_grad_id.dtg.h" - -#include "op-attrs/parallel_tensor_shape.h" -#include "pcg/computation_graph.h" -#include "pcg/optimizer_attrs.h" -#include "realm-backend/realm_allocator.h" -#include "realm-backend/realm_tensor_backing.h" -#include "utils/containers/contains_key.h" -#include "utils/containers/keys.h" -#include "utils/overload.h" - -namespace FlexFlow { - -RealmTensorBacking::RealmTensorBacking() {}; - -void RealmTensorBacking::allocate_layer_tensors( - layer_guid_t const &layer_guid, ComputationGraph const &computation_graph, - RealmAllocator &allocator) { - this->allocate_tensors_by_role(TensorRole::INPUT, layer_guid, - computation_graph, allocator); - this->allocate_tensors_by_role(TensorRole::WEIGHT, layer_guid, - computation_graph, allocator); - this->allocate_tensors_by_role(TensorRole::OUTPUT, layer_guid, - computation_graph, allocator); -} - -void RealmTensorBacking::allocate_tensors_by_role( - TensorRole const &role, layer_guid_t const &layer_guid, - ComputationGraph const &computation_graph, RealmAllocator &allocator) { - std::vector tensors; - switch (role) { - case TensorRole::INPUT: - tensors = get_incoming_inputs(computation_graph, layer_guid); - break; - case TensorRole::WEIGHT: - tensors = get_incoming_weights(computation_graph, layer_guid); - break; - case TensorRole::OUTPUT: - tensors = get_outgoing_tensors(computation_graph, layer_guid); - break; - default: - throw mk_runtime_error("Invalid tensor role, got {}", role); - } - - for (tensor_guid_t const &tensor : tensors) { - TensorAttrs tensor_attrs = get_tensor_attrs(computation_graph, tensor); - // tensor allocation - if (!contains_key(this->tensor_lowering_mapping, tensor)) { - lowered_tensor_t reduced_tensor = - this->lowered_tensor_source.new_lowered_tensor(); - this->tensor_lowering_mapping.insert({tensor, reduced_tensor}); - RealmRegion region = - allocator.allocate(get_size_in_bytes(tensor_attrs.shape)); - this->tensor_regions.insert({reduced_tensor, region}); - this->tensor_shapes.insert({reduced_tensor, tensor_attrs.shape}); - } - - // gradient tensor allocation - if (tensor_attrs.create_gradients == CreateGrad::YES && - !contains_key(this->gradient_tensor_lowering_mapping, tensor)) { - lowered_tensor_t reduced_tensor = - this->lowered_tensor_source.new_lowered_tensor(); - this->gradient_tensor_lowering_mapping.insert({tensor, reduced_tensor}); - RealmRegion region = - allocator.allocate(get_size_in_bytes(tensor_attrs.shape)); - this->tensor_regions.insert({reduced_tensor, region}); - this->tensor_shapes.insert({reduced_tensor, tensor_attrs.shape}); - } - } -} - -void RealmTensorBacking::allocate_optimizer_tensors( - tensor_guid_t const &weight, - std::vector const &optimizer_tensors, - RealmAllocator &allocator) { - GenericTensorAccessorW weight_backing = - this->get_tensor_backing(this->tensor_lowering_mapping.at(weight)); - for (optimizer_tensor_t const &optimizer_tensor : optimizer_tensors) { - // optimizer tensor allocation - if (!contains_key(this->optimizer_tensor_lowering_mapping, - optimizer_tensor)) { - lowered_tensor_t buffer_tensor = - this->lowered_tensor_source.new_lowered_tensor(); - this->optimizer_tensor_lowering_mapping.insert( - {optimizer_tensor, buffer_tensor}); - TensorShape tensor_shape = - get_tensor_shape(weight_backing.shape, weight_backing.data_type); - RealmRegion region = allocator.allocate(get_size_in_bytes(tensor_shape)); - this->tensor_regions.insert({buffer_tensor, region}); - this->tensor_shapes.insert({buffer_tensor, tensor_shape}); - } - } -} - -bool RealmTensorBacking::is_tensor_allocated( - lowered_tensor_t const &tensor_id) const { - return contains_key(tensor_regions, tensor_id); -} - -GenericTensorAccessorW const &RealmTensorBacking::get_tensor_backing( - lowered_tensor_t const &tensor_id) const { - void *ptr = this->tensor_regions.at(tensor_id).instance.pointer_untyped(0, 0); - TensorShape shape = this->tensor_shapes.at(tensor_id); - return {shape.data_type, ArrayShape{shape}, ptr}; -} - -TensorSlotsBacking RealmTensorBacking::construct_tensor_slots_backing( - TaskBinding const &binding) const { - TensorSlotsBacking mapping; - - for (auto const &tensor_binding : binding.get_tensor_bindings()) { - SlotTensorTypeId slot_tensor_type_id = tensor_binding.first; - - lowered_tensor_t tensor_id = [&] { - TensorTypeVariant tensor_type = tensor_binding.second; - if (tensor_type.has() and - slot_tensor_type_id.tensor_type == TensorType::FORWARD) { - return this->tensor_lowering_mapping.at( - tensor_type.get()); - } else if (tensor_type.has() and - slot_tensor_type_id.tensor_type == TensorType::GRADIENT) { - return this->gradient_tensor_lowering_mapping.at( - tensor_type.get()); - } else if (tensor_type.has()) { - return this->optimizer_tensor_lowering_mapping.at( - tensor_type.get()); - } else if (tensor_type.has()) { - return this->loss_tensor_lowering_mapping.at( - tensor_type.get()); - } else { - throw mk_runtime_error(fmt::format("Tensor binding has invalid type")); - } - }(); - - GenericTensorAccessorW accessor = this->get_tensor_backing(tensor_id); - mapping.insert({slot_tensor_type_id, accessor}); - } - - return mapping; -} - -} // namespace FlexFlow diff --git a/lib/realm-backend/src/realm_tensor_backing.cc b/lib/realm-backend/src/realm_tensor_backing.cc index 8f8f828821..12d0973fba 100644 --- a/lib/realm-backend/src/realm_tensor_backing.cc +++ b/lib/realm-backend/src/realm_tensor_backing.cc @@ -2,7 +2,7 @@ #include "op-attrs/tensor_shape.h" #include "pcg/computation_graph.h" #include "pcg/optimizer_attrs.h" -#include "realm-backend/allocated_tensors.h" +#include "local-execution/allocated_tensors.h" #include "realm-backend/realm_allocator.h" #include "realm-backend/realm_tensor_backing.h" #include "task-spec/slot_grad_id.dtg.h" @@ -12,23 +12,16 @@ namespace FlexFlow { -GenericTensorAccessorW wrappup_tensor_accessor( - std::pair const &tensor_region_shape) { - void *ptr = tensor_region_shape.first.instance.pointer_untyped(0, 0); - TensorShape shape = tensor_region_shape.second; - return {shape.data_type, ArrayShape{shape}, ptr}; -} - RealmTensorBacking::RealmTensorBacking( AllocatedTensors const &allocated_tensors, UnallocatedTensors const &unallocated_tensors, - RealmAllocator const &allocator) + Allocator const &allocator) : tensor_gradient_mapping(allocated_tensors.gradient_mapping), tensor_optimizer_mapping(allocated_tensors.optimizer_mapping), allocator(allocator) { // handle already-allocated tensors - for (std::pair> const + for (std::pair const &tensor_type_backing : allocated_tensors.tensor_type_backings) { lowered_tensor_t lowered_tensor = this->insert_tensor(tensor_type_backing.first); @@ -59,10 +52,9 @@ RealmTensorBacking::RealmTensorBacking( unallocated_tensors.tensor_type_shapes) { lowered_tensor_t lowered_tensor = this->insert_tensor(tensor_type_shape.first); - RealmRegion region = allocator.allocate( - get_size_in_bytes(tensor_type_shape.second).unwrap_nonnegative()); - this->tensor_backings.insert( - {lowered_tensor, {region, tensor_type_shape.second}}); + GenericTensorAccessorW tensor_backing = + this->allocator.allocate_tensor(tensor_type_shape.second); + this->tensor_backings.insert({lowered_tensor, tensor_backing}); } }; @@ -117,7 +109,7 @@ RealmTensorBacking::get_tensor(TensorTypeVariant const &tensor_type) const { throw mk_runtime_error( fmt::format("Unhandled tensor type {}", any_tensor)); }}); - return wrappup_tensor_accessor(this->tensor_backings.at(lowered_tensor)); + return this->tensor_backings.at(lowered_tensor); } UnallocatedTensors generate_unallocated_tensors( diff --git a/lib/realm-backend/src/realm_training_backing.cc b/lib/realm-backend/src/realm_training_backing.cc index f6b516e303..225a376cf3 100644 --- a/lib/realm-backend/src/realm_training_backing.cc +++ b/lib/realm-backend/src/realm_training_backing.cc @@ -23,26 +23,34 @@ namespace FlexFlow { using namespace Realm; RealmTrainingBacking::RealmTrainingBacking( - Processor master_proc, AllocatedTensors const &allocated_tensors, + Processor master_proc, std::vector const &worker_procs, + std::vector const &allocators, + AllocatedTensors const &allocated_tensors, ComputationGraph const &computation_graph, RuntimeArgConfig const &runtime_arg_config) - : computation_graph(computation_graph), + : master_proc(master_proc), worker_procs(worker_procs), + allocators(allocators), computation_graph(computation_graph), task_registry(construct_task_registry( - get_layer_attrs_mapping(this->computation_graph)))) { - master_proc = master_proc; - proc_events.insert({master_proc, Realm::Event::NO_EVENT}); + get_layer_attrs_mapping(this->computation_graph))), + realm_tensor_backing(RealmTensorBacking( // TODO: multi gpu + allocated_tensors, + generate_unallocated_tensors( + allocated_tensors, get_all_tensor_attrs(this->computation_graph), + this->gradient_tensor_source), + this->allocators[0])), + realm_args_backing(initialize_args_backing(this, runtime_arg_config)) { + master_event = Realm::Event::NO_EVENT; master_mem = Machine::MemoryQuery(Machine::get_machine()) .only_kind(Memory::SYSTEM_MEM) .best_affinity_to(master_proc) .first(); - Machine::ProcessorQuery pq = Machine::ProcessorQuery(Machine::get_machine()) - .only_kind(Processor::TOC_PROC); - for (Processor p : pq) { - worker_procs.push_back(p); - proc_events.insert({p, Realm::Event::NO_EVENT}); - allocators.push_back(RealmAllocator::create(p)); + for (Processor p : worker_procs) { + worker_events.push_back(Realm::Event::NO_EVENT); } - assert(worker_procs.size() > 0); + // Machine::ProcessorQuery pq = + // Machine::ProcessorQuery(Machine::get_machine()) + // .only_kind(Processor::TOC_PROC); + // allocators.push_back(create_realm_memory_allocator(p)); // register tasks for realm for (layer_guid_t const &node : @@ -60,41 +68,35 @@ RealmTrainingBacking::RealmTrainingBacking( } } } - - // TODO: multi gpu - realm_tensor_backing = RealmTensorBacking( - allocated_tensors, - generate_unallocated_tensors( - allocated_tensors, get_all_tensor_attrs(this->computation_graph), - this->gradient_tensor_source), - allocators[0]); - realm_args_backing = - initialize_args_backing(this->task_registry, this->computation_graph, - runtime_arg_config, this->realm_tensor_backing); } RealmTrainingBacking::RealmTrainingBacking( - Processor master_proc, AllocatedTensors const &allocated_tensors, - ComputationGraph const &computation_graph, - RuntimeArgConfig const &runtime_arg_config, - OptimizerAttrs const &optimizer_attrs) - : computation_graph(computation_graph), - task_registry(construct_task_registry( - get_layer_attrs_mapping(this->computation_graph)))) { - master_proc = master_proc; - proc_events.insert({master_proc, Realm::Event::NO_EVENT}); + Processor master_proc, std::vector const &worker_procs, + std::vector const &allocators, + AllocatedTensors const &allocated_tensors, + ComputationGraph const &computation_graph, + RuntimeArgConfig const &runtime_arg_config, + OptimizerAttrs const &optimizer_attrs) + : master_proc(master_proc), worker_procs(worker_procs), + allocators(allocators), computation_graph(computation_graph), + task_registry(construct_task_registry( + get_layer_attrs_mapping(this->computation_graph))), + realm_tensor_backing(RealmTensorBacking( // TODO: multi gpu + allocated_tensors, + generate_unallocated_tensors_with_optimizer( + allocated_tensors, get_all_tensor_attrs(this->computation_graph), + this->gradient_tensor_source, this->optimizer_tensor_source, + optimizer_attrs), + this->allocators[0])), + realm_args_backing(initialize_args_backing(this, runtime_arg_config)) { + master_event = Realm::Event::NO_EVENT; master_mem = Machine::MemoryQuery(Machine::get_machine()) .only_kind(Memory::SYSTEM_MEM) .best_affinity_to(master_proc) .first(); - Machine::ProcessorQuery pq = Machine::ProcessorQuery(Machine::get_machine()) - .only_kind(Processor::TOC_PROC); - for (Processor p : pq) { - worker_procs.push_back(p); - proc_events.insert({p, Realm::Event::NO_EVENT}); - allocators.push_back(RealmAllocator::create(p)); + for (Processor p : worker_procs) { + worker_events.push_back(Realm::Event::NO_EVENT); } - assert(worker_procs.size() > 0); // register tasks for realm for (layer_guid_t const &node : @@ -112,16 +114,6 @@ RealmTrainingBacking::RealmTrainingBacking( } } } - - // TODO: multi gpu - realm_tensor_backing = RealmTensorBacking( - allocated_tensors, - generate_unallocated_tensors_with_optimizer( - allocated_tensors, get_all_tensor_attrs(this->computation_graph), - this->gradient_tensor_source, this->optimizer_tensor_source, - optimizer_attrs), - allocators[0]); - realm_args_backing = initialize_args_backing(this, runtime_arg_config); } RealmArgsBacking @@ -140,7 +132,7 @@ initialize_args_backing(RealmTrainingBacking *backing, Processor master_proc = backing->master_proc; Memory master_mem = backing->master_mem; std::vector &worker_procs = backing->worker_procs; - std::unordered_map &proc_events = backing->proc_events; + std::vector &worker_events = backing->worker_events; for (layer_guid_t const &node : topological_ordering(cg)) { if (registry_contains_task_for_layer(task_registry, node, @@ -164,10 +156,10 @@ initialize_args_backing(RealmTrainingBacking *backing, Future future = promise.get_future(); RealmTaskArgs args{ task_id, impl_function, accessor, std::move(promise)}; - Event e = worker_procs[0].spawn( - static_cast(task_id), &args, sizeof(args), - proc_events[worker_procs[0]]); - proc_events[worker_procs[0]] = e; + Event e = + worker_procs[0].spawn(static_cast(task_id), + &args, sizeof(args), worker_events[0]); + worker_events[0] = e; future.set_event(e); per_device_op_states.insert({node, std::move(future.get())}); } @@ -176,35 +168,6 @@ initialize_args_backing(RealmTrainingBacking *backing, return RealmArgsBacking{runtime_arg_config, per_device_op_states}; } -// void RealmTrainingBacking::register_and_allocate_layer( -// layer_guid_t const &node) { -// ComputationGraphOpAttrs attrs = -// get_layer_attrs(this->computation_graph, node).attrs; -// this->realm_tensor_backing.allocate_layer_tensors( -// node, this->computation_graph, this->allocators[0]); -// } - -// void RealmTrainingBacking::allocate_layer_optimizer_tensors( -// layer_guid_t const &node, OptimizerAttrs const &optimizer_attrs) { -// ComputationGraphOpAttrs attrs = -// get_layer_attrs(this->computation_graph, node).attrs; -// if (attrs.has()) { -// TaskSignature sig = get_update_signature(optimizer_attrs); -// tensor_guid_t weight_tensor = -// get_only(get_outgoing_tensors(this->computation_graph, node)); - -// std::vector optimizer_tensors; -// for (TensorTypeSlotSpec const &tensor_type_slot_spec : -// values(sig.tensor_guid_slots)) { -// optimizer_tensors.push_back( -// this->optimizer_tensor_source.new_optimizer_tensor()); -// } -// this->layer_optimizer_tensor_ids.insert({node, optimizer_tensors}); -// this->realm_tensor_backing.allocate_optimizer_tensors( -// weight_tensor, optimizer_tensors, this->allocators[0]); -// } -// } - Future> execute_forward(RealmTrainingBacking &realm_training_backing, layer_guid_t const &operator_node) { @@ -242,10 +205,8 @@ execute_forward(RealmTrainingBacking &realm_training_backing, std::move(promise)}; Event e = realm_training_backing.worker_procs[0].spawn( static_cast(task_id), &args, sizeof(args), - realm_training_backing - .proc_events[realm_training_backing.worker_procs[0]]); - realm_training_backing.proc_events[realm_training_backing.worker_procs[0]] = - e; + realm_training_backing.worker_events[0]); + realm_training_backing.worker_events[0] = e; future.set_event(e); return future; } else { @@ -290,10 +251,8 @@ execute_backward(RealmTrainingBacking &realm_training_backing, std::move(promise)}; Event e = realm_training_backing.worker_procs[0].spawn( static_cast(task_id), &args, sizeof(args), - realm_training_backing - .proc_events[realm_training_backing.worker_procs[0]]); - realm_training_backing.proc_events[realm_training_backing.worker_procs[0]] = - e; + realm_training_backing.worker_events[0]); + realm_training_backing.worker_events[0] = e; future.set_event(e); return future; } else { @@ -301,7 +260,7 @@ execute_backward(RealmTrainingBacking &realm_training_backing, } } -Future execute_update(RealmTrainingBacking const &realm_training_backing, +Future execute_update(RealmTrainingBacking &realm_training_backing, layer_guid_t const &node, OptimizerAttrs const &optimizer_attrs) { LayerAttrs layer_attrs = @@ -341,10 +300,8 @@ Future execute_update(RealmTrainingBacking const &realm_training_backing, std::move(promise)}; Event e = realm_training_backing.worker_procs[0].spawn( static_cast(task_id), &args, sizeof(args), - realm_training_backing - .proc_events[realm_training_backing.worker_procs[0]]); - realm_training_backing.proc_events[realm_training_backing.worker_procs[0]] = - e; + realm_training_backing.worker_events[0]); + realm_training_backing.worker_events[0] = e; future.set_event(e); return future; } else { @@ -352,7 +309,7 @@ Future execute_update(RealmTrainingBacking const &realm_training_backing, } } -Future compute_loss(RealmTrainingBacking const &realm_training_backing, +Future compute_loss(RealmTrainingBacking &realm_training_backing, LossAttrs const &loss_attrs, tensor_guid_t const &logit_tensor, loss_tensor_t const &label_tensor) { @@ -377,10 +334,8 @@ Future compute_loss(RealmTrainingBacking const &realm_training_backing, std::move(promise)}; Event e = realm_training_backing.worker_procs[0].spawn( static_cast(task_id), &args, sizeof(args), - realm_training_backing - .proc_events[realm_training_backing.worker_procs[0]]); - realm_training_backing.proc_events[realm_training_backing.worker_procs[0]] = - e; + realm_training_backing.worker_events[0]); + realm_training_backing.worker_events[0] = e; future.set_event(e); return future; }