Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
6a74af0
Add make_dynamic_open_dataflow_graph_from_pcg.
elliottslaughter Feb 4, 2026
587e08e
Empty skeleton of the realm-execution backend.
elliottslaughter Feb 4, 2026
50f6ec6
More Realm execution skeleton.
elliottslaughter Feb 4, 2026
984aae5
Stub creation.
elliottslaughter Feb 4, 2026
e9e1105
More passes.
elliottslaughter Feb 4, 2026
90b92c7
Add Realm manager and test it.
elliottslaughter Feb 5, 2026
ef92d6f
Do not expose raw runtime and properly wait in test.
elliottslaughter Feb 5, 2026
01e23cd
Sketch more Realm manager APIs.
elliottslaughter Feb 5, 2026
3e7d841
Add controller functionality.
elliottslaughter Feb 5, 2026
b9a30a6
Fix Realm tests.
elliottslaughter Feb 5, 2026
814e13f
Support passing closure arguments to controllers.
elliottslaughter Feb 5, 2026
3d0298c
Move task IDs into Realm and assign IDs to remaining tasks.
elliottslaughter Feb 5, 2026
d702afe
Avoid pulling in the entire invocation.
elliottslaughter Feb 5, 2026
4fcde77
Conversion into Realm task IDs.
elliottslaughter Feb 5, 2026
e51b04e
Add a top-level PRealm switch.
elliottslaughter Feb 5, 2026
895de33
Some work on Realm task registry.
elliottslaughter Feb 6, 2026
09fde7d
Split out the Realm context.
elliottslaughter Feb 6, 2026
c5a0ea9
Switch to mapped PCG.
elliottslaughter Feb 6, 2026
a587e53
Add shard expansion pass (and implement shard expansion pass).
elliottslaughter Feb 6, 2026
62b49f7
Add instance field to dynamic graph, more task IDs.
elliottslaughter Feb 6, 2026
ce403d4
Fix filename.
elliottslaughter Feb 6, 2026
a4183dd
Some work in instance allocation and registry/manager.
elliottslaughter Feb 6, 2026
0274dd0
Instance allocation.
elliottslaughter Feb 6, 2026
9d24b3d
Simplify dims and use constructors.
elliottslaughter Feb 6, 2026
60989fe
Refactor.
elliottslaughter Feb 6, 2026
8d46441
Sketch out device mapping.
elliottslaughter Feb 6, 2026
0dfa1a3
Move instance backing to a separate map, remove realm from task-spec.
elliottslaughter Feb 6, 2026
a4bc84e
Implement processor queries.
elliottslaughter Feb 7, 2026
02b71a8
Enable PRealm.
elliottslaughter Feb 7, 2026
b144d6d
Move tasks to dedicated file, stub out device state init, shuffle dir…
elliottslaughter Feb 10, 2026
4d43a7b
Make use of task args struct.
elliottslaughter Feb 10, 2026
4991911
Use task args struct.
elliottslaughter Feb 10, 2026
6f65c51
Refactor task APIs.
elliottslaughter Feb 10, 2026
fce23cf
Finish implementation of device init task.
elliottslaughter Feb 10, 2026
6fc3b9b
Finish implementation of device state initialization.
elliottslaughter Feb 10, 2026
2de3516
Block on initialization.
elliottslaughter Feb 10, 2026
2a174e0
Wire up rest of Realm implementation.
elliottslaughter Feb 11, 2026
7e78e3f
Implement Realm device idx.
elliottslaughter Feb 11, 2026
5ffc1dd
Updates to compile against latest local-execution.
elliottslaughter Feb 12, 2026
e1b6fca
Fix up function arguments.
elliottslaughter Feb 12, 2026
e2ccf4a
Rename PCGInstance and add dependency set.
elliottslaughter Feb 12, 2026
ffd2738
Dependency tracking.
elliottslaughter Feb 12, 2026
81cc485
Add event argument to controller.
elliottslaughter Feb 12, 2026
bb0ea6b
Implement the allocator.
elliottslaughter Feb 12, 2026
feb5897
Implement device handle.
elliottslaughter Feb 12, 2026
202889f
Distributed device handle initialization.
elliottslaughter Feb 12, 2026
8f816f0
Distributed device handle initialization.
elliottslaughter Feb 13, 2026
37beaa4
Test distributed device handle.
elliottslaughter Feb 13, 2026
c616040
Guard the kinds of procs we run on.
elliottslaughter Feb 13, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 0 additions & 48 deletions .flake/pkgs/legion.nix

This file was deleted.

46 changes: 46 additions & 0 deletions .flake/pkgs/realm.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{ lib
, stdenv
, fetchFromGitHub
, cmake
, cudaPackages ? { }
, zlib
, maxDim ? 5
}:

let
inherit (cudaPackages) cudatoolkit;
in

stdenv.mkDerivation rec {
pname = "realm";
version = "2026-02-06";

src = fetchFromGitHub {
owner = "StanfordLegion";
repo = "realm";
rev = "0405b67ca14b586f7dec0dcddee194cecee7efa6";
sha256 = "sha256-iUPVV1rh3QuyDKgXuu8aDlaZGlNwcpPvPsSVLWp8tr4=";
};

nativeBuildInputs = [
cmake
];

cmakeFlags = [
"-DBUILD_SHARED_LIBS=ON"
"-DREALM_ENABLE_CUDA=ON"
"-DREALM_ENABLE_PREALM=ON"
"-DREALM_MAX_DIM=${toString maxDim}"
];

buildInputs = [
cudatoolkit
zlib
];

meta = with lib; {
description = "Realm is a distributed, event–based tasking runtime for building high-performance applications that span clusters of CPUs, GPUs, and other accelerators";
homepage = "https://legion.stanford.edu/realm";
license = licenses.asl20;
};
}
7 changes: 7 additions & 0 deletions .proj.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,13 @@ has-cpu-only-benchmarks = false
has-cuda-tests = true
has-cuda-benchmarks = false

[targets.realm-execution]
type = "lib"
has-cpu-only-tests = true
has-cpu-only-benchmarks = false
has-cuda-tests = true
has-cuda-benchmarks = false

# [targets.local-pcg-execution]
# type = "lib"
# has-cpu-only-tests = true
Expand Down
21 changes: 10 additions & 11 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
};
};

outputs = { self, nixpkgs, flake-utils, proj-repo, nixGL, ... }: flake-utils.lib.eachSystem [ "x86_64-linux" ] (system:
let
outputs = { self, nixpkgs, flake-utils, proj-repo, nixGL, ... }: flake-utils.lib.eachSystem [ "x86_64-linux" ] (system:
let
pkgs = import nixpkgs {
inherit system;
config.allowUnfree = true;
Expand All @@ -41,21 +41,21 @@
mkShell = attrs: pkgs.mkShell.override {
stdenv = pkgs.cudaPackages.backendStdenv;
} (attrs // {
hardeningDisable = ["all"]; # disable nixpkgs default compiler arguments, otherwise ubsan doesn't catch
# signed overflows due to the signedoverflow hardening setting.
# for more details, see the following (long-running) nixpkgs github issues:
hardeningDisable = ["all"]; # disable nixpkgs default compiler arguments, otherwise ubsan doesn't catch
# signed overflows due to the signedoverflow hardening setting.
# for more details, see the following (long-running) nixpkgs github issues:
# - https://github.com/NixOS/nixpkgs/issues/18995
# - https://github.com/NixOS/nixpkgs/issues/60919
});

proj = proj-repo.packages.${system}.proj;
in
in
{
packages = rec {
libdwarf-lite = pkgs.callPackage ./.flake/pkgs/libdwarf-lite.nix { };
cpptrace = pkgs.callPackage ./.flake/pkgs/cpptrace.nix { inherit libdwarf-lite; };
libassert = pkgs.callPackage ./.flake/pkgs/libassert.nix { inherit cpptrace; };
legion = pkgs.callPackage ./.flake/pkgs/legion.nix { };
realm = pkgs.callPackage ./.flake/pkgs/realm.nix { };
bencher-cli = pkgs.callPackage ./.flake/pkgs/bencher-cli.nix { };
ffdb = pkgs.callPackage ./.flake/pkgs/ffdb { inherit proj; };
hpp2plantuml = pkgs.python3Packages.callPackage ./.flake/pkgs/hpp2plantuml.nix { };
Expand Down Expand Up @@ -83,8 +83,7 @@
shellHook = ''
export PATH="$HOME/ff/.scripts/:$PATH"
export RC_PARAMS="max_discard_ratio=100"
export CMAKE_FLAGS="-DFF_USE_EXTERNAL_LEGION=ON \
-DFF_USE_EXTERNAL_NCCL=ON \
export CMAKE_FLAGS="-DFF_USE_EXTERNAL_NCCL=ON \
-DFF_USE_EXTERNAL_JSON=ON \
-DFF_USE_EXTERNAL_FMT=ON \
-DFF_USE_EXTERNAL_SPDLOG=ON \
Expand All @@ -94,7 +93,7 @@
-DFF_USE_EXTERNAL_GBENCHMARK=ON \
-DFF_USE_EXTERNAL_LIBASSERT=ON"
'';

buildInputs = builtins.concatLists [
(with pkgs; [
zlib
Expand Down Expand Up @@ -125,7 +124,7 @@
])
(with self.packages.${system}; [
libassert
legion
realm
rapidcheckFull
doctest
])
Expand Down
1 change: 1 addition & 0 deletions lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ add_subdirectory(op-attrs)
add_subdirectory(kernels)
add_subdirectory(local-execution)
add_subdirectory(local-pcg-execution)
add_subdirectory(realm-execution)
add_subdirectory(task-spec)
add_subdirectory(utils)
add_subdirectory(ffi)
Expand Down
3 changes: 3 additions & 0 deletions lib/kernels/include/kernels/device_handle_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ namespace FlexFlow {
device_handle_t device_handle_t_from_managed_handle(
std::optional<ManagedPerDeviceFFHandle> const &managed_handle);

device_handle_t device_handle_t_from_managed_handle_ptr(
std::optional<ManagedPerDeviceFFHandle *> const &managed_handle);

device_handle_t gpu_make_device_handle_t(PerDeviceFFHandle const &ff_handle);
device_handle_t cpu_make_device_handle_t();

Expand Down
9 changes: 9 additions & 0 deletions lib/kernels/src/kernels/device_handle_t.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,15 @@ device_handle_t device_handle_t_from_managed_handle(
}
}

device_handle_t device_handle_t_from_managed_handle_ptr(
std::optional<ManagedPerDeviceFFHandle *> const &managed_handle) {
if (managed_handle.has_value()) {
return gpu_make_device_handle_t(managed_handle.value()->raw_handle());
} else {
return cpu_make_device_handle_t();
}
}

device_handle_t gpu_make_device_handle_t(PerDeviceFFHandle const &ff_handle) {
return device_handle_t{
ff_handle,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ std::unordered_map<TensorSlotName, ParallelComputationGraphEdge>
std::unordered_set<parallel_layer_guid_t>
get_initial_layers(ParallelComputationGraph const &);

std::unordered_map<TensorSlotName, parallel_tensor_guid_t>
get_outgoing_tensors(ParallelComputationGraph const &,
parallel_layer_guid_t const &);
std::unordered_map<TensorSlotName, parallel_tensor_guid_t>
get_incoming_tensors(ParallelComputationGraph const &,
parallel_layer_guid_t const &);
Expand Down Expand Up @@ -107,6 +110,9 @@ ParallelTensorShape get_parallel_tensor_shape(ParallelComputationGraph const &,
std::vector<parallel_layer_guid_t>
topological_ordering(ParallelComputationGraph const &);

std::unordered_map<parallel_layer_guid_t, ParallelLayerAttrs>
get_parallel_layer_attrs_mapping(ParallelComputationGraph const &pcg);

parallel_layer_guid_t
get_parallel_layer_by_name(ParallelComputationGraph const &pcg,
std::string const &name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,16 @@ std::unordered_set<parallel_layer_guid_t>
[](Node const &n) { return parallel_layer_guid_t{n}; });
}

std::unordered_map<TensorSlotName, parallel_tensor_guid_t>
get_outgoing_tensors(ParallelComputationGraph const &pcg,
parallel_layer_guid_t const &l) {
return map_values(get_outgoing_kwarg_dataflow_outputs_for_node(
pcg.raw_graph, l.raw_graph_node),
[](KwargDataflowOutput<TensorSlotName> const &o) {
return parallel_tensor_guid_t{o};
});
}

std::unordered_map<TensorSlotName, parallel_tensor_guid_t>
get_incoming_tensors(ParallelComputationGraph const &pcg,
parallel_layer_guid_t const &l) {
Expand Down Expand Up @@ -378,6 +388,17 @@ std::vector<parallel_layer_guid_t>
[](Node const &n) { return parallel_layer_guid_t{n}; });
}

std::unordered_map<parallel_layer_guid_t, ParallelLayerAttrs>
get_parallel_layer_attrs_mapping(ParallelComputationGraph const &pcg) {
std::unordered_map<parallel_layer_guid_t, ParallelLayerAttrs>
layer_attrs_mapping;
for (parallel_layer_guid_t const &layer_guid : get_parallel_layers(pcg)) {
layer_attrs_mapping.insert(
{layer_guid, get_parallel_layer_attrs(pcg, layer_guid)});
}
return layer_attrs_mapping;
}

parallel_layer_guid_t
get_parallel_layer_by_name(ParallelComputationGraph const &pcg,
std::string const &name) {
Expand Down
22 changes: 22 additions & 0 deletions lib/realm-execution/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
ff_add_library(
NAME
realm-execution
SRC_PATTERNS
src/*.cc
PUBLIC_INCLUDE
include/
PRIVATE_INCLUDE
src/
DEPS
compiler
kernels
local-execution
op-attrs
pcg
spdlog
task-spec
utils
Realm::Realm
)

add_subdirectory(test)
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_ATOMIC_DEPENDENCY_SET_H
#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_ATOMIC_DEPENDENCY_SET_H

#include "realm-execution/realm.h"
#include <vector>

namespace FlexFlow {

struct AtomicDependencySet {
public:
AtomicDependencySet() = delete;
explicit AtomicDependencySet(Realm::Event precondition);

void add_writer(Realm::Event writer);
void add_reader(Realm::Event reader);

Realm::Event get_dependency_for_writer() const;
Realm::Event get_dependency_for_reader() const;

private:
Realm::Event writer;
std::vector<Realm::Event> readers;
};

} // namespace FlexFlow

#endif
34 changes: 34 additions & 0 deletions lib/realm-execution/include/realm-execution/dependency_set.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DEPENDENCY_SET_H
#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DEPENDENCY_SET_H

#include "realm-execution/atomic_dependency_set.h"
#include "realm-execution/realm.h"
#include "task-spec/dynamic_graph/dynamic_value_attrs.dtg.h"
#include <unordered_map>

namespace FlexFlow {

struct DependencySet {
public:
DependencySet() = delete;
explicit DependencySet(Realm::Event precondition);

void add_writer(DynamicValueAttrs const &value, Realm::Event writer);
void add_reader(DynamicValueAttrs const &value, Realm::Event reader);

Realm::Event get_dependency_for_writer(DynamicValueAttrs const &value) const;
Realm::Event get_dependency_for_reader(DynamicValueAttrs const &value) const;

private:
AtomicDependencySet &
get_atomic_dependency_set(DynamicValueAttrs const &value);

private:
Realm::Event precondition;
std::unordered_map<DynamicValueAttrs, AtomicDependencySet>
atomic_dependencies;
};

} // namespace FlexFlow

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
namespace = "FlexFlow"
name = "DeviceSpecificManagedPerDeviceFFHandle"
type = "struct"
features = [
"eq",
]

includes = [
"<optional>",
"kernels/managed_per_device_ff_handle.h",
"task-spec/device_specific.h",
]

[[fields]]
name = "handle"
type = "::FlexFlow::DeviceSpecific<std::optional<::FlexFlow::ManagedPerDeviceFFHandle *>>"
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DEVICE_SPECIFIC_MANAGED_PER_DEVICE_FF_HANDLE_H
#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DEVICE_SPECIFIC_MANAGED_PER_DEVICE_FF_HANDLE_H

#include "kernels/device_handle_t.dtg.h"
#include "kernels/managed_per_device_ff_handle.h"
#include "pcg/device_id_t.dtg.h"
#include "realm-execution/device_specific_managed_per_device_ff_handle.dtg.h"

namespace FlexFlow {

DeviceSpecificManagedPerDeviceFFHandle make_device_specific_managed_handle(
device_id_t const &, std::optional<ManagedPerDeviceFFHandle *> const &);

device_handle_t device_handle_t_from_device_specific_managed_handle(
DeviceSpecificManagedPerDeviceFFHandle const &, device_id_t);

} // namespace FlexFlow

#endif
Loading
Loading