Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ option(GGML_NO_ACCELERATE "ggml: disable Accelerate framework" OFF)
option(GGML_OPENBLAS "ggml: use OpenBLAS" OFF)
option(GGML_CLBLAST "ggml: use clBLAST" OFF)
option(GGML_CUBLAS "ggml: use cuBLAS" OFF)
option(GGML_MPI "ggml: use MPI" OFF)

# sanitizers

Expand Down
6 changes: 4 additions & 2 deletions include/ggml/ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,9 @@ extern "C" {
GGML_OP_CROSS_ENTROPY_LOSS_BACK,

GGML_OP_COUNT,

GGML_OP_SEND,
GGML_OP_RECV,
};


Expand Down Expand Up @@ -431,8 +434,7 @@ extern "C" {
char name[GGML_MAX_NAME];

void * extra; // extra things e.g. for ggml-cuda.cu

char padding[4];
int tag; // custom metadata that doesn't require a full pointer
};

static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
Expand Down
13 changes: 13 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,19 @@ if (GGML_CUBLAS)
message(WARNING "cuBLAS not found")
endif()
endif()
if (GGML_MPI)
cmake_minimum_required(VERSION 3.10)
find_package(MPI)
if (MPI_C_FOUND)
message(STATUS "MPI found")
add_compile_definitions(GGML_USE_MPI)
add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
set(GGML_EXTRA_LIBS ${GGML_EXTRA_LIBS} ${MPI_C_LIBRARIES})
set(GGML_EXTRA_INCS ${GGML_EXTRA_INCS} ${MPI_C_INCLUDE_DIRS})
else()
message(WARNING "MPI not found")
endif()
endif()


if (GGML_PERF)
Expand Down
103 changes: 102 additions & 1 deletion src/ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
#include <limits.h>
#include <stdarg.h>

#ifdef GGML_USE_MPI
#include <mpi.h>
#endif

#ifdef GGML_USE_METAL
#include <unistd.h>
#endif
Expand Down Expand Up @@ -4587,7 +4591,7 @@ struct ggml_tensor * ggml_new_tensor_impl(
/*.data =*/ (data == NULL && !ctx->no_alloc) ? (void *)(result + 1) : data,
/*.name =*/ { 0 },
/*.extra =*/ NULL,
/*.pad =*/ { 0 },
/*.tag =*/ 0,
};

// TODO: this should not be needed as long as we don't rely on aligned SIMD loads
Expand Down Expand Up @@ -4681,6 +4685,36 @@ struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggm
return ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, NULL);
}

struct ggml_tensor * ggml_send_tensor(
struct ggml_context * ctx,
struct ggml_tensor *src,
int dst_rank) {

struct ggml_tensor * result = ggml_new_i32(ctx, 0);

result->op = GGML_OP_SEND;
result->src0 = src;
result->tag = dst_rank;

return result;
}

struct ggml_tensor * ggml_recv_tensor(
struct ggml_context * ctx,
struct ggml_tensor *parent,
struct ggml_tensor *dst,
int src_rank) {
UNUSED(ctx);

struct ggml_tensor * result = dst;

result->op = GGML_OP_RECV;
result->src0 = parent; // just used for graph computation
result->tag = src_rank;

return result;
}

struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) {
memset(tensor->data, 0, ggml_nbytes(tensor));
return tensor;
Expand Down Expand Up @@ -8304,6 +8338,52 @@ static void ggml_compute_forward_dup(
}
}

// ggml_compute_forward_recv

static void ggml_compute_forward_recv(
const struct ggml_compute_params * params,
struct ggml_tensor * dst) {
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
GGML_ASSERT(dst->type == GGML_TYPE_F32);
#ifdef GGML_USE_MPI
MPI_Status status;
int my_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
// fprintf(stderr, "(%d) Receiving from (%d)\n", my_rank, (int)dst->tag);
int retval = MPI_Recv(dst->data, ggml_nelements(dst), MPI_FLOAT, (int)dst->tag, MPI_ANY_TAG, MPI_COMM_WORLD, &status);
// fprintf(stderr, "(%d) Received from (%d)\n", my_rank, (int)dst->tag);
GGML_ASSERT(retval == MPI_SUCCESS);
#else
GGML_ASSERT(false);
#endif
}

// ggml_compute_forward_send

static void ggml_compute_forward_send(
const struct ggml_compute_params * params,
struct ggml_tensor * src,
struct ggml_tensor * dst) {
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
GGML_ASSERT(src->type == GGML_TYPE_F32);
GGML_ASSERT(dst->type == GGML_TYPE_I32);
#ifdef GGML_USE_MPI
int my_rank;
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
// fprintf(stderr, "(%d) Sending to (%d)\n", my_rank, (int)dst->tag);
int retval = MPI_Send(src->data, ggml_nelements(src), MPI_FLOAT, (int)dst->tag, 0, MPI_COMM_WORLD);
// fprintf(stderr, "(%d) Sent to (%d)\n", my_rank, (int)dst->tag);
ggml_set_i32(dst, retval);
GGML_ASSERT(retval == MPI_SUCCESS);
#else
GGML_ASSERT(false);
#endif
}

// ggml_compute_forward_add

static void ggml_compute_forward_add_f32(
Expand Down Expand Up @@ -14931,6 +15011,14 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
{
ggml_compute_forward_dup(params, tensor->src0, tensor);
} break;
case GGML_OP_SEND:
{
ggml_compute_forward_send(params, tensor->src0, tensor);
} break;
case GGML_OP_RECV:
{
ggml_compute_forward_recv(params, tensor);
} break;
case GGML_OP_ADD:
{
ggml_compute_forward_add(params, tensor->src0, tensor->src1, tensor);
Expand Down Expand Up @@ -15229,6 +15317,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
src0->grad = ggml_add_impl(ctx, src0->grad, tensor->grad, inplace);
}
} break;
case GGML_OP_SEND:
{
GGML_ASSERT(false); // TODO: not implemented
} break;
case GGML_OP_RECV:
{
GGML_ASSERT(false); // TODO: not implemented
} break;
case GGML_OP_ADD:
{
if (src0->grad) {
Expand Down Expand Up @@ -16459,6 +16555,11 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)

work_size = MAX(work_size, cur);
} break;
case GGML_OP_SEND:
case GGML_OP_RECV:
{
node->n_tasks = 1;
} break;
case GGML_OP_ADD:
case GGML_OP_ADD1:
{
Expand Down