Skip to content

Open MPI appears to ignore the max_msg_size and related fields reported by OFI. #6976

Closed
@hppritcha

Description

@hppritcha

This issue tracks a discussion on the use mail list:

https://www.mail-archive.com/users@lists.open-mpi.org//msg33397.html

The test case works with the PML ob1, fails with a PSM2 error if using the PSM2 MTL, fails silently if using the OFI MTL (highly likely using the PSM2 provider).
Test case:

#define _GNU_SOURCE
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <mpi.h>

long failed_offset = 0;

size_t chunk_size = 1 << 16;
size_t nchunks = (1 << 16) + 1;

int main(int argc, char * argv[])
{
    if (argc >= 2) chunk_size = atol(argv[1]);
    if (argc >= 3) nchunks = atol(argv[1]);

    MPI_Init(&argc, &argv);
    /*
     * This function returns:
     *  0 on success.
     *  a non-zero MPI Error code if MPI_Allgather returned one.
     *  -1 if no MPI Error code was returned, but the result of Allgather
     *  was wrong.
     *  -2 if memory allocation failed.
     *
     * (note that the MPI document guarantees that MPI error codes are
     * positive integers)
     */

    int size, rank;
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);

    int err;

    char * check_text;
    int rc = asprintf(&check_text, "MPI_Allgather, %d nodes, 0x%zx chunks of 0x%zx bytes, total %d * 0x%zx bytes", size, nchunks, chunk_size, size, chunk_size * nchunks);
    if (rc < 0) abort();

    if (!rank) printf("%s: ...\n", check_text);

    MPI_Datatype mpi_ft;
    MPI_Type_contiguous(chunk_size, MPI_BYTE, &mpi_ft);
    MPI_Type_commit(&mpi_ft);
    MPI_Comm_set_errhandler(MPI_COMM_WORLD, MPI_ERRORS_RETURN);
    void * data = malloc(nchunks * size * chunk_size);
    memset(data, 0, nchunks * size * chunk_size);
    int alloc_ok = data != NULL;
    MPI_Allreduce(MPI_IN_PLACE, &alloc_ok, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
    if (alloc_ok) {
        memset(((char*)data) + nchunks * chunk_size * rank, 0x42, nchunks * chunk_size);
        err = MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL,
                data, nchunks,
                mpi_ft, MPI_COMM_WORLD);
        if (err == 0) {
            void * p = memchr(data, 0, nchunks * size * chunk_size);
            if (p != NULL) {
                /* We found a zero, we shouldn't ! */
                err = -1;
                failed_offset = ((char*)p)-(char*)data;
            }
        }
    } else {
        err = -2;
    }
    if (data) free(data);
    MPI_Type_free(&mpi_ft);

    if (!rank) {
        printf("%s: %s\n", check_text, err == 0 ? "ok" : "NOK");
    }
    if (err == -2) {
        puts("Could not allocate memory buffer");
    } else if (err != 0) {
        int someone_has_minusone = (err == -1);
        MPI_Allreduce(MPI_IN_PLACE, &someone_has_minusone, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
        if (someone_has_minusone) {
            long * offsets = malloc(size * sizeof(long));
            offsets[rank] = failed_offset;
            MPI_Gather(&failed_offset, 1, MPI_LONG,
                    offsets, 1, MPI_LONG, 0, MPI_COMM_WORLD);
            if (!rank) {
                for(int i = 0 ; i < size ; i++) {
                    printf("node %d failed_offset = 0x%lx\n", i, offsets[i]);
                }
            }
            free(offsets);
        }

        if (!rank) {
            if (err > 0) { /* return an MPI Error if we've got one. */
                /* we often get MPI_ERR_OTHER... mostly useless */
                char error[1024];
                int errorlen = sizeof(error);
                MPI_Error_string(err, error, &errorlen);
                printf("MPI error returned:\n%s\n", error);
            }
        }
    }
    free(check_text);
    MPI_Finalize();
}

Metadata

Metadata

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions