Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,52 @@ ompi/mpi/fortran/use-mpi-tkr/mpi_kinds.ompi_module
ompi/mpi/fortran/use-mpi-tkr/mpi-tkr-sizeof.f90
ompi/mpi/fortran/use-mpi-tkr/mpi-tkr-sizeof.h

ompi/mpiext/pcollreq/c/profile/pallgather_init.c
ompi/mpiext/pcollreq/c/profile/pallgatherv_init.c
ompi/mpiext/pcollreq/c/profile/pallreduce_init.c
ompi/mpiext/pcollreq/c/profile/palltoall_init.c
ompi/mpiext/pcollreq/c/profile/palltoallv_init.c
ompi/mpiext/pcollreq/c/profile/palltoallw_init.c
ompi/mpiext/pcollreq/c/profile/pbarrier_init.c
ompi/mpiext/pcollreq/c/profile/pbcast_init.c
ompi/mpiext/pcollreq/c/profile/pexscan_init.c
ompi/mpiext/pcollreq/c/profile/pgather_init.c
ompi/mpiext/pcollreq/c/profile/pgatherv_init.c
ompi/mpiext/pcollreq/c/profile/pmpiext_pcollreq_c.h
ompi/mpiext/pcollreq/c/profile/pneighbor_allgather_init.c
ompi/mpiext/pcollreq/c/profile/pneighbor_allgatherv_init.c
ompi/mpiext/pcollreq/c/profile/pneighbor_alltoall_init.c
ompi/mpiext/pcollreq/c/profile/pneighbor_alltoallv_init.c
ompi/mpiext/pcollreq/c/profile/pneighbor_alltoallw_init.c
ompi/mpiext/pcollreq/c/profile/preduce_init.c
ompi/mpiext/pcollreq/c/profile/preduce_scatter_block_init.c
ompi/mpiext/pcollreq/c/profile/preduce_scatter_init.c
ompi/mpiext/pcollreq/c/profile/pscan_init.c
ompi/mpiext/pcollreq/c/profile/pscatter_init.c
ompi/mpiext/pcollreq/c/profile/pscatterv_init.c
ompi/mpiext/pcollreq/mpif-h/profile/pallgather_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pallgatherv_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pallreduce_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/palltoall_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/palltoallv_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/palltoallw_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pbarrier_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pbcast_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pexscan_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pgather_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pgatherv_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pneighbor_allgather_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pneighbor_allgatherv_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pneighbor_alltoall_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pneighbor_alltoallv_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pneighbor_alltoallw_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/preduce_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/preduce_scatter_block_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/preduce_scatter_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pscan_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pscatter_init_f.c
ompi/mpiext/pcollreq/mpif-h/profile/pscatterv_init_f.c

ompi/mpi/java/java/mpi
ompi/mpi/java/java/*.jar
ompi/mpi/java/java/*.h
Expand Down
3 changes: 2 additions & 1 deletion opal/include/opal/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,8 @@ enum {
OPAL_ERR_HEARTBEAT_ALERT = (OPAL_ERR_BASE - 67),
OPAL_ERR_FILE_ALERT = (OPAL_ERR_BASE - 68),
OPAL_ERR_MODEL_DECLARED = (OPAL_ERR_BASE - 69),
OPAL_PMIX_LAUNCH_DIRECTIVE = (OPAL_ERR_BASE - 70)
OPAL_PMIX_LAUNCH_DIRECTIVE = (OPAL_ERR_BASE - 70),
OPAL_OPERATION_SUCCEEDED = (OPAL_ERR_BASE - 71)
};

#define OPAL_ERR_MAX (OPAL_ERR_BASE - 100)
Expand Down
1 change: 1 addition & 0 deletions opal/mca/pmix/ext1x/pmix1x.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ pmix_status_t pmix1_convert_opalrc(int rc)
case OPAL_ERROR:
return PMIX_ERROR;
case OPAL_SUCCESS:
case OPAL_OPERATION_SUCCEEDED:
return PMIX_SUCCESS;
default:
return PMIX_ERROR;
Expand Down
1 change: 1 addition & 0 deletions opal/mca/pmix/ext2x/ext2x.c
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ pmix_status_t ext2x_convert_opalrc(int rc)
case OPAL_ERROR:
return PMIX_ERROR;
case OPAL_SUCCESS:
case OPAL_OPERATION_SUCCEEDED:
return PMIX_SUCCESS;
default:
return rc;
Expand Down
26 changes: 24 additions & 2 deletions opal/mca/pmix/pmix3x/pmix/NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,21 @@ example, a bug might be fixed in the master, and then moved to the
current release as well as the "stable" bug fix release branch.


3.0.0 -- TBD
3.0.1 -- 23 Aug 2018
----------------------
**** DEPRECATION WARNING: The pmix_info_array_t struct was
**** initially marked for deprecation in the v2.x series.
**** We failed to provide clear warning at that time. This
**** therefore serves as warning of intended removal of
**** pmix_info_array_t in the future v4 release series.
- Fixed memory corruption bug in event notification
system due to uninitialized variable
- Add numeric version field to pmix_version.h
- Transfer all cached data to client dstore upon first connect
- Implement missing job control and sensor APIs


3.0.0 -- 6 July 2018
------------------------------------
**** NOTE: This release implements the complete PMIX v3.0 Standard
**** and therefore includes a number of new APIs and features. These
Expand Down Expand Up @@ -63,7 +77,15 @@ current release as well as the "stable" bug fix release branch.
- Fix several memory and file descriptor leaks


2.1.2 -- TBD
2.1.3 -- 23 Aug 2018
----------------------
- Fixed memory corruption bug in event notification
system due to uninitialized variable
- Add numeric version definition
- Transfer all cached data to client dstore upon first connect


2.1.2 -- 6 July 2018
----------------------
- Added PMIX_VERSION_RELEASE string to pmix_version.h
- Added PMIX_SPAWNED and PMIX_PARENT_ID keys to all procs
Expand Down
8 changes: 4 additions & 4 deletions opal/mca/pmix/pmix3x/pmix/VERSION
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

major=3
minor=0
release=0
release=1

# greek is used for alpha or beta release tags. If it is non-empty,
# it will be appended to the version number. It does not have to be
Expand All @@ -30,7 +30,7 @@ greek=
# command, or with the date (if "git describe" fails) in the form of
# "date<date>".

repo_rev=gitffba520
repo_rev=gitbf30a5f

# If tarball_version is not empty, it is used as the version string in
# the tarball filename, regardless of all other versions listed in
Expand All @@ -44,7 +44,7 @@ tarball_version=

# The date when this release was created

date="Jul 01, 2018"
date="Aug 20, 2018"

# The shared library version of each of PMIx's public libraries.
# These versions are maintained in accordance with the "Library
Expand Down Expand Up @@ -75,6 +75,6 @@ date="Jul 01, 2018"
# Version numbers are described in the Libtool current:revision:age
# format.

libpmix_so_version=4:0:2
libpmix_so_version=4:1:2
libpmi_so_version=1:0:0
libpmi2_so_version=1:0:0
2 changes: 1 addition & 1 deletion opal/mca/pmix/pmix3x/pmix/contrib/pmix.spec
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@

Summary: An extended/exascale implementation of PMI
Name: %{?_name:%{_name}}%{!?_name:pmix}
Version: 3.0.0
Version: 3.0.1
Release: 1%{?dist}
License: BSD
Group: Development/Libraries
Expand Down
1 change: 1 addition & 0 deletions opal/mca/pmix/pmix3x/pmix/include/pmix_common.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,7 @@ typedef int pmix_status_t;
#define PMIX_LAUNCH_DIRECTIVE (PMIX_ERR_OP_BASE - 24)
#define PMIX_LAUNCHER_READY (PMIX_ERR_OP_BASE - 25)
#define PMIX_OPERATION_IN_PROGRESS (PMIX_ERR_OP_BASE - 26)
#define PMIX_OPERATION_SUCCEEDED (PMIX_ERR_OP_BASE - 27)


/* define a starting point for system error constants so
Expand Down
3 changes: 3 additions & 0 deletions opal/mca/pmix/pmix3x/pmix/include/pmix_version.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
* Copyright (c) 2016 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2018 IBM Corporation. All rights reserved.
* Copyright (c) 2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -17,4 +18,6 @@
#define PMIX_VERSION_MAJOR @pmixmajor@
#define PMIX_VERSION_MINOR @pmixminor@
#define PMIX_VERSION_RELEASE @pmixrelease@

#define PMIX_NUMERIC_VERSION 0x00030001
#endif
21 changes: 20 additions & 1 deletion opal/mca/pmix/pmix3x/pmix/src/common/pmix_control.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ static void query_cbfunc(struct pmix_peer_t *peer,
/* unpack any returned data */
cnt = 1;
PMIX_BFROPS_UNPACK(rc, peer, buf, &results->ninfo, &cnt, PMIX_SIZE);
if (PMIX_SUCCESS != rc) {
if (PMIX_SUCCESS != rc && PMIX_ERR_UNPACK_READ_PAST_END_OF_BUFFER != rc) {
PMIX_ERROR_LOG(rc);
goto complete;
}
Expand Down Expand Up @@ -332,6 +332,12 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm
return PMIX_ERR_INIT;
}

/* sanity check */
if (NULL == monitor) {
PMIX_RELEASE_THREAD(&pmix_global_lock);
return PMIX_ERR_BAD_PARAM;
}

/* if we are the server, then we just issue the request and
* return the response */
if (PMIX_PROC_IS_SERVER(pmix_globals.mypeer) &&
Expand All @@ -355,6 +361,19 @@ PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pm
}
PMIX_RELEASE_THREAD(&pmix_global_lock);

/* if the monitor is PMIX_SEND_HEARTBEAT, then send it */
if (0 == strncmp(monitor->key, PMIX_SEND_HEARTBEAT, PMIX_MAX_KEYLEN)) {
msg = PMIX_NEW(pmix_buffer_t);
if (NULL == msg) {
return PMIX_ERR_NOMEM;
}
PMIX_PTL_SEND_ONEWAY(rc, pmix_client_globals.myserver, msg, PMIX_PTL_TAG_HEARTBEAT);
if (PMIX_SUCCESS != rc) {
PMIX_RELEASE(msg);
}
return rc;
}

/* if we are a client, then relay this request to the server */
msg = PMIX_NEW(pmix_buffer_t);
/* pack the cmd */
Expand Down
8 changes: 4 additions & 4 deletions opal/mca/pmix/pmix3x/pmix/src/mca/gds/ds12/gds_dstore.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
* Copyright (c) 2016 IBM Corporation. All rights reserved.
* Copyright (c) 2016-2018 IBM Corporation. All rights reserved.
* Copyright (c) 2016-2017 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2018 Research Organization for Information Science
Expand Down Expand Up @@ -3170,7 +3170,7 @@ static pmix_status_t dstore_register_job_info(struct pmix_peer_t *pr,
char *msg;
pmix_status_t rc;
pmix_proc_t proc;
pmix_rank_info_t *rinfo;
pmix_rank_t rank;

pmix_output_verbose(2, pmix_gds_base_framework.framework_output,
"[%s:%d] gds:dstore:register_job_info for peer [%s:%d]",
Expand All @@ -3187,8 +3187,8 @@ static pmix_status_t dstore_register_job_info(struct pmix_peer_t *pr,
return rc;
}

PMIX_LIST_FOREACH(rinfo, &ns->ranks, pmix_rank_info_t) {
proc.rank = rinfo->pname.rank;
for (rank=0; rank < ns->nprocs; rank++) {
proc.rank = rank;
rc = _store_job_info(&proc);
if (PMIX_SUCCESS != rc) {
PMIX_ERROR_LOG(rc);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
*
* $COPYRIGHT$
*
Expand All @@ -24,6 +24,7 @@ pmix_status_t pmix_psensor_base_start(pmix_peer_t *requestor, pmix_status_t erro
{
pmix_psensor_active_module_t *mod;
pmix_status_t rc;
bool didit = false;

pmix_output_verbose(5, pmix_psensor_base_framework.framework_output,
"%s:%d sensor:base: starting sensors",
Expand All @@ -36,17 +37,25 @@ pmix_status_t pmix_psensor_base_start(pmix_peer_t *requestor, pmix_status_t erro
if (PMIX_SUCCESS != rc && PMIX_ERR_TAKE_NEXT_OPTION != rc) {
return rc;
}
didit = true;
}
}

/* if none of the components could do it, then report
* not supported upwards so the server knows to ask
* the host to try */
if (!didit) {
return PMIX_ERR_NOT_SUPPORTED;
}

return PMIX_SUCCESS;
}

pmix_status_t pmix_psensor_base_stop(pmix_peer_t *requestor,
char *id)
{
pmix_psensor_active_module_t *mod;
pmix_status_t rc;
pmix_status_t rc, ret = PMIX_SUCCESS;

pmix_output_verbose(5, pmix_psensor_base_framework.framework_output,
"%s:%d sensor:base: stopping sensors",
Expand All @@ -57,10 +66,14 @@ pmix_status_t pmix_psensor_base_stop(pmix_peer_t *requestor,
if (NULL != mod->module->stop) {
rc = mod->module->stop(requestor, id);
if (PMIX_SUCCESS != rc && PMIX_ERR_TAKE_NEXT_OPTION != rc) {
return rc;
if (PMIX_SUCCESS == ret) {
ret = rc;
}
/* need to continue to ensure that all
* sensors have been stopped */
}
}
}

return PMIX_SUCCESS;
return ret;
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* reserved.
*
* Copyright (c) 2017 Intel, Inc. All rights reserved.
* Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -30,7 +30,7 @@
#include "src/util/output.h"
#include "src/util/show_help.h"
#include "src/include/pmix_globals.h"
#include "src/mca/ptl/ptl.h"
#include "src/mca/ptl/base/base.h"

#include "src/mca/psensor/base/base.h"
#include "psensor_heartbeat.h"
Expand Down Expand Up @@ -168,6 +168,7 @@ static pmix_status_t heartbeat_start(pmix_peer_t *requestor, pmix_status_t error
{
pmix_heartbeat_trkr_t *ft;
size_t n;
pmix_ptl_posted_recv_t *rcv;

PMIX_OUTPUT_VERBOSE((1, pmix_psensor_base_framework.framework_output,
"[%s:%d] checking heartbeat monitoring for requestor %s:%d",
Expand Down Expand Up @@ -202,6 +203,17 @@ static pmix_status_t heartbeat_start(pmix_peer_t *requestor, pmix_status_t error
return PMIX_ERR_BAD_PARAM;
}

/* if the recv hasn't been posted, so so now */
if (!mca_psensor_heartbeat_component.recv_active) {
/* setup to receive heartbeats */
rcv = PMIX_NEW(pmix_ptl_posted_recv_t);
rcv->tag = PMIX_PTL_TAG_HEARTBEAT;
rcv->cbfunc = pmix_psensor_heartbeat_recv_beats;
/* add it to the beginning of the list of recvs */
pmix_list_prepend(&pmix_ptl_globals.posted_recvs, &rcv->super);
mca_psensor_heartbeat_component.recv_active = true;
}

/* need to push into our event base to add this to our trackers */
pmix_event_assign(&ft->cdev, pmix_psensor_base.evbase, -1,
EV_WRITE, add_tracker, ft);
Expand Down Expand Up @@ -241,7 +253,7 @@ static pmix_status_t heartbeat_stop(pmix_peer_t *requestor, char *id)
cd->requestor = requestor;
cd->id = strdup(id);

/* need to push into our event base to add this to our trackers */
/* need to push into our event base to remove this from our trackers */
pmix_event_assign(&cd->ev, pmix_psensor_base.evbase, -1,
EV_WRITE, del_tracker, cd);
PMIX_POST_OBJECT(cd);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
*
* Copyright (c) 2017 Intel, Inc. All rights reserved.
* Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -28,6 +28,7 @@ BEGIN_C_DECLS

typedef struct {
pmix_psensor_base_component_t super;
bool recv_active;
pmix_list_t trackers;
} pmix_psensor_heartbeat_component_t;

Expand Down
Loading