1
1
WHAT:    Merge the PMIx branch into the devel repo, creating a new
               OPAL “lmix” framework to abstract PMI support for all RTEs.
               Replace the ORTE daemon-level collectives with a new PMIx
               server and update the ORTE grpcomm framework to support
               server-to-server collectives

WHY:      We’ve had problems dealing with variations in PMI implementations,
               and need to extend the existing PMI definitions to meet exascale
               requirements.

WHEN:   Mon, Aug 25

WHERE:  https://github.com/rhc54/ompi-svn-mirror.git

Several community members have been working on a refactoring of the current PMI support within OMPI. Although the APIs are common, Slurm and Cray implement a different range of capabilities, and package them differently. For example, Cray provides an integrated PMI-1/2 library, while Slurm separates the two and requires the user to specify the one to be used at runtime. In addition, several bugs in the Slurm implementations have caused problems requiring extra coding.

All this has led to a slew of #if’s in the PMI code and bugs when the corner-case logic for one implementation accidentally traps the other. Extending this support to other implementations would have increased this complexity to an unacceptable level.

Accordingly, we have:

* created a new OPAL “pmix” framework to abstract the PMI support, with separate components for Cray, Slurm PMI-1, and Slurm PMI-2 implementations.

* Replaced the current ORTE grpcomm daemon-based collective operation with an integrated PMIx server, and updated the grpcomm APIs to provide more flexible, multi-algorithm support for collective operations. At this time, only the xcast and allgather operations are supported.

* Replaced the current global collective id with a signature based on the names of the participating procs. The allows an unlimited number of collectives to be executed by any group of processes, subject to the requirement that only one collective can be active at a time for a unique combination of procs. Note that a proc can be involved in any number of simultaneous collectives - it is the specific combination of procs that is subject to the constraint

* removed the prior OMPI/OPAL modex code

* added new macros for executing modex send/recv to simplify use of the new APIs. The send macros allow the caller to specify whether or not the BTL supports async modex operations - if so, then the non-blocking “fence” operation is used, if the active PMIx component supports it. Otherwise, the default is a full blocking modex exchange as we currently perform.

* retained the current flag that directs us to use a blocking fence operation, but only to retrieve data upon demand

This commit was SVN r32570.
Этот коммит содержится в:
Ralph Castain 2014-08-21 18:56:47 +00:00
родитель c3beb0472e
Коммит aec5cd08bd
210 изменённых файлов: 13626 добавлений и 9104 удалений

Просмотреть файл

@ -21,6 +21,37 @@
# $HEADER$
#
#
# special check for cray pmi, uses macro(s) from pkg.m4
#
# OPAL_CHECK_CRAY_PMI(prefix, [action-if-found], [action-if-not-found])
# --------------------------------------------------------
AC_DEFUN([OPAL_CHECK_CRAY_PMI],[
# set defaults
opal_have_pmi1=0
opal_enable_pmi2=0
PKG_CHECK_MODULES([CRAY_PMI], [cray-pmi],
[$1_LDFLAGS="$CRAY_PMI_LIBS"
$1_CPPFLAGS="$CRAY_PMI_CFLAGS"
$1_LIBS="$1_LDFLAGS"
opal_have_pmi1=1
opal_enable_pmi2=1
$2],
[AC_MSG_RESULT([no])
$3])
AC_DEFINE_UNQUOTED([WANT_PMI_SUPPORT],
[$opal_enable_pmi],
[Whether we want PMI support])
AC_DEFINE_UNQUOTED([WANT_PMI2_SUPPORT],
[$opal_have_pmi2],
[Whether we have PMI2 support])
AM_CONDITIONAL(WANT_PMI_SUPPORT, [test "$opal_enable_pmi" = 1])
AM_CONDITIONAL(WANT_PMI2_SUPPORT, [test "$opal_have_pmi2" = 1])
])
# OPAL_CHECK_PMI(prefix, [action-if-found], [action-if-not-found])
# --------------------------------------------------------
AC_DEFUN([OPAL_CHECK_PMI],[
@ -45,7 +76,6 @@ AC_DEFUN([OPAL_CHECK_PMI],[
opal_check_pmi_$1_LIBS=
AC_MSG_CHECKING([if user requested PMI support])
opal_have_pmi_support=no
AS_IF([test "$with_pmi" = "no"],
[AC_MSG_RESULT([no])
$3],

Просмотреть файл

@ -20,6 +20,7 @@
* All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -33,7 +34,7 @@
#include "ompi/constants.h"
#include "opal/mca/hwloc/base/base.h"
#include "opal/mca/dstore/dstore.h"
#include "opal/dss/dss.h"
#include "ompi/proc/proc.h"
@ -1526,6 +1527,8 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm,
char *recvbuf;
ompi_proc_t **proc_list=NULL;
int i;
opal_list_t myvals;
opal_value_t *kv;
local_rank = ompi_comm_rank (local_comm);
local_size = ompi_comm_size (local_comm);
@ -1634,7 +1637,22 @@ ompi_proc_t **ompi_comm_get_rprocs ( ompi_communicator_t *local_comm,
/* set the locality of the remote procs */
for (i=0; i < rsize; i++) {
ompi_proc_set_locality(rprocs[i]);
/* get the locality information - do not use modex recv for
* this request as that will automatically cause the hostname
* to be loaded as well. All RTEs are required to provide this
* information at startup for procs on our node. Thus, not
* finding the info indicates that the proc is non-local.
*/
OBJ_CONSTRUCT(&myvals, opal_list_t);
if (OMPI_SUCCESS != opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)&rprocs[i]->super.proc_name,
OPAL_DSTORE_LOCALITY, &myvals)) {
rprocs[i]->super.proc_flags = OPAL_PROC_NON_LOCAL;
} else {
kv = (opal_value_t*)opal_list_get_first(&myvals);
rprocs[i]->super.proc_flags = kv->data.uint16;
}
OPAL_LIST_DESTRUCT(&myvals);
}
/* And now add the information into the database */

Просмотреть файл

@ -17,7 +17,7 @@
* Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -28,6 +28,8 @@
#include "ompi_config.h"
#include "opal/dss/dss.h"
#include "opal/mca/pmix/pmix.h"
#include "ompi/proc/proc.h"
#include "ompi/communicator/communicator.h"
#include "ompi/op/op.h"
@ -38,7 +40,6 @@
#include "ompi/mca/rte/rte.h"
#include "ompi/mca/coll/base/base.h"
#include "ompi/request/request.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/runtime/mpiruntime.h"
BEGIN_C_DECLS
@ -147,7 +148,7 @@ int ompi_comm_cid_init (void)
#if OMPI_ENABLE_THREAD_MULTIPLE
ompi_proc_t **procs, *thisproc;
uint8_t thread_level;
void *tlpointer;
uint8_t *tlpointer;
int ret;
size_t i, size, numprocs;
@ -161,7 +162,7 @@ int ompi_comm_cid_init (void)
for ( i=0; i<numprocs; i++ ) {
thisproc = procs[i];
ret = ompi_modex_recv_string("MPI_THREAD_LEVEL", thisproc, &tlpointer, &size);
OPAL_MODEX_RECV_STRING(ret, "MPI_THREAD_LEVEL", &thisproc->super, (uint8_t**)&tlpointer, &size);
if (OMPI_SUCCESS == ret) {
thread_level = *((uint8_t *) tlpointer);
if ( OMPI_THREADLEVEL_IS_MULTIPLE (thread_level) ) {

Просмотреть файл

@ -39,7 +39,9 @@
#include "opal/util/argv.h"
#include "opal/util/opal_getcwd.h"
#include "opal/dss/dss.h"
#include "opal/mca/dstore/dstore.h"
#include "opal/mca/hwloc/base/base.h"
#include "opal/mca/pmix/pmix.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/base/base.h"
@ -126,7 +128,6 @@ typedef struct {
bool event_active;
uint32_t id;
uint32_t cid;
orte_grpcomm_coll_id_t disconnectid;
orte_rml_tag_t tag;
ompi_dpm_base_paccept_connect_callback_fn_t cbfunc;
void *cbdata;
@ -181,12 +182,9 @@ static int connect_accept(ompi_communicator_t *comm, int root,
int32_t i,j, new_proc_len;
ompi_group_t *new_group_pointer;
orte_grpcomm_coll_id_t id[2];
orte_grpcomm_collective_t modex;
orte_namelist_t *nm;
orte_rml_recv_cb_t xfer;
orte_process_name_t carport;
orte_dpm_prequest_t *preq;
OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_framework.framework_output,
"%s dpm:orte:connect_accept with port %s %s",
@ -226,82 +224,6 @@ static int connect_accept(ompi_communicator_t *comm, int root,
}
if ( rank == root ) {
OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t);
if (send_first) {
/* Get a collective id for the modex we need later on - we
* have to get a globally unique id for this purpose as
* multiple threads can do simultaneous connect/accept,
* and the same processes can be engaged in multiple
* connect/accepts at the same time. Only one side
* needs to do this, so have it be send_first
*/
nbuf = OBJ_NEW(opal_buffer_t);
if (NULL == nbuf) {
return OMPI_ERROR;
}
/* tell the HNP how many id's we need - we need one for
* executing the connect_accept, and another when we
* disconnect
*/
i = 2;
if (OPAL_SUCCESS != (rc = opal_dss.pack(nbuf, &i, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(nbuf);
return OMPI_ERROR;
}
/* send the request */
rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, nbuf,
ORTE_RML_TAG_COLL_ID_REQ,
orte_rml_send_callback, NULL);
/* wait for the id's */
xfer.active = true;
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_COLL_ID,
ORTE_RML_NON_PERSISTENT,
orte_rml_recv_callback, &xfer);
/* wait for response */
OMPI_WAIT_FOR_COMPLETION(xfer.active);
/* create a buffer to send to the other side */
nbuf = OBJ_NEW(opal_buffer_t);
if (NULL == nbuf) {
return OMPI_ERROR;
}
/* unpack the id's */
i=2;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer.data, id, &i, ORTE_GRPCOMM_COLL_ID_T))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&xfer);
return OMPI_ERROR;
}
/* send them to my peer on the other side */
if (ORTE_SUCCESS != (rc = opal_dss.pack(nbuf, id, 2, ORTE_GRPCOMM_COLL_ID_T))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&xfer);
return OMPI_ERROR;
}
OBJ_DESTRUCT(&xfer); // done with the received data
rc = orte_rml.send_buffer_nb(&port, nbuf, tag, orte_rml_send_callback, NULL);
} else {
/* wait to recv the collective id's */
xfer.active = true;
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, tag,
ORTE_RML_NON_PERSISTENT,
orte_rml_recv_callback, &xfer);
/* wait for response */
OMPI_WAIT_FOR_COMPLETION(xfer.active);
/* unpack the id's */
i=2;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer.data, id, &i, ORTE_GRPCOMM_COLL_ID_T))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&xfer);
return OMPI_ERROR;
}
OBJ_DESTRUCT(&xfer);
}
OPAL_OUTPUT_VERBOSE((1, ompi_dpm_base_framework.framework_output,
"%s dpm:orte:connect_accept working with new collective ids %u %u",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), id[0], id[1]));
/* Generate the message buffer containing the number of processes and the list of
participating processes */
nbuf = OBJ_NEW(opal_buffer_t);
@ -309,11 +231,6 @@ static int connect_accept(ompi_communicator_t *comm, int root,
return OMPI_ERROR;
}
/* pass the collective id's so we can all use them */
if (ORTE_SUCCESS != (rc = opal_dss.pack(nbuf, id, 2, ORTE_GRPCOMM_COLL_ID_T))) {
ORTE_ERROR_LOG(rc);
goto exit;
}
if (OPAL_SUCCESS != (rc = opal_dss.pack(nbuf, &size, 1, OPAL_INT))) {
ORTE_ERROR_LOG(rc);
goto exit;
@ -453,19 +370,11 @@ static int connect_accept(ompi_communicator_t *comm, int root,
ORTE_ERROR_LOG(rc);
goto exit;
}
/* unload collective id's */
num_vals = 2;
if (ORTE_SUCCESS != (rc = opal_dss.unpack(nrbuf, id, &num_vals, ORTE_GRPCOMM_COLL_ID_T))) {
ORTE_ERROR_LOG(rc);
goto exit;
}
num_vals = 1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(nrbuf, &rsize, &num_vals, OPAL_INT))) {
ORTE_ERROR_LOG(rc);
goto exit;
}
rc = ompi_proc_unpack(nrbuf, rsize, &rprocs, false, &new_proc_len, &new_proc_list);
if ( OMPI_SUCCESS != rc ) {
goto exit;
@ -480,6 +389,9 @@ static int connect_accept(ompi_communicator_t *comm, int root,
if (new_proc_len > 0) {
opal_list_t all_procs;
orte_namelist_t *name;
opal_identifier_t *ids;
opal_list_t myvals;
opal_value_t *kv;
/* we first need to give the wireup info to our routed module.
* Not every routed module will need it, but some do require
@ -539,43 +451,34 @@ static int connect_accept(ompi_communicator_t *comm, int root,
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* setup the modex */
OBJ_CONSTRUCT(&modex, orte_grpcomm_collective_t);
modex.id = id[0];
modex.active = true;
ids = (opal_identifier_t*)malloc(opal_list_get_size(&all_procs) * sizeof(opal_identifier_t));
/* copy across the list of participants */
i=0;
OPAL_LIST_FOREACH(nm, &all_procs, orte_namelist_t) {
name = OBJ_NEW(orte_namelist_t);
name->name = nm->name;
opal_list_append(&modex.participants, &name->super);
memcpy(&ids[i++], &nm->name, sizeof(opal_identifier_t));
}
OPAL_LIST_DESTRUCT(&all_procs);
/* perform it */
if (OMPI_SUCCESS != (rc = orte_grpcomm.modex(&modex))) {
ORTE_ERROR_LOG(rc);
goto exit;
}
OMPI_WAIT_FOR_COMPLETION(modex.active);
OBJ_DESTRUCT(&modex);
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output,
"%s dpm:orte:connect_accept modex complete",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/*
while (NULL != (item = opal_list_remove_first(&all_procs))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&all_procs);
*/
opal_pmix.fence(ids, i);
free(ids);
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output,
"%s dpm:orte:connect_accept adding procs",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* set the locality of the new procs */
/* set the locality of the new procs - the required info should
* have been included in the data exchange */
for (j=0; j < new_proc_len; j++) {
ompi_proc_set_locality(new_proc_list[j]);
OBJ_CONSTRUCT(&myvals, opal_list_t);
if (OMPI_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)&new_proc_list[j]->super.proc_name,
OPAL_DSTORE_LOCALITY, &myvals))) {
new_proc_list[j]->super.proc_flags = OPAL_PROC_NON_LOCAL;
} else {
kv = (opal_value_t*)opal_list_get_first(&myvals);
new_proc_list[j]->super.proc_flags = kv->data.uint16;
}
OPAL_LIST_DESTRUCT(&myvals);
}
if (OMPI_SUCCESS != (rc = MCA_PML_CALL(add_procs(new_proc_list, new_proc_len)))) {
@ -651,12 +554,6 @@ static int connect_accept(ompi_communicator_t *comm, int root,
goto exit;
}
/* track this communicator's disconnect collective id */
preq = OBJ_NEW(orte_dpm_prequest_t);
preq->cid = newcomp->c_contextid;
preq->disconnectid = id[1];
opal_list_append(&dynamics, &preq->super);
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output,
"%s dpm:orte:connect_accept activate comm",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
@ -703,7 +600,7 @@ static int connect_accept(ompi_communicator_t *comm, int root,
static int construct_peers(ompi_group_t *group, opal_list_t *peers)
{
int i;
orte_namelist_t *nm;
orte_namelist_t *nm, *n2;
ompi_proc_t *proct;
if (OMPI_GROUP_IS_DENSE(group)) {
@ -722,7 +619,19 @@ static int construct_peers(ompi_group_t *group, opal_list_t *peers)
ORTE_NAME_PRINT((const orte_process_name_t *)&proct->super.proc_name)));
nm = OBJ_NEW(orte_namelist_t);
nm->name = *(orte_process_name_t*)&proct->super.proc_name;
opal_list_append(peers, &nm->super);
/* need to maintain an ordered list to ensure the tracker signatures
* match across all procs */
OPAL_LIST_FOREACH(n2, peers, orte_namelist_t) {
if (*(opal_identifier_t*)&nm->name < *(opal_identifier_t*)&n2->name) {
opal_list_insert_pos(peers, &n2->super, &nm->super);
nm = NULL;
break;
}
}
if (NULL != nm) {
/* append to the end */
opal_list_append(peers, &nm->super);
}
}
} else {
for (i=0; i < group->grp_proc_count; i++) {
@ -738,7 +647,19 @@ static int construct_peers(ompi_group_t *group, opal_list_t *peers)
ORTE_NAME_PRINT((const orte_process_name_t *)&proct->super.proc_name)));
nm = OBJ_NEW(orte_namelist_t);
nm->name = *(orte_process_name_t*)&proct->super.proc_name;
opal_list_append(peers, &nm->super);
/* need to maintain an ordered list to ensure the tracker signatures
* match across all procs */
OPAL_LIST_FOREACH(n2, peers, orte_namelist_t) {
if (*(opal_identifier_t*)&nm->name < *(opal_identifier_t*)&n2->name) {
opal_list_insert_pos(peers, &n2->super, &nm->super);
nm = NULL;
break;
}
}
if (NULL != nm) {
/* append to the end */
opal_list_append(peers, &nm->super);
}
}
}
return ORTE_SUCCESS;
@ -746,10 +667,11 @@ static int construct_peers(ompi_group_t *group, opal_list_t *peers)
static int disconnect(ompi_communicator_t *comm)
{
int ret;
ompi_rte_collective_t *coll;
orte_dpm_prequest_t *req, *preq;
int ret, i;
ompi_group_t *group;
opal_list_t coll;
orte_namelist_t *nm;
opal_identifier_t *ids;
/* Note that we explicitly use an RTE-based barrier (vs. an MPI
barrier). See a lengthy comment in
@ -760,63 +682,42 @@ static int disconnect(ompi_communicator_t *comm)
"%s dpm:orte:disconnect comm_cid %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comm->c_contextid));
/* find this communicator's conn-accept request */
req = NULL;
OPAL_LIST_FOREACH(preq, &dynamics, orte_dpm_prequest_t) {
if (preq->cid == comm->c_contextid) {
req = preq;
break;
}
}
if (NULL == req) {
/* we are hosed */
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output,
"%s dpm:orte:disconnect collective tracker for comm_cid %d NOT FOUND",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comm->c_contextid));
return OMPI_ERROR;
}
/* setup the collective */
coll = OBJ_NEW(ompi_rte_collective_t);
coll->id = req->disconnectid;
/* the daemons will have no knowledge of this collective, so
* it must be done across the peers in the communicator.
* RHC: assuming for now that this must flow across all
OBJ_CONSTRUCT(&coll, opal_list_t);
/* RHC: assuming for now that this must flow across all
* local and remote group members */
group = comm->c_local_group;
if (ORTE_SUCCESS != (ret = construct_peers(group, &coll->participants))) {
if (ORTE_SUCCESS != (ret = construct_peers(group, &coll))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(coll);
OPAL_LIST_DESTRUCT(&coll);
return ret;
}
/* do the same for the remote group */
group = comm->c_remote_group;
if (ORTE_SUCCESS != (ret = construct_peers(group, &coll->participants))) {
if (ORTE_SUCCESS != (ret = construct_peers(group, &coll))) {
ORTE_ERROR_LOG(ret);
OBJ_RELEASE(coll);
return ret;
}
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output,
"%s dpm:orte:disconnect calling barrier on comm_cid %d using id %d with %d participants",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comm->c_contextid, (int)coll->id,
(int)opal_list_get_size(&coll->participants)));
coll->active = true;
if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) {
OMPI_ERROR_LOG(ret);
OPAL_LIST_DESTRUCT(&coll);
return ret;
}
/* wait for barrier to complete */
OMPI_WAIT_FOR_COMPLETION(coll->active);
OBJ_RELEASE(coll);
/* setup the ids */
ids = (opal_identifier_t*)malloc(opal_list_get_size(&coll) * sizeof(opal_identifier_t));
i=0;
OPAL_LIST_FOREACH(nm, &coll, orte_namelist_t) {
memcpy(&ids[i++], &nm->name, sizeof(opal_identifier_t));
}
OPAL_LIST_DESTRUCT(&coll);
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output,
"%s dpm:orte:disconnect calling barrier on comm_cid %d with %d participants",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comm->c_contextid, i));
opal_pmix.fence(ids, i);
free(ids);
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output,
"%s dpm:orte:disconnect barrier complete for comm_cid %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), comm->c_contextid));
/* release this tracker */
opal_list_remove_item(&dynamics, &req->super);
OBJ_RELEASE(req);
return OMPI_SUCCESS;
}
@ -1511,7 +1412,6 @@ static void process_request(orte_process_name_t* sender,
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output,
"%s dpm:pconprocess: process modex",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
orte_grpcomm_base_store_modex(buffer, NULL);
OPAL_OUTPUT_VERBOSE((3, ompi_dpm_base_framework.framework_output,
"%s dpm:pconprocess: adding procs",

Просмотреть файл

@ -1,6 +1,6 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (C) 2001-2011 Mellanox Technologies Ltd. ALL RIGHTS RESERVED.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -12,12 +12,12 @@
#include "ompi_config.h"
#include "ompi/mca/mtl/mtl.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
#include "ompi/proc/proc.h"
#include "ompi/communicator/communicator.h"
#include "opal/memoryhooks/memory.h"
#include "opal/util/show_help.h"
#include "opal/mca/pmix/pmix.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
@ -173,7 +173,8 @@ static int ompi_mtl_mxm_send_ep_address(void *address, size_t address_len)
/* Send address length */
sprintf(modex_name, "%s-len", modex_component_name);
rc = ompi_modex_send_string((const char *)modex_name, &address_len, sizeof(address_len));
OPAL_MODEX_SEND_STRING(rc, PMIX_SYNC_REQD, PMIX_GLOBAL,
modex_name, &address_len, sizeof(address_len));
if (OMPI_SUCCESS != rc) {
MXM_ERROR("failed to send address length");
goto bail;
@ -187,7 +188,8 @@ static int ompi_mtl_mxm_send_ep_address(void *address, size_t address_len)
while (modex_buf_size) {
sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id);
modex_cur_size = (modex_buf_size < modex_max_size) ? modex_buf_size : modex_max_size;
rc = ompi_modex_send_string(modex_name, modex_buf_ptr, modex_cur_size);
OPAL_MODEX_SEND_STRING(rc, PMIX_SYNC_REQD, PMIX_GLOBAL,
modex_name, modex_buf_ptr, modex_cur_size);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Open MPI couldn't distribute EP connection details");
goto bail;
@ -226,8 +228,9 @@ static int ompi_mtl_mxm_recv_ep_address(ompi_proc_t *source_proc, void **address
/* Receive address length */
sprintf(modex_name, "%s-len", modex_component_name);
rc = ompi_modex_recv_string(modex_name, source_proc, (void**)&address_len_buf_ptr,
&modex_cur_size);
OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super,
(char**)&address_len_buf_ptr,
&modex_cur_size);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Failed to receive ep address length");
goto bail;
@ -246,8 +249,9 @@ static int ompi_mtl_mxm_recv_ep_address(ompi_proc_t *source_proc, void **address
modex_buf_size = 0;
while (modex_buf_size < *address_len_p) {
sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id);
rc = ompi_modex_recv_string(modex_name, source_proc, (void**)&modex_buf_ptr,
&modex_cur_size);
OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super,
(char**)&modex_buf_ptr,
&modex_cur_size);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Open MPI couldn't distribute EP connection details");
goto bail;

Просмотреть файл

@ -10,6 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010-2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -24,7 +25,7 @@
#include "ompi/proc/proc.h"
#include "ompi/mca/mtl/mtl.h"
#include "opal/class/opal_list.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "opal/mca/pmix/pmix.h"
#include "mtl_portals4.h"
#include "mtl_portals4_recv_short.h"
@ -84,8 +85,8 @@ ompi_mtl_portals4_add_procs(struct mca_mtl_base_module_t *mtl,
return OMPI_ERR_NOT_SUPPORTED;
}
ret = ompi_modex_recv(&mca_mtl_portals4_component.mtl_version,
procs[i], (void**) &modex_id, &size);
OMPI_MODEX_RECV(ret, &mca_mtl_portals4_component.mtl_version,
&procs[i]->super, (char**)&modex_id, &size);
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: ompi_modex_recv failed: %d\n",

Просмотреть файл

@ -10,6 +10,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010-2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -21,7 +22,7 @@
#include "opal/mca/event/event.h"
#include "opal/util/output.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "opal/mca/pmix/pmix.h"
#include "mtl_portals4.h"
#include "mtl_portals4_request.h"
@ -268,8 +269,9 @@ ompi_mtl_portals4_component_init(bool enable_progress_threads,
goto error;
}
ret = ompi_modex_send(&mca_mtl_portals4_component.mtl_version,
&id, sizeof(id));
OPAL_MODEX_SEND(ret, PMIX_SYNC_REQD, OPAL_PMIX_GLOBAL,
&mca_mtl_portals4_component.mtl_version,
&id, sizeof(id));
if (OMPI_SUCCESS != ret) {
opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
"%s:%d: ompi_modex_send failed: %d\n",

Просмотреть файл

@ -11,7 +11,7 @@
* Copyright (c) 2004-2006 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 QLogic Corporation. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
@ -23,8 +23,8 @@
#include "ompi_config.h"
#include "opal/mca/pmix/pmix.h"
#include "ompi/mca/mtl/mtl.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
#include "opal/util/show_help.h"
#include "ompi/proc/proc.h"
@ -97,7 +97,8 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) {
unsigned long long *uu = (unsigned long long *) unique_job_key;
char *generated_key;
char env_string[256];
int rc;
generated_key = getenv("OMPI_MCA_orte_precondition_transports");
memset(uu, 0, sizeof(psm_uuid_t));
@ -173,10 +174,12 @@ int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) {
ompi_mtl_psm.epid = epid;
ompi_mtl_psm.mq = mq;
if (OMPI_SUCCESS !=
ompi_modex_send( &mca_mtl_psm_component.super.mtl_version,
&ompi_mtl_psm.epid,
sizeof(psm_epid_t))) {
OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL,
&mca_mtl_psm_component.super.mtl_version,
&ompi_mtl_psm.epid,
sizeof(psm_epid_t));
if (OMPI_SUCCESS != rc) {
opal_output(0, "Open MPI couldn't send PSM epid to head node process");
return OMPI_ERROR;
}
@ -281,8 +284,8 @@ ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
/* Get the epids for all the processes from modex */
for (i = 0; i < (int) nprocs; i++) {
rc = ompi_modex_recv(&mca_mtl_psm_component.super.mtl_version,
procs[i], (void**)&epid, &size);
OPAL_MODEX_RECV(rc, &mca_mtl_psm_component.super.mtl_version,
&procs[i]->super, (void**)&epid, &size);
if (rc != OMPI_SUCCESS || size != sizeof(psm_epid_t)) {
return OMPI_ERROR;
}

Просмотреть файл

@ -12,9 +12,7 @@
* All rights reserved.
* Copyright (c) 2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -35,12 +33,12 @@
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/runtime/opal.h"
#include "opal/mca/pmix/pmix.h"
#include "ompi/constants.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
#include "ompi/proc/proc.h"
#include "ompi/runtime/ompi_module_exchange.h"
typedef struct opened_component_t {
opal_list_item_t super;
@ -168,7 +166,7 @@ int mca_pml_base_select(bool enable_progress_threads,
/* Finished querying all components. Check for the bozo case. */
if( NULL == best_component ) {
opal_show_help("help-mca-base.txt", "find-available:not-valid", true, "pml");
opal_show_help("help-mca-base.txt", "find-available:none-found", true, "pml");
for( i = 0; i < opal_pointer_array_get_size(&mca_pml_base_pml); i++) {
char * tmp_val = NULL;
tmp_val = (char *) opal_pointer_array_get_item(&mca_pml_base_pml, i);
@ -312,7 +310,10 @@ static mca_base_component_t pml_base_component = {
int
mca_pml_base_pml_selected(const char *name)
{
return ompi_modex_send(&pml_base_component, name, strlen(name) + 1);
int rc;
OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL, &pml_base_component, name, strlen(name) + 1);
return rc;
}
int
@ -341,9 +342,8 @@ mca_pml_base_pml_check_selected(const char *my_pml,
}
/* get the name of the PML module selected by rank=0 */
ret = ompi_modex_recv(&pml_base_component,
procs[0],
(void**) &remote_pml, &size);
OPAL_MODEX_RECV(ret, &pml_base_component,
&procs[0]->super, (void**) &remote_pml, &size);
/* if this key wasn't found, then just assume all is well... */
if (OMPI_SUCCESS != ret) {

Просмотреть файл

@ -1,5 +1,6 @@
#
# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2014 Intel, Inc. All rights reserved
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -30,8 +31,7 @@ mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_pubsub_pmi_la_SOURCES = $(local_sources)
mca_pubsub_pmi_la_LDFLAGS = -module -avoid-version $(pubsub_pmi_LDFLAGS)
mca_pubsub_pmi_la_LIBADD = $(pubsub_pmi_LIBS) \
$(OMPI_TOP_BUILDDIR)/opal/mca/common/pmi/libmca_common_pmi.la
mca_pubsub_pmi_la_LIBADD = $(pubsub_pmi_LIBS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_pubsub_pmi_la_SOURCES = $(local_sources)

Просмотреть файл

@ -1,7 +1,8 @@
/*
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Los Alamos National Security, LLC.
* All rights reserved.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -13,7 +14,7 @@
#include "ompi/constants.h"
#include "opal/mca/common/pmi/common_pmi.h"
#include "opal/mca/pmix/pmix.h"
#include "ompi/info/info.h"
#include "ompi/mca/rte/rte.h"
@ -24,38 +25,86 @@
* Init the module
*/
static int init(void)
{
{
// did the pmix.init in the component
return OMPI_SUCCESS;
}
/*
* publish the port_name for the specified service_name.
*/
static int publish ( const char *service_name, ompi_info_t *info, const char *port_name )
static int publish(const char *service_name, ompi_info_t *info, const char *port_name)
{
return mca_common_pmi_publish(service_name,port_name);
pmix_info_t *p;
opal_list_t xfer;
ompi_info_entry_t *ie;
int rc;
/* transfer the ompi_info_t data to an array of pmix_info_t structs */
OBJ_CONSTRUCT(&xfer, opal_list_t);
OPAL_LIST_FOREACH(ie, &info->super, ompi_info_entry_t) {
p = OBJ_NEW(pmix_info_t);
strncpy(p->key, ie->ie_key, PMIX_MAX_INFO_KEY);
strncpy(p->value, ie->ie_value, PMIX_MAX_INFO_VAL);
opal_list_append(&xfer, &p->super);
}
rc = opal_pmix.publish(service_name, &xfer, port_name);
OPAL_LIST_DESTRUCT(&xfer);
return rc;
}
static char* lookup ( const char *service_name, ompi_info_t *info )
static char* lookup(const char *service_name, ompi_info_t *info)
{
char *port=NULL;
int rc = mca_common_pmi_lookup(service_name, &port);
char port[PMIX_MAX_VALLEN], *ret;
pmix_info_t *p;
opal_list_t xfer;
ompi_info_entry_t *ie;
int rc;
/* transfer the ompi_info_t data to an array of pmix_info_t structs */
OBJ_CONSTRUCT(&xfer, opal_list_t);
OPAL_LIST_FOREACH(ie, &info->super, ompi_info_entry_t) {
p = OBJ_NEW(pmix_info_t);
strncpy(p->key, ie->ie_key, PMIX_MAX_INFO_KEY);
strncpy(p->value, ie->ie_value, PMIX_MAX_INFO_VAL);
opal_list_append(&xfer, &p->super);
}
rc = opal_pmix.lookup(service_name, &xfer, port, PMIX_MAX_VALLEN);
OPAL_LIST_DESTRUCT(&xfer);
/* in error case port will be set to NULL
* this is what our callers expect to see
* In future maybe som error handling need?
* In future maybe some error handling need?
*/
if( rc != OPAL_SUCCESS ){
// improove error processing
return port; // NULL ?
// improve error processing
return NULL;
}
return port;
ret = strdup(port);
return ret;
}
/*
* delete the entry */
static int unpublish ( const char *service_name, ompi_info_t *info )
static int unpublish(const char *service_name, ompi_info_t *info)
{
return mca_common_pmi_unpublish( service_name );
pmix_info_t *p;
opal_list_t xfer;
ompi_info_entry_t *ie;
int rc;
/* transfer the ompi_info_t data to an array of pmix_info_t structs */
OBJ_CONSTRUCT(&xfer, opal_list_t);
OPAL_LIST_FOREACH(ie, &info->super, ompi_info_entry_t) {
p = OBJ_NEW(pmix_info_t);
strncpy(p->key, ie->ie_key, PMIX_MAX_INFO_KEY);
strncpy(p->value, ie->ie_value, PMIX_MAX_INFO_VAL);
opal_list_append(&xfer, &p->super);
}
rc = opal_pmix.unpublish(service_name, &xfer);
OPAL_LIST_DESTRUCT(&xfer);
return rc;
}

Просмотреть файл

@ -3,6 +3,7 @@
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -13,7 +14,7 @@
#include "ompi_config.h"
#include "opal/runtime/opal_params.h"
#include "opal/mca/common/pmi/common_pmi.h"
#include "opal/mca/pmix/pmix.h"
#include "ompi/constants.h"
#include "ompi/mca/rte/rte.h"
@ -66,20 +67,21 @@ static int pubsub_pmi_component_open(void)
static int pubsub_pmi_component_close(void)
{
if (NULL != opal_pmix.finalize) {
opal_pmix.finalize();
}
return OMPI_SUCCESS;
}
static int pubsub_pmi_component_query(mca_base_module_t **module, int *priority)
{
/* if we are indirectly launched via orted, the
* selection will have been turned "off" for us
*/
int rc = mca_common_pmi_init (opal_pmi_version);
if (NULL != opal_pmix.init) {
if ( OPAL_SUCCESS == rc ) {
*priority = my_priority;
*module = (mca_base_module_t *)&ompi_pubsub_pmi_module;
return OMPI_SUCCESS;
if (OPAL_SUCCESS == opal_pmix.init()) {
*priority = my_priority;
*module = (mca_base_module_t *)&ompi_pubsub_pmi_module;
return OMPI_SUCCESS;
}
}
/* we can't run */

Просмотреть файл

@ -28,7 +28,6 @@ struct opal_proc_t;
#include "orte/types.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/grpcomm/grpcomm.h"
#include "orte/mca/rml/base/rml_contact.h"
#include "orte/mca/rml/rml.h"
#include "orte/mca/routed/routed.h"
@ -73,13 +72,6 @@ static inline orte_process_name_t * OMPI_CAST_RTE_NAME(opal_process_name_t * nam
#define OMPI_CAST_RTE_NAME(a) ((orte_process_name_t*)(a))
#endif
/* Collective objects and operations */
#define ompi_rte_collective_t orte_grpcomm_collective_t
typedef orte_grpcomm_coll_id_t ompi_rte_collective_id_t;
OMPI_DECLSPEC int ompi_rte_modex(ompi_rte_collective_t *coll);
#define ompi_rte_barrier(a) orte_grpcomm.barrier(a)
OMPI_DECLSPEC orte_grpcomm_coll_id_t ompi_rte_get_collective_id(const struct ompi_communicator_t *comm);
/* Process info struct and values */
typedef orte_node_rank_t ompi_node_rank_t;
typedef orte_local_rank_t ompi_local_rank_t;
@ -103,12 +95,6 @@ typedef orte_error_t ompi_rte_error_report_t;
#define ompi_rte_finalize() orte_finalize()
OMPI_DECLSPEC void ompi_rte_wait_for_debugger(void);
/* Database operations */
OMPI_DECLSPEC int ompi_rte_db_store(const ompi_process_name_t *nm, const char* key,
const void *data, opal_data_type_t type);
OMPI_DECLSPEC int ompi_rte_db_fetch(const struct ompi_proc_t *proc,
const char *key,
void **data, opal_data_type_t type);
#define OMPI_DB_HOSTNAME ORTE_DB_HOSTNAME
#define OMPI_DB_LOCALITY ORTE_DB_LOCALITY
#define OMPI_DB_GLOBAL_RANK ORTE_DB_GLOBAL_RANK
@ -132,7 +118,6 @@ typedef orte_rml_tag_t ompi_rml_tag_t;
typedef struct {
ompi_rte_component_t super;
bool direct_modex;
opal_mutex_t lock;
opal_list_t modx_reqs;
} ompi_rte_orte_component_t;
@ -151,6 +136,9 @@ static inline orte_process_name_t * OMPI_CAST_RTE_NAME(opal_process_name_t * nam
return (orte_process_name_t *)name;
}
#endif
#define ompi_hostname_cutoff orte_full_modex_cutoff
END_C_DECLS
#endif /* MCA_OMPI_RTE_ORTE_H */

Просмотреть файл

@ -27,8 +27,6 @@
#include "ompi/mca/rte/rte.h"
#include "rte_orte.h"
bool ompi_rte_orte_direct_modex;
/*
* Public string showing the component version number
*/
@ -40,7 +38,6 @@ const char *ompi_rte_orte_component_version_string =
*/
static int rte_orte_open(void);
static int rte_orte_close(void);
static int rte_orte_register(void);
/*
* Instantiate the public struct with all of our public information
@ -65,7 +62,7 @@ ompi_rte_orte_component_t mca_rte_orte_component = {
rte_orte_open,
rte_orte_close,
NULL,
rte_orte_register
NULL
},
{
/* The component is checkpoint ready */
@ -91,17 +88,6 @@ static int rte_orte_close(void)
return OMPI_SUCCESS;
}
static int rte_orte_register(void)
{
mca_rte_orte_component.direct_modex = false;
(void) mca_base_component_var_register (&mca_rte_orte_component.super.base_version,
"direct_modex", "Enable direct modex (default: false)",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &mca_rte_orte_component.direct_modex);
return OMPI_SUCCESS;
}
static void con(ompi_orte_tracker_t *p)
{
p->active = true;

Просмотреть файл

@ -20,6 +20,7 @@
#include "opal/util/proc.h"
#include "opal/util/opal_getcwd.h"
#include "opal/mca/dstore/dstore.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/threads/threads.h"
#include "opal/class/opal_list.h"
#include "opal/dss/dss.h"
@ -51,9 +52,6 @@
#include "ompi/communicator/communicator.h"
extern ompi_rte_orte_component_t mca_rte_orte_component;
static void recv_callback(int status, orte_process_name_t* sender,
opal_buffer_t *buffer,
orte_rml_tag_t tag, void *cbdata);
void ompi_rte_abort(int error_code, char *fmt, ...)
{
@ -156,322 +154,3 @@ void ompi_rte_wait_for_debugger(void)
OMPI_WAIT_FOR_COMPLETION(xfer.active);
}
}
static bool direct_modex_enabled = false;
int ompi_rte_modex(ompi_rte_collective_t *coll)
{
/* mark that this process reached modex */
orte_grpcomm_base.modex_ready = true;
/* let the datastore commit any data we provided that needs
* to be shared with our peers, if required
*/
opal_dstore.commit(opal_dstore_peer, (opal_identifier_t*)ORTE_PROC_MY_NAME);
if ((orte_process_info.num_procs < ompi_hostname_cutoff) ||
!mca_rte_orte_component.direct_modex ||
orte_standalone_operation) {
/* if we are direct launched and/or below a user-specified
* cutoff value, then we just fall thru to the ORTE modex
*/
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
"%s running modex",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
return orte_grpcomm.modex(coll);
}
/* if the user defined a cutoff value that we are larger
* than, and if we were not direct launched, then skip
* the modex operation. We already have all the RTE-level
* info we need, and we will retrieve the MPI-level info
* only as requested. This will provide a faster startup
* time since we won't do a massive allgather operation,
* but will make first-message connections slower.
*/
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
"%s using direct modex",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
direct_modex_enabled = true;
/* if direct modex was enabled, setup the receive for it */
orte_rml.recv_buffer_nb(OMPI_NAME_WILDCARD,
ORTE_RML_TAG_DIRECT_MODEX_RESP,
ORTE_RML_PERSISTENT,
recv_callback, NULL);
/* process any pending requests for our data */
ORTE_ACTIVATE_PROC_STATE(ORTE_PROC_MY_NAME, ORTE_PROC_STATE_MODEX_READY);
/* release the barrier */
if (NULL != coll->cbfunc) {
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
"%s CALLING MODEX RELEASE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
coll->cbfunc(NULL, coll->cbdata);
} else {
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
"%s NO MODEX RELEASE CBFUNC",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
}
/* flag the collective as complete */
coll->active = false;
return OMPI_SUCCESS;
}
int ompi_rte_db_store(const orte_process_name_t *nm, const char* key,
const void *data, opal_data_type_t type)
{
opal_value_t kv;
int rc;
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = strdup(key);
if (OPAL_SUCCESS != (rc = opal_value_load(&kv, (void*)data, type))) {
OBJ_DESTRUCT(&kv);
return rc;
}
/* MPI connection data is to be shared with ALL other processes */
rc = opal_dstore.store(opal_dstore_peer, (opal_identifier_t*)nm, &kv);
OBJ_DESTRUCT(&kv);
return rc;
}
static int direct_modex(orte_process_name_t *peer)
{
int rc;
ompi_orte_tracker_t *req;
opal_buffer_t *buf;
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
"%s requesting direct modex from %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(peer)));
buf = OBJ_NEW(opal_buffer_t);
/* create a tracker for this request */
req = OBJ_NEW(ompi_orte_tracker_t);
req->peer = *peer;
/* add this to our list of requests - protect it as the
* callback that returns data comes in the ORTE event base
*/
opal_mutex_lock(&mca_rte_orte_component.lock);
opal_list_append(&mca_rte_orte_component.modx_reqs, &req->super);
opal_mutex_unlock(&mca_rte_orte_component.lock);
/* send the request */
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(peer, buf,
ORTE_RML_TAG_DIRECT_MODEX,
orte_rml_send_callback, NULL))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(buf);
opal_mutex_lock(&mca_rte_orte_component.lock);
opal_list_remove_item(&mca_rte_orte_component.modx_reqs, &req->super);
opal_mutex_unlock(&mca_rte_orte_component.lock);
OBJ_RELEASE(req);
return rc;
}
/* wait for the response */
opal_mutex_lock(&req->lock);
while (req->active) {
opal_condition_wait(&req->cond, &req->lock);
}
/* now can safely destruct the request */
OBJ_RELEASE(req);
return ORTE_SUCCESS;
}
int ompi_rte_db_fetch(const struct ompi_proc_t *proc,
const char *key,
void **data, opal_data_type_t type)
{
int rc;
opal_list_t myvals;
opal_value_t *kv;
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
"%s fetch data from %s for %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT((const orte_process_name_t *)&proc->super.proc_name), key));
OBJ_CONSTRUCT(&myvals, opal_list_t);
/* the peer dstore contains our own data that will be shared
* with our peers - the nonpeer dstore contains data we received
* that would only be shared with nonpeer procs
*/
if (OPAL_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_nonpeer,
(opal_identifier_t*)(&proc->super.proc_name),
key, &myvals))) {
if (direct_modex_enabled) {
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
"%s requesting direct modex from %s for %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT((const orte_process_name_t *)&proc->super.proc_name), key));
/* if we couldn't fetch the data via the db, then we will attempt
* to retrieve it from the target proc
*/
if (ORTE_SUCCESS != (rc = direct_modex((orte_process_name_t*)(&proc->super.proc_name)))) {
ORTE_ERROR_LOG(rc);
OPAL_LIST_DESTRUCT(&myvals);
return rc;
}
/* now retrieve the requested piece */
if (OPAL_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_nonpeer,
(opal_identifier_t*)(&proc->super.proc_name),
key, &myvals))) {
ORTE_ERROR_LOG(rc);
OPAL_LIST_DESTRUCT(&myvals);
return rc;
}
} else {
/* see if we can find it in the internal dstore */
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
"%s searching nonpeer dstore for %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), key));
if (OPAL_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)(&proc->super.proc_name),
key, &myvals))) {
/* try one last place - the peer dstore in case it got stuck there for some reason */
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
"%s searching internal dstore for %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), key));
if (OPAL_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_peer,
(opal_identifier_t*)(&proc->super.proc_name),
key, &myvals))) {
OPAL_LIST_DESTRUCT(&myvals);
return rc;
}
}
}
}
/* only one value should have been returned */
kv = (opal_value_t*)opal_list_get_first(&myvals);
if (NULL == kv) {
return OMPI_ERROR;
}
opal_value_unload(kv, data, type);
OPAL_LIST_DESTRUCT(&myvals);
/* update the hostname upon first call to modex-recv for this proc */
if (NULL == proc->super.proc_hostname) {
OBJ_CONSTRUCT(&myvals, opal_list_t);
if (OPAL_SUCCESS == opal_dstore.fetch(opal_dstore_internal, (opal_identifier_t*)(&proc->super.proc_name), ORTE_DB_HOSTNAME, &myvals)) {
kv = (opal_value_t*)opal_list_get_first(&myvals);
if (NULL != kv) {
opal_value_unload(kv, (void**)&proc->super.proc_hostname, OPAL_STRING);
}
}
OPAL_LIST_DESTRUCT(&myvals);
}
return OMPI_SUCCESS;
}
/* this function executes in the RML event base, and so
* we must take care to protect against threading conflicts
* with the MPI layer
*/
static void recv_callback(int status, orte_process_name_t* sender,
opal_buffer_t *buffer,
orte_rml_tag_t tag, void *cbdata)
{
ompi_orte_tracker_t *req, *nxt;
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_framework.framework_output,
"%s received direct modex data from %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(sender)));
/* this is a std modex package, so unpack it with the
* grpcomm function and cache it locally so we can quickly get
* more pieces if necessary - don't need to thread protect
* here as only one RML callback can be active at a time
*/
orte_grpcomm_base_store_modex(buffer, NULL);
/* protect */
opal_mutex_lock(&mca_rte_orte_component.lock);
/* find all requests against this sender and release them */
OPAL_LIST_FOREACH_SAFE(req, nxt, &mca_rte_orte_component.modx_reqs, ompi_orte_tracker_t) {
if (req->peer.jobid == sender->jobid &&
req->peer.vpid == sender->vpid) {
/* remove the request from the list */
opal_list_remove_item(&mca_rte_orte_component.modx_reqs, &req->super);
/* wake up the waiting thread */
req->active = false;
opal_condition_signal(&req->cond);
}
}
/* release */
opal_mutex_unlock(&mca_rte_orte_component.lock);
}
/* everybody involved in the collective has to call this function. However,
* only the "root" process (i.e., rank=0 in this communicator) will send
* the collective id request to the HNP. The HNP will then xcast the
* assigned value to all daemons so that every daemon knows about it. This
* will ensure that daemons properly handle the request. The daemons will
* relay the received ID to their local procs */
orte_grpcomm_coll_id_t ompi_rte_get_collective_id(const struct ompi_communicator_t *comm)
{
opal_buffer_t *nbuf;
int32_t i, rc;
orte_rml_recv_cb_t xfer;
orte_grpcomm_coll_id_t id;
uint8_t flag=1;
/* everybody waits for the id */
OBJ_CONSTRUCT(&xfer, orte_rml_recv_cb_t);
xfer.active = true;
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD,
ORTE_RML_TAG_FULL_COLL_ID,
ORTE_RML_NON_PERSISTENT,
orte_rml_recv_callback, &xfer);
/* the lowest member of the communicator requests the communicator
* id from mpirun */
if (0 == ompi_comm_rank((ompi_communicator_t*)comm)) {
nbuf = OBJ_NEW(opal_buffer_t);
if (NULL == nbuf) {
return OMPI_ERROR;
}
/* tell the HNP we want one id */
i = 1;
if (OPAL_SUCCESS != (rc = opal_dss.pack(nbuf, &i, 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(nbuf);
return OMPI_ERROR;
}
/* tell the HNP this is to be a global value */
if (OPAL_SUCCESS != (rc = opal_dss.pack(nbuf, &flag, 1, OPAL_UINT8))) {
ORTE_ERROR_LOG(rc);
OBJ_RELEASE(nbuf);
return OMPI_ERROR;
}
/* send the request */
rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, nbuf,
ORTE_RML_TAG_COLL_ID_REQ,
orte_rml_send_callback, NULL);
}
/* wait for response */
OMPI_WAIT_FOR_COMPLETION(xfer.active);
/* extract the id */
i=1;
if (OPAL_SUCCESS != (rc = opal_dss.unpack(&xfer.data, &id, &i, ORTE_GRPCOMM_COLL_ID_T))) {
ORTE_ERROR_LOG(rc);
OBJ_DESTRUCT(&xfer);
return OMPI_ERROR;
}
OBJ_DESTRUCT(&xfer); // done with the received data
return id;
}

Просмотреть файл

@ -35,11 +35,11 @@
#include "opal/util/show_help.h"
#include "opal/mca/dstore/dstore.h"
#include "opal/mca/hwloc/base/base.h"
#include "opal/mca/pmix/pmix.h"
#include "ompi/proc/proc.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/runtime/params.h"
static opal_list_t ompi_proc_list;
@ -115,7 +115,9 @@ int ompi_proc_init(void)
opal_proc_local_set(&proc->super);
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
/* add our arch to the modex */
if (OMPI_SUCCESS != (ret = ompi_modex_send_key_value("OMPI_ARCH", &proc->super.proc_arch, OPAL_UINT32))) {
OPAL_MODEX_SEND_STRING(ret, PMIX_SYNC_REQD, PMIX_REMOTE, OPAL_DSTORE_ARCH,
&proc->super.proc_arch, OPAL_UINT32);
if (OPAL_SUCCESS != ret) {
return ret;
}
#endif
@ -125,124 +127,6 @@ int ompi_proc_init(void)
return OMPI_SUCCESS;
}
int ompi_proc_set_locality(ompi_proc_t *proc)
{
opal_hwloc_locality_t locality;
ompi_vpid_t vpid;
int ret;
opal_list_t myvals;
opal_value_t *kv, kvn;
/* get the locality information - do not use modex recv for
* this request as that will automatically cause the hostname
* to be loaded as well
*/
OBJ_CONSTRUCT(&myvals, opal_list_t);
if (OMPI_SUCCESS == opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)&proc->super.proc_name,
OPAL_DSTORE_LOCALITY, &myvals)) {
kv = (opal_value_t*)opal_list_get_first(&myvals);
proc->super.proc_flags = kv->data.uint16;
OPAL_LIST_DESTRUCT(&myvals);
return OMPI_SUCCESS;
}
OPAL_LIST_DESTRUCT(&myvals);
/* if we don't already have it, compute and save it for future use */
OBJ_CONSTRUCT(&myvals, opal_list_t);
if (OMPI_SUCCESS != (ret = opal_dstore.fetch(opal_dstore_nonpeer,
(opal_identifier_t*)&proc->super.proc_name,
OMPI_RTE_NODE_ID, &myvals))) {
if (OMPI_SUCCESS != (ret = opal_dstore.fetch(opal_dstore_peer,
(opal_identifier_t*)&proc->super.proc_name,
OMPI_RTE_NODE_ID, &myvals))) {
OPAL_LIST_DESTRUCT(&myvals);
return ret;
}
}
kv = (opal_value_t*)opal_list_get_first(&myvals);
vpid = kv->data.uint32;
OPAL_LIST_DESTRUCT(&myvals);
/* if we are on different nodes, then we are probably non-local */
if (vpid != OMPI_RTE_MY_NODEID) {
locality = OPAL_PROC_NON_LOCAL;
#ifdef OMPI_RTE_HOST_ID
/* see if coprocessors were detected - if the hostid isn't
* present, then no coprocessors were detected and we can
* ignore this test
*/
OBJ_CONSTRUCT(&myvals, opal_list_t);
if (OMPI_SUCCESS == opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)&proc->super.proc_name,
OMPI_RTE_HOST_ID, &myvals)) {
kv = (opal_value_t*)opal_list_get_first(&myvals);
vpid = kv->data.uint32;
/* if this matches my host id, then we are on the same host,
* but not on the same board
*/
if (vpid == ompi_process_info.my_hostid) {
locality = OPAL_PROC_ON_HOST;
} else {
locality = OPAL_PROC_NON_LOCAL;
}
}
OPAL_LIST_DESTRUCT(&myvals);
#endif
} else {
#if OPAL_HAVE_HWLOC
{
char *cpu_bitmap;
/* retrieve the binding for the other proc */
OBJ_CONSTRUCT(&myvals, opal_list_t);
if (OMPI_SUCCESS != (ret = opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)&proc->super.proc_name,
OPAL_DSTORE_CPUSET, &myvals))) {
/* check the nonpeer data in case of comm_spawn */
if (OMPI_SUCCESS != ( ret = opal_dstore.fetch(opal_dstore_nonpeer,
(opal_identifier_t*)&proc->super.proc_name,
OPAL_DSTORE_CPUSET, &myvals))) {
ret = opal_dstore.fetch(opal_dstore_peer,
(opal_identifier_t*)&proc->super.proc_name,
OPAL_DSTORE_CPUSET, &myvals);
}
}
if (OMPI_SUCCESS != ret) {
/* we don't know their cpuset, so nothing more we can say */
locality = OPAL_PROC_ON_NODE;
} else {
kv = (opal_value_t*)opal_list_get_first(&myvals);
cpu_bitmap = kv->data.string;
if (NULL == cpu_bitmap || NULL == ompi_process_info.cpuset) {
/* one or both of us is not bound, so all we can say is we are on the
* same node
*/
locality = OPAL_PROC_ON_NODE;
} else {
/* we share a node - see what else we share */
locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
ompi_process_info.cpuset,
cpu_bitmap);
}
}
OPAL_LIST_DESTRUCT(&myvals);
}
#else
/* all we know is that we share this node */
locality = OPAL_PROC_ON_NODE;
#endif
}
OBJ_CONSTRUCT(&kvn, opal_value_t);
kvn.key = strdup(OPAL_DSTORE_LOCALITY);
kvn.type = OPAL_HWLOC_LOCALITY_T;
kvn.data.uint16 = locality;
ret = opal_dstore.store(opal_dstore_internal, (opal_identifier_t*)&proc->super.proc_name, &kvn);
OBJ_DESTRUCT(&kvn);
/* set the proc's local value as well */
proc->super.proc_flags = locality;
return ret;
}
/**
* The process creation is split into two steps. The second step
@ -255,24 +139,31 @@ int ompi_proc_set_locality(ompi_proc_t *proc)
*/
int ompi_proc_complete_init(void)
{
ompi_proc_t *proc = NULL;
opal_list_item_t *item = NULL;
ompi_proc_t *proc;
int ret, errcode = OMPI_SUCCESS;
opal_list_t myvals;
opal_value_t *kv;
OPAL_THREAD_LOCK(&ompi_proc_lock);
for( item = opal_list_get_first(&ompi_proc_list);
item != opal_list_get_end(&ompi_proc_list);
item = opal_list_get_next(item)) {
proc = (ompi_proc_t*)item;
OPAL_LIST_FOREACH(proc, &ompi_proc_list, ompi_proc_t) {
if (OMPI_CAST_RTE_NAME(&proc->super.proc_name)->vpid != OMPI_PROC_MY_NAME->vpid) {
/* get the locality information */
ret = ompi_proc_set_locality(proc);
if (OMPI_SUCCESS != ret) {
errcode = ret;
break;
/* get the locality information - do not use modex recv for
* this request as that will automatically cause the hostname
* to be loaded as well. All RTEs are required to provide this
* information at startup for procs on our node. Thus, not
* finding the info indicates that the proc is non-local.
*/
OBJ_CONSTRUCT(&myvals, opal_list_t);
if (OMPI_SUCCESS != (ret = opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)&proc->super.proc_name,
OPAL_DSTORE_LOCALITY, &myvals))) {
proc->super.proc_flags = OPAL_PROC_NON_LOCAL;
} else {
kv = (opal_value_t*)opal_list_get_first(&myvals);
proc->super.proc_flags = kv->data.uint16;
}
OPAL_LIST_DESTRUCT(&myvals);
if (ompi_process_info.num_procs < ompi_hostname_cutoff) {
/* IF the number of procs falls below the specified cutoff,
@ -281,8 +172,9 @@ int ompi_proc_complete_init(void)
* ALL modex info for this proc) will have no appreciable
* impact on launch scaling
*/
ret = ompi_modex_recv_key_value(OMPI_DB_HOSTNAME, proc, (void**)&(proc->super.proc_hostname), OPAL_STRING);
if (OMPI_SUCCESS != ret) {
OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_HOSTNAME, (opal_proc_t*)&proc->super,
(char**)&(proc->super.proc_hostname), OPAL_STRING);
if (OPAL_SUCCESS != ret) {
errcode = ret;
break;
}
@ -297,12 +189,14 @@ int ompi_proc_complete_init(void)
proc->super.proc_hostname = NULL;
}
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
/* get the remote architecture */
/* get the remote architecture - this might force a modex except
* for those environments where the RM provides it */
{
uint32_t *ui32ptr;
ui32ptr = &(proc->super.proc_arch);
ret = ompi_modex_recv_key_value("OMPI_ARCH", proc, (void**)&ui32ptr, OPAL_UINT32);
if (OMPI_SUCCESS == ret) {
OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_ARCH, (opal_proc_t*)&proc->super,
(void**)&ui32ptr, OPAL_UINT32);
if (OPAL_SUCCESS == ret) {
/* if arch is different than mine, create a new convertor for this proc */
if (proc->super.proc_arch != opal_local_arch) {
OBJ_RELEASE(proc->super.proc_convertor);
@ -499,6 +393,8 @@ int ompi_proc_refresh(void)
opal_list_item_t *item = NULL;
ompi_vpid_t i = 0;
int ret=OMPI_SUCCESS;
opal_list_t myvals;
opal_value_t *kv;
OPAL_THREAD_LOCK(&ompi_proc_lock);
@ -520,11 +416,23 @@ int ompi_proc_refresh(void)
proc->super.proc_arch = opal_local_arch;
opal_proc_local_set(&proc->super);
} else {
/* get the locality information */
ret = ompi_proc_set_locality(proc);
if (OMPI_SUCCESS != ret) {
break;
/* get the locality information - do not use modex recv for
* this request as that will automatically cause the hostname
* to be loaded as well. All RTEs are required to provide this
* information at startup for procs on our node. Thus, not
* finding the info indicates that the proc is non-local.
*/
OBJ_CONSTRUCT(&myvals, opal_list_t);
if (OMPI_SUCCESS != (ret = opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)&proc->super.proc_name,
OPAL_DSTORE_LOCALITY, &myvals))) {
proc->super.proc_flags = OPAL_PROC_NON_LOCAL;
} else {
kv = (opal_value_t*)opal_list_get_first(&myvals);
proc->super.proc_flags = kv->data.uint16;
}
OPAL_LIST_DESTRUCT(&myvals);
if (ompi_process_info.num_procs < ompi_hostname_cutoff) {
/* IF the number of procs falls below the specified cutoff,
* then we assume the job is small enough that retrieving
@ -532,7 +440,8 @@ int ompi_proc_refresh(void)
* ALL modex info for this proc) will have no appreciable
* impact on launch scaling
*/
ret = ompi_modex_recv_key_value(OMPI_DB_HOSTNAME, proc, (void**)&(proc->super.proc_hostname), OPAL_STRING);
OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_HOSTNAME, (opal_proc_t*)&proc->super,
(char**)&(proc->super.proc_hostname), OPAL_STRING);
if (OMPI_SUCCESS != ret) {
break;
}
@ -550,7 +459,8 @@ int ompi_proc_refresh(void)
{
/* get the remote architecture */
uint32_t* uiptr = &(proc->super.proc_arch);
ret = ompi_modex_recv_key_value("OMPI_ARCH", proc, (void**)&uiptr, OPAL_UINT32);
OPAL_MODEX_RECV_VALUE(ret, OPAL_DSTORE_ARCH, (opal_proc_t*)&proc->super,
(void**)&uiptr, OPAL_UINT32);
if (OMPI_SUCCESS != ret) {
break;
}
@ -605,30 +515,21 @@ ompi_proc_pack(ompi_proc_t **proclist, int proclistsize,
opal_value_t *kv;
opal_list_t data;
/* fetch all global info we know about the peer - while
/* fetch all info we know about the peer - while
* the remote procs may already know some of it, we cannot
* be certain they do. So we must include a full dump of
* everything we know about this proc, excluding INTERNAL
* data that each process computes about its peers
* everything we know about this proc
*/
OBJ_CONSTRUCT(&data, opal_list_t);
rc = opal_dstore.fetch(opal_dstore_peer,
rc = opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)&proclist[i]->super.proc_name,
NULL, &data);
if (OPAL_SUCCESS != rc) {
OMPI_ERROR_LOG(rc);
num_entries = 0;
} else {
rc = opal_dstore.fetch(opal_dstore_nonpeer,
(opal_identifier_t*)&proclist[i]->super.proc_name,
NULL, &data);
if (OPAL_SUCCESS != rc) {
OMPI_ERROR_LOG(rc);
num_entries = 0;
} else {
/* count the number of entries we will send */
num_entries = opal_list_get_size(&data);
}
/* count the number of entries we will send */
num_entries = opal_list_get_size(&data);
}
/* put the number of entries into the buffer */
@ -799,18 +700,19 @@ ompi_proc_unpack(opal_buffer_t* buf,
if (OPAL_EQUAL != ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL,
OMPI_PROC_MY_NAME, &new_name)) {
/* store it in the database */
if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_peer,
if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal,
(opal_identifier_t*)&new_name, kv))) {
OMPI_ERROR_LOG(rc);
}
}
OBJ_RELEASE(kv);
}
/* RHC: compute locality */
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
OBJ_CONSTRUCT(&myvals, opal_list_t);
rc = opal_dstore.fetch(opal_dstore_peer,
rc = opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)&new_name,
"OMPI_ARCH", &myvals);
OPAL_DSTORE_ARCH, &myvals);
if( OPAL_SUCCESS == rc ) {
kv = (opal_value_t*)opal_list_get_first(&myvals);
new_arch = kv->data.uint32;
@ -824,9 +726,9 @@ ompi_proc_unpack(opal_buffer_t* buf,
if (ompi_process_info.num_procs < ompi_hostname_cutoff) {
/* retrieve the hostname */
OBJ_CONSTRUCT(&myvals, opal_list_t);
rc = opal_dstore.fetch(opal_dstore_peer,
rc = opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)&new_name,
OMPI_DB_HOSTNAME, &myvals);
OPAL_DSTORE_HOSTNAME, &myvals);
if( OPAL_SUCCESS == rc ) {
kv = (opal_value_t*)opal_list_get_first(&myvals);
new_hostname = strdup(kv->data.string);

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -302,11 +302,6 @@ OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf,
OMPI_DECLSPEC int ompi_proc_refresh(void);
/**
* Set the locality of a proc relative to me
*/
OMPI_DECLSPEC int ompi_proc_set_locality(ompi_proc_t *proc);
END_C_DECLS
#endif /* OMPI_PROC_PROC_H */

Просмотреть файл

@ -10,6 +10,7 @@
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006-2009 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
@ -25,7 +26,6 @@ headers += \
runtime/mpiruntime.h \
runtime/ompi_cr.h \
runtime/params.h \
runtime/ompi_module_exchange.h \
runtime/ompi_info_support.h
libmpi_la_SOURCES += \
@ -35,5 +35,4 @@ libmpi_la_SOURCES += \
runtime/ompi_mpi_params.c \
runtime/ompi_mpi_preconnect.c \
runtime/ompi_cr.c \
runtime/ompi_module_exchange.c \
runtime/ompi_info_support.c

Просмотреть файл

@ -1,158 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/constants.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/dss/dss.h"
#include "ompi/mca/rte/rte.h"
#include "ompi/proc/proc.h"
#include "ompi/runtime/ompi_module_exchange.h"
int ompi_modex_send(const mca_base_component_t *source_component,
const void *data, size_t size)
{
int rc;
char *key;
opal_byte_object_t bo;
key = mca_base_component_to_string(source_component);
if (NULL == key) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
bo.bytes = (uint8_t *)data;
bo.size = size;
/* the store API makes a copy of the provided data */
rc = ompi_rte_db_store(OMPI_PROC_MY_NAME, key, &bo, OPAL_BYTE_OBJECT);
free(key);
return rc;
}
int
ompi_modex_recv(const mca_base_component_t *component,
const ompi_proc_t *proc,
void **buffer,
size_t *size)
{
int rc;
char *key;
opal_byte_object_t *boptr;
/* set defaults */
*buffer = NULL;
*size = 0;
key = mca_base_component_to_string(component);
if (NULL == key) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
/* the fetch API returns a pointer to the data */
rc = ompi_rte_db_fetch(proc, key, (void**)&boptr, OPAL_BYTE_OBJECT);
if (OMPI_SUCCESS == rc) {
/* xfer the data - it was allocated in the call */
*buffer = (void*)boptr->bytes;
*size = boptr->size;
/* we no longer require the struct itself since all we
* wanted was the data inside it
*/
free(boptr);
}
free(key);
return rc;
}
int
ompi_modex_send_string(const char* key,
const void *buffer, size_t size)
{
int rc;
opal_byte_object_t bo;
bo.bytes = (uint8_t *)buffer;
bo.size = size;
/* the store API makes a copy of the provided data */
rc = ompi_rte_db_store(OMPI_PROC_MY_NAME, key, &bo, OPAL_BYTE_OBJECT);
return rc;
}
int
ompi_modex_recv_string(const char* key,
const ompi_proc_t *source_proc,
void **buffer, size_t *size)
{
int rc;
opal_byte_object_t *boptr;
/* set defaults */
*buffer = NULL;
*size = 0;
/* the fetch API returns a copy of the data */
rc = ompi_rte_db_fetch(source_proc, key, (void**)&boptr, OPAL_BYTE_OBJECT);
if (OMPI_SUCCESS == rc) {
/* xfer the data for local use */
*buffer = boptr->bytes;
*size = boptr->size;
}
/* we no longer require the struct itself since all we
* wanted was the data inside it
*/
free(boptr);
return rc;
}
int
ompi_modex_send_key_value(const char* key,
const void *value,
opal_data_type_t dtype)
{
int rc;
/* the store API makes a copy of the provided data */
rc = ompi_rte_db_store(OMPI_PROC_MY_NAME, key, value, dtype);
return rc;
}
int ompi_modex_recv_key_value(const char* key,
const ompi_proc_t *source_proc,
void **value, opal_data_type_t type)
{
int rc;
/* the fetch API returns the data */
rc = ompi_rte_db_fetch(source_proc, key, (void**)value, type);
return rc;
}

Просмотреть файл

@ -1,252 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2012 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file
* Open MPI module-related data transfer mechanism
*
* A system for publishing module-related data for global
* initialization. Known simply as the "modex", this interface
* provides a system for sharing data, particularly data related to
* modules and their availability on the system.
*
* The modex system is tightly integrated into the general run-time
* initialization system and takes advantage of global update periods
* to minimize the amount of network traffic. All updates are also
* stored in the general purpose registry, and can be read at any time
* during the life of the process. Care should be taken to not call
* the blocking receive during the first stage of global
* initialization, as data will not be available the process will
* likely hang.
*
* @note For the purpose of this interface, two components are
* "corresponding" if:
* - they share the same major and minor MCA version number
* - they have the same type name string
* - they share the same major and minor type version number
* - they have the same component name string
* - they share the same major and minor component version number
*/
#ifndef MCA_OMPI_MODULE_EXCHANGE_H
#define MCA_OMPI_MODULE_EXCHANGE_H
#include "ompi_config.h"
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#include "opal/dss/dss_types.h"
#include "opal/mca/mca.h"
#include "ompi/proc/proc.h"
BEGIN_C_DECLS
/**
* Send a module-specific buffer to all other corresponding MCA
* modules in peer processes
*
* This function takes a contiguous buffer of network-ordered data
* and makes it available to all other MCA processes during the
* selection process. Modules sent by one source_component can only
* be received by a corresponding module with the same
* component name.
*
* This function is indended to be used during MCA module
* initialization \em before \em selection (the selection process is
* defined differently for each component type). Each module will
* provide a buffer containing meta information and/or parameters
* that it wants to share with its corresponding modules in peer
* processes. This information typically contains location /
* contact information for establishing communication between
* processes (in a manner that is specific to that module). For
* example, a TCP-based module could provide its IP address and TCP
* port where it is waiting on listen(). The peer process receiving
* this buffer can therefore open a socket to the indicated IP
* address and TCP port.
*
* During the selection process, the MCA framework will effectively
* perform an "allgather" operation of all modex buffers; every
* buffer will be available to every peer process (see
* ompi_modex_recv()).
*
* The buffer is copied during the send call and may be modified or
* free()'ed immediately after the return from this function call.
*
* @note Buffer contents is transparent to the MCA framework -- it \em
* must already either be in network order or be in some format that
* peer processes will be able to read it, regardless of pointer sizes
* or endian bias.
*
* @param[in] source_component A pointer to this module's component
* structure
* @param[in] buffer A pointer to the beginning of the buffer to send
* @param[in] size Number of bytes in the buffer
*
* @retval OMPI_SUCCESS On success
* @retval OMPI_ERROR An unspecified error occurred
*/
OMPI_DECLSPEC int ompi_modex_send(const mca_base_component_t *source_component,
const void *buffer, size_t size);
/**
* Send a buffer to all other corresponding peer process
*
* Similar to ompi_modex_send(), but uses a char* key instead of a
* component name for indexing. All other semantics apply.
*
* @note Buffer contents is transparent to the modex -- it \em must
* already either be in network order or be in some format that peer
* processes will be able to read it, regardless of pointer sizes or
* endian bias.
*
* @param[in] key A unique key for data storage / lookup
* @param[in] buffer A pointer to the beginning of the buffer to send
* @param[in] size Number of bytes in the buffer
*
* @retval OMPI_SUCCESS On success
* @retval OMPI_ERROR An unspecified error occurred
*/
OMPI_DECLSPEC int ompi_modex_send_string(const char* key,
const void *buffer, size_t size);
/**
* Send a value to all other corresponding peer process
*
* Similar to ompi_modex_send(), but uses a char* key instead of a
* component name for indexing, and performs all required conditioning
* to deal with heterogeneity.
*
* @param[in] key A unique key for data storage / lookup
* @param[in] value A pointer to data value
* @param[in] dtype Data type of the value
*
* @retval OMPI_SUCCESS On success
* @retval OMPI_ERROR An unspecified error occurred
*/
OMPI_DECLSPEC int ompi_modex_send_key_value(const char* key,
const void *value,
opal_data_type_t dtype);
/**
* Receive a module-specific buffer from a corresponding MCA module
* in a specific peer process
*
* This is the corresponding "get" call to ompi_modex_send().
* After selection, modules can call this function to receive the
* buffer sent by their corresponding module on the process
* source_proc.
*
* If a buffer from a corresponding module is found, buffer will be
* filled with a pointer to a copy of the buffer that was sent by
* the peer process. It is the caller's responsibility to free this
* buffer. The size will be filled in with the total size of the
* buffer.
*
* @note If the modex system has received information from a given
* process, but has not yet received information for the given
* component, ompi_modex_recv() will return no data. This
* can not happen to a process that has gone through the normal
* startup proceedure, but if you believe this can happen with your
* component, you should use ompi_modex_recv_nb() to receive updates
* when the information becomes available.
*
* @param[in] dest_component A pointer to this module's component struct
* @param[in] source_proc Peer process to receive from
* @param[out] buffer A pointer to a (void*) that will be filled
* with a pointer to the received buffer
* @param[out] size Pointer to a size_t that will be filled with
* the number of bytes in the buffer
*
* @retval OMPI_SUCCESS If a corresponding module buffer is found and
* successfully returned to the caller.
* @retval OMPI_ERR_NOT_IMPLEMENTED Modex support is not available in
* this build of Open MPI (systems like the Cray XT)
* @retval OMPI_ERR_OUT_OF_RESOURCE No memory could be allocated for the
* buffer.
*/
OMPI_DECLSPEC int ompi_modex_recv(const mca_base_component_t *dest_component,
const ompi_proc_t *source_proc,
void **buffer, size_t *size);
/**
* Receive a buffer from a given peer
*
* Similar to ompi_modex_recv(), but uses a char* key instead of a
* component name for indexing. All other semantics apply.
*
* @note If the modex system has received information from a given
* process, but has not yet received information for the given
* component, ompi_modex_recv_string() will return no data. This can
* not happen to a process that has gone through the normal startup
* proceedure, but if you believe this can happen with your component,
* you should use ompi_modex_recv_string_nb() to receive updates when
* the information becomes available.
*
* @param[in] key A unique key for data storage / lookup
* @param[in] source_proc Peer process to receive from
* @param[out] buffer A pointer to a (void*) that will be filled
* with a pointer to the received buffer
* @param[out] size Pointer to a size_t that will be filled with
* the number of bytes in the buffer
*
* @retval OMPI_SUCCESS If a corresponding module buffer is found and
* successfully returned to the caller.
* @retval OMPI_ERR_NOT_IMPLEMENTED Modex support is not available in
* this build of Open MPI (systems like the Cray XT)
* @retval OMPI_ERR_OUT_OF_RESOURCE No memory could be allocated for the
* buffer.
*/
OMPI_DECLSPEC int ompi_modex_recv_string(const char* key,
const ompi_proc_t *source_proc,
void **buffer, size_t *size);
/**
* Recv a value from a given peer
*
* Similar to ompi_modex_recv(), but uses a char* key instead of a
* component name for indexing, and performs all required conditioning
* to deal with heterogeneity.
*
* @param[in] key A unique key for data storage / lookup
* @param[in] source_proc Peer process to receive from
* @param[in] value A pointer to the address where the data
* value will be stored
* @param[in] dtype Data type of the value
*
* @retval OMPI_SUCCESS If a corresponding module value is found and
* successfully returned to the caller.
* @retval OMPI_ERR_NOT_IMPLEMENTED Modex support is not available in
* this build of Open MPI (systems like the Cray XT)
*/
OMPI_DECLSPEC int ompi_modex_recv_key_value(const char* key,
const ompi_proc_t *source_proc,
void **value,
opal_data_type_t dtype);
END_C_DECLS
#endif /* MCA_OMPI_MODULE_EXCHANGE_H */

Просмотреть файл

@ -51,6 +51,7 @@
#include "opal/mca/mpool/base/mpool_base_tree.h"
#include "opal/mca/rcache/base/base.h"
#include "opal/mca/allocator/base/base.h"
#include "opal/mca/pmix/pmix.h"
#include "mpi.h"
#include "ompi/constants.h"
@ -94,7 +95,6 @@ int ompi_mpi_finalize(void)
static int32_t finalize_has_already_started = 0;
opal_list_item_t *item;
struct timeval ompistart, ompistop;
ompi_rte_collective_t *coll;
ompi_proc_t** procs;
size_t nprocs;
@ -227,17 +227,7 @@ int ompi_mpi_finalize(void)
del_procs behavior around May of 2014 (see
https://svn.open-mpi.org/trac/ompi/ticket/4669#comment:4 for
more details). */
coll = OBJ_NEW(ompi_rte_collective_t);
coll->id = ompi_process_info.peer_fini_barrier;
coll->active = true;
if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) {
OMPI_ERROR_LOG(ret);
return ret;
}
/* wait for barrier to complete */
OMPI_LAZY_WAIT_FOR_COMPLETION(coll->active);
OBJ_RELEASE(coll);
opal_pmix.fence(NULL, 0);
/* check for timing request - get stop time and report elapsed
time if so */

Просмотреть файл

@ -55,12 +55,12 @@
#include "opal/mca/rcache/base/base.h"
#include "opal/mca/rcache/rcache.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/pmix/pmix.h"
#include "ompi/constants.h"
#include "ompi/mpi/fortran/base/constants.h"
#include "ompi/runtime/mpiruntime.h"
#include "ompi/runtime/params.h"
#include "ompi/runtime/ompi_module_exchange.h"
#include "ompi/communicator/communicator.h"
#include "ompi/info/info.h"
#include "ompi/errhandler/errcode.h"
@ -387,7 +387,6 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
size_t nprocs;
char *error = NULL;
struct timeval ompistart, ompistop;
ompi_rte_collective_t *coll;
char *cmd=NULL, *av=NULL;
/* bitflag of the thread level support provided. To be used
@ -533,11 +532,15 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
memset ( &threadlevel_bf, 0, sizeof(uint8_t));
OMPI_THREADLEVEL_SET_BITFLAG ( ompi_mpi_thread_provided, threadlevel_bf );
#if OMPI_ENABLE_THREAD_MULTIPLE
/* add this bitflag to the modex */
if ( OMPI_SUCCESS != (ret = ompi_modex_send_string("MPI_THREAD_LEVEL", &threadlevel_bf, sizeof(uint8_t)))) {
OPAL_MODEX_SEND_STRING(ret, PMIX_SYNC_REQD, PMIX_GLOBAL,
"MPI_THREAD_LEVEL", &threadlevel_bf, sizeof(uint8_t));
if (OPAL_SUCCESS != ret) {
error = "ompi_mpi_init: modex send thread level";
goto error;
}
#endif
/* initialize datatypes. This step should be done early as it will
* create the local convertor and local arch used in the proc
@ -644,22 +647,12 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
gettimeofday(&ompistart, NULL);
}
/* exchange connection info - this function also acts as a barrier
* as it will not return until the exchange is complete
/* exchange connection info - this function may also act as a barrier
* if data exchange is required. The modex occurs solely across procs
* in our job, so no proc array is passed. If a barrier is required,
* the "fence" function will perform it internally
*/
coll = OBJ_NEW(ompi_rte_collective_t);
coll->id = ompi_process_info.peer_modex;
coll->active = true;
if (OMPI_SUCCESS != (ret = ompi_rte_modex(coll))) {
error = "rte_modex failed";
goto error;
}
/* wait for modex to complete - this may be moved anywhere in mpi_init
* so long as it occurs prior to calling a function that needs
* the modex info!
*/
OMPI_WAIT_FOR_COMPLETION(coll->active);
OBJ_RELEASE(coll);
OPAL_FENCE(NULL, 0, NULL, NULL);
if (ompi_enable_timing && 0 == OMPI_PROC_MY_NAME->vpid) {
gettimeofday(&ompistop, NULL);
@ -835,17 +828,10 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided)
gettimeofday(&ompistart, NULL);
}
/* wait for everyone to reach this point */
coll = OBJ_NEW(ompi_rte_collective_t);
coll->id = ompi_process_info.peer_init_barrier;
coll->active = true;
if (OMPI_SUCCESS != (ret = ompi_rte_barrier(coll))) {
error = "rte_barrier failed";
goto error;
}
/* wait for barrier to complete */
OMPI_WAIT_FOR_COMPLETION(coll->active);
OBJ_RELEASE(coll);
/* wait for everyone to reach this point - this is a hard
* barrier requirement at this time, though we hope to relax
* it at a later point */
opal_pmix.fence(NULL, 0);
/* check for timing request - get stop time and report elapsed
time if so, then start the clock again */

Просмотреть файл

@ -14,7 +14,7 @@
* Copyright (c) 2007-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -63,7 +63,6 @@ bool ompi_mpi_keep_fqdn_hostnames = false;
bool ompi_have_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
bool ompi_use_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
uint32_t ompi_hostname_cutoff = UINT32_MAX;
bool ompi_mpi_yield_when_idle = true;
int ompi_mpi_event_tick_rate = -1;
char *ompi_mpi_show_mca_params_string = NULL;
@ -304,15 +303,6 @@ int ompi_mpi_register_params(void)
mca_base_var_register_synonym(value, "opal", "opal", NULL, "cuda_support",
MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
/* cutoff for retrieving hostnames */
ompi_hostname_cutoff = UINT32_MAX;
(void) mca_base_var_register ("ompi", "ompi", NULL, "hostname_cutoff",
"If the number of processes in the application exceeds the provided value,"
"hostnames for remote processes will not be retrieved by applications [default: UINT32_MAX]",
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&ompi_hostname_cutoff);
return OMPI_SUCCESS;
}

Просмотреть файл

@ -51,10 +51,6 @@ typedef struct {
uint8_t *bytes;
} opal_byte_object_t;
/* define an RTE-agnostic process identifier */
typedef uint64_t opal_identifier_t;
#define OPAL_ID_T OPAL_UINT64
/* Type defines for packing and unpacking */
#define OPAL_UNDEF (opal_data_type_t) 0 /**< type hasn't been defined yet */
#define OPAL_BYTE (opal_data_type_t) 1 /**< a byte of data */

Просмотреть файл

@ -77,7 +77,9 @@ enum {
OPAL_ERR_PROC_ENTRY_NOT_FOUND = (OPAL_ERR_BASE - 47),
OPAL_ERR_DATA_VALUE_NOT_FOUND = (OPAL_ERR_BASE - 48),
OPAL_ERR_CONNECTION_FAILED = (OPAL_ERR_BASE - 49),
OPAL_ERR_AUTHENTICATION_FAILED = (OPAL_ERR_BASE - 50)
OPAL_ERR_AUTHENTICATION_FAILED = (OPAL_ERR_BASE - 50),
OPAL_ERR_COMM_FAILURE = (OPAL_ERR_BASE - 51),
OPAL_ERR_SERVER_NOT_AVAIL = (OPAL_ERR_BASE - 52)
};
#define OPAL_ERR_MAX (OPAL_ERR_BASE - 100)

Просмотреть файл

@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -44,6 +45,11 @@
#include "opal/util/output.h"
#endif
/* define an RTE-agnostic process identifier */
typedef uint64_t opal_identifier_t;
#define OPAL_ID_T OPAL_UINT64
#define OPAL_ID_INVALID UINT64_MAX
/*
* portable assignment of pointer to int

Просмотреть файл

@ -18,7 +18,7 @@
* Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011-2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2012 Oak Ridge National Laboratory. All rights reserved
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -83,6 +83,7 @@
#include "opal/mca/common/verbs/common_verbs.h"
#include "opal/runtime/opal_params.h"
#include "opal/runtime/opal.h"
#include "opal/mca/pmix/pmix.h"
#include "btl_openib.h"
#include "btl_openib_frag.h"
@ -444,8 +445,9 @@ static int btl_openib_modex_send(void)
}
/* All done -- send it! */
rc = opal_modex_send(&mca_btl_openib_component.super.btl_version,
message, msg_size);
OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_REMOTE,
&mca_btl_openib_component.super.btl_version,
message, msg_size);
free(message);
opal_output(-1, "Modex sent! %d calculated, %d actual\n", (int) msg_size, (int) (offset - message));

Просмотреть файл

@ -11,6 +11,7 @@
* All rights reserved.
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2006-2007 Voltaire All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -21,6 +22,7 @@
#include "opal_config.h"
#include "opal/util/arch.h"
#include "opal/mca/pmix/pmix.h"
#include "btl_openib.h"
#include "btl_openib_proc.h"
@ -145,10 +147,8 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_create(opal_proc_t* proc)
module_proc->proc_opal = proc;
/* query for the peer address info */
rc = opal_modex_recv(&mca_btl_openib_component.super.btl_version,
proc,
&message,
&msg_size);
OPAL_MODEX_RECV(rc, &mca_btl_openib_component.super.btl_version,
proc, &message, &msg_size);
if (OPAL_SUCCESS != rc) {
BTL_ERROR(("[%s:%d] opal_modex_recv failed for peer %s",
__FILE__, __LINE__,

Просмотреть файл

@ -13,6 +13,7 @@
* Copyright (c) 2010-2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -32,6 +33,7 @@
#include "opal/mca/btl/btl.h"
#include "opal/datatype/opal_convertor.h"
#include "opal/util/proc.h"
#include "opal/mca/pmix/pmix.h"
#include "btl_portals4.h"
#include "btl_portals4_recv.h"
@ -86,8 +88,8 @@ mca_btl_portals4_add_procs(struct mca_btl_base_module_t* btl_base,
continue;
}
ret = opal_modex_recv(&mca_btl_portals4_component.super.btl_version,
curr_proc, (void**) &id, &size);
OPAL_MODEX_RECV(ret, &mca_btl_portals4_component.super.btl_version,
curr_proc, (void**) &id, &size);
if (OPAL_SUCCESS != ret) {
opal_output_verbose(0, opal_btl_base_framework.framework_output,

Просмотреть файл

@ -11,6 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2010-2012 Sandia National Laboratories. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -20,6 +21,9 @@
#include "opal_config.h"
#include "opal/mca/event/event.h"
#include "opal/util/output.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/util/show_help.h"
#include "opal/mca/btl/btl.h"
#include "opal/mca/btl/base/base.h"
@ -530,8 +534,9 @@ static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
interface, portals4_btl->portals_ni_h,
ptl_process_ids[interface].phys.nid, ptl_process_ids[interface].phys.pid));
}
ret = opal_modex_send(&mca_btl_portals4_component.super.btl_version,
ptl_process_ids, mca_btl_portals4_component.num_btls * sizeof(ptl_process_t));
OPAL_MODEX_SEND(ret, PMIX_SYNC_REQD, PMIX_REMOTE,
&mca_btl_portals4_component.super.btl_version,
ptl_process_ids, mca_btl_portals4_component.num_btls * sizeof(ptl_process_t));
if (OPAL_SUCCESS != ret) {
opal_output_verbose(1, opal_btl_base_framework.framework_output,
"%s:%d: opal_modex_send failed: %d\n",

Просмотреть файл

@ -4,6 +4,7 @@
* reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -17,6 +18,7 @@
#include "opal/runtime/opal_params.h"
#include "opal/include/opal/align.h"
#include "opal/memoryhooks/memory.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/mca/base/mca_base_pvar.h"
@ -209,10 +211,15 @@ static void mca_btl_scif_autoset_leave_pinned (void) {
static int mca_btl_scif_modex_send (void)
{
mca_btl_scif_modex_t modex;
int rc;
memset(&modex, 0, sizeof(mca_btl_scif_modex_t));
modex.port_id = mca_btl_scif_module.port_id;
return opal_modex_send (&mca_btl_scif_component.super.btl_version, &modex, sizeof (modex));
OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_LOCAL,
&mca_btl_scif_component.super.btl_version,
&modex, sizeof (modex));
return rc;
}

Просмотреть файл

@ -2,6 +2,7 @@
/*
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -13,6 +14,7 @@
#define MCA_BTL_SCIF_ENDPOINT_H
#include "btl_scif.h"
#include "opal/mca/pmix/pmix.h"
typedef enum mca_btl_scif_endpoint_state_t {
MCA_BTL_SCIF_EP_STATE_INIT,
@ -78,8 +80,8 @@ static inline int mca_btl_scif_ep_init (mca_btl_scif_endpoint_t *endpoint,
OBJ_CONSTRUCT(endpoint, mca_btl_scif_endpoint_t);
endpoint->state = MCA_BTL_SCIF_EP_STATE_INIT;
rc = opal_modex_recv (&mca_btl_scif_component.super.btl_version, peer_proc,
(void **) &modex, &msg_size);
OPAL_MODEX_RECV(rc, &mca_btl_scif_component.super.btl_version,
peer_proc, (void **) &modex, &msg_size);
if (OPAL_SUCCESS != rc) {
return rc;
}

Просмотреть файл

@ -16,6 +16,7 @@
* Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2013-2014 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -63,6 +64,7 @@
#include "opal/mca/btl/base/base.h"
#include "opal/mca/mpool/base/base.h"
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/mca/pmix/pmix.h"
#include "btl_tcp.h"
#include "btl_tcp_addr.h"
@ -939,8 +941,9 @@ static int mca_btl_tcp_component_exchange(void)
#endif
} /* end of for opal_ifbegin() */
} /* end of for tcp_num_btls */
rc = opal_modex_send(&mca_btl_tcp_component.super.btl_version,
addrs, xfer_size);
OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_GLOBAL,
&mca_btl_tcp_component.super.btl_version,
addrs, xfer_size);
free(addrs);
} /* end if */
return rc;

Просмотреть файл

@ -11,7 +11,7 @@
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008-2010 Oracle and/or its affiliates. All rights reserved
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -30,6 +30,7 @@
#include "opal/class/opal_hash_table.h"
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/util/arch.h"
#include "opal/util/argv.h"
#include "opal/util/if.h"
@ -121,10 +122,8 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(const opal_proc_t* proc)
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
/* lookup tcp parameters exported by this proc */
rc = opal_modex_recv( &mca_btl_tcp_component.super.btl_version,
proc,
(void**)&btl_proc->proc_addrs,
&size );
OPAL_MODEX_RECV(rc, &mca_btl_tcp_component.super.btl_version,
proc, (uint8_t**)&btl_proc->proc_addrs, &size);
if(rc != OPAL_SUCCESS) {
if(OPAL_ERR_NOT_FOUND != rc)
BTL_ERROR(("opal_modex_recv: failed with return value=%d", rc));

Просмотреть файл

@ -15,6 +15,7 @@
* Copyright (c) 2008-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -60,6 +61,7 @@
#include "opal/mca/btl/base/base.h"
#include "opal/util/proc.h"
#include "opal/mca/common/verbs/common_verbs.h"
#include "opal/mca/pmix/pmix.h"
#include "btl_usnic.h"
#include "btl_usnic_connectivity.h"
@ -253,8 +255,9 @@ static int usnic_modex_send(void)
}
}
rc = opal_modex_send(&mca_btl_usnic_component.super.btl_version,
addrs, size);
OPAL_MODEX_SEND(rc, PMIX_SYNC_REQD, PMIX_REMOTE,
&mca_btl_usnic_component.super.btl_version,
addrs, size);
if (NULL != addrs) {
free(addrs);
}

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2006 Sandia National Laboratories. All rights
* reserved.
* Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -28,6 +28,7 @@
#include "opal/util/arch.h"
#include "opal/util/show_help.h"
#include "opal/constants.h"
#include "opal/mca/pmix/pmix.h"
#include "btl_usnic.h"
#include "btl_usnic_proc.h"
@ -191,9 +192,8 @@ static int create_proc(opal_proc_t *opal_proc,
proc->proc_opal = opal_proc;
/* query for the peer address info */
rc = opal_modex_recv(&mca_btl_usnic_component.super.btl_version,
opal_proc, (void*)&proc->proc_modex,
&size);
OPAL_MODEX_RECV(rc, &mca_btl_usnic_component.super.btl_version,
opal_proc, (uint8_t**)&proc->proc_modex, &size);
/* If this proc simply doesn't have this key, then they're not
running the usnic BTL -- just ignore them. Otherwise, show an

Просмотреть файл

@ -15,6 +15,7 @@
* Copyright (c) 2010-2014 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2011 NVIDIA Corporation. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -25,6 +26,7 @@
#include "opal/util/output.h"
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/mca/pmix/pmix.h"
#include "btl_vader.h"
#include "btl_vader_frag.h"
@ -215,6 +217,7 @@ static int mca_btl_base_vader_modex_send (void)
{
struct vader_modex_t modex;
int modex_size;
int rc;
#if OPAL_BTL_VADER_HAVE_XPMEM
modex.seg_id = mca_btl_vader_component.my_seg_id;
@ -226,7 +229,10 @@ static int mca_btl_base_vader_modex_send (void)
memmove (&modex.seg_ds, &mca_btl_vader_component.seg_ds, modex_size);
#endif
return opal_modex_send(&mca_btl_vader_component.super.btl_version, &modex, modex_size);
OPAL_MODEX_SEND(rc, PMIX_ASYNC_RDY, PMIX_LOCAL,
&mca_btl_vader_component.super.btl_version,
&modex, modex_size);
return rc;
}
/*

Просмотреть файл

@ -14,6 +14,7 @@
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2014 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -23,6 +24,8 @@
#include "opal_config.h"
#include "opal/mca/pmix/pmix.h"
#include "btl_vader.h"
#include "btl_vader_endpoint.h"
#include "btl_vader_fifo.h"
@ -171,8 +174,9 @@ static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_
ep->peer_smp_rank = remote_rank;
if (remote_rank != MCA_BTL_VADER_LOCAL_RANK) {
if (OPAL_SUCCESS != (rc = opal_modex_recv(&component->super.btl_version,
proc, (void *)&modex, &msg_size))) {
OPAL_MODEX_RECV(rc, &component->super.btl_version,
proc, (uint8_t**)&modex, &msg_size);
if (OPAL_SUCCESS != rc) {
return rc;
}

Просмотреть файл

@ -1,47 +0,0 @@
#
# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2012 Sandia National Laboratories. All rights reserved.
# Copyright (c) 2013 Los Alamos National Security, LLC. All rights
# reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This library is shared among all the consumers of PMI to provide a
# single point for initializing and shutting down PMI.
#
# This is not a component in the traditional sense. It is a library
# (either rolled into libmpi if the components are rolled into libmpi
# or a stand-alone library). Users of this "component" must
# explicitly link against libmca_common_pmi.la when building
# stand-alone libraries (it won't hurt to do so in all cases).
#
# Note that building this common component statically and linking
# against other dynamic components is *not* supported!
AM_CPPFLAGS = $(common_pmi_CPPFLAGS)
dist_opaldata_DATA = help-common-pmi.txt
# control whether building an installed library or a convenience
# (noinst) library
if MCA_BUILD_opal_common_pmi_DSO
component_noinst =
component_install = libmca_common_pmi.la
else
component_noinst = libmca_common_pmi.la
component_install =
endif
lib_LTLIBRARIES = $(component_install)
noinst_LTLIBRARIES = $(component_noinst)
libmca_common_pmi_la_SOURCES = common_pmi.h common_pmi.c
if WANT_PMI2_SUPPORT
libmca_common_pmi_la_SOURCES += pmi2_pmap_parser.h pmi2_pmap_parser.c
endif
libmca_common_pmi_la_LDFLAGS = $(common_pmi_LDFLAGS) -version-info $(libmca_opal_common_pmi_so_version)
libmca_common_pmi_la_LIBADD = $(common_pmi_LIBS)

Просмотреть файл

@ -1,625 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All
* rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/types.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "pmi2_pmap_parser.h"
#include <string.h>
#include <pmi.h>
#if WANT_PMI2_SUPPORT
#include <pmi2.h>
#if !defined(PMI2_SUCCESS)
#define PMI2_SUCCESS PMI_SUCCESS
#endif
#endif
#include "common_pmi.h"
// usage accounting
static int mca_common_pmi_init_count = 0;
// per-launch selection between PMI versions
static int mca_common_pmi_version = 0;
// PMI constant values:
static int pmi_kvslen_max = 0;
static int pmi_keylen_max = 0;
static int pmi_vallen_max = 0;
// Job environment description
static int pmi_size = 0;
static int pmi_rank = 0;
static int pmi_appnum = 0;
static int pmi_usize = 0;
static char *pmi_kvs_name = NULL;
#if WANT_PMI2_SUPPORT
static int mca_initialize_pmi_v2(void)
{
int spawned, size, rank, appnum;
int rc, ret = OPAL_ERROR;
/* deal with a Slurm bug by first checking if we were
* even launched by a PMI server before attempting
* to use PMI */
if (NULL == getenv("PMI_FD")) {
return OPAL_ERROR;
}
/* if we can't startup PMI, we can't be used */
if ( PMI2_Initialized () ) {
return OPAL_SUCCESS;
}
size = -1;
rank = -1;
appnum = -1;
if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
opal_show_help("help-common-pmi.txt", "pmi2-init-failed", true, rc);
return OPAL_ERROR;
}
if( size < 0 || rank < 0 ){
opal_output(0, "SIZE %d RANK %d", size, rank);
opal_show_help("help-common-pmi.txt", "pmi2-init-returned-bad-values", true);
goto err_exit;
}
pmi_size = size;
pmi_rank = rank;
pmi_appnum = appnum;
pmi_vallen_max = PMI2_MAX_VALLEN;
pmi_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
pmi_keylen_max = PMI2_MAX_KEYLEN;
char buf[16];
int found;
rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
if( PMI2_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
goto err_exit;
}
pmi_usize = atoi(buf);
pmi_kvs_name = (char*)malloc(pmi_kvslen_max);
if( pmi_kvs_name == NULL ){
PMI2_Finalize();
ret = OPAL_ERR_OUT_OF_RESOURCE;
goto err_exit;
}
rc = PMI2_Job_GetId(pmi_kvs_name, pmi_kvslen_max);
if( PMI2_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
goto err_exit;
}
return OPAL_SUCCESS;
err_exit:
PMI2_Finalize();
return ret;
}
#endif
static int mca_initialize_pmi_v1(void)
{
PMI_BOOL initialized;
int spawned;
int rc, ret = OPAL_ERROR;
if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) {
OPAL_PMI_ERROR(rc, "PMI_Initialized");
return OPAL_ERROR;
}
if( PMI_TRUE != initialized && PMI_SUCCESS != (rc = PMI_Init(&spawned)) ) {
OPAL_PMI_ERROR(rc, "PMI_Init");
return OPAL_ERROR;
}
// Initialize space demands
rc = PMI_KVS_Get_value_length_max(&pmi_vallen_max);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_KVS_Get_value_length_max");
goto err_exit;
}
rc = PMI_KVS_Get_name_length_max(&pmi_kvslen_max);
if (PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max");
goto err_exit;
}
rc = PMI_KVS_Get_key_length_max(&pmi_keylen_max);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_KVS_Get_key_length_max");
goto err_exit;
}
// Initialize job environment information
rc = PMI_Get_rank(&pmi_rank);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_Get_rank");
return OPAL_ERROR;
}
rc = PMI_Get_universe_size(&pmi_usize);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
goto err_exit;
}
rc = PMI_Get_size(&pmi_size);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_Get_size");
goto err_exit;
}
rc = PMI_Get_appnum(&pmi_appnum);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_Get_appnum");
goto err_exit;
}
pmi_kvs_name = (char*)malloc(pmi_kvslen_max);
if( pmi_kvs_name == NULL ){
ret = OPAL_ERR_OUT_OF_RESOURCE;
goto err_exit;
}
rc = PMI_KVS_Get_my_name(pmi_kvs_name,pmi_kvslen_max);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
goto err_exit;
}
return OPAL_SUCCESS;
err_exit:
PMI_Finalize();
return ret;
}
int mca_common_pmi_init (int preferred_version) {
int rc = OPAL_SUCCESS;
if (0 < mca_common_pmi_init_count++) {
return rc;
}
// Decide what version of PMI we want
#if WANT_PMI2_SUPPORT
{
bool auto_select = !(preferred_version >= 1 && preferred_version <= 2);
if( auto_select ){
// choose PMIv2
mca_common_pmi_version = 2;
}else{
mca_common_pmi_version = preferred_version;
}
if( mca_common_pmi_version == 2 ){
rc = mca_initialize_pmi_v2();
if( !auto_select || rc == OPAL_SUCCESS ){
// If we want exactly PMIv2 or we succeed
if( rc != OPAL_SUCCESS ){
mca_common_pmi_init_count--;
}
return rc;
}
}
}
#endif
mca_common_pmi_version = 1;
if( OPAL_SUCCESS != (rc = mca_initialize_pmi_v1()) ){
mca_common_pmi_init_count--;
}
return rc;
}
void mca_common_pmi_finalize (void) {
if (0 == mca_common_pmi_init_count) {
return;
}
if (0 == --mca_common_pmi_init_count) {
#if WANT_PMI2_SUPPORT
if( mca_common_pmi_version == 2){
PMI2_Finalize ();
}
else
#endif
{
PMI_Finalize ();
}
}
}
/* useful util */
char* opal_errmgr_base_pmi_error(int pmi_err)
{
char * err_msg;
switch(pmi_err) {
case PMI_FAIL: err_msg = "Operation failed"; break;
case PMI_ERR_INIT: err_msg = "PMI is not initialized"; break;
case PMI_ERR_NOMEM: err_msg = "Input buffer not large enough"; break;
case PMI_ERR_INVALID_ARG: err_msg = "Invalid argument"; break;
case PMI_ERR_INVALID_KEY: err_msg = "Invalid key argument"; break;
case PMI_ERR_INVALID_KEY_LENGTH: err_msg = "Invalid key length argument"; break;
case PMI_ERR_INVALID_VAL: err_msg = "Invalid value argument"; break;
case PMI_ERR_INVALID_VAL_LENGTH: err_msg = "Invalid value length argument"; break;
case PMI_ERR_INVALID_LENGTH: err_msg = "Invalid length argument"; break;
case PMI_ERR_INVALID_NUM_ARGS: err_msg = "Invalid number of arguments"; break;
case PMI_ERR_INVALID_ARGS: err_msg = "Invalid args argument"; break;
case PMI_ERR_INVALID_NUM_PARSED: err_msg = "Invalid num_parsed length argument"; break;
case PMI_ERR_INVALID_KEYVALP: err_msg = "Invalid keyvalp argument"; break;
case PMI_ERR_INVALID_SIZE: err_msg = "Invalid size argument"; break;
#if defined(PMI_ERR_INVALID_KVS)
/* pmi.h calls this a valid return code but mpich doesn't define it (slurm does). */
case PMI_ERR_INVALID_KVS: err_msg = "Invalid kvs argument"; break;
#endif
case PMI_SUCCESS: err_msg = "Success"; break;
default: err_msg = "Unkown error";
}
return err_msg;
}
int mca_common_pmi_rank()
{
return pmi_rank;
}
int mca_common_pmi_size()
{
return pmi_size;
}
int mca_common_pmi_appnum()
{
return pmi_appnum;
}
int mca_common_pmi_universe()
{
return pmi_usize;
}
int mca_common_pmi_kvslen() {
return pmi_kvslen_max;
}
int mca_common_pmi_keylen()
{
return pmi_keylen_max;
}
int mca_common_pmi_vallen()
{
return pmi_vallen_max;
}
int mca_common_pmi_kvsname(char *buf, int len)
{
int i;
if( (unsigned)len < strnlen(pmi_kvs_name,pmi_kvslen_max) ){
return OPAL_ERR_BAD_PARAM;
}
for(i = 0; pmi_kvs_name[i]; i++){
buf[i] = pmi_kvs_name[i];
}
buf[i] = '\0';
return OPAL_SUCCESS;
}
int mca_common_pmi_id(char **pmi_id_ret, char **error){
char *pmi_id = NULL;
int rc;
// Default values
*pmi_id_ret = pmi_id;
*error = NULL;
#if WANT_PMI2_SUPPORT
if( mca_common_pmi_version == 2 ){
// TODO: add proper error handling
pmi_id = (char*)malloc(PMI2_MAX_VALLEN);
if( pmi_id == NULL ){
*error = "mca_common_pmi_id: could not get memory for PMIv2 ID";
return OPAL_ERR_OUT_OF_RESOURCE;
}
strncpy(pmi_id, pmi_kvs_name, pmi_kvslen_max);
}
else
#endif
{
int pmi_maxlen;
/* get our PMI id length */
if (PMI_SUCCESS != (rc = PMI_Get_id_length_max(&pmi_maxlen))) {
*error = "PMI_Get_id_length_max";
return OPAL_ERROR;
}
// TODO: add proper error handling
pmi_id = (char*)malloc(pmi_maxlen);
if( pmi_id == NULL ){
*error = "mca_common_pmi_id: could not get memory for PMIv1 ID";
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* Get domain id */
if (PMI_SUCCESS != (rc = PMI_Get_kvs_domain_id(pmi_id, pmi_maxlen))) {
free(pmi_id);
*error = "PMI_Get_kvs_domain_id";
return OPAL_ERROR;
}
}
*pmi_id_ret = pmi_id;
return OPAL_SUCCESS;
}
int mca_common_pmi_local_info(int vpid, int **ranks_ret,
int *procs_ret, char **error)
{
int *ranks;
int procs = -1;
int rc;
#if WANT_PMI2_SUPPORT
if(mca_common_pmi_version == 2){
{
char *pmapping = (char*)malloc(PMI2_MAX_VALLEN);
if( pmapping == NULL ){
*error = "mca_common_pmi_local_info: could not get memory for PMIv2 process mapping";
return OPAL_ERR_OUT_OF_RESOURCE;
}
int found;
int my_node;
rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
if( !found || PMI2_SUCCESS != rc ) {
/* can't check PMI2_SUCCESS as some folks (i.e., Cray) don't define it */
OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr");
*error = "mca_common_pmi_local_info: could not get PMI_process_mapping";
return OPAL_ERROR;
}
ranks = mca_common_pmi2_parse_pmap(pmapping, vpid, &my_node, &procs);
if (NULL == ranks) {
*error = "mca_common_pmi_local_info: could not get memory for PMIv2 local ranks";
return OPAL_ERR_OUT_OF_RESOURCE;
}
free(pmapping);
}
}
else
#endif
{
/* get our local proc info to find our local rank */
if (PMI_SUCCESS != (rc = PMI_Get_clique_size(&procs))) {
OPAL_PMI_ERROR(rc, "PMI_Get_clique_size");
*error = "mca_common_pmi_local_info: could not get PMI clique size";
return OPAL_ERROR;
}
/* now get the specific ranks */
ranks = (int*)calloc(procs, sizeof(int));
if (NULL == ranks) {
*error = "mca_common_pmi_local_info: could not get memory for local ranks";
return OPAL_ERR_OUT_OF_RESOURCE;
}
if (PMI_SUCCESS != (rc = PMI_Get_clique_ranks(ranks, procs))) {
OPAL_PMI_ERROR(rc, "PMI_Get_clique_ranks");
*error = "mca_common_pmi_local_info: could not get clique ranks";
return OPAL_ERROR;
}
}
*ranks_ret = ranks;
*procs_ret = procs;
return OPAL_SUCCESS;
}
void mca_common_pmi_abort(int status, char *msg)
{
#if WANT_PMI2_SUPPORT
if( mca_common_pmi_version == 2){
PMI2_Abort(status, msg);
}
else
#endif
{
PMI_Abort(status, msg);
}
}
int rc;
int mca_common_pmi_publish(const char *service_name, const char *port_name)
{
#if WANT_PMI2_SUPPORT
if( mca_common_pmi_version == 2){
if (PMI2_SUCCESS != (rc = PMI2_Nameserv_publish(service_name, NULL, port_name))) {
OPAL_PMI_ERROR(rc, "PMI2_Nameserv_publish");
return OPAL_ERROR;
}
}
else
#endif
{
if (PMI_SUCCESS != (rc = PMI_Publish_name(service_name, port_name))) {
OPAL_PMI_ERROR(rc, "PMI_Publish_name");
return OPAL_ERROR;
}
}
return OPAL_SUCCESS;
}
int mca_common_pmi_lookup(const char *service_name, char **port_ret)
{
// FIXME:
// 1. Why don't we malloc memory for the port for PMI v1?
// 2. Maybe error handling is needed in pbusub?
// 3. Is it legal to call OPAL_PMI_ERROR for PMIv2 rc?
char *port = NULL;
*port_ret = port;
int rc;
#if WANT_PMI2_SUPPORT
if( mca_common_pmi_version == 2 ){
port = (char*)malloc(1024*sizeof(char)); /* arbitrary size */
if( port == NULL ){
return OPAL_ERR_OUT_OF_RESOURCE;
}
if (PMI_SUCCESS != (rc = PMI2_Nameserv_lookup(service_name, NULL, port, 1024))) {
OPAL_PMI_ERROR(rc, "PMI2_Nameserv_lookup");
free(port);
return OPAL_ERROR;
}
}
else
#endif
{
// Allocate mem for port here? Otherwise we won't get success!
// SLURM PMIv1 doesn't implement this function
if (PMI_SUCCESS != (rc = PMI_Lookup_name(service_name, port))) {
OPAL_PMI_ERROR(rc, "PMI_Lookup_name");
return OPAL_ERROR;
}
}
*port_ret = port;
return OPAL_SUCCESS;
}
int mca_common_pmi_unpublish ( const char *service_name )
{
int rc;
#if WANT_PMI2_SUPPORT
if( mca_common_pmi_version == 2 ){
if (PMI2_SUCCESS != (rc = PMI2_Nameserv_unpublish(service_name, NULL))) {
OPAL_PMI_ERROR(rc, "PMI2_Nameserv_unpublish");
return OPAL_ERROR;
}
}
else
#endif
{
if (PMI_SUCCESS != (rc = PMI_Unpublish_name(service_name))) {
OPAL_PMI_ERROR(rc, "PMI2_Nameserv_unpublish");
return OPAL_ERROR;
}
}
return OPAL_SUCCESS;;
}
int mca_common_pmi_barrier()
{
#if WANT_PMI2_SUPPORT
if( mca_common_pmi_version == 2 ){
/* PMI2 doesn't provide a barrier, so use the Fence function here */
if (PMI2_SUCCESS != (rc = PMI2_KVS_Fence())) {
// FIX ME: OPAL_PMI2_ERROR(rc, "PMI2_KVS_Fence");
return OPAL_ERROR;
}
}
else
#endif
{
/* use the PMI barrier function */
if (PMI_SUCCESS != (rc = PMI_Barrier())) {
OPAL_PMI_ERROR(rc, "PMI_Barrier");
return OPAL_ERROR;
}
}
return OPAL_SUCCESS;
}
int mca_common_pmi_put(const char *kvs_name,
const char *key, const char *value)
{
int rc;
#if WANT_PMI2_SUPPORT
if( mca_common_pmi_version == 2 ){
if( PMI2_SUCCESS != PMI2_KVS_Put(key, value) ){
// FIXME: OPAL_PMI2_ERROR(rc, "PMI2_KVS_Put");
return OPAL_ERROR;
}
}
else
#endif
{
rc = PMI_KVS_Put(kvs_name, key, value);
if( PMI_SUCCESS != rc ){
OPAL_PMI_ERROR(rc, "PMI_KVS_Put");
return OPAL_ERROR;
}
}
return OPAL_SUCCESS;
}
int mca_common_pmi_get(const char *kvs_name, const char *key,
char *value, int valuelen)
{
int rc;
#if WANT_PMI2_SUPPORT
if( mca_common_pmi_version == 2 ){
int len;
rc = PMI2_KVS_Get(kvs_name, PMI2_ID_NULL, key, value, valuelen, &len);
if( PMI2_SUCCESS != rc ){
// OPAL_PMI2_ERROR(rc, "PMI_KVS_Put");
return OPAL_ERROR;
}
}
else
#endif
{
rc = PMI_KVS_Get(kvs_name, key, value, valuelen);
if( PMI_SUCCESS != rc ){
OPAL_PMI_ERROR(rc, "PMI_KVS_Put");
return OPAL_ERROR;
}
}
return OPAL_SUCCESS;
}
int mca_common_pmi_commit(char *kvs_name)
{
if( mca_common_pmi_version == 1 ){
if (PMI_SUCCESS != (rc = PMI_KVS_Commit(kvs_name))) {
OPAL_PMI_ERROR(rc, "PMI_KVS_Commit");
return OPAL_ERROR;
}
}
return mca_common_pmi_barrier();
}

Просмотреть файл

@ -1,86 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All
* rights reserved.
* Copyright (c) 2013 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef COMMON_PMI_H
#define COMMON_PMI_H
#include <pmi.h>
#if WANT_PMI2_SUPPORT
#include <pmi2.h>
#endif
#if !defined(OPAL_MCA_COMMON_PMI)
#define OPAL_MCA_COMMON_PMI
#include "opal/util/output.h"
/**
* mca_common_pmi_init:
*
* Attempt to initialize PMI
*
* @retval true PMI successfully initialized
* @retval false PMI could not be initialized
*/
int mca_common_pmi_init (int preferred_version);
/**
* mca_common_pmi_finalize:
*
* Finalize PMI. PMI initialization is reference counted. The last
* caller to mca_common_pmi_finalize will cause PMI to be finalized.
*/
void mca_common_pmi_finalize (void);
#define OPAL_PMI_ERROR(pmi_err, pmi_func) \
do { \
opal_output(0, "%s [%s:%d:%s]: %s\n", \
pmi_func, __FILE__, __LINE__, __func__, \
opal_errmgr_base_pmi_error(pmi_err)); \
} while(0);
OPAL_DECLSPEC char* opal_errmgr_base_pmi_error(int pmi_err);
int mca_common_pmi_rank(void);
int mca_common_pmi_size(void);
int mca_common_pmi_appnum(void);
int mca_common_pmi_universe(void);
int mca_common_pmi_kvsname(char *buf, int len);
int mca_common_pmi_kvslen(void);
int mca_common_pmi_keylen(void);
int mca_common_pmi_vallen(void);
int mca_common_pmi_id(char **pmi_id_ret, char **error);
int mca_common_pmi_local_info(int vpid, int **ranks_ret,
int *procs_ret, char **error);
void mca_common_pmi_abort(int status, char *msg);
// Publish-subscribe operations
int mca_common_pmi_publish(const char *service_name, const char *port_name);
int mca_common_pmi_lookup(const char *service_name, char **port_ret);
int mca_common_pmi_unpublish ( const char *service_name );
// KVS put/get
int mca_common_pmi_put(const char *kvs_name,
const char *key, const char *value);
int mca_common_pmi_get(const char *kvs_name, const char *key,
char *value, int valuelen);
int mca_common_pmi_commit(char *kvs_name);
int mca_common_pmi_barrier(void);
#endif
#endif

Просмотреть файл

@ -1,30 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_common_pmi_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_opal_common_pmi_CONFIG], [
AC_CONFIG_FILES([opal/mca/common/pmi/Makefile])
OPAL_CHECK_PMI([common_pmi], [common_pmi_good=1], [common_pmi_good=0])
# Evaluate succeed / fail
AS_IF([test "$common_pmi_good" = 1],
[$1],
[$2])
# set build flags to use in makefile
AC_SUBST([common_pmi_CPPFLAGS])
AC_SUBST([common_pmi_LDFLAGS])
AC_SUBST([common_pmi_LIBS])
])

Просмотреть файл

@ -1,24 +0,0 @@
# -*- text -*-
#
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
[pmi2-init-failed]
PMI2 failed to initialize, returning an error code of %d.
We cannot use PMI2 at this time, and your job will
likely abort.
#
[pmi2-init-returned-bad-values]
PMI2 initialized but returned bad values for size and rank.
This is symptomatic of either a failure to use the
"--mpi=pmi2" flag in SLURM, or a borked PMI2 installation.
If running under SLURM, try adding "-mpi=pmi2" to your
srun command line. If that doesn't work, or if you are
not running under SLURM, try removing or renaming the
pmi2.h header file so PMI2 support will not automatically
be built, reconfigure and build OMPI, and then try again
with only PMI1 support enabled.

Просмотреть файл

@ -14,9 +14,9 @@
#include "common_ugni.h"
#include "opal/mca/dstore/dstore.h"
#include "opal/class/opal_list.h"
#include "opal/dss/dss.h"
#include "opal/mca/pmix/pmix.h"
/* NTH: we need some options from the btl */
#include "opal/mca/btl/ugni/btl_ugni.h"
@ -186,8 +186,9 @@ static int opal_common_ugni_send_modex (int my_cdm_id)
msg_offset += modex_size;
}
rc = opal_modex_send(&opal_common_ugni_component,
modex_msg, total_msg_size);
OPAL_MODEX_SEND(rc, PMIX_ASYNC_RDY, PMIX_REMOTE,
&opal_common_ugni_component,
modex_msg, total_msg_size);
free(modex_msg);
@ -246,23 +247,12 @@ int opal_common_ugni_init (void)
#if 0
#if defined(OMPI_DB_GLOBAL_RANK)
{
opal_list_t myvals;
opal_value_t *kv;
ptr = &my_rank;
OBJ_CONSTRUCT(&myvals, opal_list_t);
rc = opal_dstore.fetch (opal_dstore_internal,
(opal_identifier_t *)&my_proc->proc_name,
OMPI_DB_GLOBAL_RANK,
&myvals);
if (OPAL_SUCCESS == rc) {
kv = (opal_value_t*)opal_list_get_first(&myvals);
if (OPAL_SUCCESS != opal_value_unload(kv, (void**)&ptr, OPAL_UINT32)) {
my_rank = my_proc->proc_name.vpid;
}
} else {
OPAL_MODEX_RECV_VALUE(rc, (opal_identifier_t *)&my_proc->proc_name,
OMPI_DB_GLOBAL_RANK, (void**)&ptr, OPAL_UINT32);
if (OPAL_SUCCESS != rc) {
my_rank = my_proc->proc_name.vpid;
}
}
OPAL_LIST_DESTRUCT(&myvals);
}
#else

Просмотреть файл

@ -3,6 +3,7 @@
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -11,6 +12,7 @@
*/
#include "common_ugni.h"
#include "opal/mca/pmix/pmix.h"
OBJ_CLASS_INSTANCE(opal_common_ugni_endpoint_t, opal_object_t, NULL, NULL);
@ -31,8 +33,8 @@ int opal_common_ugni_endpoint_for_proc (opal_common_ugni_device_t *dev, opal_pro
}
/* Receive the modex */
rc = opal_modex_recv(&opal_common_ugni_component, peer_proc,
(void *) &modex, &msg_size);
OPAL_MODEX_RECV(rc, &opal_common_ugni_component,
peer_proc, (void *) &modex, &msg_size);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
OPAL_OUTPUT((-1, "btl/ugni error receiving modex"));
return rc;

Просмотреть файл

@ -36,9 +36,23 @@ OPAL_DECLSPEC extern mca_base_framework_t opal_dstore_base_framework;
*/
OPAL_DECLSPEC int opal_dstore_base_select(void);
/* DSTORE is an oddball framework in that it:
*
* has an active storage component that issues handle-specific
* modules. This is done to provide separate storage areas that
* are isolated from each other, and thus don't have to worry
* about overlapping keys
*
* a backfill module used to attempt to retrieve data that has
* been requested, but that the handle-specific storage module
* does not contain. This is used in situations where data has
* not been provided at startup, and we need to retrieve it
* solely on-demand
*/
typedef struct {
opal_dstore_base_component_t *active; // active component
opal_pointer_array_t handles; // array of open datastore handles
opal_dstore_base_component_t *storage_component;
opal_dstore_base_module_t *backfill_module;
opal_pointer_array_t handles; // array of open datastore handles
} opal_dstore_base_t;
OPAL_DECLSPEC extern opal_dstore_base_t opal_dstore_base;
@ -70,8 +84,6 @@ OPAL_DECLSPEC int opal_dstore_base_close(int dstorehandle);
OPAL_DECLSPEC int opal_dstore_base_store(int dstorehandle,
const opal_identifier_t *id,
opal_value_t *kv);
OPAL_DECLSPEC void opal_dstore_base_commit(int dstorehandle,
const opal_identifier_t *id);
OPAL_DECLSPEC int opal_dstore_base_fetch(int dstorehandle,
const opal_identifier_t *id,
const char *key,

Просмотреть файл

@ -33,15 +33,12 @@ opal_dstore_base_API_t opal_dstore = {
opal_dstore_base_open,
opal_dstore_base_close,
opal_dstore_base_store,
opal_dstore_base_commit,
opal_dstore_base_fetch,
opal_dstore_base_remove_data
};
opal_dstore_base_t opal_dstore_base;
int opal_dstore_peer = -1;
int opal_dstore_internal = -1;
int opal_dstore_nonpeer = -1;
static int opal_dstore_base_frame_close(void)
{
@ -58,9 +55,9 @@ static int opal_dstore_base_frame_close(void)
}
OBJ_DESTRUCT(&opal_dstore_base.handles);
/* let the active component to finalize, should it wish to do so */
if (NULL != opal_dstore_base.active && NULL != opal_dstore_base.active->finalize) {
opal_dstore_base.active->finalize();
/* let the backfill module finalize, should it wish to do so */
if (NULL != opal_dstore_base.backfill_module && NULL != opal_dstore_base.backfill_module->finalize) {
opal_dstore_base.backfill_module->finalize((struct opal_dstore_base_module_t*)opal_dstore_base.backfill_module);
}
return mca_base_framework_components_close(&opal_dstore_base_framework, NULL);
@ -80,6 +77,7 @@ MCA_BASE_FRAMEWORK_DECLARE(opal, dstore, NULL, NULL,
opal_dstore_base_frame_close,
mca_dstore_base_static_components, 0);
/*** CLASS INSTANCES ***/
static void hdl_con(opal_dstore_handle_t *p)
{
p->name = NULL;

Просмотреть файл

@ -26,8 +26,11 @@ int
opal_dstore_base_select(void)
{
mca_base_component_list_item_t *cli;
opal_dstore_base_component_t *component, *best=NULL;
int pri = -1000;
mca_base_component_t *cmp;
mca_base_module_t *md;
int priority, cmp_pri, mod_pri;
opal_dstore_base_module_t *mod=NULL;
opal_dstore_base_component_t *comp=NULL;
if (selected) {
/* ensure we don't do this twice */
@ -36,48 +39,62 @@ opal_dstore_base_select(void)
selected = true;
/* Query all available components and ask if they have a module */
cmp_pri = -100000;
mod_pri = -100000;
OPAL_LIST_FOREACH(cli, &opal_dstore_base_framework.framework_components, mca_base_component_list_item_t) {
component = (opal_dstore_base_component_t*)cli->cli_component;
cmp = (mca_base_component_t*)cli->cli_component;
opal_output_verbose(5, opal_dstore_base_framework.framework_output,
"mca:dstore:select: checking available component %s",
component->base_version.mca_component_name);
cmp->mca_component_name);
/* If there's no query function, skip it */
if (NULL == component->available) {
if (NULL == cmp->mca_query_component) {
opal_output_verbose(5, opal_dstore_base_framework.framework_output,
"mca:dstore:select: Skipping component [%s]. It does not implement a query function",
component->base_version.mca_component_name );
cmp->mca_component_name );
continue;
}
/* Query the component */
opal_output_verbose(5, opal_dstore_base_framework.framework_output,
opal_output_verbose(5, opal_dstore_base_framework.framework_output,
"mca:dstore:select: Querying component [%s]",
component->base_version.mca_component_name);
cmp->mca_component_name);
/* If the component is not available, then skip it as
* it has no available interfaces
*/
if (!component->available()) {
/* If the component reports failure, then skip component - however,
* it is okay to return a NULL module */
if (OPAL_SUCCESS != cmp->mca_query_component(&md, &priority)) {
opal_output_verbose(5, opal_dstore_base_framework.framework_output,
"mca:dstore:select: Skipping component [%s] - not available",
component->base_version.mca_component_name );
cmp->mca_component_name );
continue;
}
/* keep only the highest priority component */
if (pri < component->priority) {
best = component;
pri = component->priority;
/* track the highest priority component that returned a NULL module - this
* will become our storage element */
if (NULL == md) {
if (0 < priority && priority > cmp_pri) {
comp = (opal_dstore_base_component_t*)cmp;
cmp_pri = priority;
}
} else {
/* track the highest priority module that was returned - this
* will become our backfill element */
if (priority > mod_pri) {
mod = (opal_dstore_base_module_t*)md;
mod_pri = priority;
}
}
}
/* if no components are available, that is an error */
if (NULL == best) {
return OPAL_ERR_NOT_FOUND;
if (NULL == comp) {
/* no components available - that's bad */
return OPAL_ERROR;
}
opal_dstore_base.storage_component = comp;
/* it's okay not to have a backfill module */
opal_dstore_base.backfill_module = mod;
opal_dstore_base.active = best;
return OPAL_SUCCESS;;
}

Просмотреть файл

@ -28,8 +28,8 @@ int opal_dstore_base_open(const char *name)
int index;
opal_dstore_base_module_t *mod;
/* create the module */
if (NULL != (mod = opal_dstore_base.active->create_handle())) {
/* ask the storage component for a module */
if (NULL != (mod = opal_dstore_base.storage_component->create_handle())) {
/* have our module, so create a new dstore_handle */
hdl = OBJ_NEW(opal_dstore_handle_t);
if (NULL != name) {
@ -98,35 +98,13 @@ int opal_dstore_base_store(int dstorehandle,
return hdl->module->store((struct opal_dstore_base_module_t*)hdl->module, id, kv);
}
void opal_dstore_base_commit(int dstorehandle,
const opal_identifier_t *id)
{
opal_dstore_handle_t *hdl;
if (dstorehandle < 0) {
OPAL_ERROR_LOG(OPAL_ERR_NOT_INITIALIZED);
return;
}
if (NULL == (hdl = (opal_dstore_handle_t*)opal_pointer_array_get_item(&opal_dstore_base.handles, dstorehandle))) {
OPAL_ERROR_LOG(OPAL_ERR_NOT_FOUND);
return;
}
if (NULL != hdl->module->commit) {
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
"committing data in %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
hdl->module->commit((struct opal_dstore_base_module_t*)hdl->module, id);
}
}
int opal_dstore_base_fetch(int dstorehandle,
const opal_identifier_t *id,
const char *key,
opal_list_t *kvs)
{
opal_dstore_handle_t *hdl;
int rc;
if (dstorehandle < 0) {
return OPAL_ERR_NOT_INITIALIZED;
@ -140,7 +118,17 @@ int opal_dstore_base_fetch(int dstorehandle,
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
"fetching data from %s dstore", (NULL == hdl->name) ? "NULL" : hdl->name);
return hdl->module->fetch((struct opal_dstore_base_module_t*)hdl->module, id, key, kvs);
if (OPAL_SUCCESS == (rc = hdl->module->fetch((struct opal_dstore_base_module_t*)hdl->module, id, key, kvs))) {
/* found the data, so we can just return it */
return rc;
}
/* if the storage module didn't find it, then let the backfill module try
* to retrieve it if we have one */
if (NULL != opal_dstore_base.backfill_module) {
rc = opal_dstore_base.backfill_module->fetch((struct opal_dstore_base_module_t*)opal_dstore_base.backfill_module, id, key, kvs);
}
return rc;
}
int opal_dstore_base_remove_data(int dstorehandle,

Просмотреть файл

@ -36,10 +36,12 @@
BEGIN_C_DECLS
/* declare a pair of global handles until such time
/* declare a global handle until such time
* as someone figures out how to separate the various
* datastore channels
*/
OPAL_DECLSPEC extern int opal_dstore_internal;
OPAL_DECLSPEC extern int opal_dstore_peer;
OPAL_DECLSPEC extern int opal_dstore_internal;
OPAL_DECLSPEC extern int opal_dstore_nonpeer;
@ -76,13 +78,6 @@ typedef int (*opal_dstore_base_API_store_fn_t)(int dstorehandle,
const opal_identifier_t *id,
opal_value_t *kv);
/*
* Commit data to the database - action depends on implementation within
* each active component
*/
typedef void (*opal_dstore_base_API_commit_fn_t)(int dstorehandle,
const opal_identifier_t *id);
/*
* Retrieve data
*
@ -112,7 +107,6 @@ typedef struct {
opal_dstore_base_API_open_fn_t open;
opal_dstore_base_API_close_fn_t close;
opal_dstore_base_API_store_fn_t store;
opal_dstore_base_API_commit_fn_t commit;
opal_dstore_base_API_fetch_fn_t fetch;
opal_dstore_base_API_remove_fn_t remove;
} opal_dstore_base_API_t;
@ -146,10 +140,6 @@ typedef int (*opal_dstore_base_module_store_fn_t)(struct opal_dstore_base_module
const opal_identifier_t *id,
opal_value_t *kv);
/* commit data */
typedef void (*opal_dstore_base_module_commit_fn_t)(struct opal_dstore_base_module_t *mod,
const opal_identifier_t *id);
/* fetch data from the module */
typedef int (*opal_dstore_base_module_fetch_fn_t)(struct opal_dstore_base_module_t *mod,
const opal_identifier_t *id,
@ -168,7 +158,6 @@ typedef struct {
opal_dstore_base_module_init_fn_t init;
opal_dstore_base_module_finalize_fn_t finalize;
opal_dstore_base_module_store_fn_t store;
opal_dstore_base_module_commit_fn_t commit;
opal_dstore_base_module_fetch_fn_t fetch;
opal_dstore_base_module_remove_fn_t remove;
} opal_dstore_base_module_t;
@ -176,11 +165,6 @@ typedef struct {
/*
* the component data structure
*/
/* function to determine if this component is available for use.
* Note that we do not use the standard component open
* function as we do not want/need return of a module.
*/
typedef bool (*mca_dstore_base_component_avail_fn_t)(void);
/* create and return a datastore module */
typedef opal_dstore_base_module_t* (*mca_dstore_base_component_create_hdl_fn_t)(void);
@ -191,8 +175,6 @@ typedef void (*mca_dstore_base_component_finalize_fn_t)(void);
typedef struct {
mca_base_component_t base_version;
mca_base_component_data_t base_data;
int priority;
mca_dstore_base_component_avail_fn_t available;
mca_dstore_base_component_create_hdl_fn_t create_handle;
mca_dstore_base_component_finalize_fn_t finalize;
} opal_dstore_base_component_t;

Просмотреть файл

@ -20,17 +20,47 @@
#include "opal/types.h"
#include "opal/dss/dss_types.h"
#include "opal/mca/pmix/pmix.h"
BEGIN_C_DECLS
/* some values are provided by an external entity such
* as the resource manager. These values enter the
* system via the PMIx interface at startup, but are
* not explicitly retrieved by processes. Instead, procs
* access them after RTE-init has stored them. For ease-of-use,
* we define equivalent dstore names here. PMIx attributes
* not listed here should be directly accessed via the
* OPAL pmix framework */
#define OPAL_DSTORE_CPUSET PMIX_CPUSET
#define OPAL_DSTORE_CREDENTIAL PMIX_CREDENTIAL
#define OPAL_DSTORE_TMPDIR PMIX_TMPDIR
#define OPAL_DSTORE_JOBID PMIX_JOBID
#define OPAL_DSTORE_APPNUM PMIX_APPNUM
#define OPAL_DSTORE_RANK PMIX_RANK
#define OPAL_DSTORE_GLOBAL_RANK PMIX_GLOBAL_RANK
#define OPAL_DSTORE_LOCALRANK PMIX_LOCAL_RANK
#define OPAL_DSTORE_NODERANK PMIX_NODE_RANK
#define OPAL_DSTORE_LOCALLDR PMIX_LOCALLDR
#define OPAL_DSTORE_APPLDR PMIX_APPLDR
#define OPAL_DSTORE_LOCAL_PEERS PMIX_LOCAL_PEERS
#define OPAL_DSTORE_UNIV_SIZE PMIX_UNIV_SIZE
#define OPAL_DSTORE_JOB_SIZE PMIX_JOB_SIZE
#define OPAL_DSTORE_LOCAL_SIZE PMIX_LOCAL_SIZE
#define OPAL_DSTORE_NODE_SIZE PMIX_NODE_SIZE
#define OPAL_DSTORE_MAX_PROCS PMIX_MAX_PROCS
#define OPAL_DSTORE_NPROC_OFFSET PMIX_NPROC_OFFSET
/* some OPAL-appropriate key definitions */
#define OPAL_DSTORE_LOCALITY "opal.locality"
#define OPAL_DSTORE_CPUSET "opal.cpuset"
#define OPAL_DSTORE_CREDENTIAL "opal.cred"
#define OPAL_DSTORE_JOB_SDIR "opal.job.session.dir"
#define OPAL_DSTORE_MY_SDIR "opal.my.session.dir"
#define OPAL_DSTORE_LOCALRANK "opal.local.rank"
#define OPAL_DSTORE_LOCALLDR "opal.local.ldr"
#define OPAL_DSTORE_LOCALITY "opal.locality" // (uint16_t) relative locality of a peer
/* proc-specific scratch dirs */
#define OPAL_DSTORE_JOB_SDIR "opal.job.session.dir" // (char*) job-level session dir
#define OPAL_DSTORE_MY_SDIR "opal.my.session.dir" // (char*) session dir for this proc
#define OPAL_DSTORE_URI "opal.uri" // (char*) uri of specified proc
#define OPAL_DSTORE_HOSTNAME "opal.hostname" // (char*) hostname of specified proc
#define OPAL_DSTORE_ARCH "opal.arch" // (uint32_t) arch for specified proc
#define OPAL_DSTORE_HOSTID "opal.hostid" // (uint32_t) hostid of specified proc
#define OPAL_DSTORE_NODEID "opal.nodeid" // (uint32_t) nodeid of specified proc
END_C_DECLS

Просмотреть файл

@ -26,6 +26,7 @@
#include "opal/dss/dss_types.h"
#include "opal/util/error.h"
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "opal/util/show_help.h"
#include "opal/mca/dstore/base/base.h"
@ -48,7 +49,6 @@ mca_dstore_hash_module_t opal_dstore_hash_module = {
init,
finalize,
store,
NULL,
fetch,
remove_data
}
@ -112,14 +112,15 @@ static int store(struct opal_dstore_base_module_t *imod,
memcpy(&id, uid, sizeof(opal_identifier_t));
opal_output_verbose(1, opal_dstore_base_framework.framework_output,
"dstore:hash:store storing data for proc %" PRIu64 "", id);
"%s dstore:hash:store storing data for proc %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(id));
/* lookup the proc data object for this proc */
if (NULL == (proc_data = opal_dstore_base_lookup_proc(&mod->hash_data, id))) {
/* unrecoverable error */
OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output,
"dstore:hash:store: storing data for proc %" PRIu64 " unrecoverably failed",
id));
"%s dstore:hash:store: storing data for proc %s unrecoverably failed",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), OPAL_NAME_PRINT(id)));
return OPAL_ERR_OUT_OF_RESOURCE;
}
@ -130,9 +131,10 @@ static int store(struct opal_dstore_base_module_t *imod,
#if OPAL_ENABLE_DEBUG
char *_data_type = opal_dss.lookup_data_type(val->type);
OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output,
"dstore:hash:store: %s key %s[%s] for proc %" PRIu64 "",
"%s dstore:hash:store: %s key %s[%s] for proc %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
(NULL == kv ? "storing" : "updating"),
val->key, _data_type, id));
val->key, _data_type, OPAL_NAME_PRINT(id)));
free (_data_type);
#endif
@ -166,13 +168,16 @@ static int fetch(struct opal_dstore_base_module_t *imod,
memcpy(&id, uid, sizeof(opal_identifier_t));
OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output,
"dstore:hash:fetch: searching for key %s on proc %" PRIu64 "",
(NULL == key) ? "NULL" : key, id));
"%s dstore:hash:fetch: searching for key %s on proc %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
(NULL == key) ? "NULL" : key, OPAL_NAME_PRINT(id)));
/* lookup the proc data object for this proc */
if (NULL == (proc_data = opal_dstore_base_lookup_proc(&mod->hash_data, id))) {
OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output,
"dstore_hash:fetch data for proc %" PRIu64 " not found", id));
"%s dstore_hash:fetch data for proc %s not found",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(id)));
return OPAL_ERR_NOT_FOUND;
}
@ -185,8 +190,11 @@ static int fetch(struct opal_dstore_base_module_t *imod,
return rc;
}
OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output,
"dstore:hash:fetch: adding data for key %s on proc %" PRIu64 "",
(NULL == kv->key) ? "NULL" : kv->key, id));
"%s dstore:hash:fetch: adding data for key %s on proc %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
(NULL == kv->key) ? "NULL" : kv->key,
OPAL_NAME_PRINT(id)));
/* add it to the output list */
opal_list_append(kvs, &knew->super);
}
@ -196,8 +204,10 @@ static int fetch(struct opal_dstore_base_module_t *imod,
/* find the value */
if (NULL == (kv = opal_dstore_base_lookup_keyval(proc_data, key))) {
OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output,
"dstore_hash:fetch key %s for proc %" PRIu64 " not found",
(NULL == key) ? "NULL" : key, id));
"%s dstore_hash:fetch key %s for proc %s not found",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
(NULL == key) ? "NULL" : key,
OPAL_NAME_PRINT(id)));
return OPAL_ERR_NOT_FOUND;
}

Просмотреть файл

@ -25,9 +25,8 @@
#include "opal/mca/dstore/base/base.h"
#include "dstore_hash.h"
static int dstore_hash_component_register(void);
static bool component_avail(void);
static opal_dstore_base_module_t *component_create(void);
static int dstore_hash_query(mca_base_module_t **module, int *priority);
/*
* Instantiate the public struct with all of our public information
@ -46,39 +45,25 @@ opal_dstore_base_component_t mca_dstore_hash_component = {
/* Component open and close functions */
NULL,
NULL,
NULL,
dstore_hash_component_register
dstore_hash_query,
NULL
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
1,
component_avail,
component_create,
NULL
};
static int dstore_hash_component_register(void)
static int dstore_hash_query(mca_base_module_t **module, int *priority)
{
mca_base_component_t *c = &mca_dstore_hash_component.base_version;
mca_dstore_hash_component.priority = 1;
(void) mca_base_component_var_register(c, "priority",
"Priority dictating order in which components will be considered",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_dstore_hash_component.priority);
/* we are always available, but only as storage */
*priority = 80;
*module = NULL;
return OPAL_SUCCESS;
}
static bool component_avail(void)
{
/* we are always available */
return true;
}
static opal_dstore_base_module_t *component_create(void)
{
mca_dstore_hash_module_t *mod;

Просмотреть файл

@ -1,39 +0,0 @@
#
# Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
dstore_pmi.h \
dstore_pmi_component.c \
dstore_pmi.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_opal_dstore_pmi_DSO
component_noinst =
component_install = mca_dstore_pmi.la
else
component_noinst = libmca_dstore_pmi.la
component_install =
endif
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_dstore_pmi_la_SOURCES = $(sources)
mca_dstore_pmi_la_CPPFLAGS = $(dstore_pmi_CPPFLAGS)
mca_dstore_pmi_la_LDFLAGS = -module -avoid-version $(dstore_pmi_LDFLAGS)
mca_dstore_pmi_la_LIBADD = $(dstore_pmi_LIBS) \
$(OPAL_TOP_BUILDDIR)/opal/mca/common/pmi/libmca_common_pmi.la
noinst_LTLIBRARIES = $(component_noinst)
libmca_dstore_pmi_la_SOURCES =$(sources)
libmca_dstore_pmi_la_CPPFLAGS = $(dstore_pmi_CPPFLAGS)
libmca_dstore_pmi_la_LDFLAGS = -module -avoid-version $(dstore_pmi_LDFLAGS)
libmca_dstore_pmi_la_LIBADD = $(dstore_pmi_LIBS)

Просмотреть файл

@ -1,28 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2012-2013 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_dstore_pmi_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_opal_dstore_pmi_CONFIG], [
AC_CONFIG_FILES([opal/mca/dstore/pmi/Makefile])
OPAL_CHECK_PMI([dstore_pmi], [dstore_pmi_good=1], [dstore_pmi_good=0])
# Evaluate succeed / fail
AS_IF([test "$dstore_pmi_good" = 1],
[$1],
[$2])
# set build flags to use in makefile
AC_SUBST([dstore_pmi_CPPFLAGS])
AC_SUBST([dstore_pmi_LDFLAGS])
AC_SUBST([dstore_pmi_LIBS])
])

Просмотреть файл

@ -1,717 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "opal_config.h"
#include "opal/constants.h"
#include <time.h>
#include <string.h>
#include "opal/mca/common/pmi/common_pmi.h"
#include <regex.h>
#include "opal_stdint.h"
#include "opal/class/opal_pointer_array.h"
#include "opal/dss/dss_types.h"
#include "opal/util/argv.h"
#include "opal/util/error.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "opal/mca/dstore/base/base.h"
#include "dstore_pmi.h"
#define OPAL_PMI_PAD 10
static void finalize(struct opal_dstore_base_module_t *imod);
static int store(struct opal_dstore_base_module_t *imod,
const opal_identifier_t *proc,
opal_value_t *kv);
static void commit(struct opal_dstore_base_module_t *mod,
const opal_identifier_t *id);
static int fetch(struct opal_dstore_base_module_t *imod,
const opal_identifier_t *proc,
const char *key,
opal_list_t *kvs);
static int remove_data(struct opal_dstore_base_module_t *imod,
const opal_identifier_t *proc, const char *key);
mca_dstore_pmi_module_t opal_dstore_pmi_module = {
{
NULL,
finalize,
store,
commit,
fetch,
remove_data
}
};
static char *pmi_encode(const void *val, size_t vallen);
static uint8_t* pmi_decode(const char *data, size_t *retlen);
static char* setup_key(mca_dstore_pmi_module_t *mod,
opal_identifier_t name, const char *key);
/* Local variables */
/* Because Cray uses PMI2 extensions for some, but not all,
* PMI functions, we define a set of wrappers for those
* common functions we will use
*/
static inline int kvs_put(mca_dstore_pmi_module_t *mod,
const char *key, const char *value)
{
return mca_common_pmi_put(mod->pmi_kvs_name, key, value);
}
static inline int kvs_get(mca_dstore_pmi_module_t *mod,
const char *key, char *value, int valuelen)
{
return mca_common_pmi_get(mod->pmi_kvs_name, key, value, valuelen);
}
static void finalize(struct opal_dstore_base_module_t *imod)
{
mca_dstore_pmi_module_t *mod;
opal_dstore_proc_data_t *proc_data;
uint64_t key;
char *node;
mod = (mca_dstore_pmi_module_t*)imod;
if (NULL != mod->pmi_kvs_name) {
free(mod->pmi_kvs_name);
mod->pmi_kvs_name = NULL;
}
/* to assist in getting a clean valgrind, cycle thru the hash table
* and release all data stored in it
*/
if (OPAL_SUCCESS == opal_hash_table_get_first_key_uint64(&mod->hash_data, &key,
(void**)&proc_data,
(void**)&node)) {
if (NULL != proc_data) {
OBJ_RELEASE(proc_data);
}
while (OPAL_SUCCESS == opal_hash_table_get_next_key_uint64(&mod->hash_data, &key,
(void**)&proc_data,
node, (void**)&node)) {
if (NULL != proc_data) {
OBJ_RELEASE(proc_data);
}
}
}
OBJ_DESTRUCT(&mod->hash_data);
}
static int pmi_commit_packed(mca_dstore_pmi_module_t *mod,
opal_identifier_t proc) {
char *pmikey = NULL, *tmp;
char tmp_key[32], save;
char *encoded_data;
int rc, left;
if (mod->pmi_packed_data_off == 0) {
/* nothing to write */
return OPAL_SUCCESS;
}
if (NULL == (encoded_data = pmi_encode(mod->pmi_packed_data, mod->pmi_packed_data_off))) {
OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
return OPAL_ERR_OUT_OF_RESOURCE;
}
for (left = strlen (encoded_data), tmp = encoded_data ; left ; ) {
size_t value_size = mod->pmi_vallen_max > left ? left : mod->pmi_vallen_max - 1;
sprintf (tmp_key, "key%d", mod->pmi_pack_key);
if (NULL == (pmikey = setup_key(mod, proc, tmp_key))) {
OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
rc = OPAL_ERR_BAD_PARAM;
break;
}
/* only write value_size bytes */
save = tmp[value_size];
tmp[value_size] = '\0';
rc = kvs_put(mod, pmikey, tmp);
free(pmikey);
if (OPAL_SUCCESS != rc) {
break;
}
tmp[value_size] = save;
tmp += value_size;
left -= value_size;
mod->pmi_pack_key ++;
rc = OPAL_SUCCESS;
}
if (encoded_data) {
free(encoded_data);
}
mod->pmi_packed_data_off = 0;
free(mod->pmi_packed_data);
mod->pmi_packed_data = NULL;
return rc;
}
static int pmi_store_encoded(mca_dstore_pmi_module_t *mod,
const char *key, const void *data,
opal_data_type_t type)
{
opal_byte_object_t *bo;
size_t data_len = 0;
size_t needed;
switch (type) {
case OPAL_STRING:
data_len = data ? strlen (data) + 1 : 0;
break;
case OPAL_INT:
case OPAL_UINT:
data_len = sizeof (int);
break;
case OPAL_INT16:
case OPAL_UINT16:
data_len = sizeof (int16_t);
break;
case OPAL_INT32:
case OPAL_UINT32:
data_len = sizeof (int32_t);
break;
case OPAL_INT64:
case OPAL_UINT64:
data_len = sizeof (int64_t);
break;
case OPAL_BYTE_OBJECT:
bo = (opal_byte_object_t *) data;
data = bo->bytes;
data_len = bo->size;
}
needed = 10 + data_len + strlen (key);
if (NULL == mod->pmi_packed_data) {
mod->pmi_packed_data = calloc (needed, 1);
} else {
/* grow the region */
mod->pmi_packed_data = realloc (mod->pmi_packed_data, mod->pmi_packed_data_off + needed);
}
/* special length meaning NULL */
if (NULL == data) {
data_len = 0xffff;
}
/* serialize the opal datatype */
mod->pmi_packed_data_off += sprintf (mod->pmi_packed_data + mod->pmi_packed_data_off,
"%s%c%02x%c%04x%c", key, '\0', type, '\0',
(int) data_len, '\0');
if (NULL != data) {
memmove (mod->pmi_packed_data + mod->pmi_packed_data_off, data, data_len);
mod->pmi_packed_data_off += data_len;
}
return OPAL_SUCCESS;
}
static int pmi_get_packed(mca_dstore_pmi_module_t *mod,
opal_identifier_t proc,
char **packed_data, size_t *len)
{
char *tmp_encoded = NULL, *pmikey, *pmi_tmp;
int remote_key, size;
size_t bytes_read;
int rc;
/* set default */
*packed_data = NULL;
*len = 0;
pmi_tmp = calloc (mod->pmi_vallen_max, 1);
if (NULL == pmi_tmp) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* read all of the packed data from this proc */
for (remote_key = 0, bytes_read = 0 ; ; ++remote_key) {
char tmp_key[32];
sprintf (tmp_key, "key%d", remote_key);
if (NULL == (pmikey = setup_key(mod, proc, tmp_key))) {
rc = OPAL_ERR_OUT_OF_RESOURCE;
OPAL_ERROR_LOG(rc);
return rc;
}
OPAL_OUTPUT_VERBOSE((10, opal_dstore_base_framework.framework_output,
"GETTING KEY %s", pmikey));
rc = kvs_get(mod, pmikey, pmi_tmp, mod->pmi_vallen_max);
free (pmikey);
if (OPAL_SUCCESS != rc) {
break;
}
size = strlen (pmi_tmp);
if (NULL == tmp_encoded) {
tmp_encoded = malloc (size + 1);
} else {
tmp_encoded = realloc (tmp_encoded, bytes_read + size + 1);
}
strcpy (tmp_encoded + bytes_read, pmi_tmp);
bytes_read += size;
/* is the string terminator present? */
if ('-' == tmp_encoded[bytes_read-1]) {
break;
}
}
free (pmi_tmp);
OPAL_OUTPUT_VERBOSE((10, opal_dstore_base_framework.framework_output,
"Read data %s\n",
(NULL == tmp_encoded) ? "NULL" : tmp_encoded));
if (NULL != tmp_encoded) {
*packed_data = (char *) pmi_decode (tmp_encoded, len);
free (tmp_encoded);
if (NULL == *packed_data) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
}
return OPAL_SUCCESS;
}
static void cache_keys_locally(mca_dstore_pmi_module_t *mod,
opal_identifier_t id,
opal_dstore_proc_data_t *proc_data)
{
char *tmp, *tmp2, *tmp3, *tmp_val;
opal_data_type_t stored_type;
size_t len, offset;
int rc, size;
opal_value_t *kv;
OPAL_OUTPUT_VERBOSE((1, opal_dstore_base_framework.framework_output,
"dstore:pmi:fetch get all keys for proc %" PRIu64 " in KVS %s",
id, mod->pmi_kvs_name));
rc = pmi_get_packed(mod, id, &tmp_val, &len);
if (OPAL_SUCCESS != rc) {
return;
}
/* search for each key in the decoded data */
for (offset = 0 ; offset < len && '\0' != tmp_val[offset] ; ) {
/* type */
tmp = tmp_val + offset + strlen (tmp_val + offset) + 1;
/* size */
tmp2 = tmp + strlen (tmp) + 1;
/* data */
tmp3 = tmp2 + strlen (tmp2) + 1;
stored_type = (opal_data_type_t) strtol (tmp, NULL, 16);
size = strtol (tmp2, NULL, 16);
/* cache value locally so we don't have to look it up via pmi again */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(tmp_val + offset);
kv->type = stored_type;
opal_list_append(&proc_data->data, &kv->super);
switch (stored_type) {
case OPAL_BYTE:
kv->data.byte = *tmp3;
break;
case OPAL_STRING:
if (NULL != tmp3) {
kv->data.string = strdup(tmp3);
} else {
kv->data.string = NULL;
}
break;
case OPAL_PID:
kv->data.pid = strtoul(tmp3, NULL, 10);
break;
case OPAL_INT:
kv->data.integer = strtol(tmp3, NULL, 10);
break;
case OPAL_INT8:
kv->data.int8 = strtol(tmp3, NULL, 10);
break;
case OPAL_INT16:
kv->data.int16 = strtol(tmp3, NULL, 10);
break;
case OPAL_INT32:
kv->data.int32 = strtol(tmp3, NULL, 10);
break;
case OPAL_INT64:
kv->data.int64 = strtol(tmp3, NULL, 10);
break;
case OPAL_UINT:
kv->data.uint = strtoul(tmp3, NULL, 10);
break;
case OPAL_UINT8:
kv->data.uint8 = strtoul(tmp3, NULL, 10);
break;
case OPAL_UINT16:
kv->data.uint16 = strtoul(tmp3, NULL, 10);
break;
case OPAL_UINT32:
kv->data.uint32 = strtoul(tmp3, NULL, 10);
break;
case OPAL_UINT64:
kv->data.uint64 = strtoul(tmp3, NULL, 10);
break;
case OPAL_BYTE_OBJECT:
if (size == 0xffff) {
kv->data.bo.bytes = NULL;
kv->data.bo.size = 0;
} else {
kv->data.bo.bytes = malloc(size);
memcpy(kv->data.bo.bytes, tmp3, size);
kv->data.bo.size = size;
}
break;
default:
opal_output(0, "UNSUPPORTED TYPE %d", stored_type);
return;
}
/* keep going and cache everything locally */
offset = (size_t) (tmp3 - tmp_val) + size;
}
proc_data->loaded = true;
free (tmp_val);
}
static int store(struct opal_dstore_base_module_t *imod,
const opal_identifier_t *uid,
opal_value_t *val)
{
int rc;
opal_value_t *kv;
opal_dstore_proc_data_t *proc_data;
opal_identifier_t id;
mca_dstore_pmi_module_t *mod;
mod = (mca_dstore_pmi_module_t*)imod;
/* to protect alignment, copy the data across */
memcpy(&id, uid, sizeof(opal_identifier_t));
OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output,
"dstore:pmi:store: storing %s for proc %" PRIu64 "",
val->key, id));
/* lookup the proc data object for this proc */
if (NULL == (proc_data = opal_dstore_base_lookup_proc(&mod->hash_data, id))) {
/* unrecoverable error */
OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output,
"dstore:pmi:store: storing data for proc %" PRIu64 " unrecoverably failed",
id));
return OPAL_ERR_OUT_OF_RESOURCE;
}
if (proc_data->loaded) {
return OPAL_SUCCESS;
}
/* add it to our PMI payload */
if (OPAL_SUCCESS != (rc = pmi_store_encoded(mod, val->key, (void*)&val->data, val->type))) {
OPAL_ERROR_LOG(rc);
return rc;
}
/* retain a local copy */
kv = opal_dstore_base_lookup_keyval(proc_data, val->key);
OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output,
"dstore:hash:store: %s key %s[%s] for proc %" PRIu64 "",
(NULL == kv ? "storing" : "updating"),
val->key, opal_dss.lookup_data_type(val->type), id));
if (NULL != kv) {
opal_list_remove_item(&proc_data->data, &kv->super);
OBJ_RELEASE(kv);
}
/* create the copy */
if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&kv, val, OPAL_VALUE))) {
OPAL_ERROR_LOG(rc);
return rc;
}
opal_list_append(&proc_data->data, &kv->super);
return OPAL_SUCCESS;
}
static void commit(struct opal_dstore_base_module_t *imod,
const opal_identifier_t *uid)
{
mca_dstore_pmi_module_t *mod;
opal_identifier_t id;
mod = (mca_dstore_pmi_module_t*)imod;
/* to protect alignment, copy the identifier across */
memcpy(&id, uid, sizeof(opal_identifier_t));
/* commit the packed data to PMI */
pmi_commit_packed(mod, id);
int rc = mca_common_pmi_commit(mod->pmi_kvs_name);
if( OPAL_SUCCESS != rc ){
// TODO: What we do here? failure exit?
}
}
static int fetch(struct opal_dstore_base_module_t *imod,
const opal_identifier_t *uid,
const char *key, opal_list_t *kvs)
{
opal_dstore_proc_data_t *proc_data;
mca_dstore_pmi_module_t *mod;
opal_identifier_t id;
int rc;
opal_value_t *kv, *knew;
mod = (mca_dstore_pmi_module_t*)imod;
/* to protect alignment, copy the identifier across */
memcpy(&id, uid, sizeof(opal_identifier_t));
/* get the hash entry for this proc */
if (NULL == (proc_data = opal_dstore_base_lookup_proc(&mod->hash_data, id))) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
if (!proc_data->loaded) {
/* new proc - go get its data */
cache_keys_locally(mod, id, proc_data);
}
/* all keys will be available internally now. so
* retrieve the data from our hash table
*/
/* if the key is NULL, that we want everything */
if (NULL == key) {
OPAL_LIST_FOREACH(kv, &proc_data->data, opal_value_t) {
/* copy the value */
if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&knew, kv, OPAL_VALUE))) {
OPAL_ERROR_LOG(rc);
return rc;
}
/* add it to the output list */
opal_list_append(kvs, &knew->super);
}
return OPAL_SUCCESS;
}
/* find the value */
if (NULL == (kv = opal_dstore_base_lookup_keyval(proc_data, key))) {
OPAL_OUTPUT_VERBOSE((5, opal_dstore_base_framework.framework_output,
"dstore_pmi:fetch key %s for proc %" PRIu64 " not found",
(NULL == key) ? "NULL" : key, id));
return OPAL_ERR_NOT_FOUND;
}
/* create the copy */
if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&knew, kv, OPAL_VALUE))) {
OPAL_ERROR_LOG(rc);
return rc;
}
/* add it to the output list */
opal_list_append(kvs, &knew->super);
return OPAL_SUCCESS;
}
static int remove_data(struct opal_dstore_base_module_t *imod,
const opal_identifier_t *uid, const char *key)
{
opal_value_t *kv;
opal_identifier_t id;
mca_dstore_pmi_module_t *mod;
opal_dstore_proc_data_t *proc_data;
mod = (mca_dstore_pmi_module_t*)imod;
/* to protect alignment, copy the identifier across */
memcpy(&id, uid, sizeof(opal_identifier_t));
/* lookup the specified proc */
if (NULL == (proc_data = opal_dstore_base_lookup_proc(&mod->hash_data, id))) {
/* no data for this proc */
return OPAL_SUCCESS;
}
/* if key is NULL, remove all data for this proc */
if (NULL == key) {
while (NULL != (kv = (opal_value_t *) opal_list_remove_first(&proc_data->data))) {
OBJ_RELEASE(kv);
}
/* remove the proc_data object itself from the jtable */
opal_hash_table_remove_value_uint64(&mod->hash_data, id);
/* cleanup */
OBJ_RELEASE(proc_data);
return OPAL_SUCCESS;
}
/* remove this item */
OPAL_LIST_FOREACH(kv, &proc_data->data, opal_value_t) {
if (0 == strcmp(key, kv->key)) {
opal_list_remove_item(&proc_data->data, &kv->super);
OBJ_RELEASE(kv);
break;
}
}
return OPAL_SUCCESS;
}
static char* setup_key(mca_dstore_pmi_module_t *mod,
opal_identifier_t name, const char *key)
{
char *pmi_kvs_key;
if (mod->pmi_keylen_max <= asprintf(&pmi_kvs_key, "%" PRIu64 "-%s",
name, key)) {
free(pmi_kvs_key);
return NULL;
}
return pmi_kvs_key;
}
/* base64 encoding with illegal (to Cray PMI) characters removed ('=' is replaced by ' ') */
static inline unsigned char pmi_base64_encsym (unsigned char value) {
assert (value < 64);
if (value < 26) {
return 'A' + value;
} else if (value < 52) {
return 'a' + (value - 26);
} else if (value < 62) {
return '0' + (value - 52);
}
return (62 == value) ? '+' : '/';
}
static inline unsigned char pmi_base64_decsym (unsigned char value) {
if ('+' == value) {
return 62;
} else if ('/' == value) {
return 63;
} else if (' ' == value) {
return 64;
} else if (value <= '9') {
return (value - '0') + 52;
} else if (value <= 'Z') {
return (value - 'A');
} else if (value <= 'z') {
return (value - 'a') + 26;
}
return 64;
}
static inline void pmi_base64_encode_block (const unsigned char in[3], char out[4], int len) {
out[0] = pmi_base64_encsym (in[0] >> 2);
out[1] = pmi_base64_encsym (((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4));
/* Cray PMI doesn't allow = in PMI attributes so pad with spaces */
out[2] = 1 < len ? pmi_base64_encsym(((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6)) : ' ';
out[3] = 2 < len ? pmi_base64_encsym(in[2] & 0x3f) : ' ';
}
static inline int pmi_base64_decode_block (const char in[4], unsigned char out[3]) {
char in_dec[4];
in_dec[0] = pmi_base64_decsym (in[0]);
in_dec[1] = pmi_base64_decsym (in[1]);
in_dec[2] = pmi_base64_decsym (in[2]);
in_dec[3] = pmi_base64_decsym (in[3]);
out[0] = in_dec[0] << 2 | in_dec[1] >> 4;
if (64 == in_dec[2]) {
return 1;
}
out[1] = in_dec[1] << 4 | in_dec[2] >> 2;
if (64 == in_dec[3]) {
return 2;
}
out[2] = ((in_dec[2] << 6) & 0xc0) | in_dec[3];
return 3;
}
/* PMI only supports strings. For now, do a simple base64. */
static char *pmi_encode(const void *val, size_t vallen) {
char *outdata, *tmp;
size_t i;
outdata = calloc (((2 + vallen) * 4) / 3 + 2, 1);
if (NULL == outdata) {
return NULL;
}
for (i = 0, tmp = outdata ; i < vallen ; i += 3, tmp += 4) {
pmi_base64_encode_block((unsigned char *) val + i, tmp, vallen - i);
}
/* mark the end of the pmi string */
tmp[0] = (unsigned char)'-';
tmp[1] = (unsigned char)'\0';
return outdata;
}
static uint8_t *pmi_decode (const char *data, size_t *retlen) {
size_t input_len = (strlen (data) - 1) / 4;
unsigned char *ret;
int out_len;
size_t i;
/* default */
*retlen = 0;
ret = calloc (1, 3 * input_len + 1);
if (NULL == ret) {
return ret;
}
for (i = 0, out_len = 0 ; i < input_len ; i++, data += 4) {
out_len += pmi_base64_decode_block(data, ret + 3 * i);
}
ret[out_len] = '\0';
*retlen = out_len;
return ret;
}

Просмотреть файл

@ -1,37 +0,0 @@
/*
* Copyright (c) 2012 Los Alamos National Security, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OPAL_DSTORE_PMI_H
#define OPAL_DSTORE_PMI_H
#include "opal/util/error.h"
#include "opal/mca/dstore/dstore.h"
BEGIN_C_DECLS
OPAL_MODULE_DECLSPEC extern opal_dstore_base_component_t mca_dstore_pmi_component;
typedef struct {
opal_dstore_base_module_t api;
char *pmi_kvs_name;
int pmi_vallen_max;
int pmi_keylen_max;
char *pmi_packed_data;
int pmi_pack_key;
int pmi_packed_data_off;
opal_hash_table_t hash_data;
} mca_dstore_pmi_module_t;
OPAL_MODULE_DECLSPEC extern mca_dstore_pmi_module_t opal_dstore_pmi_module;
END_C_DECLS
#endif /* OPAL_DSTORE_PMI_H */

Просмотреть файл

@ -1,150 +0,0 @@
/*
* Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/mca/common/pmi/common_pmi.h"
#include "opal/mca/base/base.h"
#include "opal/mca/dstore/dstore.h"
#include "opal/mca/dstore/base/base.h"
#include "opal/runtime/opal_params.h"
#include "dstore_pmi.h"
static int dstore_pmi_component_register(void);
static bool component_avail(void);
static opal_dstore_base_module_t *component_create(void);
static void component_finalize(void);
static int setup_pmi(void);
/* local storage */
static char *pmi_kvs_name = NULL;
static int pmi_vallen_max = -1;
static int pmi_keylen_max = -1;
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
opal_dstore_base_component_t mca_dstore_pmi_component = {
{
OPAL_DSTORE_BASE_VERSION_2_0_0,
/* Component name and version */
"pmi",
OPAL_MAJOR_VERSION,
OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION,
/* Component open and close functions */
NULL,
NULL,
NULL,
dstore_pmi_component_register
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
80,
component_avail,
component_create,
component_finalize
};
static int dstore_pmi_component_register(void)
{
mca_base_component_t *c = &mca_dstore_pmi_component.base_version;
mca_dstore_pmi_component.priority = 80;
(void) mca_base_component_var_register(c, "priority",
"Priority dictating order in which components will be considered",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_dstore_pmi_component.priority);
return OPAL_SUCCESS;
}
static bool component_avail(void)
{
/* only use PMI if available - the ESS pmi module
* will force our selection if we are direct-launched,
* and the orted will turn us "off" if indirectly launched
*/
int rc = mca_common_pmi_init(opal_pmi_version);
if ( OPAL_SUCCESS == rc && OPAL_SUCCESS == setup_pmi()) {
return true;
}
/* if not, then we are not available */
return false;
}
static void component_finalize(void)
{
mca_common_pmi_finalize();
if (NULL != pmi_kvs_name) {
free(pmi_kvs_name);
}
}
static opal_dstore_base_module_t *component_create(void)
{
mca_dstore_pmi_module_t *mod;
mod = (mca_dstore_pmi_module_t*)malloc(sizeof(mca_dstore_pmi_module_t));
if (NULL == mod) {
OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
return NULL;
}
/* copy the APIs across */
memcpy(mod, &opal_dstore_pmi_module.api, sizeof(opal_dstore_base_module_t));
/* copy the global values */
mod->pmi_kvs_name = strdup(pmi_kvs_name);
mod->pmi_vallen_max = pmi_vallen_max;
mod->pmi_keylen_max = pmi_keylen_max;
/* init the other values */
mod->pmi_packed_data = NULL;
mod->pmi_pack_key = 0;
mod->pmi_packed_data_off = 0;
OBJ_CONSTRUCT(&mod->hash_data, opal_hash_table_t);
opal_hash_table_init(&mod->hash_data, 256);
return (opal_dstore_base_module_t*)mod;
}
static int setup_pmi(void)
{
int max_length, rc;
pmi_vallen_max = mca_common_pmi_vallen();
max_length = mca_common_pmi_kvslen();
pmi_kvs_name = (char*)malloc(max_length);
if (NULL == pmi_kvs_name) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
rc = mca_common_pmi_kvsname(pmi_kvs_name, max_length);
if( OPAL_SUCCESS != rc ){
OPAL_OUTPUT_VERBOSE((1, opal_dstore_base_framework.framework_output,
"dstore:pmi:pmi_setup failed %s with error %s",
"mca_common_pmi_jobname",
opal_errmgr_base_pmi_error(rc)));
return rc;
}
pmi_keylen_max = mca_common_pmi_keylen();
return OPAL_SUCCESS;
}

Просмотреть файл

@ -296,6 +296,13 @@ static int opal_hwloc_base_close(void)
hwloc_bitmap_free(opal_hwloc_my_cpuset);
opal_hwloc_my_cpuset = NULL;
}
/* destroy the topology */
if (NULL != opal_hwloc_topology) {
opal_hwloc_base_free_topology(opal_hwloc_topology);
opal_hwloc_topology = NULL;
}
}
#endif

32
opal/mca/pmix/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,32 @@
#
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(LTDLINCL) $(pmix_CPPFLAGS)
# main library setup
noinst_LTLIBRARIES = libmca_pmix.la
libmca_pmix_la_SOURCES =
# pkgdata setup
dist_opaldata_DATA =
# local files
headers = pmix.h
libmca_pmix_la_SOURCES += $(headers)
# Conditionally install the header files
if WANT_INSTALL_HEADERS
opaldir = $(opalincludedir)/$(subdir)
nobase_opal_HEADERS = $(headers)
endif
include base/Makefile.am
distclean-local:
rm -f base/static-components.h

21
opal/mca/pmix/base/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,21 @@
#
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2012-2013 Los Alamos National Security, Inc. All rights reserved.
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
dist_opaldata_DATA += base/help-pmix-base.txt
headers += \
base/base.h \
base/pmix_base_fns.h
libmca_pmix_la_SOURCES += \
base/pmix_base_frame.c \
base/pmix_base_select.c \
base/pmix_base_fns.c

41
opal/mca/pmix/base/base.h Обычный файл
Просмотреть файл

@ -0,0 +1,41 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/** @file:
*/
#ifndef MCA_PMI_BASE_H
#define MCA_PMI_BASE_H
#include "opal_config.h"
#include "opal/types.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/mca_base_framework.h"
#include "opal/mca/pmix/pmix.h"
BEGIN_C_DECLS
OPAL_DECLSPEC extern mca_base_framework_t opal_pmix_base_framework;
/**
* Select a pmix module
*/
OPAL_DECLSPEC int opal_pmix_base_select(void);
OPAL_DECLSPEC extern bool opal_pmix_base_allow_delayed_server;
OPAL_DECLSPEC void opal_pmix_base_register_handler(opal_pmix_errhandler_fn_t err);
OPAL_DECLSPEC void opal_pmix_base_deregister_handler(void);
OPAL_DECLSPEC void opal_pmix_base_errhandler(int error);
END_C_DECLS
#endif

19
opal/mca/pmix/base/help-pmix-base.txt Обычный файл
Просмотреть файл

@ -0,0 +1,19 @@
-*- text -*-
#
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
#
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for OPAL Errmgr HNP module.
#
[errmgr-hnp:unknown-job-error]
An error has occurred in an unknown job. This generally should not happen
except due to an internal OPAL error.
Job state: %s
This information should probably be repopald to the OMPI developers.

508
opal/mca/pmix/base/pmix_base_fns.c Обычный файл
Просмотреть файл

@ -0,0 +1,508 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "opal_config.h"
#include "opal/constants.h"
#include <regex.h>
#include <time.h>
#include <string.h>
#include "opal_stdint.h"
#include "opal/class/opal_pointer_array.h"
#include "opal/util/argv.h"
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "opal/util/show_help.h"
#include "opal/mca/pmix/base/base.h"
#include "opal/mca/pmix/base/pmix_base_fns.h"
#define OPAL_PMI_PAD 10
static opal_pmix_errhandler_fn_t errhandler = NULL;
void opal_pmix_base_register_handler(opal_pmix_errhandler_fn_t err)
{
errhandler = err;
}
void opal_pmix_base_errhandler(int error)
{
if (NULL != errhandler) {
errhandler(error);
}
}
void opal_pmix_base_deregister_handler(void)
{
errhandler = NULL;
}
static char* setup_key(const opal_process_name_t* name, const char *key, int pmix_keylen_max);
static char *pmi_encode(const void *val, size_t vallen);
static uint8_t *pmi_decode (const char *data, size_t *retlen);
int opal_pmix_base_store_encoded(const char *key, const void *data,
opal_data_type_t type, char** buffer, int* length)
{
opal_byte_object_t *bo;
size_t data_len = 0;
size_t needed;
int pmi_packed_data_off = *length;
char* pmi_packed_data = *buffer;
switch (type) {
case OPAL_STRING:
{
char *ptr = *(char **)data;
data_len = ptr ? strlen(ptr) + 1 : 0;
data = ptr;
break;
}
case OPAL_INT:
case OPAL_UINT:
data_len = sizeof (int);
break;
case OPAL_INT16:
case OPAL_UINT16:
data_len = sizeof (int16_t);
break;
case OPAL_INT32:
case OPAL_UINT32:
data_len = sizeof (int32_t);
break;
case OPAL_INT64:
case OPAL_UINT64:
data_len = sizeof (int64_t);
break;
case OPAL_BYTE_OBJECT:
bo = (opal_byte_object_t *) data;
data = bo->bytes;
data_len = bo->size;
}
needed = 10 + data_len + strlen (key);
if (NULL == pmi_packed_data) {
pmi_packed_data = calloc (needed, 1);
} else {
/* grow the region */
pmi_packed_data = realloc (pmi_packed_data, pmi_packed_data_off + needed);
}
/* special length meaning NULL */
if (NULL == data) {
data_len = 0xffff;
}
/* serialize the opal datatype */
pmi_packed_data_off += sprintf (pmi_packed_data + pmi_packed_data_off,
"%s%c%02x%c%04x%c", key, '\0', type, '\0',
(int) data_len, '\0');
if (NULL != data) {
memmove (pmi_packed_data + pmi_packed_data_off, data, data_len);
pmi_packed_data_off += data_len;
}
*length = pmi_packed_data_off;
*buffer = pmi_packed_data;
return OPAL_SUCCESS;
}
int opal_pmix_base_commit_packed( char* buffer_to_put, int data_to_put,
int vallen, int* pack_key, kvs_put_fn fn)
{
int rc, left;
char *pmikey = NULL, *tmp;
char tmp_key[32], save;
char *encoded_data;
int pkey;
pkey = *pack_key;
if (NULL == (encoded_data = pmi_encode(buffer_to_put, data_to_put))) {
OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
return OPAL_ERR_OUT_OF_RESOURCE;
}
for (left = strlen (encoded_data), tmp = encoded_data ; left ; ) {
size_t value_size = vallen > left ? left : vallen - 1;
sprintf (tmp_key, "key%d", *pack_key);
if (NULL == (pmikey = setup_key(&OPAL_PROC_MY_NAME, tmp_key, vallen))) {
OPAL_ERROR_LOG(OPAL_ERR_BAD_PARAM);
rc = OPAL_ERR_BAD_PARAM;
break;
}
/* only write value_size bytes */
save = tmp[value_size];
tmp[value_size] = '\0';
rc = fn(pmikey, tmp);
if (OPAL_SUCCESS != rc) {
*pack_key = pkey;
return rc;
}
free(pmikey);
if (OPAL_SUCCESS != rc) {
break;
}
tmp[value_size] = save;
tmp += value_size;
left -= value_size;
pkey++;
rc = OPAL_SUCCESS;
}
if (encoded_data) {
free(encoded_data);
}
*pack_key = pkey;
return OPAL_SUCCESS;
}
int opal_pmix_base_get_packed(const opal_identifier_t* proc, char **packed_data,
size_t *len, int vallen, kvs_get_fn fn)
{
char *tmp_encoded = NULL, *pmikey, *pmi_tmp;
int remote_key, size;
size_t bytes_read;
int rc = OPAL_ERR_NOT_FOUND;
/* set default */
*packed_data = NULL;
*len = 0;
pmi_tmp = calloc (vallen, 1);
if (NULL == pmi_tmp) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* read all of the packed data from this proc */
for (remote_key = 0, bytes_read = 0 ; ; ++remote_key) {
char tmp_key[32];
sprintf (tmp_key, "key%d", remote_key);
if (NULL == (pmikey = setup_key(proc, tmp_key, vallen))) {
rc = OPAL_ERR_OUT_OF_RESOURCE;
OPAL_ERROR_LOG(rc);
return rc;
}
OPAL_OUTPUT_VERBOSE((10, opal_pmix_base_framework.framework_output,
"GETTING KEY %s", pmikey));
rc = fn(pmikey, pmi_tmp, vallen);
free (pmikey);
if (OPAL_SUCCESS != rc) {
break;
}
size = strlen (pmi_tmp);
if (NULL == tmp_encoded) {
tmp_encoded = malloc (size + 1);
} else {
tmp_encoded = realloc (tmp_encoded, bytes_read + size + 1);
}
strcpy (tmp_encoded + bytes_read, pmi_tmp);
bytes_read += size;
/* is the string terminator present? */
if ('-' == tmp_encoded[bytes_read-1]) {
break;
}
}
free (pmi_tmp);
OPAL_OUTPUT_VERBOSE((10, opal_pmix_base_framework.framework_output,
"Read data %s\n",
(NULL == tmp_encoded) ? "NULL" : tmp_encoded));
if (NULL != tmp_encoded) {
*packed_data = (char *) pmi_decode (tmp_encoded, len);
free (tmp_encoded);
if (NULL == *packed_data) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
}
return rc;
}
int opal_pmix_base_cache_keys_locally(const opal_identifier_t* id, const char* key,
opal_value_t **out_kv, char* kvs_name,
int vallen, kvs_get_fn fn)
{
char *tmp, *tmp2, *tmp3, *tmp_val;
opal_data_type_t stored_type;
size_t len, offset;
int rc, size;
opal_value_t *kv, *knew;
*out_kv = NULL;
opal_list_t values;
/* first try to fetch data from data storage */
OBJ_CONSTRUCT(&values, opal_list_t);
rc = opal_dstore.fetch(opal_dstore_internal, id, key, &values);
if (OPAL_SUCCESS == rc) {
kv = (opal_value_t*)opal_list_get_first(&values);
/* create the copy */
if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&knew, kv, OPAL_VALUE))) {
OPAL_ERROR_LOG(rc);
} else {
*out_kv = knew;
}
OPAL_LIST_DESTRUCT(&values);
return rc;
}
OPAL_LIST_DESTRUCT(&values);
OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output,
"pmix: get all keys for proc %" PRIu64 " in KVS %s",
*id, kvs_name));
rc = opal_pmix_base_get_packed(id, &tmp_val, &len, vallen, fn);
if (OPAL_SUCCESS != rc) {
return rc;
}
/* search for each key in the decoded data */
for (offset = 0 ; offset < len && '\0' != tmp_val[offset] ; ) {
/* type */
tmp = tmp_val + offset + strlen (tmp_val + offset) + 1;
/* size */
tmp2 = tmp + strlen (tmp) + 1;
/* data */
tmp3 = tmp2 + strlen (tmp2) + 1;
stored_type = (opal_data_type_t) strtol (tmp, NULL, 16);
size = strtol (tmp2, NULL, 16);
/* cache value locally so we don't have to look it up via pmi again */
kv = OBJ_NEW(opal_value_t);
kv->key = strdup(tmp_val + offset);
kv->type = stored_type;
switch (stored_type) {
case OPAL_BYTE:
kv->data.byte = *tmp3;
break;
case OPAL_STRING:
if (NULL != tmp3) {
kv->data.string = strdup(tmp3);
} else {
kv->data.string = NULL;
}
break;
case OPAL_PID:
kv->data.pid = strtoul(tmp3, NULL, 10);
break;
case OPAL_INT:
kv->data.integer = strtol(tmp3, NULL, 10);
break;
case OPAL_INT8:
kv->data.int8 = strtol(tmp3, NULL, 10);
break;
case OPAL_INT16:
kv->data.int16 = strtol(tmp3, NULL, 10);
break;
case OPAL_INT32:
kv->data.int32 = strtol(tmp3, NULL, 10);
break;
case OPAL_INT64:
kv->data.int64 = strtol(tmp3, NULL, 10);
break;
case OPAL_UINT:
kv->data.uint = strtoul(tmp3, NULL, 10);
break;
case OPAL_UINT8:
kv->data.uint8 = strtoul(tmp3, NULL, 10);
break;
case OPAL_UINT16:
kv->data.uint16 = strtoul(tmp3, NULL, 10);
break;
case OPAL_UINT32:
kv->data.uint32 = strtoul(tmp3, NULL, 10);
break;
case OPAL_UINT64:
kv->data.uint64 = strtoul(tmp3, NULL, 10);
break;
case OPAL_BYTE_OBJECT:
if (size == 0xffff) {
kv->data.bo.bytes = NULL;
kv->data.bo.size = 0;
} else {
kv->data.bo.bytes = malloc(size);
memcpy(kv->data.bo.bytes, tmp3, size);
kv->data.bo.size = size;
}
break;
default:
opal_output(0, "UNSUPPORTED TYPE %d", stored_type);
return OPAL_ERROR;
}
/* store data in local hash table */
if (OPAL_SUCCESS != (rc = opal_dstore.store(opal_dstore_internal, id, kv))) {
OPAL_ERROR_LOG(rc);
}
/* keep going and cache everything locally */
offset = (size_t) (tmp3 - tmp_val) + size;
if (0 == strcmp(kv->key, key)) {
/* create the copy */
if (OPAL_SUCCESS != (rc = opal_dss.copy((void**)&knew, kv, OPAL_VALUE))) {
OPAL_ERROR_LOG(rc);
} else {
*out_kv = knew;
}
}
}
free (tmp_val);
return rc;
}
static char* setup_key(const opal_process_name_t* name, const char *key, int pmix_keylen_max)
{
char *pmi_kvs_key;
if (pmix_keylen_max <= asprintf(&pmi_kvs_key, "%" PRIu64 "-%s",
*name, key)) {
free(pmi_kvs_key);
return NULL;
}
return pmi_kvs_key;
}
/* base64 encoding with illegal (to Cray PMI) characters removed ('=' is replaced by ' ') */
static inline unsigned char pmi_base64_encsym (unsigned char value) {
assert (value < 64);
if (value < 26) {
return 'A' + value;
} else if (value < 52) {
return 'a' + (value - 26);
} else if (value < 62) {
return '0' + (value - 52);
}
return (62 == value) ? '+' : '/';
}
static inline unsigned char pmi_base64_decsym (unsigned char value) {
if ('+' == value) {
return 62;
} else if ('/' == value) {
return 63;
} else if (' ' == value) {
return 64;
} else if (value <= '9') {
return (value - '0') + 52;
} else if (value <= 'Z') {
return (value - 'A');
} else if (value <= 'z') {
return (value - 'a') + 26;
}
return 64;
}
static inline void pmi_base64_encode_block (const unsigned char in[3], char out[4], int len) {
out[0] = pmi_base64_encsym (in[0] >> 2);
out[1] = pmi_base64_encsym (((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4));
/* Cray PMI doesn't allow = in PMI attributes so pad with spaces */
out[2] = 1 < len ? pmi_base64_encsym(((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6)) : ' ';
out[3] = 2 < len ? pmi_base64_encsym(in[2] & 0x3f) : ' ';
}
static inline int pmi_base64_decode_block (const char in[4], unsigned char out[3]) {
char in_dec[4];
in_dec[0] = pmi_base64_decsym (in[0]);
in_dec[1] = pmi_base64_decsym (in[1]);
in_dec[2] = pmi_base64_decsym (in[2]);
in_dec[3] = pmi_base64_decsym (in[3]);
out[0] = in_dec[0] << 2 | in_dec[1] >> 4;
if (64 == in_dec[2]) {
return 1;
}
out[1] = in_dec[1] << 4 | in_dec[2] >> 2;
if (64 == in_dec[3]) {
return 2;
}
out[2] = ((in_dec[2] << 6) & 0xc0) | in_dec[3];
return 3;
}
/* PMI only supports strings. For now, do a simple base64. */
static char *pmi_encode(const void *val, size_t vallen)
{
char *outdata, *tmp;
size_t i;
outdata = calloc (((2 + vallen) * 4) / 3 + 2, 1);
if (NULL == outdata) {
return NULL;
}
for (i = 0, tmp = outdata ; i < vallen ; i += 3, tmp += 4) {
pmi_base64_encode_block((unsigned char *) val + i, tmp, vallen - i);
}
/* mark the end of the pmi string */
tmp[0] = (unsigned char)'-';
tmp[1] = (unsigned char)'\0';
return outdata;
}
static uint8_t *pmi_decode (const char *data, size_t *retlen)
{
size_t input_len = (strlen (data) - 1) / 4;
unsigned char *ret;
int out_len;
size_t i;
/* default */
*retlen = 0;
ret = calloc (1, 3 * input_len + 1);
if (NULL == ret) {
return ret;
}
for (i = 0, out_len = 0 ; i < input_len ; i++, data += 4) {
out_len += pmi_base64_decode_block(data, ret + 3 * i);
}
ret[out_len] = '\0';
*retlen = out_len;
return ret;
}

34
opal/mca/pmix/base/pmix_base_fns.h Обычный файл
Просмотреть файл

@ -0,0 +1,34 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_PMIX_BASE_FNS_H
#define MCA_PMIX_BASE_FNS_H
#include "opal_config.h"
#include "opal/util/error.h"
#include "opal/dss/dss_types.h"
BEGIN_C_DECLS
typedef int (*kvs_put_fn)(const char key[], const char value[]);
typedef int (*kvs_get_fn)(const char key[], char value [], int maxvalue);
OPAL_DECLSPEC int opal_pmix_base_store_encoded(const char *key, const void *data,
opal_data_type_t type, char** buffer, int* length);
OPAL_DECLSPEC int opal_pmix_base_commit_packed( char* buffer_to_put, int data_to_put,
int vallen, int* pack_key, kvs_put_fn fn);
OPAL_DECLSPEC int opal_pmix_base_cache_keys_locally(const opal_identifier_t* id, const char* key,
opal_value_t **out_kv, char* kvs_name, int vallen, kvs_get_fn fn);
OPAL_DECLSPEC int opal_pmix_base_get_packed(const opal_identifier_t* proc, char **packed_data,
size_t *len, int vallen, kvs_get_fn fn);
END_C_DECLS
#endif

65
opal/mca/pmix/base/pmix_base_frame.c Обычный файл
Просмотреть файл

@ -0,0 +1,65 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/mca/mca.h"
#include "opal/util/output.h"
#include "opal/mca/base/base.h"
#include "opal/mca/pmix/base/base.h"
/*
* The following file was created by configure. It contains extern
* components and the definition of an array of pointers to each
* module's public mca_base_module_t struct.
*/
#include "opal/mca/pmix/base/static-components.h"
opal_pmix_base_module_t opal_pmix;
bool opal_pmix_use_collective = false;
bool opal_pmix_base_allow_delayed_server = false;
static int opal_pmix_base_frame_register(mca_base_register_flag_t flags)
{
opal_pmix_use_collective = false;
(void)mca_base_var_register("opal", "pmix", "base", "direct_modex",
"Default to direct modex (default: true)",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&opal_pmix_use_collective);
return OPAL_SUCCESS;
}
static int opal_pmix_base_frame_close(void)
{
return mca_base_framework_components_close(&opal_pmix_base_framework, NULL);
}
static int opal_pmix_base_frame_open(mca_base_open_flag_t flags)
{
/* Open up all available components */
return mca_base_framework_components_open(&opal_pmix_base_framework, flags);
}
MCA_BASE_FRAMEWORK_DECLARE(opal, pmix, "OPAL PMI Client Framework",
opal_pmix_base_frame_register,
opal_pmix_base_frame_open,
opal_pmix_base_frame_close,
mca_pmix_base_static_components, 0);
OBJ_CLASS_INSTANCE(pmix_info_t,
opal_list_item_t,
NULL, NULL);

55
opal/mca/pmix/base/pmix_base_select.c Обычный файл
Просмотреть файл

@ -0,0 +1,55 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/mca/mca.h"
#include "opal/mca/base/base.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/mca/pmix/base/base.h"
/*
* Globals
*/
int opal_pmix_base_select(void)
{
int ret, exit_status = OPAL_SUCCESS;
opal_pmix_base_component_t *best_component = NULL;
opal_pmix_base_module_t *best_module = NULL;
/*
* Select the best component
*/
if( OPAL_SUCCESS != mca_base_select("pmix", opal_pmix_base_framework.framework_output,
&opal_pmix_base_framework.framework_components,
(mca_base_module_t **) &best_module,
(mca_base_component_t **) &best_component) ) {
/* notify caller that no available component found */
return OPAL_ERR_NOT_FOUND;
}
/* Save the winner */
opal_pmix = *best_module;
/* Initialize the winner */
if (OPAL_SUCCESS != (ret = opal_pmix.init()) ) {
/* connection not available is okay - just means
* that a server hasn't already been defined */
if (OPAL_ERR_SERVER_NOT_AVAIL == ret) {
exit_status = OPAL_SUCCESS;
} else {
exit_status = ret;
}
}
return exit_status;
}

28
opal/mca/pmix/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,28 @@
dnl -*- shell-script -*-
dnl
dnl Copyright (c) 2014 Intel, Inc. All rights reserved.
dnl $COPYRIGHT$
dnl
dnl Additional copyrights may follow
dnl
dnl $HEADER$
dnl
AC_DEFUN([MCA_opal_pmix_CONFIG],[
# configure all the components first
MCA_CONFIGURE_FRAMEWORK($1, $2, 1)
# Get the CPPFLAGS for the PMI headers
AC_MSG_CHECKING([for PMI headers])
OPAL_CHECK_PMI([pmix], [opal_pmix_happy=1], [opal_pmix_happy=0])
OPAL_CHECK_CRAY_PMI([pmix], [opal_pmix_cray_happy=1], [opal_pmix_cray_happy=0])
AS_IF([test $opal_pmix_happy = 1 -o $opal_pmix_cray_happy = 1],
[AC_MSG_RESULT([yes])],
[AC_MSG_RESULT([no])])
# add the required make directives - we only care about the CPPFLAGS
AC_MSG_CHECKING([for PMIX CPPFLAGS])
AC_SUBST([pmix_CPPFLAGS])
AC_MSG_RESULT([$pmix_CPPFLAGS])
])dnl

39
opal/mca/pmix/cray/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,39 @@
#
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
pmix_cray.h \
pmix_cray_component.c \
pmix_cray_pmap_parser.c \
pmix_cray.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_opal_pmix_cray_DSO
component_noinst =
component_install = mca_pmix_cray.la
else
component_noinst = libmca_pmix_cray.la
component_install =
endif
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_pmix_cray_la_SOURCES = $(sources)
mca_pmix_cray_la_CPPFLAGS = $(pmix_cray_CPPFLAGS)
mca_pmix_cray_la_LDFLAGS = -module -avoid-version $(pmix_cray_LDFLAGS)
mca_pmix_cray_la_LIBADD = $(pmix_cray_LIBS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_pmix_cray_la_SOURCES =$(sources)
libmca_pmix_cray_la_CPPFLAGS = $(pmix_cray_CPPFLAGS)
libmca_pmix_cray_la_LDFLAGS = -module -avoid-version $(pmix_cray_LDFLAGS)
libmca_pmix_cray_la_LIBADD = $(pmix_cray_LIBS)

31
opal/mca/pmix/cray/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,31 @@
# -*- shell-script -*-
#
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_pmix_cray_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_opal_pmix_cray_CONFIG], [
AC_CONFIG_FILES([opal/mca/pmix/cray/Makefile])
#
# check specifically for Cray PMI here
#
OPAL_CHECK_CRAY_PMI([pmix_cray], [pmix_cray_good=1], [pmix_cray_good=0])
# Evaluate succeed / fail
AS_IF([test "$pmix_cray_good" = 1],
[$1],
[$2])
# set build flags to use in makefile
AC_SUBST([pmix_cray_CPPFLAGS])
AC_SUBST([pmix_cray_LDFLAGS])
AC_SUBST([pmix_cray_LIBS])
])

711
opal/mca/pmix/cray/pmix_cray.c Обычный файл
Просмотреть файл

@ -0,0 +1,711 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All
* rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/types.h"
#include "opal_stdint.h"
#include "opal/mca/hwloc/base/base.h"
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include <string.h>
#include <pmi.h>
#include <pmi2.h>
#include "opal/mca/pmix/base/base.h"
#include "pmix_cray.h"
typedef struct {
uint32_t jid;
uint32_t vid;
} pmix_pname_t;
static int cray_init(void);
static int cray_fini(void);
static bool cray_initialized(void);
static int cray_abort(int flag, const char msg[]);
static int cray_spawn(int count, const char * cmds[],
int argcs[], const char ** argvs[],
const int maxprocs[],
opal_list_t *info_keyval_vector,
opal_list_t *preput_keyval_vector,
char jobId[], int jobIdSize,
int errors[]);
static int cray_job_connect(const char jobId[]);
static int cray_job_disconnect(const char jobId[]);
static int cray_put(opal_pmix_scope_t scope, opal_value_t *kv);
static int cray_fence(opal_process_name_t *procs, size_t nprocs);
static int cray_get(const opal_identifier_t *id,
const char *key,
opal_value_t **kv);
static int cray_publish(const char service_name[],
opal_list_t *info,
const char port[]);
static int cray_lookup(const char service_name[],
opal_list_t *info,
char port[], int portLen);
static int cray_unpublish(const char service_name[],
opal_list_t *info);
static bool cray_get_attr(const char *attr, opal_value_t **kv);
static int kvs_get(const char key[], char value [], int maxvalue);
#if 0
static int cray_get_jobid(char jobId[], int jobIdSize);
static int cray_get_rank(int *rank);
static int cray_get_size(opal_pmix_scope_t scope, int *size);
static int cray_get_appnum(int *appnum);
static int cray_local_info(int vpid, int **ranks_ret,
int *procs_ret, char **error);
#endif
const opal_pmix_base_module_t opal_pmix_cray_module = {
cray_init,
cray_fini,
cray_initialized,
cray_abort,
cray_fence,
NULL,
cray_put,
cray_get,
NULL,
cray_publish,
cray_lookup,
cray_unpublish,
cray_get_attr,
NULL,
cray_spawn,
cray_job_connect,
cray_job_disconnect,
NULL,
NULL
};
// usage accounting
static int pmix_init_count = 0;
// PMI constant values:
static int pmix_kvslen_max = 0;
static int pmix_keylen_max = 0;
static int pmix_vallen_max = 0;
// Job environment description
static int pmix_size = 0;
static int pmix_rank = 0;
static int pmix_lrank = 0;
static int pmix_nrank = 0;
static int pmix_nlranks = 0;
static int pmix_appnum = 0;
static int pmix_usize = 0;
static char *pmix_kvs_name = NULL;
static int *pmix_lranks = NULL;
static pmix_pname_t pmix_pname;
static uint32_t pmix_jobid = -1;
static char* pmix_packed_data = NULL;
static int pmix_packed_data_offset = 0;
static int pmix_pack_key = 0;
static bool pmix_got_modex_data = false;
static char* pmix_error(int pmix_err);
#define OPAL_PMI_ERROR(pmi_err, pmi_func) \
do { \
opal_output(0, "%s [%s:%d:%s]: %s\n", \
pmi_func, __FILE__, __LINE__, __func__, \
pmix_error(pmi_err)); \
} while(0);
static int cray_init(void)
{
int i, spawned, size, rank, appnum, my_node;
int rc, ret = OPAL_ERROR;
char *pmapping = NULL;
char buf[PMI2_MAX_ATTRVALUE];
int found;
uint32_t jobfam;
/* if we can't startup PMI, we can't be used */
if ( PMI2_Initialized () ) {
return OPAL_SUCCESS;
}
size = -1;
rank = -1;
appnum = -1;
if (PMI_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc);
return OPAL_ERROR;
}
if( size < 0 || rank < 0 ){
opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true);
goto err_exit;
}
pmix_size = size;
pmix_rank = rank;
pmix_appnum = appnum;
pmix_vallen_max = PMI2_MAX_VALLEN;
pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
pmix_keylen_max = PMI2_MAX_KEYLEN;
rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
goto err_exit;
}
pmix_usize = atoi(buf);
pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
if( pmix_kvs_name == NULL ){
PMI2_Finalize();
ret = OPAL_ERR_OUT_OF_RESOURCE;
goto err_exit;
}
rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
goto err_exit;
}
rc = sscanf(pmix_kvs_name,"kvs_%u",&jobfam);
if (rc != 1) {
OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
rc = OPAL_ERROR;
goto err_exit;
}
pmix_jobid = jobfam << 16;
/* store our name in the opal_proc_t so that
* debug messages will make sense - an upper
* layer will eventually overwrite it, but that
* won't do any harm */
pmix_pname.jid = pmix_jobid;
pmix_pname.vid = pmix_rank;
opal_proc_set_name((opal_process_name_t*)&pmix_pname);
opal_output_verbose(10, opal_pmix_base_framework.framework_output,
"%s pmix:cray: assigned tmp name %d %d pmix_kvs_name %s",
OPAL_NAME_PRINT(*(opal_process_name_t*)&pmix_pname),pmix_pname.jid,pmix_pname.vid,pmix_kvs_name);
pmapping = (char*)malloc(PMI2_MAX_VALLEN);
if( pmapping == NULL ){
rc = OPAL_ERR_OUT_OF_RESOURCE;
OPAL_ERROR_LOG(rc);
return rc;
}
rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
if( !found || PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr");
return OPAL_ERROR;
}
pmix_lranks = pmix_cray_parse_pmap(pmapping, pmix_rank, &my_node, &pmix_nlranks);
if (NULL == pmix_lranks) {
rc = OPAL_ERR_OUT_OF_RESOURCE;
OPAL_ERROR_LOG(rc);
return rc;
}
free(pmapping);
/* find ourselves */
for (i=0; i < pmix_nlranks; i++) {
if (pmix_rank == pmix_lranks[i]) {
pmix_lrank = i;
pmix_nrank = my_node;
break;
}
}
return OPAL_SUCCESS;
err_exit:
PMI2_Finalize();
return ret;
}
static int cray_fini(void) {
if (0 == pmix_init_count) {
return OPAL_SUCCESS;
}
if (0 == --pmix_init_count) {
PMI2_Finalize();
}
if (NULL != pmix_kvs_name) {
free(pmix_kvs_name);
pmix_kvs_name = NULL;
}
if (NULL != pmix_lranks) {
free(pmix_lranks);
}
return OPAL_SUCCESS;
}
static bool cray_initialized(void)
{
if (0 < pmix_init_count) {
return true;
}
return false;
}
static int cray_abort(int status, const char *msg)
{
return PMI2_Abort(status, msg);
}
static int cray_spawn(int count, const char * cmds[],
int argcs[], const char ** argvs[],
const int maxprocs[],
opal_list_t *info_keyval_vector,
opal_list_t *preput_keyval_vector,
char jobId[], int jobIdSize,
int errors[])
{
return OPAL_ERR_NOT_IMPLEMENTED;
}
#if 0
static int cray_get_jobid(char jobId[], int jobIdSize)
{
return PMI2_Job_GetId(jobId,jobIdSize);
}
static int cray_get_rank(int *rank)
{
*rank = pmix_rank;
return OPAL_SUCCESS;
}
static int cray_get_size(opal_pmix_scope_t scope, int *size)
{
*size = pmix_size;
return OPAL_SUCCESS;
}
static int cray_get_appnum(int *appnum)
{
*appnum = pmix_appnum;
return OPAL_SUCCESS;
}
#endif
static int cray_job_connect(const char jobId[])
{
return OPAL_ERR_NOT_IMPLEMENTED;
}
static int cray_job_disconnect(const char jobId[])
{
return OPAL_ERR_NOT_IMPLEMENTED;
}
static int kvs_put(const char key[], const char value[])
{
int rc;
opal_output_verbose(10, opal_pmix_base_framework.framework_output,
"%s pmix:cray kvs_put key %s value %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), key, value);
rc = PMI2_KVS_Put(key, value);
if( PMI_SUCCESS != rc ){
OPAL_PMI_ERROR(rc, "PMI2_KVS_Put");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
static int cray_put(opal_pmix_scope_t scope,
opal_value_t *kv)
{
int rc;
char* buffer_to_put;
int rem_offset = 0;
int data_to_put = 0;
opal_output_verbose(10, opal_pmix_base_framework.framework_output,
"%s pmix:cray cray_put my name is %ld\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), opal_proc_local_get()->proc_name);
if (OPAL_SUCCESS != (rc = opal_pmix_base_store_encoded (kv->key, (void*)&kv->data, kv->type,
&pmix_packed_data, &pmix_packed_data_offset))) {
OPAL_ERROR_LOG(rc);
return rc;
}
if (pmix_packed_data_offset == 0) {
/* nothing to write */
return OPAL_SUCCESS;
}
if (pmix_packed_data_offset < pmix_vallen_max) {
/* this meta-key is still being filled,
* nothing to put yet
*/
return OPAL_SUCCESS;
}
/* encode only full filled meta keys */
rem_offset = pmix_packed_data_offset % pmix_vallen_max;
data_to_put = pmix_packed_data_offset - rem_offset;
buffer_to_put = (char*)malloc(data_to_put);
memcpy(buffer_to_put, pmix_packed_data, data_to_put);
opal_pmix_base_commit_packed (buffer_to_put, data_to_put, pmix_vallen_max, &pmix_pack_key, kvs_put);
free(buffer_to_put);
pmix_packed_data_offset = rem_offset;
if (0 == pmix_packed_data_offset) {
free(pmix_packed_data);
pmix_packed_data = NULL;
} else {
memmove (pmix_packed_data, pmix_packed_data + data_to_put, pmix_packed_data_offset);
pmix_packed_data = realloc (pmix_packed_data, pmix_packed_data_offset);
}
return rc;
}
static int cray_fence(opal_process_name_t *procs, size_t nprocs)
{
int rc;
int32_t i;
opal_value_t *kp, kvn;
opal_hwloc_locality_t locality;
opal_output_verbose(10, opal_pmix_base_framework.framework_output,
"%s pmix:cray called fence",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* check if there is partially filled meta key and put them */
if (0 != pmix_packed_data_offset && NULL != pmix_packed_data) {
opal_pmix_base_commit_packed(pmix_packed_data, pmix_packed_data_offset, pmix_vallen_max, &pmix_pack_key, kvs_put);
pmix_packed_data_offset = 0;
free(pmix_packed_data);
pmix_packed_data = NULL;
}
if (PMI_SUCCESS != (rc = PMI2_KVS_Fence())) {
OPAL_PMI_ERROR(rc, "PMI2_KVS_Fence");
return OPAL_ERROR;
}
opal_output_verbose(10, opal_pmix_base_framework.framework_output,
"%s pmix:cray kvs_fence complete",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* get the modex data from each local process and set the
* localities to avoid having the MPI layer fetch data
* for every process in the job */
if (!pmix_got_modex_data) {
pmix_got_modex_data = true;
/* we only need to set locality for each local rank as "not found"
* equates to "non-local" */
for (i=0; i < pmix_nlranks; i++) {
pmix_pname.vid = i;
rc = opal_pmix_base_cache_keys_locally((opal_identifier_t*)&pmix_pname, OPAL_DSTORE_CPUSET,
&kp, pmix_kvs_name, pmix_vallen_max, kvs_get);
if (OPAL_SUCCESS != rc) {
OPAL_ERROR_LOG(rc);
return rc;
}
#if OPAL_HAVE_HWLOC
if (NULL == kp || NULL == kp->data.string) {
/* if we share a node, but we don't know anything more, then
* mark us as on the node as this is all we know
*/
locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
} else {
/* determine relative location on our node */
locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
opal_process_info.cpuset,
kp->data.string);
}
if (NULL != kp) {
OBJ_RELEASE(kp);
}
#else
/* all we know is we share a node */
locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
#endif
OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output,
"%s pmix:s2 proc %s locality %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(*(opal_identifier_t*)&pmix_pname),
opal_hwloc_base_print_locality(locality)));
OBJ_CONSTRUCT(&kvn, opal_value_t);
kvn.key = strdup(OPAL_DSTORE_LOCALITY);
kvn.type = OPAL_UINT16;
kvn.data.uint16 = locality;
(void)opal_dstore.store(opal_dstore_internal, (opal_identifier_t*)&pmix_pname, &kvn);
OBJ_DESTRUCT(&kvn);
}
}
return OPAL_SUCCESS;
}
#if 0
static int cray_fence(opal_process_name_t *procs, size_t nprocs)
{
int rc;
/* check if there is partially filled meta key and put them */
if (0 != pmix_packed_data_offset && NULL != pmix_packed_data) {
opal_pmix_base_commit_packed(pmix_packed_data, pmix_packed_data_offset, pmix_vallen_max, &pmix_pack_key, kvs_put);
pmix_packed_data_offset = 0;
free(pmix_packed_data);
pmix_packed_data = NULL;
}
if (PMI_SUCCESS != (rc = PMI2_KVS_Fence())) {
OPAL_PMI_ERROR(rc, "PMI2_KVS_Fence");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
#endif
static int kvs_get(const char key[], char value [], int maxvalue)
{
int rc;
int len;
rc = PMI2_KVS_Get(pmix_kvs_name, PMI2_ID_NULL, key, value, maxvalue, &len);
if( PMI_SUCCESS != rc ){
OPAL_PMI_ERROR(rc, "PMI2_KVS_Get");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
static int cray_get(const opal_identifier_t *id, const char *key, opal_value_t **kv)
{
int rc;
rc = opal_pmix_base_cache_keys_locally(id, key, kv, pmix_kvs_name, pmix_vallen_max, kvs_get);
if (NULL == *kv) {
return OPAL_ERROR;
}
return rc;
}
static int cray_publish(const char service_name[],
opal_list_t *info,
const char port[])
{
int rc;
if (PMI_SUCCESS != (rc = PMI2_Nameserv_publish(service_name, NULL, port))) {
OPAL_PMI_ERROR(rc, "PMI2_Nameserv_publish");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
static int cray_lookup(const char service_name[],
opal_list_t *info,
char port[], int portLen)
{
int rc;
if (PMI_SUCCESS != (rc = PMI2_Nameserv_lookup(service_name, NULL, port, portLen))) {
OPAL_PMI_ERROR(rc, "PMI2_Nameserv_lookup");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
static int cray_unpublish(const char service_name[],
opal_list_t *info)
{
int rc;
if (PMI_SUCCESS != (rc = PMI2_Nameserv_unpublish(service_name, NULL))) {
OPAL_PMI_ERROR(rc, "PMI2_Nameserv_unpublish");
return OPAL_ERROR;
}
return OPAL_SUCCESS;;
}
static bool cray_get_attr(const char *attr, opal_value_t **kv)
{
int rc, i;
opal_value_t *kp;
if (0 == strcmp(PMIX_JOBID, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = pmix_jobid;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_RANK, attr)) {
rc = PMI_Get_rank(&i);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_Get_rank");
return false;
}
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = i;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_UNIV_SIZE, attr)) {
rc = PMI_Get_universe_size(&i);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
return false;
}
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = i;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_JOB_SIZE, attr)) {
rc = PMI_Get_size(&i);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_Get_size");
return false;
}
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = i;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_APPNUM, attr)) {
rc = PMI_Get_appnum(&i);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_Get_appnum");
return false;
}
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = i;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_LOCAL_RANK, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = pmix_lrank;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_NODE_RANK, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = pmix_nrank;
*kv = kp;
return true;
}
return OPAL_SUCCESS;
}
#if 0
static int cray_local_info(int vpid, int **ranks_ret,
int *procs_ret, char **error)
{
int *ranks;
int procs = -1;
int rc;
char *pmapping = (char*)malloc(PMI2_MAX_VALLEN);
if( pmapping == NULL ){
*error = "mca_common_pmix_local_info: could not get memory for PMIv2 process mapping";
return OPAL_ERR_OUT_OF_RESOURCE;
}
int found;
int my_node;
rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
if( !found || PMI_SUCCESS != rc ) {
/* can't check PMI_SUCCESS as some folks (i.e., Cray) don't define it */
OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr");
*error = "mca_common_pmix_local_info: could not get PMI_process_mapping";
return OPAL_ERROR;
}
ranks = pmix_cray_parse_pmap(pmapping, vpid, &my_node, &procs);
if (NULL == ranks) {
*error = "mca_common_pmix_local_info: could not get memory for PMIv2 local ranks";
return OPAL_ERR_OUT_OF_RESOURCE;
}
free(pmapping);
*ranks_ret = ranks;
*procs_ret = procs;
return OPAL_SUCCESS;
}
#endif
static char* pmix_error(int pmix_err)
{
char * err_msg;
switch(pmix_err) {
case PMI_FAIL: err_msg = "Operation failed"; break;
case PMI_ERR_INIT: err_msg = "PMI is not initialized"; break;
case PMI_ERR_NOMEM: err_msg = "Input buffer not large enough"; break;
case PMI_ERR_INVALID_ARG: err_msg = "Invalid argument"; break;
case PMI_ERR_INVALID_KEY: err_msg = "Invalid key argument"; break;
case PMI_ERR_INVALID_KEY_LENGTH: err_msg = "Invalid key length argument"; break;
case PMI_ERR_INVALID_VAL: err_msg = "Invalid value argument"; break;
case PMI_ERR_INVALID_VAL_LENGTH: err_msg = "Invalid value length argument"; break;
case PMI_ERR_INVALID_LENGTH: err_msg = "Invalid length argument"; break;
case PMI_ERR_INVALID_NUM_ARGS: err_msg = "Invalid number of arguments"; break;
case PMI_ERR_INVALID_ARGS: err_msg = "Invalid args argument"; break;
case PMI_ERR_INVALID_NUM_PARSED: err_msg = "Invalid num_parsed length argument"; break;
case PMI_ERR_INVALID_KEYVALP: err_msg = "Invalid keyvalp argument"; break;
case PMI_ERR_INVALID_SIZE: err_msg = "Invalid size argument"; break;
#if defined(PMI_ERR_INVALID_KVS)
/* pmi.h calls this a valid return code but mpich doesn't define it (slurm does). */
case PMI_ERR_INVALID_KVS: err_msg = "Invalid kvs argument"; break;
#endif
case PMI_SUCCESS: err_msg = "Success"; break;
default: err_msg = "Unkown error";
}
return err_msg;
}

32
opal/mca/pmix/cray/pmix_cray.h Обычный файл
Просмотреть файл

@ -0,0 +1,32 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_PMIX_CRAY_H
#define MCA_PMIX_CRAY_H
#include "opal_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/mca/pmix/base/pmix_base_fns.h"
#include "pmix_cray_pmap_parser.h"
BEGIN_C_DECLS
/*
* Globally exported variable
*/
OPAL_DECLSPEC extern const opal_pmix_base_component_t mca_pmix_cray_component;
OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_cray_module;
END_C_DECLS
#endif /* MCA_PMIX_CRAY_H */

99
opal/mca/pmix/cray/pmix_cray_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,99 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/mca/pmix/pmix.h"
#include "pmix_cray.h"
#include <sys/syscall.h>
/*
* Public string showing the pmix cray component version number
*/
const char *opal_pmix_cray_component_version_string =
"OPAL cray pmix MCA component version " OPAL_VERSION;
/*
* Local function
*/
static int pmix_cray_component_query(mca_base_module_t **module, int *priority);
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
const opal_pmix_base_component_t mca_pmix_cray_component = {
/* First, the mca_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a pmix v1.1.0 component (which also
implies a specific MCA version) */
OPAL_PMIX_BASE_VERSION_2_0_0,
/* Component name and version */
"cray",
OPAL_MAJOR_VERSION,
OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION,
/* Component open and close functions */
NULL,
NULL,
pmix_cray_component_query,
NULL
},
/* Next the MCA v1.0.0 component meta data */
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
};
static int pmix_cray_component_query(mca_base_module_t **module, int *priority)
{
int rc;
const char proc_job_file[]="/proc/job";
FILE *fd = NULL;
char string[128]; /* just need to get a little output */
char *tmp = NULL;
/* disqualify ourselves if not running in a Cray PAGG container */
fd = fopen(proc_job_file, "r");
if (fd == NULL) {
*priority = 0;
*module = NULL;
rc = OPAL_ERROR;
} else {
tmp = fgets(string, sizeof(string), fd);
if (tmp) { /* okay we're in a PAGG container, got non-null output from job device */
*priority = 90;
*module = (mca_base_module_t *)&opal_pmix_cray_module;
rc = OPAL_SUCCESS;
}
fclose(fd);
}
return rc;
}

243
opal/mca/pmix/cray/pmix_cray_pmap_parser.c Обычный файл
Просмотреть файл

@ -0,0 +1,243 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
*
* Copyright (c) 2013 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "pmix_cray_pmap_parser.h"
#ifdef STANDALONE_TEST
#define WANT_PMI2_SUPPORT 1
#endif
/**
pmi2 process mapping is returned as a
comma separated list of tuples:
ex: (vector,(0,4,4),(0,4,1))
slurm cyclic distro of 4 ranks over 2 nodes:
(vector,(0,2,1),(0,2,1))
slurm block distro of 4 ranks over 2 nodes:
(vector,(0,2,2))
Format of each tuple is (base, H, L), where
H is number of nodes spawned by tuple,
L is number of ranks per node,
base is offset from node 0.
Tuple can be visualized as a rectangle on two
dimensional (Hosts, Local Ranks) plane:
------------------------------------ Hosts ->
| H
| +--------+
|<- base -->| |
| | | L
| +--------+
Local Ranks
V
Note that ranks increase by column. Tuple (0,2,3) looks like:
0 3
1 4
2 5
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static int find_my_node(char *map, int me)
{
int abs_rank;
int base, H, L;
char *p;
p = map;
abs_rank = 0;
while (NULL != (p = strstr(p+1, ",("))) {
if (3 != sscanf(p, ",(%d,%d,%d)", &base, &H, &L)) {
return -1;
}
if (me >= abs_rank && me < abs_rank + H*L) {
/* found my rectangle, compute node */
return base + (me - abs_rank)/L;
}
abs_rank += H*L;
}
return -1;
}
static int *find_lrs(char *map, int my_node, int *nlrs)
{
int abs_rank;
int base, H, L;
char *p;
int *lrs;
int max_lr;
int i;
p = map;
abs_rank = 0;
*nlrs = 0;
max_lr = 16;
lrs = malloc(max_lr * sizeof(int));
while (NULL != (p = strstr(p+1, ",("))) {
if (3 != sscanf(p, ",(%d,%d,%d)", &base, &H, &L)) {
free(lrs);
return NULL;
}
if (base <= my_node && my_node < base + H) {
if (*nlrs + L >= max_lr) {
lrs = realloc(lrs, (max_lr + L) * sizeof(int));
if (NULL == lrs) {
*nlrs = 0;
free(lrs);
return NULL;
}
max_lr += L;
}
/* skip (my_node - base) columns of L elems,
* numbers in my column are local to me
*/
for (i = 0; i < L; i++) {
lrs[*nlrs] = (my_node - base) * L + i + abs_rank;
(*nlrs) ++;
}
}
abs_rank += H*L;
}
if (0 == *nlrs) {
free(lrs);
lrs = 0;
}
return lrs;
}
/**
* @param pmap process map as returned by PMI_process_mapping
* attribute
* @param my_rank
* @param node set to my node id
* @param nlrs set to the number of local ranks returned
*
* @return array that contains ranks local to my_rank or NULL
* on failure. Array must be freed by the caller.
*/
int *pmix_cray_parse_pmap(char *pmap, int my_rank,
int *node, int *nlrs)
{
char *p;
p = strstr(pmap, "(vector");
if (NULL == p) {
return NULL;
}
*node = find_my_node(p, my_rank);
if (0 > *node) {
return NULL;
}
return find_lrs(p, *node, nlrs);
}
#ifdef STANDALONE_TEST
#include <assert.h>
static void dump_lrs(int *lrs, int me, int node, int n)
{
int i;
printf("Total %d ranks/node, node %d me %d\n", n, node, me);
for (i = 0; i < n; i++) {
printf("%d ", lrs[i]);
}
printf("\n");
free(lrs);
}
int main(int argc, char **argv)
{
int me, n, node;
int *lrs;
char *pmap;
int a1[] = {0, 1};
int a2[] = {2, 3};
int a3[] = {0, 2};
int a4[] = {1, 3};
int a5[] = {0,1,3,2,16,17};
int a6[] = {8,9,10,11,19};
if (argc == 3) {
me = atoi(argv[1]);
lrs = orte_grpcomm_pmi2_parse_pmap(argv[2], me, &node, &n);
if (NULL == lrs) {
printf("can not parse pmap\n");
exit(1);
}
dump_lrs(lrs, me, node, n);
exit(0);
}
/* built in cases */
pmap = "(vector,(0,2,2))";
me = 1;
lrs = orte_grpcomm_pmi2_parse_pmap(pmap, me, &node, &n);
assert(lrs);
assert(n == 2);
assert(memcmp(lrs, a1, 2) == 0);
free(lrs);
pmap = "(vector,(0,2,2))";
me = 2;
lrs = orte_grpcomm_pmi2_parse_pmap(pmap, me, &node, &n);
assert(lrs);
assert(n == 2);
assert(memcmp(lrs, a2, 2) == 0);
free(lrs);
/* cyclic distro which skips node 0 */
pmap = "(vector,(1,2,1),(1,2,1))";
me = 0;
lrs = orte_grpcomm_pmi2_parse_pmap(pmap, me, &node, &n);
assert(lrs);
assert(n == 2);
assert(memcmp(lrs, a3, n) == 0);
free(lrs);
pmap = "(vector,(1,2,1),(1,2,1))";
me = 3;
lrs = orte_grpcomm_pmi2_parse_pmap(pmap, me, &node, &n);
assert(lrs);
assert(n == 2);
assert(memcmp(lrs, a4, n) == 0);
free(lrs);
pmap = "(vector,(0,4,4),(0,1,2),(1,3,1))";
me = 3;
lrs = orte_grpcomm_pmi2_parse_pmap(pmap, me, &node, &n);
assert(lrs);
assert(n == 6);
assert(memcmp(lrs, a5, n) == 0);
free(lrs);
pmap = "(vector,(0,4,4),(0,1,2),(1,3,1))";
me = 10;
lrs = orte_grpcomm_pmi2_parse_pmap(pmap, me, &node, &n);
assert(lrs);
assert(n == 5);
assert(memcmp(lrs, a6, n) == 0);
free(lrs);
return 0;
}
#endif

18
opal/mca/pmix/cray/pmix_cray_pmap_parser.h Обычный файл
Просмотреть файл

@ -0,0 +1,18 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
*
* Copyright (c) 2013 Mellanox Technologies, Inc.
* All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
* Additional copyrights may follow
*
* $HEADER$
*
*/
#ifndef PMIX_CRAY_PMAP_PARSER_H
#define PMIX_CRAY_PMAP_PARSER_H
int *pmix_cray_parse_pmap(char *pmap, int my_rank,
int *node, int *nlrs);
#endif

36
opal/mca/pmix/native/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,36 @@
#
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
pmix_native.h \
pmix_native_component.c \
pmix_native.c \
usock.c \
usock_sendrecv.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_opal_pmix_native_DSO
component_noinst =
component_install = mca_pmix_native.la
else
component_noinst = libmca_pmix_native.la
component_install =
endif
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_pmix_native_la_SOURCES = $(sources)
mca_pmix_native_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_pmix_native_la_SOURCES =$(sources)
libmca_pmix_native_la_LDFLAGS = -module -avoid-version

42
opal/mca/pmix/native/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,42 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2013 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_pmix_native_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_opal_pmix_native_CONFIG],[
AC_CONFIG_FILES([opal/mca/pmix/native/Makefile])
# check for sockaddr_un (a good sign we have Unix domain sockets)
AC_CHECK_TYPES([struct sockaddr_un],
[pmix_native_happy="yes"],
[pmix_native_happy="no"],
[AC_INCLUDES_DEFAULT
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef HAVE_SYS_UN_H
#include <sys/un.h>
#endif])
AS_IF([test "$pmix_native_happy" = "yes"], [$1], [$2])
])dnl

1137
opal/mca/pmix/native/pmix_native.c Обычный файл

Разница между файлами не показана из-за своего большого размера Загрузить разницу

223
opal/mca/pmix/native/pmix_native.h Обычный файл
Просмотреть файл

@ -0,0 +1,223 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_PMIX_NATIVE_H
#define MCA_PMIX_NATIVE_H
#include "opal_config.h"
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef HAVE_SYS_UN_H
#include <sys/un.h>
#endif
#include "opal/mca/mca.h"
#include "opal/mca/event/event.h"
#include "opal/util/proc.h"
#include "opal/mca/pmix/base/base.h"
BEGIN_C_DECLS
/**
* the state of the connection to the server
*/
typedef enum {
PMIX_USOCK_UNCONNECTED,
PMIX_USOCK_CLOSED,
PMIX_USOCK_RESOLVE,
PMIX_USOCK_CONNECTING,
PMIX_USOCK_CONNECT_ACK,
PMIX_USOCK_CONNECTED,
PMIX_USOCK_FAILED,
PMIX_USOCK_ACCEPTING
} pmix_usock_state_t;
/* define a command type for communicating to the
* pmix server */
typedef uint8_t pmix_cmd_t;
#define PMIX_CMD_T OPAL_UINT8
/* define some commands */
#define PMIX_ABORT_CMD 1
#define PMIX_FENCE_CMD 2
#define PMIX_FENCENB_CMD 3
#define PMIX_PUT_CMD 4
#define PMIX_GET_CMD 5
#define PMIX_GETNB_CMD 6
#define PMIX_FINALIZE_CMD 7
#define PMIX_GETATTR_CMD 8
/* define some message types */
#define PMIX_USOCK_IDENT 1
#define PMIX_USOCK_USER 2
/* internally used cbfunc */
typedef void (*pmix_usock_cbfunc_t)(opal_buffer_t *buf, void *cbdata);
/* header for messages */
typedef struct {
opal_identifier_t id;
uint8_t type;
uint32_t tag;
size_t nbytes;
} pmix_usock_hdr_t;
/* usock structure for sending a message */
typedef struct {
opal_list_item_t super;
opal_event_t ev;
pmix_usock_hdr_t hdr;
char *data;
bool hdr_sent;
char *sdptr;
size_t sdbytes;
} pmix_usock_send_t;
OBJ_CLASS_DECLARATION(pmix_usock_send_t);
/* usock structure for recving a message */
typedef struct {
opal_list_item_t super;
opal_event_t ev;
pmix_usock_hdr_t hdr;
char *data;
bool hdr_recvd;
char *rdptr;
size_t rdbytes;
} pmix_usock_recv_t;
OBJ_CLASS_DECLARATION(pmix_usock_recv_t);
/* usock struct for posting send/recv request */
typedef struct {
opal_object_t super;
opal_event_t ev;
opal_buffer_t *bfr;
pmix_usock_cbfunc_t cbfunc;
void *cbdata;
} pmix_usock_sr_t;
OBJ_CLASS_DECLARATION(pmix_usock_sr_t);
/* usock structure for tracking posted recvs */
typedef struct {
opal_list_item_t super;
opal_event_t ev;
uint32_t tag;
pmix_usock_cbfunc_t cbfunc;
void *cbdata;
} pmix_usock_posted_recv_t;
OBJ_CLASS_DECLARATION(pmix_usock_posted_recv_t);
/* usock struct for tracking ops */
typedef struct {
opal_object_t super;
opal_event_t ev;
volatile bool active;
opal_buffer_t data;
opal_pmix_cbfunc_t cbfunc;
void *cbdata;
} pmix_cb_t;
OBJ_CLASS_DECLARATION(pmix_cb_t);
typedef struct {
opal_pmix_base_component_t super;
opal_buffer_t *cache_local;
opal_buffer_t *cache_remote;
opal_buffer_t *cache_global;
opal_event_base_t *evbase;
opal_identifier_t id;
opal_identifier_t server;
char *uri;
struct sockaddr_un address;
int sd;
int max_retries;
int retries; // number of times we have tried to connect to this address
pmix_usock_state_t state;
opal_event_t op_event; // used for connecting and operations other than read/write
uint32_t tag; // current tag
opal_event_t send_event; // registration with event thread for send events
bool send_ev_active;
opal_event_t recv_event; // registration with event thread for recv events
bool recv_ev_active;
opal_event_t timer_event; // timer for retrying connection failures
bool timer_ev_active;
opal_list_t send_queue; // list of pmix_usock_sent_t to be sent
pmix_usock_send_t *send_msg; // current send in progress
pmix_usock_recv_t *recv_msg; // current recv in progress
opal_list_t posted_recvs; // list of pmix_usock_posted_recv_t
} opal_pmix_native_component_t;
OPAL_DECLSPEC extern opal_pmix_native_component_t mca_pmix_native_component;
OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_native_module;
/* module-level shared functions */
OPAL_MODULE_DECLSPEC void pmix_usock_process_msg(int fd, short flags, void *cbdata);
OPAL_MODULE_DECLSPEC void pmix_usock_send_recv(int fd, short args, void *cbdata);
OPAL_MODULE_DECLSPEC void pmix_usock_send_handler(int sd, short flags, void *cbdata);
OPAL_MODULE_DECLSPEC void pmix_usock_recv_handler(int sd, short flags, void *cbdata);
OPAL_MODULE_DECLSPEC char* pmix_usock_state_print(pmix_usock_state_t state);
OPAL_MODULE_DECLSPEC void pmix_usock_dump(const char* msg);
OPAL_MODULE_DECLSPEC int usock_send_connect_ack(void);
/* internal convenience macros */
#define PMIX_ACTIVATE_SEND_RECV(b, cb, d) \
do { \
pmix_usock_sr_t *ms; \
opal_output_verbose(5, opal_pmix_base_framework.framework_output, \
"%s [%s:%d] post send to server", \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__); \
ms = OBJ_NEW(pmix_usock_sr_t); \
ms->bfr = (b); \
ms->cbfunc = (cb); \
ms->cbdata = (d); \
opal_event_set(mca_pmix_native_component.evbase, &((ms)->ev), -1, \
OPAL_EV_WRITE, pmix_usock_send_recv, (ms)); \
opal_event_set_priority(&((ms)->ev), OPAL_EV_MSG_LO_PRI); \
opal_event_active(&((ms)->ev), OPAL_EV_WRITE, 1); \
} while(0);
#define PMIX_ACTIVATE_POST_MSG(ms) \
do { \
opal_output_verbose(5, opal_pmix_base_framework.framework_output, \
"%s [%s:%d] post msg", \
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
__FILE__, __LINE__); \
opal_event_set(mca_pmix_native_component.evbase, &ms->ev, -1, \
OPAL_EV_WRITE, \
pmix_usock_process_msg, ms); \
opal_event_set_priority(&ms->ev, OPAL_EV_MSG_LO_PRI); \
opal_event_active(&ms->ev, OPAL_EV_WRITE, 1); \
} while(0);
#define CLOSE_THE_SOCKET(socket) \
do { \
shutdown(socket, 2); \
close(socket); \
/* notify the error handler */ \
opal_pmix_base_errhandler(OPAL_ERR_COMM_FAILURE); \
} while(0)
#define PMIX_WAIT_FOR_COMPLETION(a) \
do { \
while ((a)) { \
usleep(10); \
} \
} while (0);
END_C_DECLS
#endif /* MCA_PMIX_NATIVE_H */

132
opal/mca/pmix/native/pmix_native_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,132 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/util/proc.h"
#include "opal/mca/pmix/pmix.h"
#include "pmix_native.h"
/*
* Public string showing the pmix native component version number
*/
const char *opal_pmix_native_component_version_string =
"OPAL native pmix MCA component version " OPAL_VERSION;
/*
* Local function
*/
static int pmix_native_open(void);
static int pmix_native_close(void);
static int pmix_native_component_query(mca_base_module_t **module, int *priority);
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
opal_pmix_native_component_t mca_pmix_native_component = {
{
/* First, the mca_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a pmix v1.1.0 component (which also
implies a specific MCA version) */
OPAL_PMIX_BASE_VERSION_2_0_0,
/* Component name and version */
"native",
OPAL_MAJOR_VERSION,
OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION,
/* Component open and close functions */
pmix_native_open,
pmix_native_close,
pmix_native_component_query,
NULL
},
/* Next the MCA v1.0.0 component meta data */
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
}
};
static int pmix_native_open(void)
{
/* construct the component fields */
mca_pmix_native_component.uri = NULL;
mca_pmix_native_component.id = 0;
mca_pmix_native_component.cache_local = NULL;
mca_pmix_native_component.cache_remote = NULL;
mca_pmix_native_component.cache_global = NULL;
mca_pmix_native_component.sd = -1;
mca_pmix_native_component.state = PMIX_USOCK_UNCONNECTED;
mca_pmix_native_component.tag = 0;
OBJ_CONSTRUCT(&mca_pmix_native_component.send_queue, opal_list_t);
OBJ_CONSTRUCT(&mca_pmix_native_component.posted_recvs, opal_list_t);
mca_pmix_native_component.send_msg = NULL;
mca_pmix_native_component.recv_msg = NULL;
mca_pmix_native_component.send_ev_active = false;
mca_pmix_native_component.recv_ev_active = false;
mca_pmix_native_component.timer_ev_active = false;
return OPAL_SUCCESS;
}
static int pmix_native_close(void)
{
if (NULL != mca_pmix_native_component.uri) {
free(mca_pmix_native_component.uri);
}
OPAL_LIST_DESTRUCT(&mca_pmix_native_component.send_queue);
OPAL_LIST_DESTRUCT(&mca_pmix_native_component.posted_recvs);
return OPAL_SUCCESS;
}
static int pmix_native_component_query(mca_base_module_t **module, int *priority)
{
char *t, *id;
/* see if a PMIx server is present */
if (NULL == (t = getenv("PMIX_SERVER_URI")) ||
NULL == (id = getenv("PMIX_ID"))) {
/* we still have to be considered because this might
* be a singleton, and even a singleton requires some
* degree of support. So set us at a very low priority
* so the other components can be selected it they
* are in a better position to run */
*priority = 1;
mca_pmix_native_component.uri = NULL;
} else {
/* if PMIx is present, then we need to use it */
mca_pmix_native_component.uri = strdup(t);
mca_pmix_native_component.id = strtoul(id, NULL, 10);
opal_proc_set_name(&mca_pmix_native_component.id);
*priority = 100;
}
*module = (mca_base_module_t *)&opal_pmix_native_module;
return OPAL_SUCCESS;
}

482
opal/mca/pmix/native/usock.c Обычный файл
Просмотреть файл

@ -0,0 +1,482 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
*/
#include "opal_config.h"
#include "opal/types.h"
#include <fcntl.h>
#ifdef HAVE_SYS_UIO_H
#include <sys/uio.h>
#endif
#ifdef HAVE_NET_UIO_H
#include <net/uio.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#include "opal_stdint.h"
#include "opal/opal_socket_errno.h"
#include "opal/dss/dss.h"
#include "opal/mca/dstore/dstore.h"
#include "opal/mca/sec/sec.h"
#include "opal/runtime/opal.h"
#include "opal/util/show_help.h"
#include "opal/util/error.h"
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "opal/mca/pmix/base/base.h"
#include "pmix_native.h"
static int usock_send_blocking(char *ptr, size_t size);
static void pmix_usock_try_connect(int fd, short args, void *cbdata);
static int usock_create_socket(void);
/* State machine for internal operations */
typedef struct {
opal_object_t super;
opal_event_t ev;
} pmix_usock_op_t;
static OBJ_CLASS_INSTANCE(pmix_usock_op_t,
opal_object_t,
NULL, NULL);
#define PMIX_ACTIVATE_USOCK_STATE(cbfunc) \
do { \
pmix_usock_op_t *op; \
op = OBJ_NEW(pmix_usock_op_t); \
opal_event_set(mca_pmix_native_component.evbase, &op->ev, -1, \
OPAL_EV_WRITE, (cbfunc), op); \
opal_event_set_priority(&op->ev, OPAL_EV_MSG_LO_PRI); \
opal_event_active(&op->ev, OPAL_EV_WRITE, 1); \
} while(0);
void pmix_usock_send_recv(int fd, short args, void *cbdata)
{
pmix_usock_sr_t *ms = (pmix_usock_sr_t*)cbdata;
pmix_usock_posted_recv_t *req;
pmix_usock_send_t *snd;
uint32_t tag = UINT32_MAX;
if (NULL != ms->cbfunc) {
/* if a callback msg is expected, setup a recv for it */
req = OBJ_NEW(pmix_usock_posted_recv_t);
/* take the next tag in the sequence */
if (UINT32_MAX == mca_pmix_native_component.tag) {
mca_pmix_native_component.tag = 0;
}
req->tag = mca_pmix_native_component.tag++;
tag = req->tag;
req->cbfunc = ms->cbfunc;
req->cbdata = ms->cbdata;
opal_output_verbose(5, opal_pmix_base_framework.framework_output,
"%s posting recv on tag %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), req->tag);
/* add it to the list of recvs - we cannot have unexpected messages
* in this subsystem as the server never sends us something that
* we didn't previously request */
opal_list_append(&mca_pmix_native_component.posted_recvs, &req->super);
}
snd = OBJ_NEW(pmix_usock_send_t);
snd->hdr.id = mca_pmix_native_component.id;
snd->hdr.type = PMIX_USOCK_USER;
snd->hdr.tag = tag;
snd->hdr.nbytes = ms->bfr->bytes_used;
snd->data = ms->bfr->base_ptr;
/* always start with the header */
snd->sdptr = (char*)&snd->hdr;
snd->sdbytes = sizeof(pmix_usock_hdr_t);
/* add the msg to the send queue if we are already connected*/
if (PMIX_USOCK_CONNECTED == mca_pmix_native_component.state) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock:send_nb: already connected to server - queueing for send",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* if there is no message on-deck, put this one there */
if (NULL == mca_pmix_native_component.send_msg) {
mca_pmix_native_component.send_msg = snd;
} else {
/* add it to the queue */
opal_list_append(&mca_pmix_native_component.send_queue, &snd->super);
}
/* ensure the send event is active */
if (!mca_pmix_native_component.send_ev_active) {
opal_event_add(&mca_pmix_native_component.send_event, 0);
mca_pmix_native_component.send_ev_active = true;
}
return;
}
/* add the message to the queue for sending after the
* connection is formed
*/
opal_list_append(&mca_pmix_native_component.send_queue, &snd->super);
if (PMIX_USOCK_CONNECTING != mca_pmix_native_component.state &&
PMIX_USOCK_CONNECT_ACK != mca_pmix_native_component.state) {
/* we have to initiate the connection - again, we do not
* want to block while the connection is created.
* So throw us into an event that will create
* the connection via a mini-state-machine :-)
*/
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock:send_nb: initiating connection to server",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
mca_pmix_native_component.state = PMIX_USOCK_CONNECTING;
PMIX_ACTIVATE_USOCK_STATE(pmix_usock_try_connect);
}
}
void pmix_usock_process_msg(int fd, short flags, void *cbdata)
{
pmix_usock_recv_t *msg = (pmix_usock_recv_t*)cbdata;
pmix_usock_posted_recv_t *rcv;
opal_buffer_t buf;
OPAL_OUTPUT_VERBOSE((5, opal_pmix_base_framework.framework_output,
"%s message received %d bytes for tag %u",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
(int)msg->hdr.nbytes, msg->hdr.tag));
/* see if we have a waiting recv for this message */
OPAL_LIST_FOREACH(rcv, &mca_pmix_native_component.posted_recvs, pmix_usock_posted_recv_t) {
opal_output_verbose(5, opal_pmix_base_framework.framework_output,
"%s checking msg on tag %u for tag %u",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
msg->hdr.tag, rcv->tag);
if (msg->hdr.tag == rcv->tag) {
if (NULL != rcv->cbfunc) {
/* construct and load the buffer */
OBJ_CONSTRUCT(&buf, opal_buffer_t);
opal_dss.load(&buf, msg->data, msg->hdr.nbytes);
msg->data = NULL; // protect the data region
if (NULL != rcv->cbfunc) {
rcv->cbfunc(&buf, rcv->cbdata);
}
OBJ_DESTRUCT(&buf); // free's the msg data
/* also done with the recv */
opal_list_remove_item(&mca_pmix_native_component.posted_recvs, &rcv->super);
OBJ_RELEASE(rcv);
OBJ_RELEASE(msg);
return;
}
}
}
/* we get here if no matching recv was found - this is an error */
opal_output(0, "%s UNEXPECTED MESSAGE",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
OBJ_RELEASE(msg);
}
static int usock_create_socket(void)
{
int flags;
if (mca_pmix_native_component.sd > 0) {
return OPAL_SUCCESS;
}
OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output,
"%s pmix:usock:peer creating socket to server",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME)));
mca_pmix_native_component.sd = socket(PF_UNIX, SOCK_STREAM, 0);
if (mca_pmix_native_component.sd < 0) {
opal_output(0, "%s usock_peer_create_socket: socket() failed: %s (%d)\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
strerror(opal_socket_errno),
opal_socket_errno);
return OPAL_ERR_UNREACH;
}
/* setup the socket as non-blocking */
if ((flags = fcntl(mca_pmix_native_component.sd, F_GETFL, 0)) < 0) {
opal_output(0, "%s usock_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
strerror(opal_socket_errno),
opal_socket_errno);
} else {
flags |= O_NONBLOCK;
if(fcntl(mca_pmix_native_component.sd, F_SETFL, flags) < 0)
opal_output(0, "%s usock_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
strerror(opal_socket_errno),
opal_socket_errno);
}
/* setup event callbacks */
opal_event_set(mca_pmix_native_component.evbase,
&mca_pmix_native_component.recv_event,
mca_pmix_native_component.sd,
OPAL_EV_READ|OPAL_EV_PERSIST,
pmix_usock_recv_handler, NULL);
opal_event_set_priority(&mca_pmix_native_component.recv_event, OPAL_EV_MSG_LO_PRI);
mca_pmix_native_component.recv_ev_active = false;
opal_event_set(mca_pmix_native_component.evbase,
&mca_pmix_native_component.send_event,
mca_pmix_native_component.sd,
OPAL_EV_WRITE|OPAL_EV_PERSIST,
pmix_usock_send_handler, NULL);
opal_event_set_priority(&mca_pmix_native_component.send_event, OPAL_EV_MSG_LO_PRI);
mca_pmix_native_component.send_ev_active = false;
return OPAL_SUCCESS;
}
/*
* Try connecting to a peer
*/
static void pmix_usock_try_connect(int fd, short args, void *cbdata)
{
int rc;
opal_socklen_t addrlen = 0;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock_peer_try_connect: attempting to connect to server",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
if (OPAL_SUCCESS != usock_create_socket()) {
return;
}
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock_peer_try_connect: attempting to connect to server on socket %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mca_pmix_native_component.sd);
addrlen = sizeof(struct sockaddr_un);
retry_connect:
mca_pmix_native_component.retries++;
if (connect(mca_pmix_native_component.sd, (struct sockaddr *) &mca_pmix_native_component.address, addrlen) < 0) {
/* non-blocking so wait for completion */
if (opal_socket_errno == EINPROGRESS || opal_socket_errno == EWOULDBLOCK) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s waiting for connect completion to server - activating send event",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* just ensure the send_event is active */
if (!mca_pmix_native_component.send_ev_active) {
opal_event_add(&mca_pmix_native_component.send_event, 0);
mca_pmix_native_component.send_ev_active = true;
}
return;
}
/* Some kernels (Linux 2.6) will automatically software
abort a connection that was ECONNREFUSED on the last
attempt, without even trying to establish the
connection. Handle that case in a semi-rational
way by trying twice before giving up */
if (ECONNABORTED == opal_socket_errno) {
if (mca_pmix_native_component.retries < mca_pmix_native_component.max_retries) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s connection to server aborted by OS - retrying",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
goto retry_connect;
} else {
/* We were unsuccessful in establishing this connection, and are
* not likely to suddenly become successful,
*/
mca_pmix_native_component.state = PMIX_USOCK_FAILED;
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
return;
}
}
}
/* connection succeeded */
mca_pmix_native_component.retries = 0;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s sock_peer_try_connect: Connection across to server succeeded",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* setup our recv to catch the return ack call */
if (!mca_pmix_native_component.recv_ev_active) {
opal_event_add(&mca_pmix_native_component.recv_event, 0);
mca_pmix_native_component.recv_ev_active = true;
}
/* send our globally unique process identifier to the server */
if (OPAL_SUCCESS == (rc = usock_send_connect_ack())) {
mca_pmix_native_component.state = PMIX_USOCK_CONNECT_ACK;
} else {
opal_output(0,
"%s usock_peer_try_connect: "
"usock_send_connect_ack to server failed: %s (%d)",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
opal_strerror(rc), rc);
mca_pmix_native_component.state = PMIX_USOCK_FAILED;
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
return;
}
}
int usock_send_connect_ack(void)
{
char *msg;
pmix_usock_hdr_t hdr;
int rc;
size_t sdsize;
opal_sec_cred_t *cred;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s SEND CONNECT ACK",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* setup the header */
hdr.id = OPAL_PROC_MY_NAME;
hdr.tag = UINT32_MAX;
hdr.type = PMIX_USOCK_IDENT;
/* get our security credential */
if (OPAL_SUCCESS != (rc = opal_sec.get_my_credential(opal_dstore_internal, &OPAL_PROC_MY_NAME, &cred))) {
return rc;
}
/* set the number of bytes to be read beyond the header */
hdr.nbytes = strlen(opal_version_string) + 1 + cred->size;
/* create a space for our message */
sdsize = (sizeof(hdr) + strlen(opal_version_string) + 1 + cred->size);
if (NULL == (msg = (char*)malloc(sdsize))) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
memset(msg, 0, sdsize);
/* load the message */
memcpy(msg, &hdr, sizeof(hdr));
memcpy(msg+sizeof(hdr), opal_version_string, strlen(opal_version_string));
memcpy(msg+sizeof(hdr)+strlen(opal_version_string)+1, cred->credential, cred->size);
if (OPAL_SUCCESS != usock_send_blocking(msg, sdsize)) {
free(msg);
return OPAL_ERR_UNREACH;
}
free(msg);
return OPAL_SUCCESS;
}
/*
* A blocking send on a non-blocking socket. Used to send the small amount of connection
* information that identifies the peers endpoint.
*/
static int usock_send_blocking(char *ptr, size_t size)
{
size_t cnt = 0;
int retval;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s send blocking of %"PRIsize_t" bytes to socket %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
size, mca_pmix_native_component.sd);
while (cnt < size) {
retval = send(mca_pmix_native_component.sd, (char*)ptr+cnt, size-cnt, 0);
if (retval < 0) {
if (opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
opal_output(0, "%s usock_peer_send_blocking: send() to socket %d failed: %s (%d)\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mca_pmix_native_component.sd,
strerror(opal_socket_errno),
opal_socket_errno);
mca_pmix_native_component.state = PMIX_USOCK_FAILED;
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
return OPAL_ERR_UNREACH;
}
continue;
}
cnt += retval;
}
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s blocking send complete to socket %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mca_pmix_native_component.sd);
return OPAL_SUCCESS;
}
/*
* Routine for debugging to print the connection state and socket options
*/
void pmix_usock_dump(const char* msg)
{
char buff[255];
int nodelay,flags;
if ((flags = fcntl(mca_pmix_native_component.sd, F_GETFL, 0)) < 0) {
opal_output(0, "%s usock_peer_dump: fcntl(F_GETFL) failed: %s (%d)\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
strerror(opal_socket_errno),
opal_socket_errno);
}
#if defined(USOCK_NODELAY)
optlen = sizeof(nodelay);
if (getsockopt(mca_pmix_native_component.sd, IPPROTO_USOCK, USOCK_NODELAY, (char *)&nodelay, &optlen) < 0) {
opal_output(0, "%s usock_peer_dump: USOCK_NODELAY option: %s (%d)\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
strerror(opal_socket_errno),
opal_socket_errno);
}
#else
nodelay = 0;
#endif
snprintf(buff, sizeof(buff), "%s %s: nodelay %d flags %08x\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
msg, nodelay, flags);
opal_output(0, "%s", buff);
}
char* pmix_usock_state_print(pmix_usock_state_t state)
{
switch (state) {
case PMIX_USOCK_UNCONNECTED:
return "UNCONNECTED";
case PMIX_USOCK_CLOSED:
return "CLOSED";
case PMIX_USOCK_RESOLVE:
return "RESOLVE";
case PMIX_USOCK_CONNECTING:
return "CONNECTING";
case PMIX_USOCK_CONNECT_ACK:
return "ACK";
case PMIX_USOCK_CONNECTED:
return "CONNECTED";
case PMIX_USOCK_FAILED:
return "FAILED";
default:
return "UNKNOWN";
}
}

717
opal/mca/pmix/native/usock_sendrecv.c Обычный файл
Просмотреть файл

@ -0,0 +1,717 @@
/*
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2011 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
* All rights reserved.
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <fcntl.h>
#ifdef HAVE_SYS_UIO_H
#include <sys/uio.h>
#endif
#ifdef HAVE_NET_UIO_H
#include <net/uio.h>
#endif
#ifdef HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#ifdef HAVE_ARPA_INET_H
#include <arpa/inet.h>
#endif
#ifdef HAVE_NETINET_TCP_H
#include <netinet/tcp.h>
#endif
#include "opal_stdint.h"
#include "opal/types.h"
#include "opal/runtime/opal.h"
#include "opal/opal_socket_errno.h"
#include "opal/mca/backtrace/backtrace.h"
#include "opal/util/output.h"
#include "opal/util/net.h"
#include "opal/util/error.h"
#include "opal/class/opal_hash_table.h"
#include "opal/mca/event/event.h"
#include "opal/mca/sec/sec.h"
#include "opal/mca/pmix/base/base.h"
#include "pmix_native.h"
static void usock_complete_connect(void);
static int usock_recv_connect_ack(void);
static int send_bytes(pmix_usock_send_t *msg)
{
int rc;
while (0 < msg->sdbytes) {
rc = write(mca_pmix_native_component.sd, msg->sdptr, msg->sdbytes);
if (rc < 0) {
if (opal_socket_errno == EINTR) {
continue;
} else if (opal_socket_errno == EAGAIN) {
/* tell the caller to keep this message on active,
* but let the event lib cycle so other messages
* can progress while this socket is busy
*/
return OPAL_ERR_RESOURCE_BUSY;
} else if (opal_socket_errno == EWOULDBLOCK) {
/* tell the caller to keep this message on active,
* but let the event lib cycle so other messages
* can progress while this socket is busy
*/
return OPAL_ERR_WOULD_BLOCK;
}
/* we hit an error and cannot progress this message */
opal_output(0, "%s pmix_usock_msg_send_bytes: write failed: %s (%d) [sd = %d]",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
strerror(opal_socket_errno),
opal_socket_errno,
mca_pmix_native_component.sd);
return OPAL_ERR_COMM_FAILURE;
}
/* update location */
msg->sdbytes -= rc;
msg->sdptr += rc;
}
/* we sent the full data block */
return OPAL_SUCCESS;
}
/*
* A file descriptor is available/ready for send. Check the state
* of the socket and take the appropriate action.
*/
void pmix_usock_send_handler(int sd, short flags, void *cbdata)
{
pmix_usock_send_t *msg = mca_pmix_native_component.send_msg;
int rc;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock:send_handler called to send to server",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
switch (mca_pmix_native_component.state) {
case PMIX_USOCK_CONNECTING:
case PMIX_USOCK_CLOSED:
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"usock:send_handler %s",
pmix_usock_state_print(mca_pmix_native_component.state));
usock_complete_connect();
/* de-activate the send event until the connection
* handshake completes
*/
if (mca_pmix_native_component.send_ev_active) {
opal_event_del(&mca_pmix_native_component.send_event);
mca_pmix_native_component.send_ev_active = false;
}
break;
case PMIX_USOCK_CONNECTED:
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock:send_handler SENDING TO SERVER",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
if (NULL != msg) {
if (!msg->hdr_sent) {
if (OPAL_SUCCESS == (rc = send_bytes(msg))) {
/* header is completely sent */
msg->hdr_sent = true;
/* setup to send the data */
if (NULL == msg->data) {
/* this was a zero-byte msg - nothing more to do */
OBJ_RELEASE(msg);
mca_pmix_native_component.send_msg = NULL;
goto next;
} else {
/* send the data as a single block */
msg->sdptr = msg->data;
msg->sdbytes = msg->hdr.nbytes;
}
/* fall thru and let the send progress */
} else if (OPAL_ERR_RESOURCE_BUSY == rc ||
OPAL_ERR_WOULD_BLOCK == rc) {
/* exit this event and let the event lib progress */
return;
} else {
// report the error
opal_output(0, "%s pmix_usock_peer_send_handler: unable to send message ON SOCKET %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mca_pmix_native_component.sd);
opal_event_del(&mca_pmix_native_component.send_event);
mca_pmix_native_component.send_ev_active = false;
OBJ_RELEASE(msg);
mca_pmix_native_component.send_msg = NULL;
return;
}
}
if (msg->hdr_sent) {
if (OPAL_SUCCESS == (rc = send_bytes(msg))) {
// message is complete
OBJ_RELEASE(msg);
mca_pmix_native_component.send_msg = NULL;
goto next;
} else if (OPAL_ERR_RESOURCE_BUSY == rc ||
OPAL_ERR_WOULD_BLOCK == rc) {
/* exit this event and let the event lib progress */
return;
} else {
// report the error
opal_output(0, "%s pmix_usock_peer_send_handler: unable to send message ON SOCKET %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mca_pmix_native_component.sd);
opal_event_del(&mca_pmix_native_component.send_event);
mca_pmix_native_component.send_ev_active = false;
OBJ_RELEASE(msg);
mca_pmix_native_component.send_msg = NULL;
return;
}
}
next:
/* if current message completed - progress any pending sends by
* moving the next in the queue into the "on-deck" position. Note
* that this doesn't mean we send the message right now - we will
* wait for another send_event to fire before doing so. This gives
* us a chance to service any pending recvs.
*/
mca_pmix_native_component.send_msg = (pmix_usock_send_t*)
opal_list_remove_first(&mca_pmix_native_component.send_queue);
}
/* if nothing else to do unregister for send event notifications */
if (NULL == mca_pmix_native_component.send_msg &&
mca_pmix_native_component.send_ev_active) {
opal_event_del(&mca_pmix_native_component.send_event);
mca_pmix_native_component.send_ev_active = false;
}
break;
default:
opal_output(0, "%s pmix_usock_peer_send_handler: invalid connection state (%d) on socket %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mca_pmix_native_component.state, mca_pmix_native_component.sd);
if (mca_pmix_native_component.send_ev_active) {
opal_event_del(&mca_pmix_native_component.send_event);
mca_pmix_native_component.send_ev_active = false;
}
break;
}
}
static int read_bytes(pmix_usock_recv_t* recv)
{
int rc;
/* read until all bytes recvd or error */
while (0 < recv->rdbytes) {
rc = read(mca_pmix_native_component.sd, recv->rdptr, recv->rdbytes);
if (rc < 0) {
if(opal_socket_errno == EINTR) {
continue;
} else if (opal_socket_errno == EAGAIN) {
/* tell the caller to keep this message on active,
* but let the event lib cycle so other messages
* can progress while this socket is busy
*/
return OPAL_ERR_RESOURCE_BUSY;
} else if (opal_socket_errno == EWOULDBLOCK) {
/* tell the caller to keep this message on active,
* but let the event lib cycle so other messages
* can progress while this socket is busy
*/
return OPAL_ERR_WOULD_BLOCK;
}
/* we hit an error and cannot progress this message - report
* the error back to the RML and let the caller know
* to abort this message
*/
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix_usock_msg_recv: readv failed: %s (%d)",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
strerror(opal_socket_errno),
opal_socket_errno);
return OPAL_ERR_COMM_FAILURE;
} else if (rc == 0) {
/* the remote peer closed the connection - report that condition
* and let the caller know
*/
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix_usock_msg_recv: peer closed connection",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* stop all events */
if (mca_pmix_native_component.recv_ev_active) {
opal_event_del(&mca_pmix_native_component.recv_event);
mca_pmix_native_component.recv_ev_active = false;
}
if (mca_pmix_native_component.timer_ev_active) {
opal_event_del(&mca_pmix_native_component.timer_event);
mca_pmix_native_component.timer_ev_active = false;
}
if (mca_pmix_native_component.send_ev_active) {
opal_event_del(&mca_pmix_native_component.send_event);
mca_pmix_native_component.send_ev_active = false;
}
if (NULL != mca_pmix_native_component.recv_msg) {
OBJ_RELEASE(mca_pmix_native_component.recv_msg);
mca_pmix_native_component.recv_msg = NULL;
}
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
return OPAL_ERR_WOULD_BLOCK;
}
/* we were able to read something, so adjust counters and location */
recv->rdbytes -= rc;
recv->rdptr += rc;
}
/* we read the full data block */
return OPAL_SUCCESS;
}
/*
* Dispatch to the appropriate action routine based on the state
* of the connection with the peer.
*/
void pmix_usock_recv_handler(int sd, short flags, void *cbdata)
{
int rc;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock:recv:handler called",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
switch (mca_pmix_native_component.state) {
case PMIX_USOCK_CONNECT_ACK:
if (OPAL_SUCCESS == (rc = usock_recv_connect_ack())) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock:recv:handler starting send/recv events",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* we connected! Start the send/recv events */
if (!mca_pmix_native_component.recv_ev_active) {
opal_event_add(&mca_pmix_native_component.recv_event, 0);
mca_pmix_native_component.recv_ev_active = true;
}
if (mca_pmix_native_component.timer_ev_active) {
opal_event_del(&mca_pmix_native_component.timer_event);
mca_pmix_native_component.timer_ev_active = false;
}
/* if there is a message waiting to be sent, queue it */
if (NULL == mca_pmix_native_component.send_msg) {
mca_pmix_native_component.send_msg = (pmix_usock_send_t*)opal_list_remove_first(&mca_pmix_native_component.send_queue);
}
if (NULL != mca_pmix_native_component.send_msg && !mca_pmix_native_component.send_ev_active) {
opal_event_add(&mca_pmix_native_component.send_event, 0);
mca_pmix_native_component.send_ev_active = true;
}
/* update our state */
mca_pmix_native_component.state = PMIX_USOCK_CONNECTED;
} else {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s UNABLE TO COMPLETE CONNECT ACK WITH SERVER",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
opal_event_del(&mca_pmix_native_component.recv_event);
mca_pmix_native_component.recv_ev_active = false;
return;
}
break;
case PMIX_USOCK_CONNECTED:
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock:recv:handler CONNECTED",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* allocate a new message and setup for recv */
if (NULL == mca_pmix_native_component.recv_msg) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock:recv:handler allocate new recv msg",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
mca_pmix_native_component.recv_msg = OBJ_NEW(pmix_usock_recv_t);
if (NULL == mca_pmix_native_component.recv_msg) {
opal_output(0, "%s usock_recv_handler: unable to allocate recv message\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
return;
}
/* start by reading the header */
mca_pmix_native_component.recv_msg->rdptr = (char*)&mca_pmix_native_component.recv_msg->hdr;
mca_pmix_native_component.recv_msg->rdbytes = sizeof(pmix_usock_hdr_t);
}
/* if the header hasn't been completely read, read it */
if (!mca_pmix_native_component.recv_msg->hdr_recvd) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"usock:recv:handler read hdr");
if (OPAL_SUCCESS == (rc = read_bytes(mca_pmix_native_component.recv_msg))) {
/* completed reading the header */
mca_pmix_native_component.recv_msg->hdr_recvd = true;
/* if this is a zero-byte message, then we are done */
if (0 == mca_pmix_native_component.recv_msg->hdr.nbytes) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s RECVD ZERO-BYTE MESSAGE FROM SERVER for tag %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mca_pmix_native_component.recv_msg->hdr.tag);
mca_pmix_native_component.recv_msg->data = NULL; // make sure
mca_pmix_native_component.recv_msg->rdptr = NULL;
mca_pmix_native_component.recv_msg->rdbytes = 0;
} else {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock:recv:handler allocate data region of size %lu",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
(unsigned long)mca_pmix_native_component.recv_msg->hdr.nbytes);
/* allocate the data region */
mca_pmix_native_component.recv_msg->data = (char*)malloc(mca_pmix_native_component.recv_msg->hdr.nbytes);
/* point to it */
mca_pmix_native_component.recv_msg->rdptr = mca_pmix_native_component.recv_msg->data;
mca_pmix_native_component.recv_msg->rdbytes = mca_pmix_native_component.recv_msg->hdr.nbytes;
}
/* fall thru and attempt to read the data */
} else if (OPAL_ERR_RESOURCE_BUSY == rc ||
OPAL_ERR_WOULD_BLOCK == rc) {
/* exit this event and let the event lib progress */
return;
} else {
/* close the connection */
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock:recv:handler error reading bytes - closing connection",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
return;
}
}
if (mca_pmix_native_component.recv_msg->hdr_recvd) {
/* continue to read the data block - we start from
* wherever we left off, which could be at the
* beginning or somewhere in the message
*/
if (OPAL_SUCCESS == (rc = read_bytes(mca_pmix_native_component.recv_msg))) {
/* we recvd all of the message */
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s RECVD COMPLETE MESSAGE FROM SERVER OF %d BYTES FOR TAG %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
(int)mca_pmix_native_component.recv_msg->hdr.nbytes,
mca_pmix_native_component.recv_msg->hdr.tag);
/* post it for delivery */
PMIX_ACTIVATE_POST_MSG(mca_pmix_native_component.recv_msg);
mca_pmix_native_component.recv_msg = NULL;
}
} else if (OPAL_ERR_RESOURCE_BUSY == rc ||
OPAL_ERR_WOULD_BLOCK == rc) {
/* exit this event and let the event lib progress */
return;
} else {
// report the error
opal_output(0, "%s usock_peer_recv_handler: unable to recv message",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* turn off the recv event */
opal_event_del(&mca_pmix_native_component.recv_event);
mca_pmix_native_component.recv_ev_active = false;
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
return;
}
break;
default:
opal_output(0, "%s usock_peer_recv_handler: invalid socket state(%d)",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mca_pmix_native_component.state);
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
break;
}
}
/*
* A blocking recv on a non-blocking socket. Used to receive the small amount of connection
* information that identifies the peers endpoint.
*/
static bool usock_recv_blocking(char *data, size_t size)
{
size_t cnt = 0;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s waiting for connect ack from server",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
while (cnt < size) {
int retval = recv(mca_pmix_native_component.sd, (char *)data+cnt, size-cnt, 0);
/* remote closed connection */
if (retval == 0) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock_recv_blocking: server closed connection: state %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mca_pmix_native_component.state);
mca_pmix_native_component.state = PMIX_USOCK_CLOSED;
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
return false;
}
/* socket is non-blocking so handle errors */
if (retval < 0) {
if (opal_socket_errno != EINTR &&
opal_socket_errno != EAGAIN &&
opal_socket_errno != EWOULDBLOCK) {
if (mca_pmix_native_component.state == PMIX_USOCK_CONNECT_ACK) {
/* If we overflow the listen backlog, it's
possible that even though we finished the three
way handshake, the remote host was unable to
transition the connection from half connected
(received the initial SYN) to fully connected
(in the listen backlog). We likely won't see
the failure until we try to receive, due to
timing and the like. The first thing we'll get
in that case is a RST packet, which receive
will turn into a connection reset by peer
errno. In that case, leave the socket in
CONNECT_ACK and propogate the error up to
recv_connect_ack, who will try to establish the
connection again */
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s connect ack received error %s from server",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
strerror(opal_socket_errno));
return false;
} else {
opal_output(0,
"%s usock_recv_blocking: "
"recv() failed for server: %s (%d)\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
strerror(opal_socket_errno),
opal_socket_errno);
mca_pmix_native_component.state = PMIX_USOCK_FAILED;
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
return false;
}
}
continue;
}
cnt += retval;
}
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s connect ack received from server",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
return true;
}
/*
* Receive the peers globally unique process identification from a newly
* connected socket and verify the expected response. If so, move the
* socket to a connected state.
*/
static int usock_recv_connect_ack(void)
{
char *msg;
char *version;
int rc;
opal_sec_cred_t creds;
pmix_usock_hdr_t hdr;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s RECV CONNECT ACK FROM SERVER ON SOCKET %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mca_pmix_native_component.sd);
/* ensure all is zero'd */
memset(&hdr, 0, sizeof(pmix_usock_hdr_t));
if (usock_recv_blocking((char*)&hdr, sizeof(pmix_usock_hdr_t))) {
/* If the state is CONNECT_ACK, then we were waiting for
* the connection to be ack'd
*/
if (mca_pmix_native_component.state != PMIX_USOCK_CONNECT_ACK) {
/* handshake broke down - abort this connection */
opal_output(0, "%s RECV CONNECT BAD HANDSHAKE FROM SERVER ON SOCKET %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mca_pmix_native_component.sd);
mca_pmix_native_component.state = PMIX_USOCK_FAILED;
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
return OPAL_ERR_UNREACH;
}
} else {
/* unable to complete the recv */
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s unable to complete recv of connect-ack from server ON SOCKET %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mca_pmix_native_component.sd);
return OPAL_ERR_UNREACH;
}
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s connect-ack recvd from server",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* compare the servers name to the expected value */
if (hdr.id != mca_pmix_native_component.server) {
opal_output(0, "usock_peer_recv_connect_ack: "
"%s received unexpected process identifier %"PRIu64" from server: expected %"PRIu64"",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
hdr.id, mca_pmix_native_component.server);
mca_pmix_native_component.state = PMIX_USOCK_FAILED;
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
return OPAL_ERR_UNREACH;
}
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s connect-ack header from server is okay",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* get the authentication and version payload */
if (NULL == (msg = (char*)malloc(hdr.nbytes))) {
mca_pmix_native_component.state = PMIX_USOCK_FAILED;
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
return OPAL_ERR_OUT_OF_RESOURCE;
}
if (!usock_recv_blocking(msg, hdr.nbytes)) {
/* unable to complete the recv */
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s unable to complete recv of connect-ack from server ON SOCKET %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mca_pmix_native_component.sd);
free(msg);
return OPAL_ERR_UNREACH;
}
/* check that this is from a matching version */
version = (char*)(msg);
if (0 != strcmp(version, opal_version_string)) {
opal_output(0, "usock_peer_recv_connect_ack: "
"%s received different version from server: %s instead of %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
version, opal_version_string);
mca_pmix_native_component.state = PMIX_USOCK_FAILED;
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
free(msg);
return OPAL_ERR_UNREACH;
}
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s connect-ack version from server matches ours",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* check security token */
creds.credential = (char*)(msg + strlen(version) + 1);
creds.size = hdr.nbytes - strlen(version) - 1;
if (OPAL_SUCCESS != (rc = opal_sec.authenticate(&creds))) {
OPAL_ERROR_LOG(rc);
}
free(msg);
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s connect-ack from server authenticated",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* connected */
mca_pmix_native_component.state = PMIX_USOCK_CONNECTED;
/* initiate send of first message on queue */
if (NULL == mca_pmix_native_component.send_msg) {
mca_pmix_native_component.send_msg = (pmix_usock_send_t*)
opal_list_remove_first(&mca_pmix_native_component.send_queue);
}
if (NULL != mca_pmix_native_component.send_msg && !mca_pmix_native_component.send_ev_active) {
opal_event_add(&mca_pmix_native_component.send_event, 0);
mca_pmix_native_component.send_ev_active = true;
}
if (2 <= opal_output_get_verbosity(opal_pmix_base_framework.framework_output)) {
pmix_usock_dump("connected");
}
return OPAL_SUCCESS;
}
/*
* Check the status of the connection. If the connection failed, will retry
* later. Otherwise, send this process' identifier to the server on the
* newly connected socket.
*/
static void usock_complete_connect(void)
{
int so_error = 0;
opal_socklen_t so_length = sizeof(so_error);
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock:complete_connect called for server on socket %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
mca_pmix_native_component.sd);
/* check connect completion status */
if (getsockopt(mca_pmix_native_component.sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) {
opal_output(0, "%s usock_peer_complete_connect: getsockopt() to server failed: %s (%d)\n",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
strerror(opal_socket_errno),
opal_socket_errno);
mca_pmix_native_component.state = PMIX_USOCK_FAILED;
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
return;
}
if (so_error == EINPROGRESS) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock:send:handler still in progress",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
return;
} else if (so_error == ECONNREFUSED || so_error == ETIMEDOUT) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock_peer_complete_connect: connection to server failed: %s (%d)",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
strerror(so_error),
so_error);
mca_pmix_native_component.state = PMIX_USOCK_FAILED;
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
return;
} else if (so_error != 0) {
/* No need to worry about the return code here - we return regardless
at this point, and if an error did occur a message has already been
printed for the user */
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock_peer_complete_connect: "
"connection to server failed with error %d",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
so_error);
mca_pmix_native_component.state = PMIX_USOCK_FAILED;
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
return;
}
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock_peer_complete_connect: sending ack to server",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
if (usock_send_connect_ack() == OPAL_SUCCESS) {
mca_pmix_native_component.state = PMIX_USOCK_CONNECT_ACK;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s usock_peer_complete_connect: setting read event on connection to server",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
if (!mca_pmix_native_component.recv_ev_active) {
opal_event_add(&mca_pmix_native_component.recv_event, 0);
mca_pmix_native_component.recv_ev_active = true;
}
} else {
opal_output(0, "%s usock_complete_connect: unable to send connect ack to server",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
mca_pmix_native_component.state = PMIX_USOCK_FAILED;
CLOSE_THE_SOCKET(mca_pmix_native_component.sd);
}
}

426
opal/mca/pmix/pmix.h Обычный файл
Просмотреть файл

@ -0,0 +1,426 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OPAL_PMIX_H
#define OPAL_PMIX_H
#include "opal_config.h"
#include "opal/types.h"
#include "opal/mca/mca.h"
#include "opal/mca/event/event.h"
#include "opal/dss/dss.h"
#include "opal/runtime/opal.h"
#include "opal/mca/dstore/dstore.h"
#include "opal/dss/dss.h"
#include "opal/util/error.h"
#include "opal/util/proc.h"
BEGIN_C_DECLS
/* define some maximum sizes */
#define PMIX_MAX_VALLEN 1024
#define PMIX_MAX_INFO_KEY 255
#define PMIX_MAX_INFO_VAL 1024
/* define an INFO object corresponding to
* the MPI_Info structure */
typedef struct {
opal_list_item_t super;
char key[PMIX_MAX_INFO_KEY];
char value[PMIX_MAX_INFO_VAL];
} pmix_info_t;
OBJ_CLASS_DECLARATION(pmix_info_t);
/* define a scope for data "put" by PMI per the following:
*
* PMI_LOCAL - the data is intended only for other application
* processes on the same node. Data marked in this way
* will not be included in data packages sent to remote requestors
* PMI_REMOTE - the data is intended solely for applications processes on
* remote nodes. Data marked in this way will not be shared with
* other processes on the same node
* PMI_GLOBAL - the data is to be shared with all other requesting processes,
* regardless of location
*/
typedef uint8_t opal_pmix_scope_t;
#define PMIX_SCOPE_T OPAL_UINT8
#define PMIX_SCOPE_UNDEF 0
#define PMIX_INTERNAL 1 // data used internally only
#define PMIX_LOCAL 2 // share to procs also on this node
#define PMIX_REMOTE 3 // share with procs not on this node
#define PMIX_GLOBAL 4 // share with all procs (local + remote)
/* callback function for non-blocking operations */
typedef void (*opal_pmix_cbfunc_t)(int status, opal_value_t *kv, void *cbdata);
/* flags to indicate if the modex value being pushed into
* the PMIx server comes from an element that is ready to
* support async modex operations, or from one that requires
* synchronous modex (i.e., blocking modex operation) */
#define PMIX_SYNC_REQD true
#define PMIX_ASYNC_RDY false
/* define a set of "standard" PMIx attributes that can
* be queried. Implementations (and users) are free to extend as
* desired, so the get_attr functions need to be capable
* of handling the "not found" condition. Note that these
* are attributes of the system and the job as opposed to
* values the application (or underlying MPI library)
* might choose to expose - i.e., they are values provided
* by the resource manager as opposed to the application */
#define PMIX_ATTR_UNDEF NULL
#define PMIX_CPUSET "pmix.cpuset" // (char*) hwloc bitmap applied to proc upon launch
#define PMIX_CREDENTIAL "pmix.cred" // (opal_byte_object*) security credential assigned to proc
/* scratch directory locations for use by applications */
#define PMIX_TMPDIR "pmix.tmpdir" // (char*) top-level tmp dir assigned to session
/* information about relative ranks as assigned */
#define PMIX_JOBID "pmix.jobid" // (char*) jobid assigned by scheduler
#define PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job
#define PMIX_RANK "pmix.rank" // (uint32_t) process rank within the job
#define PMIX_GLOBAL_RANK "pmix.grank" // (uint32_t) rank spanning across all jobs in this session
#define PMIX_APP_RANK "pmix.apprank" // (uint32_t) rank within this app
#define PMIX_NPROC_OFFSET "pmix.offset" // (uint32_t) starting global rank of this job
#define PMIX_LOCAL_RANK "pmix.lrank" // (uint16_t) rank on this node within this job
#define PMIX_NODE_RANK "pmix.nrank" // (uint16_t) rank on this node spanning all jobs
#define PMIX_LOCALLDR "pmix.lldr" // (uint64_t) opal_identifier of lowest rank on this node within this job
#define PMIX_APPLDR "pmix.aldr" // (uint32_t) lowest rank in this app within this job
/* proc location-related info */
#define PMIX_PROC_MAP "pmix.map" // (byte_object) packed map of proc locations within this job
#define PMIX_LOCAL_PEERS "pmix.lpeers" // (char*) comma-delimited string of ranks on this node within this job
/* size info */
#define PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this namespace
#define PMIX_JOB_SIZE "pmix.job.size" // (uint32_t) #procs in this job
#define PMIX_LOCAL_SIZE "pmix.local.size" // (uint32_t) #procs in this job on this node
#define PMIX_NODE_SIZE "pmix.node.size" // (uint32_t) #procs across all jobs on this node
#define PMIX_MAX_PROCS "pmix.max.size" // (uint32_t) max #procs for this job
/* topology info */
#define PMIX_NET_TOPO "pmix.ntopo" // (byte_object) network topology
#define PMIX_LOCAL_TOPO "pmix.ltopo" // (hwloc topo) local node topology
/**
* Provide a simplified macro for sending data via modex
* to other processes. The macro requires four arguments:
*
* r - the integer return status from the modex op
* f - whether this modex requires sync or is async ready
* sc - the PMIX scope of the data
* s - the key to tag the data being posted
* d - the data object being posted
* sz - the number of bytes in the data object
*/
#define OPAL_MODEX_SEND_STRING(r, f, sc, s, d, sz) \
do { \
opal_value_t kv; \
if (PMIX_SYNC_REQD == (f)) { \
opal_pmix_use_collective = true; \
} \
OBJ_CONSTRUCT(&kv, opal_value_t); \
kv.key = (s); \
kv.type = OPAL_BYTE_OBJECT; \
kv.data.bo.bytes = (uint8_t*)(d); \
kv.data.bo.size = (sz); \
if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &kv))) { \
OPAL_ERROR_LOG((r)); \
} \
kv.data.bo.bytes = NULL; /* protect the data */ \
kv.key = NULL; /* protect the key */ \
OBJ_DESTRUCT(&kv); \
} while(0);
/**
* Provide a simplified macro for sending data via modex
* to other processes. The macro requires four arguments:
*
* r - the integer return status from the modex op
* f - whether this modex requires sync or is async ready
* sc - the PMIX scope of the data
* s - the MCA component that is posting the data
* d - the data object being posted
* sz - the number of bytes in the data object
*/
#define OPAL_MODEX_SEND(r, f, sc, s, d, sz) \
do { \
char *key; \
if (PMIX_SYNC_REQD == (f)) { \
opal_pmix_use_collective = true; \
} \
key = mca_base_component_to_string((s)); \
OPAL_MODEX_SEND_STRING((r), (f), (sc), key, (d), (sz)); \
free(key); \
} while(0);
/**
* Provide a simplified macro for retrieving modex data
* from another process:
*
* r - the integer return status from the modex op (int)
* s - string key (char*)
* p - pointer to the opal_proc_t of the proc that posted
* the data (opal_proc_t*)
* d - pointer to a location wherein the data object
* it to be returned
* t - the expected data type
*/
#define OPAL_MODEX_RECV_VALUE(r, s, p, d, t) \
do { \
opal_value_t *kv; \
if (OPAL_SUCCESS != ((r) = opal_pmix.get(&(p)->proc_name, \
(s), &kv))) { \
OPAL_ERROR_LOG((r)); \
} else { \
(r) = opal_value_unload(kv, (void**)(d), (t)); \
OBJ_RELEASE(kv); \
} \
} while(0);
/**
* Provide a simplified macro for retrieving modex data
* from another process:
*
* r - the integer return status from the modex op (int)
* s - string key (char*)
* p - pointer to the opal_proc_t of the proc that posted
* the data (opal_proc_t*)
* d - pointer to a location wherein the data object
* it to be returned (char**)
* sz - pointer to a location wherein the number of bytes
* in the data object can be returned (size_t)
*/
#define OPAL_MODEX_RECV_STRING(r, s, p, d, sz) \
do { \
opal_value_t *kv; \
if (OPAL_SUCCESS != ((r) = opal_pmix.get(&(p)->proc_name, \
(s), &kv))) { \
OPAL_ERROR_LOG((r)); \
} else { \
*(d) = kv->data.bo.bytes; \
*(sz) = kv->data.bo.size; \
kv->data.bo.bytes = NULL; /* protect the data */ \
OBJ_RELEASE(kv); \
} \
} while(0);
/**
* Provide a simplified macro for retrieving modex data
* from another process:
*
* r - the integer return status from the modex op (int)
* s - the MCA component that posted the data (mca_base_component_t*)
* p - pointer to the opal_proc_t of the proc that posted
* the data (opal_proc_t*)
* d - pointer to a location wherein the data object
* it to be returned (char**)
* sz - pointer to a location wherein the number of bytes
* in the data object can be returned (size_t)
*/
#define OPAL_MODEX_RECV(r, s, p, d, sz) \
do { \
char *key; \
key = mca_base_component_to_string((s)); \
if (NULL == key) { \
OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); \
(r) = OPAL_ERR_OUT_OF_RESOURCE; \
} else { \
OPAL_MODEX_RECV_STRING((r), key, (p), (d), (sz)); \
free(key); \
} \
} while(0);
/**
* Provide a simplified macro for calling the fence function
* that takes into account directives and availability of
* non-blocking operations
*/
#define OPAL_FENCE(p, s, cf, cd) \
do { \
if (opal_pmix_use_collective || NULL == opal_pmix.fence_nb) { \
opal_pmix.fence((p), (s)); \
} else { \
opal_pmix.fence_nb((p), (s), (cf), (cd)); \
} \
} while(0);
/* callback handler for errors */
typedef void (*opal_pmix_errhandler_fn_t)(int error);
/**** DEFINE THE PUBLIC API'S ****
**** NOTE THAT WE DO NOT HAVE A 1:1 MAPPING OF APIs ****
**** HERE TO THOSE CURRENTLY DEFINED BY PMI AS WE ****
**** DON'T USE SOME OF THOSE FUNCTIONS AND THIS ISN'T ****
**** A GENERAL LIBRARY ****/
/***** APIs CURRENTLY USED IN THE OMPI/ORTE CODE BASE ****/
/* NOTE: calls to these APIs must be thread-protected as there
* currently is NO internal thread safety. */
/* Init */
typedef int (*opal_pmix_base_module_init_fn_t)(void);
/* Finalize */
typedef int (*opal_pmix_base_module_fini_fn_t)(void);
/* Initialized */
typedef bool (*opal_pmix_base_module_initialized_fn_t)(void);
/* Abort */
typedef int (*opal_pmix_base_module_abort_fn_t)(int flag, const char msg[]);
/* Fence - note that this call is required to commit any
* data "put" to the system since the last call to "fence"
* prior to (or as part of) executing the barrier. Serves both PMI2
* and PMI1 "barrier" purposes */
typedef int (*opal_pmix_base_module_fence_fn_t)(opal_process_name_t *procs, size_t nprocs);
/* Fence_nb - not included in the current PMI standard. This is a non-blocking
* version of the standard "fence" call. All subsequent "get" calls will block
* pending completion of this operation. Non-blocking "get" calls will still
* complete as data becomes available */
typedef int (*opal_pmix_base_module_fence_nb_fn_t)(opal_process_name_t *procs, size_t nprocs,
opal_pmix_cbfunc_t cbfunc, void *cbdata);
/* Put - note that this API has been modified from the current PMI standard to
* reflect the proposed PMIx extensions. */
typedef int (*opal_pmix_base_module_put_fn_t)(opal_pmix_scope_t scope,
opal_value_t *kv);
/* Get - note that this API has been modified from the current PMI standard to
* reflect the proposed PMIx extensions, and to include the process identifier so
* we can form the PMI key within the active component instead of sprinkling that
* code all over the code base. */
typedef int (*opal_pmix_base_module_get_fn_t)(const opal_identifier_t *id,
const char *key,
opal_value_t **kv);
/* Get_nb - not included in the current PMI standard. This is a non-blocking
* version of the standard "get" call. Retrieved value will be provided as
* opal_value_t object in the callback. We include the process identifier so
* we can form the PMI key within the active component instead of sprinkling that
* code all over the code base. */
typedef void (*opal_pmix_base_module_get_nb_fn_t)(const opal_identifier_t *id,
const char *key,
opal_pmix_cbfunc_t cbfunc,
void *cbdata);
/* Publish - the "info" parameter
* consists of a list of pmix_info_t objects */
typedef int (*opal_pmix_base_module_publish_fn_t)(const char service_name[],
opal_list_t *info,
const char port[]);
/* Lookup - the "info" parameter
* consists of a list of pmix_info_t objects */
typedef int (*opal_pmix_base_module_lookup_fn_t)(const char service_name[],
opal_list_t *info,
char port[], int portLen);
/* Unpublish - the "info" parameter
* consists of a list of pmix_info_t objects */
typedef int (*opal_pmix_base_module_unpublish_fn_t)(const char service_name[],
opal_list_t *info);
/* Get attribute
* Query the server for the specified attribute, returning it in the
* provided opal_value_t. The function will return "true" if the attribute
* is found, and "false" if not.
* Attributes are provided by the PMIx server, so there is no corresponding
* "put" function. */
typedef bool (*opal_pmix_base_module_get_attr_fn_t)(const char *attr, opal_value_t **kv);
/* Get attribute (non-blocking)
* Query the server for the specified attribute..
* Attributes are provided by the PMIx server, so there is no corresponding "put"
* function. The call will be executed as non-blocking, returning immediately,
* with data resulting from the call returned in the callback function. A returned
* NULL opal_value_t* indicates that the attribute was not found. The returned
* pointer is "owned" by the PMIx module and must not be released by the
* callback function */
typedef int (*opal_pmix_base_module_get_attr_nb_fn_t)(const char *attr,
opal_pmix_cbfunc_t cbfunc,
void *cbdata);
/**** APIs NOT CURRENTLY USED IN THE OMPI/ORTE CODE BASE, BUT THAT ****
**** MAY BE IMPLEMENTED IN THE NEAR FUTURE. COMPONENTS ARE FREE TO ****
**** JUST HAVE THEM RETURN "OPAL_ERR_NOT_IMPLEMENTED" ****/
/* PMI2_Job_Spawn */
typedef int (*opal_pmix_base_module_spawn_fn_t)(int count, const char * cmds[],
int argcs[], const char ** argvs[],
const int maxprocs[],
opal_list_t *info_keyval_vector,
opal_list_t *preput_keyval_vector,
char jobId[], int jobIdSize,
int errors[]);
/* PMI2_Job_Connect */
typedef int (*opal_pmix_base_module_job_connect_fn_t)(const char jobId[]);
/* PMI2_Job_Disconnect */
typedef int (*opal_pmix_base_module_job_disconnect_fn_t)(const char jobId[]);
/* register an errhandler to report loss of connection to the server */
typedef void (*opal_pmix_base_module_register_fn_t)(opal_pmix_errhandler_fn_t errhandler);
/* deregister the errhandler */
typedef void (*opal_pmix_base_module_deregister_fn_t)(void);
/*
* the standard public API data structure
*/
typedef struct {
/* currently used APIs */
opal_pmix_base_module_init_fn_t init;
opal_pmix_base_module_fini_fn_t finalize;
opal_pmix_base_module_initialized_fn_t initialized;
opal_pmix_base_module_abort_fn_t abort;
opal_pmix_base_module_fence_fn_t fence;
opal_pmix_base_module_fence_nb_fn_t fence_nb;
opal_pmix_base_module_put_fn_t put;
opal_pmix_base_module_get_fn_t get;
opal_pmix_base_module_get_nb_fn_t get_nb;
opal_pmix_base_module_publish_fn_t publish;
opal_pmix_base_module_lookup_fn_t lookup;
opal_pmix_base_module_unpublish_fn_t unpublish;
opal_pmix_base_module_get_attr_fn_t get_attr;
opal_pmix_base_module_get_attr_nb_fn_t get_attr_nb;
/* currently unused APIs */
opal_pmix_base_module_spawn_fn_t spawn;
opal_pmix_base_module_job_connect_fn_t job_connect;
opal_pmix_base_module_job_disconnect_fn_t job_disconnect;
/* register the errhandler */
opal_pmix_base_module_register_fn_t register_errhandler;
opal_pmix_base_module_deregister_fn_t deregister_errhandler;
} opal_pmix_base_module_t;
typedef struct {
mca_base_component_t base_version;
mca_base_component_data_t base_data;
int priority;
} opal_pmix_base_component_t;
/*
* Macro for use in components that are of type pmix
*/
#define OPAL_PMIX_BASE_VERSION_2_0_0 \
MCA_BASE_VERSION_2_0_0, \
"pmix", 2, 0, 0
/* Global structure for accessing store functions */
OPAL_DECLSPEC extern opal_pmix_base_module_t opal_pmix; /* holds base function pointers */
/* flag to indicate collective vs direct fence operations */
OPAL_DECLSPEC extern bool opal_pmix_use_collective;
END_C_DECLS
#endif

38
opal/mca/pmix/s1/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,38 @@
#
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
pmix_s1.h \
pmix_s1_component.c \
pmix_s1.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_opal_pmix_s1_DSO
component_noinst =
component_install = mca_pmix_s1.la
else
component_noinst = libmca_pmix_s1.la
component_install =
endif
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_pmix_s1_la_SOURCES = $(sources)
mca_pmix_s1_la_CPPFLAGS = $(pmix_s1_CPPFLAGS)
mca_pmix_s1_la_LDFLAGS = -module -avoid-version $(pmix_s1_LDFLAGS)
mca_pmix_s1_la_LIBADD = $(pmix_s1_LIBS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_pmix_s1_la_SOURCES =$(sources)
libmca_pmix_s1_la_CPPFLAGS = $(pmix_s1_CPPFLAGS)
libmca_pmix_s1_la_LDFLAGS = -module -avoid-version $(pmix_s1_LDFLAGS)
libmca_pmix_s1_la_LIBADD = $(pmix_s1_LIBS)

28
opal/mca/pmix/s1/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,28 @@
# -*- shell-script -*-
#
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_pmix_s1_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_opal_pmix_s1_CONFIG], [
AC_CONFIG_FILES([opal/mca/pmix/s1/Makefile])
AC_REQUIRE([OPAL_CHECK_UGNI])
OPAL_CHECK_PMI([pmix_s1], [pmix_s1_good=1], [pmix_s1_good=0])
# Evaluate succeed / fail
AS_IF([test "$pmix_s1_good" = 1 -a "$opal_check_ugni_happy" = "no"],
[$1],
[$2])
# set build flags to use in makefile
AC_SUBST([pmix_s1_CPPFLAGS])
AC_SUBST([pmix_s1_LDFLAGS])
AC_SUBST([pmix_s1_LIBS])
])

662
opal/mca/pmix/s1/pmix_s1.c Обычный файл
Просмотреть файл

@ -0,0 +1,662 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/types.h"
#include "opal_stdint.h"
#include "opal/mca/hwloc/base/base.h"
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "opal/util/show_help.h"
#include <string.h>
#include <pmi.h>
#include "opal/mca/pmix/base/base.h"
#include "pmix_s1.h"
static int s1_init(void);
static int s1_fini(void);
static bool s1_initialized(void);
static int s1_abort(int flag, const char msg[]);
static int s1_fence(opal_process_name_t *procs, size_t nprocs);
static int s1_put(opal_pmix_scope_t scope,
opal_value_t *kv);
static int s1_get(const opal_identifier_t *id,
const char *key,
opal_value_t **kv);
static int s1_publish(const char service_name[],
opal_list_t *info,
const char port[]);
static int s1_lookup(const char service_name[],
opal_list_t *info,
char port[], int portLen);
static int s1_unpublish(const char service_name[],
opal_list_t *info);
static bool s1_get_attr(const char *attr, opal_value_t **kv);
static int s1_spawn(int count, const char * cmds[],
int argcs[], const char ** argvs[],
const int maxprocs[],
opal_list_t *info_keyval_vector,
opal_list_t *preput_keyval_vector,
char jobId[], int jobIdSize,
int errors[]);
static int s1_job_connect(const char jobId[]);
static int s1_job_disconnect(const char jobId[]);
const opal_pmix_base_module_t opal_pmix_s1_module = {
s1_init,
s1_fini,
s1_initialized,
s1_abort,
s1_fence,
NULL,
s1_put,
s1_get,
NULL,
s1_publish,
s1_lookup,
s1_unpublish,
s1_get_attr,
NULL,
s1_spawn,
s1_job_connect,
s1_job_disconnect,
NULL,
NULL
};
// usage accounting
static int pmix_init_count = 0;
// PMI constant values:
static int pmix_kvslen_max = 0;
static int pmix_keylen_max = 0;
static int pmix_vallen_max = 0;
// Job environment description
static char *pmix_kvs_name = NULL;
static bool s1_committed = false;
static char* pmix_packed_data = NULL;
static int pmix_packed_data_offset = 0;
static int pmix_pack_key = 0;
static uint32_t s1_jobid;
static int s1_rank;
static uint16_t s1_lrank;
static uint16_t s1_nrank;
static int s1_usize;
static int s1_jsize;
static int s1_appnum;
static int s1_nlranks;
static int *s1_lranks=NULL;
static struct {
uint32_t jid;
uint32_t vid;
} s1_pname;
static bool got_modex_data = false;
static char* pmix_error(int pmix_err);
#define OPAL_PMI_ERROR(pmi_err, pmi_func) \
do { \
opal_output(0, "%s [%s:%d:%s]: %s\n", \
pmi_func, __FILE__, __LINE__, __func__, \
pmix_error(pmi_err)); \
} while(0);
static int kvs_get(const char key[], char value [], int maxvalue)
{
int rc;
rc = PMI_KVS_Get(pmix_kvs_name, key, value, maxvalue);
if( PMI_SUCCESS != rc ){
OPAL_PMI_ERROR(rc, "PMI_KVS_Get");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
static int kvs_put(const char key[], const char value[])
{
int rc;
rc = PMI_KVS_Put(pmix_kvs_name, key, value);
if( PMI_SUCCESS != rc ){
OPAL_PMI_ERROR(rc, "PMI_KVS_Put");
return OPAL_ERROR;
}
return rc;
}
static int s1_init(void)
{
PMI_BOOL initialized;
int spawned;
int rc, ret = OPAL_ERROR;
int i;
char *pmix_id, *tmp;
uint32_t jobfam, stepid;
if (PMI_SUCCESS != (rc = PMI_Initialized(&initialized))) {
OPAL_PMI_ERROR(rc, "PMI_Initialized");
return OPAL_ERROR;
}
if( PMI_TRUE != initialized && PMI_SUCCESS != (rc = PMI_Init(&spawned)) ) {
OPAL_PMI_ERROR(rc, "PMI_Init");
return OPAL_ERROR;
}
// Initialize space demands
rc = PMI_KVS_Get_value_length_max(&pmix_vallen_max);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_KVS_Get_value_length_max");
goto err_exit;
}
rc = PMI_KVS_Get_name_length_max(&pmix_kvslen_max);
if (PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_KVS_Get_name_length_max");
goto err_exit;
}
rc = PMI_KVS_Get_key_length_max(&pmix_keylen_max);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_KVS_Get_key_length_max");
goto err_exit;
}
// Initialize job environment information
pmix_id = (char*)malloc(pmix_vallen_max);
if( pmix_id == NULL ){
ret = OPAL_ERR_OUT_OF_RESOURCE;
goto err_exit;
}
/* Get domain id */
if (PMI_SUCCESS != (rc = PMI_Get_kvs_domain_id(pmix_id, pmix_vallen_max))) {
free(pmix_id);
goto err_exit;
}
/* Slurm PMI provides the job id as an integer followed
* by a '.', followed by essentially a stepid. The first integer
* defines an overall job number. The second integer is the number of
* individual jobs we have run within that allocation. So we translate
* this as the overall job number equating to our job family, and
* the individual number equating to our local jobid
*/
jobfam = strtoul(pmix_id, &tmp, 10);
if (NULL == tmp) {
/* hmmm - no '.', so let's just use zero */
stepid = 0;
} else {
tmp++; /* step over the '.' */
stepid = strtoul(tmp, NULL, 10);
}
/* now build the jobid */
s1_jobid = (jobfam << 16) | stepid;
free(pmix_id);
/* get our rank */
ret = PMI_Get_rank(&s1_rank);
if( PMI_SUCCESS != ret ) {
OPAL_PMI_ERROR(ret, "PMI_Get_rank");
goto err_exit;
}
/* store our name in the opal_proc_t so that
* debug messages will make sense - an upper
* layer will eventually overwrite it, but that
* won't do any harm */
s1_pname.jid = s1_jobid;
s1_pname.vid = s1_rank;
opal_proc_set_name((opal_process_name_t*)&s1_pname);
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:s1: assigned tmp name",
OPAL_NAME_PRINT(*(opal_process_name_t*)&s1_pname));
pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
if( pmix_kvs_name == NULL ){
ret = OPAL_ERR_OUT_OF_RESOURCE;
goto err_exit;
}
rc = PMI_KVS_Get_my_name(pmix_kvs_name, pmix_kvslen_max);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_KVS_Get_my_name");
goto err_exit;
}
/* get our local proc info to find our local rank */
if (PMI_SUCCESS != (rc = PMI_Get_clique_size(&s1_nlranks))) {
OPAL_PMI_ERROR(rc, "PMI_Get_clique_size");
return rc;
}
/* now get the specific ranks */
s1_lranks = (int*)calloc(s1_nlranks, sizeof(int));
if (NULL == s1_lranks) {
rc = OPAL_ERR_OUT_OF_RESOURCE;
OPAL_ERROR_LOG(rc);
return rc;
}
if (PMI_SUCCESS != (rc = PMI_Get_clique_ranks(s1_lranks, s1_nlranks))) {
OPAL_PMI_ERROR(rc, "PMI_Get_clique_ranks");
free(s1_lranks);
return rc;
}
/* find ourselves */
for (i=0; i < s1_nlranks; i++) {
if (s1_rank == s1_lranks[i]) {
s1_lrank = i;
s1_nrank = i;
break;
}
}
/* get universe size */
ret = PMI_Get_universe_size(&s1_usize);
if (PMI_SUCCESS != ret) {
OPAL_PMI_ERROR(ret, "PMI_Get_universe_size");
goto err_exit;
}
/* get job size */
ret = PMI_Get_size(&s1_jsize);
if (PMI_SUCCESS != ret) {
OPAL_PMI_ERROR(ret, "PMI_Get_size");
goto err_exit;
}
/* get appnum */
ret = PMI_Get_appnum(&s1_appnum);
if (PMI_SUCCESS != ret) {
OPAL_PMI_ERROR(ret, "PMI_Get_appnum");
goto err_exit;
}
return OPAL_SUCCESS;
err_exit:
PMI_Finalize();
return ret;
}
static int s1_fini(void) {
if (0 == pmix_init_count) {
return OPAL_SUCCESS;
}
if (0 == --pmix_init_count) {
PMI_Finalize ();
}
if (NULL != s1_lranks) {
free(s1_lranks);
}
return OPAL_SUCCESS;
}
static bool s1_initialized(void)
{
if (0 < pmix_init_count) {
return true;
}
return false;
}
static int s1_abort(int flag, const char msg[])
{
PMI_Abort(flag, msg);
return OPAL_SUCCESS;
}
static int s1_spawn(int count, const char * cmds[],
int argcs[], const char ** argvs[],
const int maxprocs[],
opal_list_t *info_keyval_vector,
opal_list_t *preput_keyval_vector,
char jobId[], int jobIdSize,
int errors[])
{
/*
int rc;
size_t preput_vector_size;
const int info_keyval_sizes[1];
info_keyval_sizes[0] = (int)opal_list_get_size(info_keyval_vector);
//FIXME what's the size of array of lists?
preput_vector_size = opal_list_get_size(preput_keyval_vector);
rc = PMI_Spawn_multiple(count, cmds, argcs, argvs, maxprocs, info_keyval_sizes, info_keyval_vector, (int)preput_vector_size, preput_keyval_vector);
if( PMI_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_Spawn_multiple");
return OPAL_ERROR;
}*/
return OPAL_ERR_NOT_IMPLEMENTED;
}
static int s1_put(opal_pmix_scope_t scope,
opal_value_t *kv)
{
int rc;
char* buffer_to_put;
int rem_offset = 0;
int data_to_put = 0;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:s1 put for key %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key);
if (OPAL_SUCCESS != (rc = opal_pmix_base_store_encoded (kv->key, (void*)&kv->data, kv->type, &pmix_packed_data, &pmix_packed_data_offset))) {
OPAL_ERROR_LOG(rc);
return rc;
}
if (pmix_packed_data_offset == 0) {
/* nothing to write */
return OPAL_SUCCESS;
}
if (pmix_packed_data_offset < pmix_vallen_max) {
/* this meta-key is still being filled,
* nothing to put yet
*/
return OPAL_SUCCESS;
}
/* encode only full filled meta keys */
rem_offset = pmix_packed_data_offset % pmix_vallen_max;
data_to_put = pmix_packed_data_offset - rem_offset;
buffer_to_put = (char*)malloc(data_to_put);
memcpy(buffer_to_put, pmix_packed_data, data_to_put);
opal_pmix_base_commit_packed (buffer_to_put, data_to_put, pmix_vallen_max, &pmix_pack_key, kvs_put);
free(buffer_to_put);
pmix_packed_data_offset = rem_offset;
if (0 == pmix_packed_data_offset) {
free(pmix_packed_data);
pmix_packed_data = NULL;
} else {
memmove (pmix_packed_data, pmix_packed_data + data_to_put, pmix_packed_data_offset);
pmix_packed_data = realloc (pmix_packed_data, pmix_packed_data_offset);
}
s1_committed = false;
return rc;
}
static int s1_fence(opal_process_name_t *procs, size_t nprocs)
{
int rc;
int32_t i;
opal_value_t *kp, kvn;
opal_hwloc_locality_t locality;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:s1 called fence",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* check if there is partially filled meta key and put them */
if (0 != pmix_packed_data_offset && NULL != pmix_packed_data) {
opal_pmix_base_commit_packed(pmix_packed_data, pmix_packed_data_offset, pmix_vallen_max, &pmix_pack_key, kvs_put);
pmix_packed_data_offset = 0;
free(pmix_packed_data);
pmix_packed_data = NULL;
}
/* if we haven't already done it, ensure we have committed our values */
if (!s1_committed) {
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:s1 committing values",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
if (PMI_SUCCESS != (rc = PMI_KVS_Commit(pmix_kvs_name))) {
OPAL_PMI_ERROR(rc, "PMI_KVS_Commit");
return OPAL_ERROR;
}
}
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:s1 performing barrier",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* use the PMI barrier function */
if (PMI_SUCCESS != (rc = PMI_Barrier())) {
OPAL_PMI_ERROR(rc, "PMI_Barrier");
return OPAL_ERROR;
}
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:s1 barrier complete",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* get the modex data from each local process and set the
* localities to avoid having the MPI layer fetch data
* for every process in the job */
if (!got_modex_data) {
got_modex_data = true;
/* we only need to set locality for each local rank as "not found"
* equates to "non-local" */
for (i=0; i < s1_nlranks; i++) {
s1_pname.vid = i;
rc = opal_pmix_base_cache_keys_locally((opal_identifier_t*)&s1_pname, OPAL_DSTORE_CPUSET,
&kp, pmix_kvs_name, pmix_vallen_max, kvs_get);
if (OPAL_SUCCESS != rc) {
OPAL_ERROR_LOG(rc);
return rc;
}
#if OPAL_HAVE_HWLOC
if (NULL == kp || NULL == kp->data.string) {
/* if we share a node, but we don't know anything more, then
* mark us as on the node as this is all we know
*/
locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
} else {
/* determine relative location on our node */
locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
opal_process_info.cpuset,
kp->data.string);
}
if (NULL != kp) {
OBJ_RELEASE(kp);
}
#else
/* all we know is we share a node */
locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
#endif
OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output,
"%s pmix:s1 proc %s locality %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(*(opal_identifier_t*)&s1_pname),
opal_hwloc_base_print_locality(locality)));
OBJ_CONSTRUCT(&kvn, opal_value_t);
kvn.key = strdup(OPAL_DSTORE_LOCALITY);
kvn.type = OPAL_UINT16;
kvn.data.uint16 = locality;
(void)opal_dstore.store(opal_dstore_internal, (opal_identifier_t*)&s1_pname, &kvn);
OBJ_DESTRUCT(&kvn);
}
}
return OPAL_SUCCESS;
}
static int s1_get(const opal_identifier_t *id,
const char *key,
opal_value_t **kv)
{
int rc;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:s1 called get for key %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), key);
rc = opal_pmix_base_cache_keys_locally(id, key, kv, pmix_kvs_name, pmix_vallen_max, kvs_get);
if (NULL == *kv) {
return OPAL_ERROR;
}
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:s1 got key %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), key);
return rc;
}
static int s1_publish(const char service_name[],
opal_list_t *info,
const char port[])
{
int rc;
if (PMI_SUCCESS != (rc = PMI_Publish_name(service_name, port))) {
OPAL_PMI_ERROR(rc, "PMI_Publish_name");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
static int s1_lookup(const char service_name[],
opal_list_t *info,
char port[], int portLen)
{
int rc;
// Allocate mem for port here? Otherwise we won't get success!
// SLURM PMIv1 doesn't implement this function
/* I don't understand this comment. Is it still valid? */
if (PMI_SUCCESS != (rc = PMI_Lookup_name(service_name, port))) {
OPAL_PMI_ERROR(rc, "PMI_Lookup_name");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
static int s1_unpublish(const char service_name[],
opal_list_t *info)
{
int rc;
if (PMI_SUCCESS != (rc = PMI_Unpublish_name(service_name))) {
OPAL_PMI_ERROR(rc, "PMI2_Nameserv_unpublish");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
static bool s1_get_attr(const char *attr, opal_value_t **kv)
{
opal_value_t *kp;
if (0 == strcmp(PMIX_JOBID, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = s1_jobid;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_RANK, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = s1_rank;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_UNIV_SIZE, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = s1_usize;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_JOB_SIZE, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = s1_jsize;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_APPNUM, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = s1_appnum;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_LOCAL_RANK, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = s1_lrank;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_NODE_RANK, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = s1_nrank;
*kv = kp;
return true;
}
return false;
}
static int s1_get_attr_nb(const char *attr,
opal_pmix_cbfunc_t cbfunc,
void *cbdata)
{
return OPAL_ERR_NOT_SUPPORTED;
}
static int s1_job_connect(const char jobId[])
{
return OPAL_ERR_NOT_SUPPORTED;
}
static int s1_job_disconnect(const char jobId[])
{
return OPAL_ERR_NOT_SUPPORTED;
}
static char* pmix_error(int pmix_err)
{
char * err_msg;
switch(pmix_err) {
case PMI_FAIL: err_msg = "Operation failed"; break;
case PMI_ERR_INIT: err_msg = "PMI is not initialized"; break;
case PMI_ERR_NOMEM: err_msg = "Input buffer not large enough"; break;
case PMI_ERR_INVALID_ARG: err_msg = "Invalid argument"; break;
case PMI_ERR_INVALID_KEY: err_msg = "Invalid key argument"; break;
case PMI_ERR_INVALID_KEY_LENGTH: err_msg = "Invalid key length argument"; break;
case PMI_ERR_INVALID_VAL: err_msg = "Invalid value argument"; break;
case PMI_ERR_INVALID_VAL_LENGTH: err_msg = "Invalid value length argument"; break;
case PMI_ERR_INVALID_LENGTH: err_msg = "Invalid length argument"; break;
case PMI_ERR_INVALID_NUM_ARGS: err_msg = "Invalid number of arguments"; break;
case PMI_ERR_INVALID_ARGS: err_msg = "Invalid args argument"; break;
case PMI_ERR_INVALID_NUM_PARSED: err_msg = "Invalid num_parsed length argument"; break;
case PMI_ERR_INVALID_KEYVALP: err_msg = "Invalid keyvalp argument"; break;
case PMI_ERR_INVALID_SIZE: err_msg = "Invalid size argument"; break;
#if defined(PMI_ERR_INVALID_KVS)
/* pmix.h calls this a valid return code but mpich doesn't define it (slurm does). */
case PMI_ERR_INVALID_KVS: err_msg = "Invalid kvs argument"; break;
#endif
case PMI_SUCCESS: err_msg = "Success"; break;
default: err_msg = "Unkown error";
}
return err_msg;
}

31
opal/mca/pmix/s1/pmix_s1.h Обычный файл
Просмотреть файл

@ -0,0 +1,31 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_PMIX_S1_H
#define MCA_PMIX_S1_H
#include "opal_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/mca/pmix/base/pmix_base_fns.h"
BEGIN_C_DECLS
/*
* Globally exported variable
*/
OPAL_DECLSPEC extern opal_pmix_base_component_t mca_pmix_s1_component;
OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_s1_module;
END_C_DECLS
#endif /* MCA_PMIX_S1_H */

105
opal/mca/pmix/s1/pmix_s1_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,105 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/mca/pmix/pmix.h"
#include "pmix_s1.h"
/*
* Public string showing the pmix s1 component version number
*/
const char *opal_pmix_s1_component_version_string =
"OPAL s1 pmix MCA component version " OPAL_VERSION;
/*
* Local function
*/
static int pmix_s1_component_query(mca_base_module_t **module, int *priority);
static int pmix_s1_component_register(void);
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
opal_pmix_base_component_t mca_pmix_s1_component = {
/* First, the mca_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a pmix v1.1.0 component (which also
implies a specific MCA version) */
OPAL_PMIX_BASE_VERSION_2_0_0,
/* Component name and version */
"s1",
OPAL_MAJOR_VERSION,
OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION,
/* Component open and close functions */
NULL,
NULL,
pmix_s1_component_query,
pmix_s1_component_register
},
/* Next the MCA v1.0.0 component meta data */
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
10 /*component priority */
};
static int pmix_s1_component_register(void)
{
int ret;
mca_base_component_t *component = &mca_pmix_s1_component.base_version;
mca_pmix_s1_component.priority = 10;
ret = mca_base_component_var_register(component, "priority",
"Priority of the pmix s1 component (default: 10)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&mca_pmix_s1_component.priority);
if (0 > ret) {
return ret;
}
return OPAL_SUCCESS;
}
static int pmix_s1_component_query(mca_base_module_t **module, int *priority)
{
/* disqualify ourselves if we are not under slurm */
if (NULL == getenv("SLURM_JOBID")) {
*priority = 0;
*module = NULL;
return OPAL_ERROR;
}
/* we can be considered, but set our priority by default
* to be less than s2 */
*priority = mca_pmix_s1_component.priority;
*module = (mca_base_module_t *)&opal_pmix_s1_module;
return OPAL_SUCCESS;
}

40
opal/mca/pmix/s2/Makefile.am Обычный файл
Просмотреть файл

@ -0,0 +1,40 @@
#
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
sources = \
pmix_s2.h \
pmix_s2_component.c \
pmix_s2.c \
pmi2_pmap_parser.h \
pmi2_pmap_parser.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_opal_pmix_s2_DSO
component_noinst =
component_install = mca_pmix_s2.la
else
component_noinst = libmca_pmix_s2.la
component_install =
endif
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_pmix_s2_la_SOURCES = $(sources)
mca_pmix_s2_la_CPPFLAGS = $(pmix_s2_CPPFLAGS)
mca_pmix_s2_la_LDFLAGS = -module -avoid-version $(pmix_s2_LDFLAGS)
mca_pmix_s2_la_LIBADD = $(pmix_s2_LIBS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_pmix_s2_la_SOURCES =$(sources)
libmca_pmix_s2_la_CPPFLAGS = $(pmix_s2_CPPFLAGS)
libmca_pmix_s2_la_LDFLAGS = -module -avoid-version $(pmix_s2_LDFLAGS)
libmca_pmix_s2_la_LIBADD = $(pmix_s2_LIBS)

29
opal/mca/pmix/s2/configure.m4 Обычный файл
Просмотреть файл

@ -0,0 +1,29 @@
# -*- shell-script -*-
#
# Copyright (c) 2014 Intel, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_pmix_s2_CONFIG([action-if-found], [action-if-not-found])
# -----------------------------------------------------------
AC_DEFUN([MCA_opal_pmix_s2_CONFIG], [
AC_CONFIG_FILES([opal/mca/pmix/s2/Makefile])
AC_REQUIRE([OPAL_CHECK_UGNI])
OPAL_CHECK_PMI([pmix_s2], [pmix_s2_good=1], [pmix_s2_good=0])
# Evaluate succeed / fail
AS_IF([test "$pmix_s2_good" = 1 -a "$opal_have_pmi2" = 1 -a "$opal_check_ugni_happy" = "no"],
[$1],
[$2])
# set build flags to use in makefile
AC_SUBST([pmix_s2_CPPFLAGS])
AC_SUBST([pmix_s2_LDFLAGS])
AC_SUBST([pmix_s2_LIBS])
])

621
opal/mca/pmix/s2/pmix_s2.c Обычный файл
Просмотреть файл

@ -0,0 +1,621 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2007 The Trustees of Indiana University.
* All rights reserved.
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All
* rights reserved.
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/types.h"
#include "opal_stdint.h"
#include "opal/mca/hwloc/base/base.h"
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "opal/util/show_help.h"
#include "pmi2_pmap_parser.h"
#include <string.h>
#include <pmi.h>
#include <pmi2.h>
#include "opal/mca/pmix/base/base.h"
#include "pmix_s2.h"
static int s2_init(void);
static int s2_fini(void);
static bool s2_initialized(void);
static int s2_abort(int flag, const char msg[]);
static int s2_spawn(int count, const char * cmds[],
int argcs[], const char ** argvs[],
const int maxprocs[],
opal_list_t *info_keyval_vector,
opal_list_t *preput_keyval_vector,
char jobId[], int jobIdSize,
int errors[]);
static int s2_put(opal_pmix_scope_t scope,
opal_value_t *kv);
static int s2_fence(opal_process_name_t *procs, size_t nprocs);
static int s2_get(const opal_identifier_t *id,
const char *key,
opal_value_t **kv);
static int s2_publish(const char service_name[],
opal_list_t *info,
const char port[]);
static int s2_lookup(const char service_name[],
opal_list_t *info,
char port[], int portLen);
static int s2_unpublish(const char service_name[],
opal_list_t *info);
static bool s2_get_attr(const char *attr, opal_value_t **kv);
static int s2_job_connect(const char jobId[]);
static int s2_job_disconnect(const char jobId[]);
const opal_pmix_base_module_t opal_pmix_s2_module = {
s2_init,
s2_fini,
s2_initialized,
s2_abort,
s2_fence,
NULL,
s2_put,
s2_get,
NULL,
s2_publish,
s2_lookup,
s2_unpublish,
s2_get_attr,
NULL,
s2_spawn,
s2_job_connect,
s2_job_disconnect,
NULL,
NULL
};
// usage accounting
static int pmix_init_count = 0;
// PMI constant values:
static int pmix_kvslen_max = 0;
static int pmix_keylen_max = 0;
static int pmix_vallen_max = 0;
// Job environment description
static char *pmix_kvs_name = NULL;
static char* pmix_packed_data = NULL;
static int pmix_packed_data_offset = 0;
static int pmix_pack_key = 0;
static uint32_t s2_jobid;
static int s2_rank;
static uint16_t s2_lrank;
static uint16_t s2_nrank;
static int s2_usize;
static int s2_jsize;
static int s2_appnum;
static int s2_nlranks;
static int *s2_lranks=NULL;
static struct {
uint32_t jid;
uint32_t vid;
} s2_pname;
static bool got_modex_data = false;
static char* pmix_error(int pmix_err);
#define OPAL_PMI_ERROR(pmi_err, pmi_func) \
do { \
opal_output(0, "%s [%s:%d:%s]: %s\n", \
pmi_func, __FILE__, __LINE__, __func__, \
pmix_error(pmi_err)); \
} while(0);
static int kvs_put(const char key[], const char value[])
{
int rc;
rc = PMI2_KVS_Put(key, value);
if( PMI2_SUCCESS != rc ){
OPAL_PMI_ERROR(rc, "PMI2_KVS_Put");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
static int kvs_get(const char key[], char value [], int maxvalue)
{
int rc;
int len;
rc = PMI2_KVS_Get(pmix_kvs_name, PMI2_ID_NULL, key, value, maxvalue, &len);
if( PMI2_SUCCESS != rc || len < 0){
OPAL_PMI_ERROR(rc, "PMI2_KVS_Get");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
static int s2_init(void)
{
int spawned, size, rank, appnum;
int rc, ret = OPAL_ERROR;
char buf[16];
int found;
int my_node;
char *tmp;
uint32_t jobfam, stepid;
int i;
/* if we can't startup PMI, we can't be used */
if ( PMI2_Initialized () ) {
return OPAL_SUCCESS;
}
size = -1;
rank = -1;
appnum = -1;
if (PMI2_SUCCESS != (rc = PMI2_Init(&spawned, &size, &rank, &appnum))) {
opal_show_help("help-pmix-base.txt", "pmix2-init-failed", true, rc);
return OPAL_ERROR;
}
if( size < 0 || rank < 0 ){
opal_show_help("help-pmix-base.txt", "pmix2-init-returned-bad-values", true);
goto err_exit;
}
s2_jsize = size;
s2_rank = rank;
s2_appnum = appnum;
pmix_vallen_max = PMI2_MAX_VALLEN;
pmix_kvslen_max = PMI2_MAX_VALLEN; // FIX ME: What to put here for versatility?
pmix_keylen_max = PMI2_MAX_KEYLEN;
rc = PMI2_Info_GetJobAttr("universeSize", buf, 16, &found);
if( PMI2_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI_Get_universe_size");
goto err_exit;
}
s2_usize = atoi(buf);
pmix_kvs_name = (char*)malloc(pmix_kvslen_max);
if( pmix_kvs_name == NULL ){
PMI2_Finalize();
ret = OPAL_ERR_OUT_OF_RESOURCE;
goto err_exit;
}
rc = PMI2_Job_GetId(pmix_kvs_name, pmix_kvslen_max);
if( PMI2_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI2_Job_GetId");
goto err_exit;
}
/* Slurm PMI provides the job id as an integer followed
* by a '.', followed by essentially a stepid. The first integer
* defines an overall job number. The second integer is the number of
* individual jobs we have run within that allocation. So we translate
* this as the overall job number equating to our job family, and
* the individual number equating to our local jobid
*/
jobfam = strtoul(pmix_kvs_name, &tmp, 10);
if (NULL == tmp) {
/* hmmm - no '.', so let's just use zero */
stepid = 0;
} else {
tmp++; /* step over the '.' */
stepid = strtoul(tmp, NULL, 10);
}
/* now build the jobid */
s2_jobid = (jobfam << 16) | stepid;
/* store our name in the opal_proc_t so that
* debug messages will make sense - an upper
* layer will eventually overwrite it, but that
* won't do any harm */
s2_pname.jid = s2_jobid;
s2_pname.vid = s2_rank;
opal_proc_set_name((opal_process_name_t*)&s2_pname);
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:s2: assigned tmp name",
OPAL_NAME_PRINT(*(opal_process_name_t*)&s2_pname));
char *pmapping = (char*)malloc(PMI2_MAX_VALLEN);
if( pmapping == NULL ){
rc = OPAL_ERR_OUT_OF_RESOURCE;
OPAL_ERROR_LOG(rc);
return rc;
}
rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
if( !found || PMI2_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc,"PMI2_Info_GetJobAttr");
return OPAL_ERROR;
}
s2_lranks = mca_common_pmi2_parse_pmap(pmapping, s2_pname.vid, &my_node, &s2_nlranks);
if (NULL == s2_lranks) {
rc = OPAL_ERR_OUT_OF_RESOURCE;
OPAL_ERROR_LOG(rc);
return rc;
}
free(pmapping);
/* find ourselves */
for (i=0; i < s2_nlranks; i++) {
if (s2_rank == s2_lranks[i]) {
s2_lrank = i;
s2_nrank = my_node;
break;
}
}
return OPAL_SUCCESS;
err_exit:
PMI2_Finalize();
return ret;
}
static int s2_fini(void) {
if (0 == pmix_init_count) {
return OPAL_SUCCESS;
}
if (0 == --pmix_init_count) {
PMI2_Finalize();
}
if (NULL != pmix_kvs_name) {
free(pmix_kvs_name);
pmix_kvs_name = NULL;
}
if (NULL != s2_lranks) {
free(s2_lranks);
}
return OPAL_SUCCESS;
}
static bool s2_initialized(void)
{
if (0 < pmix_init_count) {
return true;
}
return false;
}
static int s2_abort(int flag, const char msg[])
{
PMI2_Abort(flag, msg);
return OPAL_SUCCESS;
}
static int s2_spawn(int count, const char * cmds[],
int argcs[], const char ** argvs[],
const int maxprocs[],
opal_list_t *info_keyval_vector,
opal_list_t *preput_keyval_vector,
char jobId[], int jobIdSize,
int errors[])
{
/*
int rc;
size_t preput_vector_size;
const int info_keyval_sizes[1];
info_keyval_sizes[0] = (int)opal_list_get_size(info_keyval_vector);
//FIXME what's the size of array of lists?
preput_vector_size = opal_list_get_size(preput_keyval_vector);
rc = PMI2_Job_Spawn(count, cmds, argcs, argvs, maxprocs, info_keyval_sizes, info_keyval_vector, (int)preput_vector_size, preput_keyval_vector, jobId, jobIdSize, errors);
if( PMI2_SUCCESS != rc ) {
OPAL_PMI_ERROR(rc, "PMI2_Job_Spawn");
return OPAL_ERROR;
}*/
return OPAL_ERR_NOT_IMPLEMENTED;
}
static int s2_job_connect(const char jobId[])
{
int rc;
PMI2_Connect_comm_t *conn;
/*FIXME should change function prototype to add void* conn */
rc = PMI2_Job_Connect(jobId, conn);
if( PMI2_SUCCESS != rc ){
OPAL_PMI_ERROR(rc, "PMI2_Job_Connect");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
static int s2_job_disconnect(const char jobId[])
{
int rc;
rc = PMI2_Job_Disconnect(jobId);
if( PMI2_SUCCESS != rc ){
OPAL_PMI_ERROR(rc, "PMI2_Job_Disconnect");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
static int s2_put(opal_pmix_scope_t scope,
opal_value_t *kv)
{
int rc;
char* buffer_to_put;
int rem_offset = 0;
int data_to_put = 0;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:s2 put for key %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), kv->key);
if (OPAL_SUCCESS != (rc = opal_pmix_base_store_encoded (kv->key, (void*)&kv->data, kv->type, &pmix_packed_data, &pmix_packed_data_offset))) {
OPAL_ERROR_LOG(rc);
return rc;
}
if (pmix_packed_data_offset == 0) {
/* nothing to write */
return OPAL_SUCCESS;
}
if (pmix_packed_data_offset < pmix_vallen_max) {
/* this meta-key is still being filled,
* nothing to put yet
*/
return OPAL_SUCCESS;
}
/* encode only full filled meta keys */
rem_offset = pmix_packed_data_offset % pmix_vallen_max;
data_to_put = pmix_packed_data_offset - rem_offset;
buffer_to_put = (char*)malloc(data_to_put);
memcpy(buffer_to_put, pmix_packed_data, data_to_put);
opal_pmix_base_commit_packed (buffer_to_put, data_to_put, pmix_vallen_max, &pmix_pack_key, kvs_put);
free(buffer_to_put);
pmix_packed_data_offset = rem_offset;
if (0 == pmix_packed_data_offset) {
free(pmix_packed_data);
pmix_packed_data = NULL;
} else {
memmove (pmix_packed_data, pmix_packed_data + data_to_put, pmix_packed_data_offset);
pmix_packed_data = realloc (pmix_packed_data, pmix_packed_data_offset);
}
return rc;
}
static int s2_fence(opal_process_name_t *procs, size_t nprocs)
{
int rc;
int32_t i;
opal_value_t *kp, kvn;
opal_hwloc_locality_t locality;
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:s2 called fence",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* check if there is partially filled meta key and put them */
if (0 != pmix_packed_data_offset && NULL != pmix_packed_data) {
opal_pmix_base_commit_packed(pmix_packed_data, pmix_packed_data_offset, pmix_vallen_max, &pmix_pack_key, kvs_put);
pmix_packed_data_offset = 0;
free(pmix_packed_data);
pmix_packed_data = NULL;
}
if (PMI2_SUCCESS != (rc = PMI2_KVS_Fence())) {
OPAL_PMI_ERROR(rc, "PMI2_KVS_Fence");
return OPAL_ERROR;
}
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
"%s pmix:s2 kvs_fence complete",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
/* get the modex data from each local process and set the
* localities to avoid having the MPI layer fetch data
* for every process in the job */
if (!got_modex_data) {
got_modex_data = true;
/* we only need to set locality for each local rank as "not found"
* equates to "non-local" */
for (i=0; i < s2_nlranks; i++) {
s2_pname.vid = i;
rc = opal_pmix_base_cache_keys_locally((opal_identifier_t*)&s2_pname, OPAL_DSTORE_CPUSET,
&kp, pmix_kvs_name, pmix_vallen_max, kvs_get);
if (OPAL_SUCCESS != rc) {
OPAL_ERROR_LOG(rc);
return rc;
}
#if OPAL_HAVE_HWLOC
if (NULL == kp || NULL == kp->data.string) {
/* if we share a node, but we don't know anything more, then
* mark us as on the node as this is all we know
*/
locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
} else {
/* determine relative location on our node */
locality = opal_hwloc_base_get_relative_locality(opal_hwloc_topology,
opal_process_info.cpuset,
kp->data.string);
}
if (NULL != kp) {
OBJ_RELEASE(kp);
}
#else
/* all we know is we share a node */
locality = OPAL_PROC_ON_CLUSTER | OPAL_PROC_ON_CU | OPAL_PROC_ON_NODE;
#endif
OPAL_OUTPUT_VERBOSE((1, opal_pmix_base_framework.framework_output,
"%s pmix:s2 proc %s locality %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(*(opal_identifier_t*)&s2_pname),
opal_hwloc_base_print_locality(locality)));
OBJ_CONSTRUCT(&kvn, opal_value_t);
kvn.key = strdup(OPAL_DSTORE_LOCALITY);
kvn.type = OPAL_UINT16;
kvn.data.uint16 = locality;
(void)opal_dstore.store(opal_dstore_internal, (opal_identifier_t*)&s2_pname, &kvn);
OBJ_DESTRUCT(&kvn);
}
}
return OPAL_SUCCESS;
}
static int s2_get(const opal_identifier_t *id,
const char *key,
opal_value_t **kv)
{
int rc;
rc = opal_pmix_base_cache_keys_locally(id, key, kv, pmix_kvs_name, pmix_vallen_max, kvs_get);
if (NULL == *kv) {
return OPAL_ERROR;
}
return rc;
}
static int s2_publish(const char service_name[],
opal_list_t *info,
const char port[])
{
int rc;
if (PMI2_SUCCESS != (rc = PMI2_Nameserv_publish(service_name, NULL, port))) {
OPAL_PMI_ERROR(rc, "PMI2_Nameserv_publish");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
static int s2_lookup(const char service_name[],
opal_list_t *info,
char port[], int portLen)
{
int rc;
if (PMI2_SUCCESS != (rc = PMI2_Nameserv_lookup(service_name, NULL, port, portLen))) {
OPAL_PMI_ERROR(rc, "PMI2_Nameserv_lookup");
return OPAL_ERROR;
}
return OPAL_SUCCESS;
}
static int s2_unpublish(const char service_name[],
opal_list_t *info)
{
int rc;
if (PMI2_SUCCESS != (rc = PMI2_Nameserv_unpublish(service_name, NULL))) {
OPAL_PMI_ERROR(rc, "PMI2_Nameserv_unpublish");
return OPAL_ERROR;
}
return OPAL_SUCCESS;;
}
static bool s2_get_attr(const char *attr, opal_value_t **kv)
{
opal_value_t *kp;
if (0 == strcmp(PMIX_JOBID, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = s2_jobid;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_RANK, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = s2_rank;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_UNIV_SIZE, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = s2_usize;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_JOB_SIZE, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = s2_jsize;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_APPNUM, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = s2_appnum;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_LOCAL_RANK, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = s2_lrank;
*kv = kp;
return true;
}
if (0 == strcmp(PMIX_NODE_RANK, attr)) {
kp = OBJ_NEW(opal_value_t);
kp->key = strdup(attr);
kp->type = OPAL_UINT32;
kp->data.uint32 = s2_nrank;
*kv = kp;
return true;
}
return false;
}
static char* pmix_error(int pmix_err)
{
char * err_msg;
switch(pmix_err) {
case PMI2_FAIL: err_msg = "Operation failed"; break;
case PMI2_ERR_INIT: err_msg = "PMI is not initialized"; break;
case PMI2_ERR_NOMEM: err_msg = "Input buffer not large enough"; break;
case PMI2_ERR_INVALID_ARG: err_msg = "Invalid argument"; break;
case PMI2_ERR_INVALID_KEY: err_msg = "Invalid key argument"; break;
case PMI2_ERR_INVALID_KEY_LENGTH: err_msg = "Invalid key length argument"; break;
case PMI2_ERR_INVALID_VAL: err_msg = "Invalid value argument"; break;
case PMI2_ERR_INVALID_VAL_LENGTH: err_msg = "Invalid value length argument"; break;
case PMI2_ERR_INVALID_LENGTH: err_msg = "Invalid length argument"; break;
case PMI2_ERR_INVALID_NUM_ARGS: err_msg = "Invalid number of arguments"; break;
case PMI2_ERR_INVALID_ARGS: err_msg = "Invalid args argument"; break;
case PMI2_ERR_INVALID_NUM_PARSED: err_msg = "Invalid num_parsed length argument"; break;
case PMI2_ERR_INVALID_KEYVALP: err_msg = "Invalid keyvalp argument"; break;
case PMI2_ERR_INVALID_SIZE: err_msg = "Invalid size argument"; break;
case PMI2_SUCCESS: err_msg = "Success"; break;
default: err_msg = "Unkown error";
}
return err_msg;
}

31
opal/mca/pmix/s2/pmix_s2.h Обычный файл
Просмотреть файл

@ -0,0 +1,31 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_PMIX_S2_H
#define MCA_PMIX_S2_H
#include "opal_config.h"
#include "opal/mca/mca.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/mca/pmix/base/pmix_base_fns.h"
BEGIN_C_DECLS
/*
* Globally exported variable
*/
OPAL_DECLSPEC extern opal_pmix_base_component_t mca_pmix_s2_component;
OPAL_DECLSPEC extern const opal_pmix_base_module_t opal_pmix_s2_module;
END_C_DECLS
#endif /* MCA_PMIX_S2_H */

106
opal/mca/pmix/s2/pmix_s2_component.c Обычный файл
Просмотреть файл

@ -0,0 +1,106 @@
/*
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/
#include "opal_config.h"
#include "opal/constants.h"
#include "opal/mca/pmix/pmix.h"
#include "pmix_s2.h"
/*
* Public string showing the pmix s2 component version number
*/
const char *opal_pmix_s2_component_version_string =
"OPAL s2 pmix MCA component version " OPAL_VERSION;
/*
* Local function
*/
static int pmix_s2_component_query(mca_base_module_t **module, int *priority);
static int pmix_s2_component_register(void);
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
opal_pmix_base_component_t mca_pmix_s2_component = {
/* First, the mca_component_t struct containing meta information
about the component itself */
{
/* Indicate that we are a pmix v1.1.0 component (which also
implies a specific MCA version) */
OPAL_PMIX_BASE_VERSION_2_0_0,
/* Component name and version */
"s2",
OPAL_MAJOR_VERSION,
OPAL_MINOR_VERSION,
OPAL_RELEASE_VERSION,
/* Component open and close functions */
NULL,
NULL,
pmix_s2_component_query,
pmix_s2_component_register
},
/* Next the MCA v1.0.0 component meta data */
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
20 /* component priority */
};
static int pmix_s2_component_register(void)
{
int ret;
mca_base_component_t *component = &(mca_pmix_s2_component.base_version);
ret = mca_base_component_var_register(component, "priority",
"Priority of the pmix s2 component (default: 20)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
&mca_pmix_s2_component.priority);
if (0 > ret) {
return ret;
}
return OPAL_SUCCESS;
}
static int pmix_s2_component_query(mca_base_module_t **module, int *priority)
{
/* disqualify ourselves if we are not under slurm, and
* if they didn't set mpi=pmix2 */
if (NULL == getenv("SLURM_JOBID") ||
NULL == getenv("PMI_FD")) {
*priority = 0;
*module = NULL;
return OPAL_ERROR;
}
/* we can be considered */
*priority = mca_pmix_s2_component.priority;
*module = (mca_base_module_t *)&opal_pmix_s2_module;
return OPAL_SUCCESS;
}

Просмотреть файл

@ -11,6 +11,7 @@
* All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2010-2012 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
@ -24,6 +25,7 @@
#define OPAL_H
#include "opal_config.h"
#include "opal/types.h"
BEGIN_C_DECLS

Просмотреть файл

@ -235,6 +235,12 @@ opal_err2str(int errnum, const char **errmsg)
case OPAL_ERR_AUTHENTICATION_FAILED:
retval = "Authentication failed";
break;
case OPAL_ERR_COMM_FAILURE:
retval = "Comm failure";
break;
case OPAL_ERR_SERVER_NOT_AVAIL:
retval = "Server not available";
break;
default:
retval = NULL;
}
@ -616,5 +622,3 @@ void opal_warn_fork(void)
}
#endif
}

Просмотреть файл

@ -14,6 +14,7 @@
#include "opal/util/proc.h"
#include "opal/util/arch.h"
#include "opal/mca/dstore/dstore.h"
#include "opal/mca/pmix/pmix.h"
opal_process_info_t opal_process_info = {
.nodename = "not yet named",
@ -29,7 +30,7 @@ opal_process_info_t opal_process_info = {
static opal_proc_t opal_local_proc = {
{ .opal_list_next = NULL,
.opal_list_prev = NULL},
0x1122334455667788,
OPAL_NAME_INVALID,
0,
0,
NULL,
@ -88,6 +89,15 @@ int opal_proc_local_set(opal_proc_t* proc)
return OPAL_SUCCESS;
}
/* this function is used to temporarily set the local
* name while OPAL and upper layers are initializing,
* thus allowing debug messages to be more easily
* understood */
void opal_proc_set_name(opal_process_name_t *name)
{
opal_local_proc.proc_name = *name;
}
/**
* The following functions are surrogates for the RTE functionality, and are not supposed
* to be called. Instead, the corresponding function pointer should be set by the upper layer
@ -110,112 +120,3 @@ char* (*opal_process_name_print)(const opal_process_name_t) = opal_process_name_
uint32_t (*opal_process_name_vpid)(const opal_process_name_t) = opal_process_name_vpid_should_never_be_called;
uint32_t (*opal_process_name_jobid)(const opal_process_name_t) = opal_process_name_vpid_should_never_be_called;
static int
opal_modex_send_internal(const mca_base_component_t *source_component,
const void *data, size_t size)
{
int rc;
char *key;
opal_byte_object_t bo;
opal_value_t kv;
const opal_proc_t *proc = opal_proc_local_get();
key = mca_base_component_to_string(source_component);
if (NULL == key) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
bo.bytes = (uint8_t *)data;
bo.size = size;
/* the store API makes a copy of the provided data */
OBJ_CONSTRUCT(&kv, opal_value_t);
kv.key = key;
if (OPAL_SUCCESS != (rc = opal_value_load(&kv, (void*)&bo, OPAL_BYTE_OBJECT))) {
OBJ_DESTRUCT(&kv);
free(key);
return rc;
}
/* MPI connection data is to be shared with ALL other processes */
rc = opal_dstore.store(opal_dstore_peer, (opal_identifier_t*)&proc->proc_name, &kv);
OBJ_DESTRUCT(&kv);
return rc;
}
static int
opal_modex_recv_internal(const mca_base_component_t *component,
const opal_proc_t *proc,
void **buffer, size_t *size)
{
int rc;
opal_list_t myvals;
opal_value_t *kv;
char *key;
opal_byte_object_t *boptr;
key = mca_base_component_to_string(component);
if (NULL == key) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
OPAL_OUTPUT_VERBOSE((2, 0, "%s fetch data from %s for %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
OPAL_NAME_PRINT(proc->proc_name), key));
OBJ_CONSTRUCT(&myvals, opal_list_t);
/* the peer dstore contains our own data that will be shared
* with our peers - the nonpeer dstore contains data we received
* that would only be shared with nonpeer procs
*/
if (OPAL_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_nonpeer,
(opal_identifier_t*)(&proc->proc_name),
key, &myvals))) {
/* see if we can find it in the internal dstore */
OPAL_OUTPUT_VERBOSE((2, 0, "%s searching nonpeer dstore for %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), key));
if (OPAL_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_internal,
(opal_identifier_t*)(&proc->proc_name),
key, &myvals))) {
/* try one last place - the peer dstore in case it got stuck there for some reason */
OPAL_OUTPUT_VERBOSE((2, 0, "%s searching internal dstore for %s",
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), key));
if (OPAL_SUCCESS != (rc = opal_dstore.fetch(opal_dstore_peer,
(opal_identifier_t*)(&proc->proc_name),
key, &myvals))) {
OPAL_LIST_DESTRUCT(&myvals);
free(key);
return rc;
}
}
}
/* only one value should have been returned */
kv = (opal_value_t*)opal_list_get_first(&myvals);
if (NULL == kv) {
free(key);
return OPAL_ERROR;
}
opal_value_unload(kv, (void**)&boptr, OPAL_BYTE_OBJECT);
OPAL_LIST_DESTRUCT(&myvals);
if (OPAL_SUCCESS == rc) {
/* xfer the data - it was allocated in the call */
*buffer = (void*)boptr->bytes;
*size = boptr->size;
/* we no longer require the struct itself since all we
* wanted was the data inside it
*/
free(boptr);
}
free(key);
return OPAL_SUCCESS;
}
int
(*opal_modex_send)(const mca_base_component_t *source_component,
const void *data, size_t size) = opal_modex_send_internal;
int
(*opal_modex_recv)(const mca_base_component_t *component,
const opal_proc_t *proc,
void **buffer, size_t *size) = opal_modex_recv_internal;

Просмотреть файл

@ -3,6 +3,7 @@
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2013 Inria. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
@ -18,6 +19,7 @@
#include "opal_config.h"
#include "opal/class/opal_list.h"
#include "opal/mca/hwloc/hwloc.h"
#include "opal/types.h"
#include "opal/dss/dss.h"
#if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
@ -87,6 +89,7 @@ OPAL_DECLSPEC extern opal_process_info_t opal_process_info;
OPAL_DECLSPEC extern opal_proc_t* opal_proc_local_get(void);
OPAL_DECLSPEC extern int opal_proc_local_set(opal_proc_t* proc);
OPAL_DECLSPEC extern void opal_proc_set_name(opal_process_name_t *name);
/**
* Compare two processor name and return an integer greater than,
@ -103,16 +106,6 @@ OPAL_DECLSPEC extern uint32_t (*opal_process_name_jobid)(const opal_process_name
#define OPAL_NAME_PRINT(OPAL_PN) opal_process_name_print(OPAL_PN)
#define OPAL_PROC_MY_NAME (opal_proc_local_get()->proc_name)
#define OPAL_PROC_MY_HOSTNAME (opal_proc_local_get()->proc_hostname)
/**
* Access to the modex.
*/
OPAL_DECLSPEC extern int
(*opal_modex_send)(const mca_base_component_t *source_component,
const void *data, size_t size);
OPAL_DECLSPEC extern int
(*opal_modex_recv)(const mca_base_component_t *component,
const opal_proc_t *proc,
void **buffer, size_t *size);
#define OPAL_NAME_INVALID 0xffffffffffffffff
#endif /* OPAL_PROC_H */

Просмотреть файл

@ -202,7 +202,8 @@ OPAL_DECLSPEC OBJ_CLASS_DECLARATION(orte_attribute_t);
/* Attribute */
#define ORTE_ATTRIBUTE (OPAL_DSS_ID_DYNAMIC + 21)
/* Grpcomm signature */
#define ORTE_SIGNATURE (OPAL_DSS_ID_DYNAMIC + 22)
/* provide a boundary for others to use */
#define ORTE_DSS_ID_DYNAMIC (OPAL_DSS_ID_DYNAMIC + 50)

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше