Merge pull request #897 from rhc54/topic/oob
Remove the last involvement of the OOB system from the MPI layer
Этот коммит содержится в:
Коммит
5597416fe0
@ -58,26 +58,8 @@
|
||||
# parameters available and their default values.
|
||||
#
|
||||
|
||||
#default hostfile
|
||||
#orte_default_hostfile = /home/common/hosts
|
||||
#ras_slurm_enable_dyn_alloc = 1
|
||||
#ras_slurm_config_file = /home/common/slurm/conf/slurm.conf
|
||||
|
||||
# Basic behavior to smooth startup
|
||||
mca_base_component_show_load_errors = 1
|
||||
mpi_param_check = 0
|
||||
orte_abort_timeout = 10
|
||||
hwloc_base_mem_bind_failure_action = silent
|
||||
|
||||
## Protect the shared file systems
|
||||
|
||||
## Add the interface for out-of-band communication
|
||||
## and set it up
|
||||
oob_tcp_peer_retries = 120
|
||||
#oob_tcp_connect_timeout=600
|
||||
|
||||
## Define the MPI interconnects
|
||||
btl = sm,tcp,self
|
||||
|
||||
## Setup shared memory
|
||||
btl_sm_free_list_max = 768
|
||||
|
@ -60,19 +60,6 @@
|
||||
|
||||
# Basic behavior to smooth startup
|
||||
mca_base_component_show_load_errors = 1
|
||||
mpi_param_check = 0
|
||||
orte_abort_timeout = 10
|
||||
hwloc_base_mem_bind_failure_action = silent
|
||||
|
||||
## Protect the shared file systems
|
||||
|
||||
## Add the interface for out-of-band communication
|
||||
## and set it up
|
||||
oob_tcp_peer_retries = 120
|
||||
#oob_tcp_connect_timeout=600
|
||||
|
||||
## Define the MPI interconnects
|
||||
btl = sm,tcp,self
|
||||
|
||||
## Setup shared memory
|
||||
btl_sm_free_list_max = 768
|
||||
|
@ -60,19 +60,5 @@
|
||||
|
||||
# Basic behavior to smooth startup
|
||||
mca_base_component_show_load_errors = 1
|
||||
mpi_param_check = 0
|
||||
orte_abort_timeout = 10
|
||||
hwloc_base_mem_bind_failure_action = silent
|
||||
|
||||
## Protect the shared file systems
|
||||
|
||||
## Add the interface for out-of-band communication
|
||||
## and set it up
|
||||
oob_tcp_peer_retries = 120
|
||||
#oob_tcp_connect_timeout=600
|
||||
|
||||
## Define the MPI interconnects
|
||||
btl = sm,tcp,self
|
||||
|
||||
## Setup shared memory
|
||||
btl_sm_free_list_max = 768
|
||||
|
@ -17,7 +17,7 @@
|
||||
* Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2012 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -30,6 +30,7 @@
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
@ -58,7 +59,7 @@ typedef int ompi_comm_cid_allredfct (int *inbuf, int* outbuf,
|
||||
ompi_communicator_t *comm,
|
||||
ompi_communicator_t *bridgecomm,
|
||||
void* lleader, void* rleader,
|
||||
int send_first );
|
||||
int send_first, char *tag, int iter );
|
||||
|
||||
static int ompi_comm_allreduce_intra (int *inbuf, int* outbuf,
|
||||
int count, struct ompi_op_t *op,
|
||||
@ -66,7 +67,7 @@ static int ompi_comm_allreduce_intra (int *inbuf, int* outbuf,
|
||||
ompi_communicator_t *bridgecomm,
|
||||
void* local_leader,
|
||||
void* remote_ledaer,
|
||||
int send_first );
|
||||
int send_first, char *tag, int iter );
|
||||
|
||||
static int ompi_comm_allreduce_inter (int *inbuf, int *outbuf,
|
||||
int count, struct ompi_op_t *op,
|
||||
@ -74,7 +75,7 @@ static int ompi_comm_allreduce_inter (int *inbuf, int *outbuf,
|
||||
ompi_communicator_t *bridgecomm,
|
||||
void* local_leader,
|
||||
void* remote_leader,
|
||||
int send_first );
|
||||
int send_first, char *tag, int iter );
|
||||
|
||||
static int ompi_comm_allreduce_intra_bridge(int *inbuf, int* outbuf,
|
||||
int count, struct ompi_op_t *op,
|
||||
@ -82,15 +83,15 @@ static int ompi_comm_allreduce_intra_bridge(int *inbuf, int* outbuf,
|
||||
ompi_communicator_t *bridgecomm,
|
||||
void* local_leader,
|
||||
void* remote_leader,
|
||||
int send_first);
|
||||
int send_first, char *tag, int iter);
|
||||
|
||||
static int ompi_comm_allreduce_intra_oob (int *inbuf, int* outbuf,
|
||||
int count, struct ompi_op_t *op,
|
||||
ompi_communicator_t *intercomm,
|
||||
ompi_communicator_t *bridgecomm,
|
||||
void* local_leader,
|
||||
void* remote_leader,
|
||||
int send_first );
|
||||
static int ompi_comm_allreduce_intra_pmix (int *inbuf, int* outbuf,
|
||||
int count, struct ompi_op_t *op,
|
||||
ompi_communicator_t *intercomm,
|
||||
ompi_communicator_t *bridgecomm,
|
||||
void* local_leader,
|
||||
void* remote_leader,
|
||||
int send_first, char *tag, int iter );
|
||||
|
||||
static int ompi_comm_allreduce_group (int *inbuf, int* outbuf,
|
||||
int count, struct ompi_op_t *op,
|
||||
@ -98,7 +99,7 @@ static int ompi_comm_allreduce_group (int *inbuf, int* outbuf,
|
||||
ompi_communicator_t *bridgecomm,
|
||||
void* local_leader,
|
||||
void* remote_leader,
|
||||
int send_first);
|
||||
int send_first, char *tag, int iter);
|
||||
|
||||
/* non-blocking intracommunicator allreduce */
|
||||
static int ompi_comm_allreduce_intra_nb (int *inbuf, int *outbuf,
|
||||
@ -158,7 +159,7 @@ int ompi_comm_nextcid ( ompi_communicator_t* newcomm,
|
||||
int response, glresponse=0;
|
||||
int start;
|
||||
unsigned int i;
|
||||
|
||||
int iter=0;
|
||||
ompi_comm_cid_allredfct* allredfnct;
|
||||
|
||||
/**
|
||||
@ -177,8 +178,8 @@ int ompi_comm_nextcid ( ompi_communicator_t* newcomm,
|
||||
case OMPI_COMM_CID_INTRA_BRIDGE:
|
||||
allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_bridge;
|
||||
break;
|
||||
case OMPI_COMM_CID_INTRA_OOB:
|
||||
allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_oob;
|
||||
case OMPI_COMM_CID_INTRA_PMIX:
|
||||
allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_pmix;
|
||||
break;
|
||||
case OMPI_COMM_CID_GROUP:
|
||||
allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_group;
|
||||
@ -218,7 +219,8 @@ int ompi_comm_nextcid ( ompi_communicator_t* newcomm,
|
||||
}
|
||||
|
||||
ret = (allredfnct)(&nextlocal_cid, &nextcid, 1, MPI_MAX, comm, bridgecomm,
|
||||
local_leader, remote_leader, send_first );
|
||||
local_leader, remote_leader, send_first, "nextcid", iter );
|
||||
++iter;
|
||||
if( OMPI_SUCCESS != ret ) {
|
||||
opal_pointer_array_set_item(&ompi_mpi_communicators, nextlocal_cid, NULL);
|
||||
goto release_and_return;
|
||||
@ -251,7 +253,8 @@ int ompi_comm_nextcid ( ompi_communicator_t* newcomm,
|
||||
}
|
||||
|
||||
ret = (allredfnct)(&response, &glresponse, 1, MPI_MIN, comm, bridgecomm,
|
||||
local_leader, remote_leader, send_first );
|
||||
local_leader, remote_leader, send_first, "nextcid", iter );
|
||||
++iter;
|
||||
if( OMPI_SUCCESS != ret ) {
|
||||
opal_pointer_array_set_item(&ompi_mpi_communicators, nextcid, NULL);
|
||||
goto release_and_return;
|
||||
@ -614,8 +617,8 @@ int ompi_comm_activate ( ompi_communicator_t** newcomm,
|
||||
case OMPI_COMM_CID_INTRA_BRIDGE:
|
||||
allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_bridge;
|
||||
break;
|
||||
case OMPI_COMM_CID_INTRA_OOB:
|
||||
allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_oob;
|
||||
case OMPI_COMM_CID_INTRA_PMIX:
|
||||
allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_intra_pmix;
|
||||
break;
|
||||
case OMPI_COMM_CID_GROUP:
|
||||
allredfnct=(ompi_comm_cid_allredfct*)ompi_comm_allreduce_group;
|
||||
@ -636,7 +639,7 @@ int ompi_comm_activate ( ompi_communicator_t** newcomm,
|
||||
|
||||
|
||||
ret = (allredfnct)(&ok, &gok, 1, MPI_MIN, comm, bridgecomm,
|
||||
local_leader, remote_leader, send_first );
|
||||
local_leader, remote_leader, send_first, "activate", 0 );
|
||||
if( OMPI_SUCCESS != ret ) {
|
||||
goto bail_on_error;
|
||||
}
|
||||
@ -870,7 +873,7 @@ static int ompi_comm_allreduce_intra ( int *inbuf, int *outbuf,
|
||||
ompi_communicator_t *bridgecomm,
|
||||
void* local_leader,
|
||||
void* remote_leader,
|
||||
int send_first )
|
||||
int send_first, char *tag, int iter )
|
||||
{
|
||||
return comm->c_coll.coll_allreduce ( inbuf, outbuf, count, MPI_INT, op, comm,
|
||||
comm->c_coll.coll_allreduce_module );
|
||||
@ -899,7 +902,7 @@ static int ompi_comm_allreduce_inter ( int *inbuf, int *outbuf,
|
||||
ompi_communicator_t *bridgecomm,
|
||||
void* local_leader,
|
||||
void* remote_leader,
|
||||
int send_first )
|
||||
int send_first, char *tag, int iter )
|
||||
{
|
||||
int local_rank, rsize;
|
||||
int rc;
|
||||
@ -1204,7 +1207,7 @@ static int ompi_comm_allreduce_intra_bridge (int *inbuf, int *outbuf,
|
||||
ompi_communicator_t *comm,
|
||||
ompi_communicator_t *bcomm,
|
||||
void* lleader, void* rleader,
|
||||
int send_first )
|
||||
int send_first, char *tag, int iter )
|
||||
{
|
||||
int *tmpbuf=NULL;
|
||||
int local_rank;
|
||||
@ -1291,46 +1294,30 @@ static int ompi_comm_allreduce_intra_bridge (int *inbuf, int *outbuf,
|
||||
return (rc);
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
opal_buffer_t buf;
|
||||
bool active;
|
||||
} comm_cid_return_t;
|
||||
|
||||
static void comm_cid_recv(int status,
|
||||
ompi_process_name_t* peer,
|
||||
opal_buffer_t* buffer,
|
||||
ompi_rml_tag_t tag,
|
||||
void* cbdata)
|
||||
{
|
||||
comm_cid_return_t *rcid = (comm_cid_return_t*)cbdata;
|
||||
|
||||
opal_dss.copy_payload(&rcid->buf, buffer);
|
||||
rcid->active = false;
|
||||
}
|
||||
|
||||
/* Arguments not used in this implementation:
|
||||
* - bridgecomm
|
||||
*
|
||||
* lleader is the local rank of root in comm
|
||||
* rleader is the OOB contact information of the
|
||||
* root processes in the other world.
|
||||
* rleader is the port_string
|
||||
*/
|
||||
static int ompi_comm_allreduce_intra_oob (int *inbuf, int *outbuf,
|
||||
int count, struct ompi_op_t *op,
|
||||
ompi_communicator_t *comm,
|
||||
ompi_communicator_t *bridgecomm,
|
||||
void* lleader, void* rleader,
|
||||
int send_first )
|
||||
static int ompi_comm_allreduce_intra_pmix (int *inbuf, int *outbuf,
|
||||
int count, struct ompi_op_t *op,
|
||||
ompi_communicator_t *comm,
|
||||
ompi_communicator_t *bridgecomm,
|
||||
void* lleader, void* rleader,
|
||||
int send_first, char *tag, int iter )
|
||||
{
|
||||
int *tmpbuf=NULL;
|
||||
int rc;
|
||||
int local_leader, local_rank;
|
||||
ompi_process_name_t *remote_leader=NULL;
|
||||
char *port_string;
|
||||
opal_value_t info;
|
||||
opal_pmix_pdata_t pdat;
|
||||
opal_buffer_t sbuf;
|
||||
int32_t size_count;
|
||||
comm_cid_return_t rcid;
|
||||
|
||||
local_leader = (*((int*)lleader));
|
||||
remote_leader = (ompi_process_name_t*)rleader;
|
||||
port_string = (char*)rleader;
|
||||
size_count = count;
|
||||
|
||||
local_rank = ompi_comm_rank ( comm );
|
||||
@ -1348,50 +1335,48 @@ static int ompi_comm_allreduce_intra_oob (int *inbuf, int *outbuf,
|
||||
}
|
||||
|
||||
if (local_rank == local_leader ) {
|
||||
opal_buffer_t *sbuf;
|
||||
OBJ_CONSTRUCT(&sbuf, opal_buffer_t);
|
||||
|
||||
sbuf = OBJ_NEW(opal_buffer_t);
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(sbuf, tmpbuf, (int32_t)count, OPAL_INT))) {
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&sbuf, tmpbuf, (int32_t)count, OPAL_INT))) {
|
||||
goto exit;
|
||||
}
|
||||
OBJ_CONSTRUCT(&info, opal_value_t);
|
||||
OBJ_CONSTRUCT(&pdat, opal_pmix_pdata_t);
|
||||
|
||||
if ( send_first ) {
|
||||
if (0 > (rc = ompi_rte_send_buffer_nb(remote_leader, sbuf,
|
||||
OMPI_RML_TAG_COMM_CID_INTRA,
|
||||
ompi_rte_send_cbfunc, NULL))) {
|
||||
goto exit;
|
||||
}
|
||||
OBJ_CONSTRUCT(&rcid.buf, opal_buffer_t);
|
||||
rcid.active = true;
|
||||
ompi_rte_recv_buffer_nb(remote_leader, OMPI_RML_TAG_COMM_CID_INTRA,
|
||||
OMPI_RML_NON_PERSISTENT, comm_cid_recv, &rcid);
|
||||
while (rcid.active) {
|
||||
opal_progress();
|
||||
}
|
||||
}
|
||||
else {
|
||||
OBJ_CONSTRUCT(&rcid.buf, opal_buffer_t);
|
||||
rcid.active = true;
|
||||
ompi_rte_recv_buffer_nb(remote_leader, OMPI_RML_TAG_COMM_CID_INTRA,
|
||||
OMPI_RML_NON_PERSISTENT, comm_cid_recv, &rcid);
|
||||
while (rcid.active) {
|
||||
opal_progress();
|
||||
}
|
||||
if (0 > (rc = ompi_rte_send_buffer_nb(remote_leader, sbuf,
|
||||
OMPI_RML_TAG_COMM_CID_INTRA,
|
||||
ompi_rte_send_cbfunc, NULL))) {
|
||||
goto exit;
|
||||
}
|
||||
info.type = OPAL_BYTE_OBJECT;
|
||||
pdat.value.type = OPAL_BYTE_OBJECT;
|
||||
|
||||
opal_dss.unload(&sbuf, (void**)&info.data.bo.bytes, &info.data.bo.size);
|
||||
OBJ_DESTRUCT(&sbuf);
|
||||
|
||||
if (send_first) {
|
||||
(void)asprintf(&info.key, "%s:%s:send:%d", port_string, tag, iter);
|
||||
(void)asprintf(&pdat.value.key, "%s:%s:recv:%d", port_string, tag, iter);
|
||||
} else {
|
||||
(void)asprintf(&info.key, "%s:%s:recv:%d", port_string, tag, iter);
|
||||
(void)asprintf(&pdat.value.key, "%s:%s:send:%d", port_string, tag, iter);
|
||||
}
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(&rcid.buf, outbuf, &size_count, OPAL_INT))) {
|
||||
OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 60);
|
||||
OBJ_DESTRUCT(&info);
|
||||
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OBJ_DESTRUCT(&pdat);
|
||||
goto exit;
|
||||
}
|
||||
OBJ_DESTRUCT(&rcid.buf);
|
||||
OBJ_CONSTRUCT(&sbuf, opal_buffer_t);
|
||||
opal_dss.load(&sbuf, pdat.value.data.bo.bytes, pdat.value.data.bo.size);
|
||||
pdat.value.data.bo.bytes = NULL;
|
||||
pdat.value.data.bo.size = 0;
|
||||
OBJ_DESTRUCT(&pdat);
|
||||
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(&sbuf, outbuf, &size_count, OPAL_INT))) {
|
||||
OBJ_DESTRUCT(&sbuf);
|
||||
goto exit;
|
||||
}
|
||||
OBJ_DESTRUCT(&sbuf);
|
||||
count = (int)size_count;
|
||||
|
||||
ompi_op_reduce (op, tmpbuf, outbuf, count, MPI_INT);
|
||||
ompi_op_reduce (op, tmpbuf, outbuf, count, MPI_INT);
|
||||
}
|
||||
|
||||
rc = comm->c_coll.coll_bcast (outbuf, count, MPI_INT,
|
||||
@ -1412,7 +1397,7 @@ static int ompi_comm_allreduce_group (int *inbuf, int* outbuf,
|
||||
ompi_communicator_t *newcomm,
|
||||
void* local_leader,
|
||||
void* remote_leader,
|
||||
int send_first)
|
||||
int send_first, char *intag, int iter)
|
||||
{
|
||||
ompi_group_t *group = newcomm->c_local_group;
|
||||
int peers_group[3], peers_comm[3];
|
||||
|
@ -17,7 +17,7 @@
|
||||
* Copyright (c) 2011-2013 Universite Bordeaux 1
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -93,7 +93,7 @@ OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_communicator_t);
|
||||
#define OMPI_COMM_CID_INTRA 0x00000020
|
||||
#define OMPI_COMM_CID_INTER 0x00000040
|
||||
#define OMPI_COMM_CID_INTRA_BRIDGE 0x00000080
|
||||
#define OMPI_COMM_CID_INTRA_OOB 0x00000100
|
||||
#define OMPI_COMM_CID_INTRA_PMIX 0x00000100
|
||||
#define OMPI_COMM_CID_GROUP 0x00000200
|
||||
|
||||
/**
|
||||
@ -497,8 +497,8 @@ ompi_communicator_t* ompi_comm_allocate (int local_group_size,
|
||||
* a bridge comm. local_leader
|
||||
* and remote leader are in this
|
||||
* case an int (rank in bridge-comm).
|
||||
* OMPI_COMM_CID_INTRA_OOB: 2 intracomms, leaders talk
|
||||
* through OOB. lleader and rleader
|
||||
* OMPI_COMM_CID_INTRA_PMIX: 2 intracomms, leaders talk
|
||||
* through PMIx. lleader and rleader
|
||||
* are the required contact information.
|
||||
* @param send_first: to avoid a potential deadlock for
|
||||
* the OOB version.
|
||||
|
201
ompi/dpm/dpm.c
201
ompi/dpm/dpm.c
@ -72,27 +72,6 @@ static OBJ_CLASS_INSTANCE(ompi_dpm_proct_caddy_t,
|
||||
opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
struct lookup_caddy_t {
|
||||
volatile bool active;
|
||||
int status;
|
||||
opal_pmix_pdata_t *pdat;
|
||||
};
|
||||
|
||||
static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata)
|
||||
{
|
||||
struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata;
|
||||
cd->status = status;
|
||||
if (OPAL_SUCCESS == status && NULL != data) {
|
||||
opal_pmix_pdata_t *p = (opal_pmix_pdata_t*)opal_list_get_first(data);
|
||||
if (NULL != p && OPAL_STRING == p->value.type &&
|
||||
NULL != p->value.data.string) {
|
||||
cd->pdat->value.data.string = strdup(p->value.data.string);
|
||||
}
|
||||
}
|
||||
cd->active = false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Init the module
|
||||
*/
|
||||
@ -113,12 +92,12 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
ompi_communicator_t **newcomm)
|
||||
{
|
||||
int k, size, rsize, rank, rc, rportlen=0;
|
||||
char **members = NULL, *nstring, *rport=NULL, **pkeys=NULL;
|
||||
char **members = NULL, *nstring, *rport=NULL;
|
||||
bool dense, isnew;
|
||||
opal_process_name_t pname;
|
||||
opal_list_t ilist, mlist, rlist;
|
||||
opal_value_t *info;
|
||||
opal_pmix_pdata_t *pdat;
|
||||
opal_value_t info;
|
||||
opal_pmix_pdata_t pdat;
|
||||
opal_namelist_t *nm;
|
||||
opal_jobid_t jobid;
|
||||
|
||||
@ -126,7 +105,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
ompi_proc_t *proc;
|
||||
ompi_group_t *group=comm->c_local_group;
|
||||
ompi_proc_t **proc_list=NULL, **new_proc_list = NULL;
|
||||
int32_t i,j;
|
||||
int32_t i;
|
||||
ompi_group_t *new_group_pointer;
|
||||
ompi_dpm_proct_caddy_t *cd;
|
||||
|
||||
@ -200,106 +179,35 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
}
|
||||
|
||||
if (rank == root) {
|
||||
/* the root for each side publishes their list of participants */
|
||||
OBJ_CONSTRUCT(&ilist, opal_list_t);
|
||||
/* put my name at the front of the list of members - my
|
||||
* name will therefore be on the list twice, but the
|
||||
* other side's root needs to know the root from this side */
|
||||
rc = opal_convert_process_name_to_string(&nstring, OMPI_PROC_MY_NAME);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
opal_argv_prepend_nosize(&members, nstring);
|
||||
free(nstring);
|
||||
info = OBJ_NEW(opal_value_t);
|
||||
/* the roots for each side exchange their list of participants */
|
||||
OBJ_CONSTRUCT(&info, opal_value_t);
|
||||
OBJ_CONSTRUCT(&pdat, opal_pmix_pdata_t);
|
||||
if (send_first) {
|
||||
(void)asprintf(&info->key, "%s:connect", port_string);
|
||||
(void)asprintf(&info.key, "%s:connect", port_string);
|
||||
(void)asprintf(&pdat.value.key, "%s:accept", port_string);
|
||||
} else {
|
||||
(void)asprintf(&info->key, "%s:accept", port_string);
|
||||
(void)asprintf(&info.key, "%s:accept", port_string);
|
||||
(void)asprintf(&pdat.value.key, "%s:connect", port_string);
|
||||
}
|
||||
info->type = OPAL_STRING;
|
||||
info->data.string = opal_argv_join(members, ':');
|
||||
opal_list_append(&ilist, &info->super);
|
||||
/* also save the key for later */
|
||||
opal_argv_append_nosize(&pkeys, info->key);
|
||||
/* publish them with "session" scope */
|
||||
rc = opal_pmix.publish(&ilist);
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
info.type = OPAL_STRING;
|
||||
info.data.string = opal_argv_join(members, ':');
|
||||
pdat.value.type = OPAL_STRING;
|
||||
|
||||
OPAL_PMIX_EXCHANGE(rc, &info, &pdat, 60);
|
||||
OBJ_DESTRUCT(&info);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
opal_argv_free(members);
|
||||
opal_argv_free(pkeys);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
/* lookup the other side's info - if a non-blocking form
|
||||
* of lookup isn't available, then we use the blocking
|
||||
* form and trust that the underlying system will WAIT
|
||||
* until the other side publishes its data */
|
||||
OBJ_CONSTRUCT(&ilist, opal_list_t);
|
||||
pdat = OBJ_NEW(opal_pmix_pdata_t);
|
||||
if (send_first) {
|
||||
(void)asprintf(&pdat->value.key, "%s:accept", port_string);
|
||||
} else {
|
||||
(void)asprintf(&pdat->value.key, "%s:connect", port_string);
|
||||
}
|
||||
opal_list_append(&ilist, &pdat->super);
|
||||
OBJ_CONSTRUCT(&mlist, opal_list_t);
|
||||
/* if a non-blocking version of lookup isn't
|
||||
* available, then use the blocking version */
|
||||
if (NULL == opal_pmix.lookup_nb) {
|
||||
rc = opal_pmix.lookup(&ilist, &mlist);
|
||||
OPAL_LIST_DESTRUCT(&mlist);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OMPI_ERROR_LOG(rc);
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
opal_argv_free(members);
|
||||
goto exit;
|
||||
}
|
||||
} else {
|
||||
char **keys = NULL;
|
||||
struct lookup_caddy_t caddy;
|
||||
opal_argv_append_nosize(&keys, pdat->value.key);
|
||||
caddy.active = true;
|
||||
caddy.pdat = pdat;
|
||||
/* tell it to wait for the data to arrive */
|
||||
info = OBJ_NEW(opal_value_t);
|
||||
info->key = strdup(OPAL_PMIX_WAIT);
|
||||
info->type = OPAL_BOOL;
|
||||
info->data.flag = true;
|
||||
opal_list_append(&mlist, &info->super);
|
||||
/* give it a decent timeout as we don't know when
|
||||
* the other side may call connect - it doesn't
|
||||
* have to be simultaneous */
|
||||
info = OBJ_NEW(opal_value_t);
|
||||
info->key = strdup(OPAL_PMIX_TIMEOUT);
|
||||
info->type = OPAL_INT;
|
||||
info->data.integer = 60;
|
||||
opal_list_append(&mlist, &info->super);
|
||||
rc = opal_pmix.lookup_nb(keys, &mlist, lookup_cbfunc, &caddy);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
OPAL_LIST_DESTRUCT(&mlist);
|
||||
opal_argv_free(keys);
|
||||
opal_argv_free(members);
|
||||
goto exit;
|
||||
}
|
||||
OMPI_WAIT_FOR_COMPLETION(caddy.active);
|
||||
opal_argv_free(keys);
|
||||
OPAL_LIST_DESTRUCT(&mlist);
|
||||
if (OPAL_SUCCESS != caddy.status) {
|
||||
OMPI_ERROR_LOG(caddy.status);
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
opal_argv_free(members);
|
||||
goto exit;
|
||||
}
|
||||
OBJ_DESTRUCT(&pdat);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* save the result */
|
||||
rport = strdup(pdat->value.data.string); // need this later
|
||||
rport = strdup(pdat.value.data.string); // need this later
|
||||
rportlen = strlen(rport) + 1; // retain the NULL terminator
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
OBJ_DESTRUCT(&pdat);
|
||||
}
|
||||
|
||||
/* if we aren't in a comm_spawn, the non-root members won't have
|
||||
* a port_string - so let's make sure everyone knows the other
|
||||
* the port_string - so let's make sure everyone knows the other
|
||||
* side's participants */
|
||||
|
||||
/* bcast the list-length to all processes in the local comm */
|
||||
@ -327,15 +235,9 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
}
|
||||
|
||||
/* initiate a list of participants for the connect,
|
||||
* starting with our own members, remembering to
|
||||
* skip the first member if we are the root rank */
|
||||
if (rank == root) {
|
||||
j = 1;
|
||||
} else {
|
||||
j = 0;
|
||||
}
|
||||
* starting with our own members */
|
||||
OBJ_CONSTRUCT(&mlist, opal_list_t);
|
||||
for (i=j; NULL != members[i]; i++) {
|
||||
for (i=0; NULL != members[i]; i++) {
|
||||
nm = OBJ_NEW(opal_namelist_t);
|
||||
if (OPAL_SUCCESS != (rc = opal_convert_string_to_process_name(&nm->name, members[i]))) {
|
||||
OMPI_ERROR_LOG(rc);
|
||||
@ -373,33 +275,18 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
opal_argv_free(members);
|
||||
members = NULL;
|
||||
|
||||
/* the pdat object will contain a colon-delimited list
|
||||
/* rport contains a colon-delimited list
|
||||
* of process names for the remote procs - convert it
|
||||
* into an argv array */
|
||||
members = opal_argv_split(rport, ':');
|
||||
free(rport);
|
||||
|
||||
/* the first entry is the root for the remote side */
|
||||
if (OPAL_SUCCESS != (rc = opal_convert_string_to_process_name(&pname, members[0]))) {
|
||||
OMPI_ERROR_LOG(rc);
|
||||
opal_argv_free(members);
|
||||
goto exit;
|
||||
}
|
||||
/* check the name - it should never be a wildcard, so
|
||||
* this is just checking for an error */
|
||||
if (OPAL_VPID_WILDCARD == pname.vpid) {
|
||||
OMPI_ERROR_LOG(OMPI_ERR_BAD_PARAM);
|
||||
opal_argv_free(members);
|
||||
rc = OMPI_ERR_BAD_PARAM;
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* add the list of remote procs to our list, and
|
||||
* keep a list of them for later */
|
||||
OBJ_CONSTRUCT(&ilist, opal_list_t);
|
||||
OBJ_CONSTRUCT(&rlist, opal_list_t);
|
||||
|
||||
for (i=1; NULL != members[i]; i++) {
|
||||
for (i=0; NULL != members[i]; i++) {
|
||||
nm = OBJ_NEW(opal_namelist_t);
|
||||
if (OPAL_SUCCESS != (rc = opal_convert_string_to_process_name(&nm->name, members[i]))) {
|
||||
OMPI_ERROR_LOG(rc);
|
||||
@ -452,7 +339,7 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
opal_list_append(&ilist, &cd->super);
|
||||
}
|
||||
/* either way, add to the remote list */
|
||||
cd = OBJ_NEW(ompi_dpm_proct_caddy_t);
|
||||
cd = OBJ_NEW(ompi_dpm_proct_caddy_t);
|
||||
cd->p = proc;
|
||||
opal_list_append(&rlist, &cd->super);
|
||||
}
|
||||
@ -550,25 +437,25 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
new_group_pointer = MPI_GROUP_NULL;
|
||||
|
||||
/* allocate comm_cid */
|
||||
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
|
||||
comm, /* old communicator */
|
||||
NULL, /* bridge comm */
|
||||
&root, /* local leader */
|
||||
&pname, /* remote leader */
|
||||
OMPI_COMM_CID_INTRA_OOB, /* mode */
|
||||
send_first ); /* send or recv first */
|
||||
rc = ompi_comm_nextcid ( newcomp, /* new communicator */
|
||||
comm, /* old communicator */
|
||||
NULL, /* bridge comm */
|
||||
&root, /* local leader */
|
||||
(void*)port_string, /* rendezvous point */
|
||||
OMPI_COMM_CID_INTRA_PMIX, /* mode */
|
||||
send_first ); /* send or recv first */
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* activate comm and init coll-component */
|
||||
rc = ompi_comm_activate ( &newcomp, /* new communicator */
|
||||
comm, /* old communicator */
|
||||
NULL, /* bridge comm */
|
||||
&root, /* local leader */
|
||||
&pname, /* remote leader */
|
||||
OMPI_COMM_CID_INTRA_OOB, /* mode */
|
||||
send_first ); /* send or recv first */
|
||||
rc = ompi_comm_activate ( &newcomp, /* new communicator */
|
||||
comm, /* old communicator */
|
||||
NULL, /* bridge comm */
|
||||
&root, /* local leader */
|
||||
(void*)port_string, /* rendezvous point */
|
||||
OMPI_COMM_CID_INTRA_PMIX, /* mode */
|
||||
send_first ); /* send or recv first */
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
goto exit;
|
||||
}
|
||||
@ -579,10 +466,6 @@ int ompi_dpm_connect_accept(ompi_communicator_t *comm, int root,
|
||||
*/
|
||||
|
||||
exit:
|
||||
if (NULL != pkeys) {
|
||||
opal_pmix.unpublish(pkeys, NULL);
|
||||
opal_argv_free(pkeys);
|
||||
}
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
if (MPI_COMM_NULL != newcomp && NULL != newcomp) {
|
||||
OBJ_RETAIN(newcomp);
|
||||
|
@ -96,27 +96,6 @@ typedef orte_error_t ompi_rte_error_report_t;
|
||||
#define ompi_rte_finalize() orte_finalize()
|
||||
OMPI_DECLSPEC void ompi_rte_wait_for_debugger(void);
|
||||
|
||||
#define OMPI_DB_HOSTNAME ORTE_DB_HOSTNAME
|
||||
#define OMPI_DB_LOCALITY ORTE_DB_LOCALITY
|
||||
#define OMPI_DB_GLOBAL_RANK ORTE_DB_GLOBAL_RANK
|
||||
|
||||
/* Communications */
|
||||
typedef orte_rml_tag_t ompi_rml_tag_t;
|
||||
#define ompi_rte_send_buffer_nb(a, b, c, d, e) orte_rml.send_buffer_nb(a, b, c, d, e)
|
||||
#define ompi_rte_recv_buffer_nb(a, b, c, d, e) orte_rml.recv_buffer_nb(a, b, c, d, e)
|
||||
#define ompi_rte_recv_cancel(a, b) orte_rml.recv_cancel(a, b)
|
||||
#define ompi_rte_parse_uris(a, b, c) orte_rml_base_parse_uris(a, b, c)
|
||||
#define ompi_rte_send_cbfunc orte_rml_send_callback
|
||||
|
||||
/* Communication tags */
|
||||
/* carry over the INVALID def */
|
||||
#define OMPI_RML_TAG_INVALID ORTE_RML_TAG_INVALID
|
||||
/* define a starting point to avoid conflicts */
|
||||
#define OMPI_RML_TAG_BASE ORTE_RML_TAG_MAX
|
||||
|
||||
#define OMPI_RML_PERSISTENT ORTE_RML_PERSISTENT
|
||||
#define OMPI_RML_NON_PERSISTENT ORTE_RML_NON_PERSISTENT
|
||||
|
||||
typedef struct {
|
||||
ompi_rte_component_t super;
|
||||
opal_mutex_t lock;
|
||||
@ -138,8 +117,6 @@ static inline orte_process_name_t * OMPI_CAST_RTE_NAME(opal_process_name_t * nam
|
||||
}
|
||||
#endif
|
||||
|
||||
#define ompi_direct_modex_cutoff orte_direct_modex_cutoff
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_OMPI_RTE_ORTE_H */
|
||||
|
@ -196,39 +196,6 @@ END_C_DECLS
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* Each RTE is required to define a DB key for identifying the node
|
||||
* upon which a process resides, and for providing this information
|
||||
* for each process
|
||||
*
|
||||
* #define OMPI_RTE_NODE_ID
|
||||
*/
|
||||
|
||||
/* Communication tags */
|
||||
#define OMPI_RML_TAG_UDAPL OMPI_RML_TAG_BASE+1
|
||||
#define OMPI_RML_TAG_OPENIB OMPI_RML_TAG_BASE+2
|
||||
#define OMPI_RML_TAG_XOPENIB OMPI_RML_TAG_BASE+3
|
||||
#define OMPI_RML_TAG_COMM_CID_INTRA OMPI_RML_TAG_BASE+4
|
||||
#define OMPI_RML_TAG_XOOB OMPI_RML_TAG_BASE+5
|
||||
#define OMPI_RML_TAG_SM_BACK_FILE_CREATED OMPI_RML_TAG_BASE+6
|
||||
#define OMPI_CRCP_COORD_BOOKMARK_TAG OMPI_RML_TAG_BASE+7
|
||||
#define OMPI_COMM_JOIN_TAG OMPI_RML_TAG_BASE+8
|
||||
|
||||
/* support for shared memory collectives */
|
||||
#define OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED OMPI_RML_TAG_BASE+9
|
||||
/* common sm component query result index */
|
||||
#define OMPI_RML_TAG_COMMON_SM_COMP_INDEX OMPI_RML_TAG_BASE+10
|
||||
|
||||
/* OFACM RML TAGs */
|
||||
#define OMPI_RML_TAG_OFACM OMPI_RML_TAG_BASE+11
|
||||
#define OMPI_RML_TAG_XOFACM OMPI_RML_TAG_BASE+12
|
||||
|
||||
#define OMPI_RML_PCONNECT_TAG OMPI_RML_TAG_BASE+13
|
||||
|
||||
#define OMPI_RML_TAG_USNIC_CONNECTIVITY OMPI_RML_TAG_BASE+14
|
||||
#define OMPI_RML_TAG_USNIC_CONNECTIVITY_REPLY OMPI_RML_TAG_BASE+15
|
||||
|
||||
#define OMPI_RML_TAG_DYNAMIC OMPI_RML_TAG_BASE+200
|
||||
|
||||
/*
|
||||
* MCA Framework
|
||||
*/
|
||||
|
@ -37,7 +37,9 @@ OPAL_DECLSPEC void opal_pmix_base_deregister_handler(void);
|
||||
OPAL_DECLSPEC void opal_pmix_base_errhandler(int status,
|
||||
opal_list_t *procs,
|
||||
opal_list_t *info);
|
||||
|
||||
OPAL_DECLSPEC int opal_pmix_base_exchange(opal_value_t *info,
|
||||
opal_pmix_pdata_t *pdat,
|
||||
int timeout);
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -35,6 +35,7 @@
|
||||
|
||||
#define OPAL_PMI_PAD 10
|
||||
|
||||
/******** ERRHANDLER SUPPORT ********/
|
||||
static opal_pmix_errhandler_fn_t errhandler = NULL;
|
||||
|
||||
void opal_pmix_base_register_handler(opal_pmix_errhandler_fn_t err)
|
||||
@ -56,6 +57,130 @@ void opal_pmix_base_deregister_handler(void)
|
||||
errhandler = NULL;
|
||||
}
|
||||
|
||||
struct lookup_caddy_t {
|
||||
volatile bool active;
|
||||
int status;
|
||||
opal_pmix_pdata_t *pdat;
|
||||
};
|
||||
|
||||
/******** DATA EXCHANGE ********/
|
||||
static void lookup_cbfunc(int status, opal_list_t *data, void *cbdata)
|
||||
{
|
||||
struct lookup_caddy_t *cd = (struct lookup_caddy_t*)cbdata;
|
||||
cd->status = status;
|
||||
if (OPAL_SUCCESS == status && NULL != data) {
|
||||
opal_pmix_pdata_t *p = (opal_pmix_pdata_t*)opal_list_get_first(data);
|
||||
if (NULL != p && p->value.type == cd->pdat->value.type) {
|
||||
(void)opal_value_xfer(&cd->pdat->value, &p->value);
|
||||
}
|
||||
cd->pdat->proc = p->proc;
|
||||
}
|
||||
cd->active = false;
|
||||
}
|
||||
|
||||
int opal_pmix_base_exchange(opal_value_t *indat,
|
||||
opal_pmix_pdata_t *outdat,
|
||||
int timeout)
|
||||
{
|
||||
int rc;
|
||||
opal_list_t ilist, mlist;
|
||||
opal_value_t *info;
|
||||
opal_pmix_pdata_t *pdat;
|
||||
struct lookup_caddy_t caddy;
|
||||
char **keys;
|
||||
|
||||
/* protect the incoming value */
|
||||
opal_dss.copy((void**)&info, indat, OPAL_VALUE);
|
||||
OBJ_CONSTRUCT(&ilist, opal_list_t);
|
||||
opal_list_append(&ilist, &info->super);
|
||||
/* tell the server to delete upon read */
|
||||
info = OBJ_NEW(opal_value_t);
|
||||
info->key = strdup(OPAL_PMIX_PERSISTENCE);
|
||||
info->type = OPAL_INT;
|
||||
info->data.integer = OPAL_PMIX_PERSIST_FIRST_READ;
|
||||
opal_list_append(&ilist, &info->super);
|
||||
|
||||
/* publish it with "session" scope */
|
||||
rc = opal_pmix.publish(&ilist);
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* lookup the other side's info - if a non-blocking form
|
||||
* of lookup isn't available, then we use the blocking
|
||||
* form and trust that the underlying system will WAIT
|
||||
* until the other side publishes its data */
|
||||
OBJ_CONSTRUCT(&ilist, opal_list_t);
|
||||
pdat = OBJ_NEW(opal_pmix_pdata_t);
|
||||
pdat->value.key = strdup(outdat->value.key);
|
||||
pdat->value.type = outdat->value.type;
|
||||
opal_list_append(&ilist, &pdat->super);
|
||||
/* setup the constraints */
|
||||
OBJ_CONSTRUCT(&mlist, opal_list_t);
|
||||
/* tell it to wait for the data to arrive */
|
||||
info = OBJ_NEW(opal_value_t);
|
||||
info->key = strdup(OPAL_PMIX_WAIT);
|
||||
info->type = OPAL_BOOL;
|
||||
info->data.flag = true;
|
||||
opal_list_append(&mlist, &info->super);
|
||||
if (0 < timeout) {
|
||||
/* give it a decent timeout as we don't know when
|
||||
* the other side will publish - it doesn't
|
||||
* have to be simultaneous */
|
||||
info = OBJ_NEW(opal_value_t);
|
||||
info->key = strdup(OPAL_PMIX_TIMEOUT);
|
||||
info->type = OPAL_INT;
|
||||
info->data.integer = timeout;
|
||||
opal_list_append(&mlist, &info->super);
|
||||
}
|
||||
|
||||
/* if a non-blocking version of lookup isn't
|
||||
* available, then use the blocking version */
|
||||
if (NULL == opal_pmix.lookup_nb) {
|
||||
rc = opal_pmix.lookup(&ilist, &mlist);
|
||||
OPAL_LIST_DESTRUCT(&mlist);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
return rc;
|
||||
}
|
||||
} else {
|
||||
caddy.active = true;
|
||||
caddy.pdat = pdat;
|
||||
keys = NULL;
|
||||
opal_argv_append_nosize(&keys, pdat->value.key);
|
||||
rc = opal_pmix.lookup_nb(keys, &mlist, lookup_cbfunc, &caddy);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OPAL_ERROR_LOG(rc);
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
OPAL_LIST_DESTRUCT(&mlist);
|
||||
opal_argv_free(keys);
|
||||
return rc;
|
||||
}
|
||||
while (caddy.active) {
|
||||
usleep(10);
|
||||
}
|
||||
opal_argv_free(keys);
|
||||
OPAL_LIST_DESTRUCT(&mlist);
|
||||
if (OPAL_SUCCESS != caddy.status) {
|
||||
OPAL_ERROR_LOG(caddy.status);
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
return caddy.status;
|
||||
}
|
||||
}
|
||||
|
||||
/* pass back the result */
|
||||
outdat->proc = pdat->proc;
|
||||
rc = opal_value_xfer(&outdat->value, &pdat->value);
|
||||
OPAL_LIST_DESTRUCT(&ilist);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/******** DATA CONSOLIDATION ********/
|
||||
|
||||
static char* setup_key(const opal_process_name_t* name, const char *key, int pmix_keylen_max);
|
||||
static char *pmi_encode(const void *val, size_t vallen);
|
||||
static uint8_t *pmi_decode (const char *data, size_t *retlen);
|
||||
|
@ -36,6 +36,9 @@ BEGIN_C_DECLS
|
||||
/* provide access to the framework verbose output without
|
||||
* exposing the entire base */
|
||||
extern int opal_pmix_verbose_output;
|
||||
extern int opal_pmix_base_exchange(opal_value_t *info,
|
||||
opal_pmix_pdata_t *pdat,
|
||||
int timeout);
|
||||
|
||||
/**
|
||||
* Provide a simplified macro for sending data via modex
|
||||
@ -249,12 +252,24 @@ extern int opal_pmix_verbose_output;
|
||||
opal_pmix.fence((p), (s)); \
|
||||
} while(0);
|
||||
|
||||
/**
|
||||
* Provide a macro for accessing a base function that exchanges
|
||||
* data values between two procs using the PMIx Publish/Lookup
|
||||
* APIs */
|
||||
#define OPAL_PMIX_EXCHANGE(r, i, p, t) \
|
||||
do { \
|
||||
OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
|
||||
"%s[%s:%d] EXCHANGE %s WITH %s", \
|
||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
(i)->key, (p)->value.key)); \
|
||||
(r) = opal_pmix_base_exchange((i), (p), (t)); \
|
||||
} while(0);
|
||||
|
||||
|
||||
/* callback handler for errors */
|
||||
typedef void (*opal_pmix_errhandler_fn_t)(int error);
|
||||
|
||||
/* NOTE: calls to these APIs must be thread-protected as there
|
||||
* currently is NO internal thread safety. */
|
||||
|
||||
|
||||
/************************************************************
|
||||
* CLIENT APIs *
|
||||
|
@ -262,6 +262,7 @@ void pmix1_value_load(pmix_value_t *v,
|
||||
switch(kv->type) {
|
||||
case OPAL_UNDEF:
|
||||
v->type = PMIX_UNDEF;
|
||||
opal_output(0, "TYPE WAS UNDEF");
|
||||
break;
|
||||
case OPAL_BOOL:
|
||||
v->type = PMIX_BOOL;
|
||||
|
@ -105,7 +105,7 @@ BEGIN_C_DECLS
|
||||
#define OPAL_PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job
|
||||
#define OPAL_PMIX_RANGE "pmix.range" // (int) opal_pmix_data_range_t value for calls to publish/lookup/unpublish
|
||||
#define OPAL_PMIX_PERSISTENCE "pmix.persist" // (int) opal_pmix_persistence_t value for calls to publish
|
||||
#define OPAL_PMIX_OPTIONAL "pmix.optional" // (bool) look only in the immediate data store for the requested value - do
|
||||
#define OPAL_PMIX_OPTIONAL "pmix.optional" // (bool) look only in the immediate data store for the requested value - do
|
||||
// not request data from the server if not found
|
||||
|
||||
/* attribute used by host server to pass data to the server convenience library - the
|
||||
@ -171,6 +171,7 @@ typedef enum {
|
||||
* consistent order with the PMIx distro */
|
||||
typedef enum {
|
||||
OPAL_PMIX_PERSIST_INDEF = 0, // retain until specifically deleted
|
||||
OPAL_PMIX_PERSIST_FIRST_READ, // delete upon first access
|
||||
OPAL_PMIX_PERSIST_PROC, // retain until publishing process terminates
|
||||
OPAL_PMIX_PERSIST_APP, // retain until application terminates
|
||||
OPAL_PMIX_PERSIST_SESSION // retain until session/allocation terminates
|
||||
|
@ -48,14 +48,9 @@
|
||||
#include "opal/runtime/opal_cr.h"
|
||||
#include "opal/runtime/opal_progress_threads.h"
|
||||
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
#include "orte/mca/routed/base/base.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/dfs/base/base.h"
|
||||
#include "orte/mca/grpcomm/base/base.h"
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/qos/base/base.h"
|
||||
#include "orte/mca/odls/odls_types.h"
|
||||
#include "orte/mca/filem/base/base.h"
|
||||
#include "orte/mca/errmgr/base/base.h"
|
||||
@ -181,84 +176,14 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
||||
}
|
||||
OBJ_DESTRUCT(&kv);
|
||||
}
|
||||
/* Setup the communication infrastructure */
|
||||
/*
|
||||
* OOB Layer
|
||||
*/
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_oob_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_oob_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* Runtime Messaging Layer */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rml_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rml_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* Messaging QoS Layer */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_qos_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_qos_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_qos_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_qos_base_select";
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* setup the errmgr */
|
||||
if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_errmgr_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* Routed system */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_routed_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_routed_base_select";
|
||||
goto error;
|
||||
}
|
||||
/*
|
||||
* Group communications
|
||||
*/
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_grpcomm_base_framework, 0))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_grpcomm_base_open";
|
||||
goto error;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_grpcomm_base_select())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_grpcomm_base_select";
|
||||
goto error;
|
||||
}
|
||||
/* enable communication via the rml */
|
||||
if (ORTE_SUCCESS != (ret = orte_rml.enable_comm())) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_rml.enable_comm";
|
||||
goto error;
|
||||
}
|
||||
/* setup the routed info */
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.init_routes(ORTE_PROC_MY_NAME->jobid, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
error = "orte_routed.init_routes";
|
||||
goto error;
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
/*
|
||||
* Setup the SnapC
|
||||
@ -335,13 +260,7 @@ int orte_ess_base_app_finalize(void)
|
||||
(void) mca_base_framework_close(&orte_filem_base_framework);
|
||||
(void) mca_base_framework_close(&orte_errmgr_base_framework);
|
||||
|
||||
/* now can close the rml and its friendly group comm */
|
||||
(void) mca_base_framework_close(&orte_grpcomm_base_framework);
|
||||
(void) mca_base_framework_close(&orte_dfs_base_framework);
|
||||
(void) mca_base_framework_close(&orte_routed_base_framework);
|
||||
|
||||
(void) mca_base_framework_close(&orte_rml_base_framework);
|
||||
(void) mca_base_framework_close(&orte_oob_base_framework);
|
||||
(void) mca_base_framework_close(&orte_state_base_framework);
|
||||
|
||||
orte_session_dir_finalize(ORTE_PROC_MY_NAME);
|
||||
@ -396,7 +315,7 @@ void orte_ess_base_app_abort(int status, bool report)
|
||||
* the message if routing is enabled as this indicates we
|
||||
* have someone to send to
|
||||
*/
|
||||
if (report && orte_routing_is_enabled && orte_create_session_dirs) {
|
||||
if (report && orte_create_session_dirs) {
|
||||
myfile = opal_os_path(false, orte_process_info.proc_session_dir, "aborted", NULL);
|
||||
fd = open(myfile, O_CREAT, S_IRUSR);
|
||||
close(fd);
|
||||
|
@ -47,8 +47,6 @@
|
||||
#include "opal/mca/pmix/base/base.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/grpcomm/grpcomm.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
@ -85,7 +83,6 @@ static int rte_init(void)
|
||||
char *envar, *ev1, *ev2;
|
||||
uint64_t unique_key[2];
|
||||
char *string_key;
|
||||
char *rmluri;
|
||||
opal_value_t *kv;
|
||||
char *val;
|
||||
int u32, *u32ptr;
|
||||
@ -355,16 +352,6 @@ static int rte_init(void)
|
||||
|
||||
/*** PUSH DATA FOR OTHERS TO FIND ***/
|
||||
|
||||
/* push our RML URI in case others need to talk directly to us */
|
||||
rmluri = orte_rml.get_contact_info();
|
||||
/* push it out for others to use */
|
||||
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_PROC_URI, rmluri, OPAL_STRING);
|
||||
if (ORTE_SUCCESS != ret) {
|
||||
error = "pmix put uri";
|
||||
goto error;
|
||||
}
|
||||
free(rmluri);
|
||||
|
||||
/* push our hostname so others can find us, if they need to */
|
||||
OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_HOSTNAME, orte_process_info.nodename, OPAL_STRING);
|
||||
if (ORTE_SUCCESS != ret) {
|
||||
|
@ -1,56 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
||||
# All rights reserved
|
||||
# Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
sources = \
|
||||
oob_usock_component.h \
|
||||
oob_usock.h \
|
||||
oob_usock_component.c \
|
||||
oob_usock_connection.h \
|
||||
oob_usock_sendrecv.h \
|
||||
oob_usock_hdr.h \
|
||||
oob_usock_peer.h \
|
||||
oob_usock_ping.h \
|
||||
oob_usock.c \
|
||||
oob_usock_connection.c \
|
||||
oob_usock_sendrecv.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_orte_oob_usock_DSO
|
||||
component_noinst =
|
||||
component_install = mca_oob_usock.la
|
||||
else
|
||||
component_noinst = libmca_oob_usock.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(ortelibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_oob_usock_la_SOURCES = $(sources)
|
||||
mca_oob_usock_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_oob_usock_la_SOURCES = $(sources)
|
||||
libmca_oob_usock_la_LDFLAGS = -module -avoid-version
|
||||
|
@ -1,42 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_oob_usock_CONFIG([action-if-found], [action-if-not-found])
|
||||
# -----------------------------------------------------------
|
||||
AC_DEFUN([MCA_orte_oob_usock_CONFIG],[
|
||||
AC_CONFIG_FILES([orte/mca/oob/usock/Makefile])
|
||||
|
||||
# check for sockaddr_un (a good sign we have Unix domain sockets)
|
||||
AC_CHECK_TYPES([struct sockaddr_un],
|
||||
[oob_usock_happy="yes"],
|
||||
[oob_usock_happy="no"],
|
||||
[AC_INCLUDES_DEFAULT
|
||||
#ifdef HAVE_SYS_SOCKET_H
|
||||
#include <sys/socket.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_UN_H
|
||||
#include <sys/un.h>
|
||||
#endif])
|
||||
|
||||
AS_IF([test "$oob_usock_happy" = "yes"], [$1], [$2])
|
||||
])dnl
|
@ -1,70 +0,0 @@
|
||||
# -*- text -*-
|
||||
#
|
||||
# Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
[static-and-dynamic]
|
||||
Both static and dynamic port ranges were specified for the
|
||||
out-of-band (OOB) communication subsystem:
|
||||
|
||||
Static ports: %s
|
||||
Dynamic ports: %s
|
||||
|
||||
Only one can be specified. Please choose either static or
|
||||
dynamic ports and try again.
|
||||
#
|
||||
[include-exclude]
|
||||
Both TCP interface include and exclude lists were specified:
|
||||
|
||||
Include: %s
|
||||
Exclude: %s
|
||||
|
||||
Only one of these can be given.
|
||||
#
|
||||
[not-parseable]
|
||||
The specified network is not parseable. Since we cannot determine
|
||||
your desired intent, we cannot establish a TCP socket for out-of-band
|
||||
communications and will therefore abort. Please correct the network
|
||||
specification and retry.
|
||||
#
|
||||
[no-included-found]
|
||||
None of the networks specified to be included for out-of-band communications
|
||||
could be found:
|
||||
|
||||
Value given: %s
|
||||
|
||||
Please revise the specification and try again.
|
||||
#
|
||||
[excluded-all]
|
||||
The specified list of networks to be excluded for out-of-band communications
|
||||
resulted in no networks being available:
|
||||
|
||||
Value given: %s
|
||||
|
||||
Please revise the specification and try again.
|
||||
#
|
||||
[no-interfaces-avail]
|
||||
No network interfaces were found for out-of-band communications. We require
|
||||
at least one available network for TCP-based messaging.
|
||||
#
|
||||
[invalid if_inexclude]
|
||||
WARNING: An invalid value was given for oob_tcp_if_%s. This
|
||||
value will be ignored.
|
||||
|
||||
Local host: %s
|
||||
Value: %s
|
||||
Message: %s
|
||||
#
|
@ -1,473 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2015 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
#include "opal/types.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#ifdef HAVE_NETINET_IN_H
|
||||
#include <netinet/in.h>
|
||||
#endif
|
||||
#ifdef HAVE_ARPA_INET_H
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETDB_H
|
||||
#include <netdb.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/opal_socket_errno.h"
|
||||
#include "opal/util/if.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/parse_options.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/oob/usock/oob_usock.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_component.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_peer.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_connection.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_ping.h"
|
||||
|
||||
static void usock_init(void);
|
||||
static void usock_fini(void);
|
||||
static void accept_connection(const int accepted_fd,
|
||||
const struct sockaddr *addr);
|
||||
static void ping(const orte_process_name_t *proc);
|
||||
static void send_nb(orte_rml_send_t *msg);
|
||||
static void ft_event(int state);
|
||||
|
||||
mca_oob_usock_module_t mca_oob_usock_module = {
|
||||
{
|
||||
usock_init,
|
||||
usock_fini,
|
||||
accept_connection,
|
||||
ping,
|
||||
send_nb,
|
||||
ft_event
|
||||
}
|
||||
};
|
||||
|
||||
/*
|
||||
* Local utility functions
|
||||
*/
|
||||
static void recv_handler(int sd, short flags, void* user);
|
||||
static void* progress_thread_engine(opal_object_t *obj)
|
||||
{
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s USOCK PROGRESS THREAD RUNNING",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
while (mca_oob_usock_module.ev_active) {
|
||||
opal_event_loop(mca_oob_usock_module.ev_base, OPAL_EVLOOP_ONCE);
|
||||
}
|
||||
return OPAL_THREAD_CANCELLED;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Initialize global variables used w/in this module.
|
||||
*/
|
||||
static void usock_init(void)
|
||||
{
|
||||
/* setup the module's state variables */
|
||||
OBJ_CONSTRUCT(&mca_oob_usock_module.peers, opal_hash_table_t);
|
||||
opal_hash_table_init(&mca_oob_usock_module.peers, 32);
|
||||
mca_oob_usock_module.ev_active = false;
|
||||
|
||||
if (orte_oob_base.use_module_threads) {
|
||||
/* if we are to use independent progress threads at
|
||||
* the module level, start it now
|
||||
*/
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s STARTING USOCK PROGRESS THREAD",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
mca_oob_usock_module.ev_base = opal_event_base_create();
|
||||
/* construct the thread object */
|
||||
OBJ_CONSTRUCT(&mca_oob_usock_module.progress_thread, opal_thread_t);
|
||||
/* fork off a thread to progress it */
|
||||
mca_oob_usock_module.progress_thread.t_run = progress_thread_engine;
|
||||
mca_oob_usock_module.progress_thread.t_arg = NULL;
|
||||
mca_oob_usock_module.ev_active = true;
|
||||
if (OPAL_SUCCESS != opal_thread_start(&mca_oob_usock_module.progress_thread)) {
|
||||
opal_output(0, "%s USOCK progress thread failed to start",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
}
|
||||
} else {
|
||||
mca_oob_usock_module.ev_base = orte_event_base;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Module cleanup.
|
||||
*/
|
||||
static void usock_fini(void)
|
||||
{
|
||||
/* cleanup all peers */
|
||||
OBJ_DESTRUCT(&mca_oob_usock_module.peers);
|
||||
|
||||
if (mca_oob_usock_module.ev_active) {
|
||||
/* if we used an independent progress thread at
|
||||
* the module level, stop it now
|
||||
*/
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s STOPPING USOCK PROGRESS THREAD",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* stop the progress thread */
|
||||
mca_oob_usock_module.ev_active = false;
|
||||
/* break the event loop */
|
||||
opal_event_base_loopexit(mca_oob_usock_module.ev_base);
|
||||
/* wait for thread to exit */
|
||||
opal_thread_join(&mca_oob_usock_module.progress_thread, NULL);
|
||||
OBJ_DESTRUCT(&mca_oob_usock_module.progress_thread);
|
||||
/* release the event base */
|
||||
opal_event_base_free(mca_oob_usock_module.ev_base);
|
||||
}
|
||||
}
|
||||
|
||||
/* Called by mca_oob_usock_accept() and connection_handler() on
|
||||
* a socket that has been accepted. This call finishes processing the
|
||||
* socket by registering for the OOB-level connection handshake. Used
|
||||
* in both the threaded and event listen modes.
|
||||
*/
|
||||
static void accept_connection(const int accepted_fd,
|
||||
const struct sockaddr *addr)
|
||||
{
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s accept_connection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* use a one-time event to wait for receipt of peer's
|
||||
* process ident message to complete this connection
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_ACCEPT_STATE(accepted_fd, addr, recv_handler);
|
||||
}
|
||||
|
||||
/* API functions */
|
||||
static void process_ping(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_ping_t *op = (mca_oob_usock_ping_t*)cbdata;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] processing ping to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->peer));
|
||||
|
||||
/* do we know this peer? */
|
||||
if (NULL == (peer = mca_oob_usock_peer_lookup(&op->peer))) {
|
||||
/* push this back to the framework so another component can try */
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] hop %s unknown",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->peer));
|
||||
#if 0
|
||||
ORTE_ACTIVATE_USOCK_MSG_ERROR(NULL, NULL, &op->peer, mca_oob_usock_component_hop_unknown);
|
||||
#endif
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* if we are already connected, there is nothing to do */
|
||||
if (MCA_OOB_USOCK_CONNECTED == peer->state) {
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] already connected to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->peer));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* if we are already connecting, there is nothing to do */
|
||||
if (MCA_OOB_USOCK_CONNECTING == peer->state &&
|
||||
MCA_OOB_USOCK_CONNECT_ACK == peer->state) {
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] already connecting to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->peer));
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* attempt the connection */
|
||||
peer->state = MCA_OOB_USOCK_CONNECTING;
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE(peer, mca_oob_usock_peer_try_connect);
|
||||
|
||||
cleanup:
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
static void ping(const orte_process_name_t *proc)
|
||||
{
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] pinging peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(proc));
|
||||
|
||||
/* push this into our event base for processing */
|
||||
ORTE_ACTIVATE_USOCK_PING(proc, process_ping);
|
||||
}
|
||||
|
||||
static void process_send(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_msg_op_t *op = (mca_oob_usock_msg_op_t*)cbdata;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s:[%s:%d] processing send to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
__FILE__, __LINE__,
|
||||
ORTE_NAME_PRINT(&op->msg->dst));
|
||||
|
||||
/* if I am an app, the only route is to my daemon, so
|
||||
* send the msg there
|
||||
*/
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
if (NULL == (peer = mca_oob_usock_peer_lookup(ORTE_PROC_MY_DAEMON))) {
|
||||
/* we don't know how to talk to our daemon,
|
||||
* which is strange since we already got here.
|
||||
* likely means we lost a race condition, so
|
||||
*
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_MSG_ERROR(NULL, op->msg,
|
||||
ORTE_PROC_MY_DAEMON,
|
||||
mca_oob_usock_component_cannot_send);
|
||||
goto cleanup;
|
||||
}
|
||||
} else if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
/* if I am a daemon, the only way I should be given this
|
||||
* message to send is if the proc is local to me
|
||||
*/
|
||||
if (NULL == (peer = mca_oob_usock_peer_lookup(&op->msg->dst))) {
|
||||
/* we don't know how to talk to this proc,
|
||||
* so send this back up to the OOB base so it
|
||||
* can try another transport
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_MSG_ERROR(NULL, op->msg,
|
||||
&op->msg->dst,
|
||||
mca_oob_usock_component_cannot_send);
|
||||
goto cleanup;
|
||||
}
|
||||
} else {
|
||||
/* otherwise, this message can't be handled by me, so
|
||||
* notify the component of the mistake
|
||||
*/
|
||||
opal_output(0, "CAN'T BE HANDLED");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* add the msg to the target's send queue */
|
||||
if (MCA_OOB_USOCK_CONNECTED == peer->state) {
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_nb: already connected to %s - queueing for send",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
MCA_OOB_USOCK_QUEUE_SEND(op->msg, peer);
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* add the message to the queue for sending after the
|
||||
* connection is formed
|
||||
*/
|
||||
MCA_OOB_USOCK_QUEUE_PENDING(op->msg, peer);
|
||||
|
||||
if (MCA_OOB_USOCK_CONNECTING != peer->state &&
|
||||
MCA_OOB_USOCK_CONNECT_ACK != peer->state) {
|
||||
/* we have to initiate the connection - again, we do not
|
||||
* want to block while the connection is created.
|
||||
* So throw us into an event that will create
|
||||
* the connection via a mini-state-machine :-)
|
||||
*/
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_nb: initiating connection to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
peer->state = MCA_OOB_USOCK_CONNECTING;
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE(peer, mca_oob_usock_peer_try_connect);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
static void send_nb(orte_rml_send_t *msg)
|
||||
{
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_nb to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&msg->dst));
|
||||
|
||||
/* push this into our event base for processing */
|
||||
ORTE_ACTIVATE_USOCK_POST_SEND(msg, process_send);
|
||||
}
|
||||
|
||||
/*
|
||||
* Event callback when there is data available on the registered
|
||||
* socket to recv. This is called for the listen sockets to accept an
|
||||
* incoming connection, on new sockets trying to complete the software
|
||||
* connection process, and for probes. Data on an established
|
||||
* connection is handled elsewhere.
|
||||
*/
|
||||
static void recv_handler(int sd, short flags, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_conn_op_t *op = (mca_oob_usock_conn_op_t*)cbdata;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
uint64_t *ui64;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler called",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* get the handshake */
|
||||
if (ORTE_SUCCESS != mca_oob_usock_peer_recv_connect_ack(NULL, sd, &hdr)) {
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* finish processing ident */
|
||||
if (MCA_OOB_USOCK_IDENT == hdr.type) {
|
||||
if (NULL == (peer = mca_oob_usock_peer_lookup(&hdr.origin))) {
|
||||
/* should never happen */
|
||||
goto cleanup;
|
||||
}
|
||||
/* set socket up to be non-blocking */
|
||||
if ((flags = fcntl(sd, F_GETFL, 0)) < 0) {
|
||||
opal_output(0, "%s mca_oob_usock_recv_connect: fcntl(F_GETFL) failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno);
|
||||
} else {
|
||||
flags |= O_NONBLOCK;
|
||||
if (fcntl(sd, F_SETFL, flags) < 0) {
|
||||
opal_output(0, "%s mca_oob_usock_recv_connect: fcntl(F_SETFL) failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), strerror(opal_socket_errno), opal_socket_errno);
|
||||
}
|
||||
}
|
||||
|
||||
/* is the peer instance willing to accept this connection */
|
||||
peer->sd = sd;
|
||||
if (mca_oob_usock_peer_accept(peer) == false) {
|
||||
if (OOB_USOCK_DEBUG_CONNECT <= opal_output_get_verbosity(orte_oob_base_framework.framework_output)) {
|
||||
opal_output(0, "%s-%s mca_oob_usock_recv_connect: "
|
||||
"rejected connection state %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->state);
|
||||
}
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
ui64 = (uint64_t*)(&peer->name);
|
||||
opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, (*ui64), NULL);
|
||||
OBJ_RELEASE(peer);
|
||||
}
|
||||
}
|
||||
|
||||
cleanup:
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
/* Dummy function for when we are not using FT. */
|
||||
#if OPAL_ENABLE_FT_CR == 0
|
||||
static void ft_event(int state)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#else
|
||||
static void ft_event(int state) {
|
||||
#if 0
|
||||
opal_list_item_t *item;
|
||||
#endif
|
||||
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
#if 0
|
||||
/*
|
||||
* Disable event processing while we are working
|
||||
*/
|
||||
opal_event_disable();
|
||||
#endif
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
#if 0
|
||||
/*
|
||||
* Resume event processing
|
||||
*/
|
||||
opal_event_enable();
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
/*
|
||||
* Clean out cached connection information
|
||||
* Select pieces of finalize/init
|
||||
*/
|
||||
for (item = opal_list_remove_first(&mod->peer_list);
|
||||
item != NULL;
|
||||
item = opal_list_remove_first(&mod->peer_list)) {
|
||||
mca_oob_usock_peer_t* peer = (mca_oob_usock_peer_t*)item;
|
||||
/* JJH: Use the below command for debugging restarts with invalid sockets
|
||||
* mca_oob_usock_peer_dump(peer, "RESTART CLEAN")
|
||||
*/
|
||||
MCA_OOB_USOCK_PEER_RETURN(peer);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&mod->peer_free);
|
||||
OBJ_DESTRUCT(&mod->peer_names);
|
||||
OBJ_DESTRUCT(&mod->peers);
|
||||
OBJ_DESTRUCT(&mod->peer_list);
|
||||
|
||||
OBJ_CONSTRUCT(&mod->peer_list, opal_list_t);
|
||||
OBJ_CONSTRUCT(&mod->peers, opal_hash_table_t);
|
||||
OBJ_CONSTRUCT(&mod->peer_names, opal_hash_table_t);
|
||||
OBJ_CONSTRUCT(&mod->peer_free, opal_free_list_t);
|
||||
|
||||
/*
|
||||
* Resume event processing
|
||||
*/
|
||||
opal_event_enable();
|
||||
#endif
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
#endif
|
@ -1,97 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_H_
|
||||
#define _MCA_OOB_USOCK_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "orte/types.h"
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/class/opal_free_list.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "orte/mca/oob/oob.h"
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* define some debug levels */
|
||||
#define OOB_USOCK_DEBUG_FAIL 2
|
||||
#define OOB_USOCK_DEBUG_CONNECT 7
|
||||
|
||||
/* forward declare a couple of structures */
|
||||
struct mca_oob_usock_module_t;
|
||||
struct mca_oob_usock_msg_error_t;
|
||||
|
||||
/* Module definition */
|
||||
typedef void (*mca_oob_usock_module_init_fn_t)(void);
|
||||
typedef void (*mca_oob_usock_module_fini_fn_t)(void);
|
||||
typedef void (*mca_oob_usock_module_accept_connection_fn_t)(const int accepted_fd,
|
||||
const struct sockaddr *addr);
|
||||
typedef void (*mca_oob_usock_module_ping_fn_t)(const orte_process_name_t *proc);
|
||||
typedef void (*mca_oob_usock_module_send_nb_fn_t)(orte_rml_send_t *msg);
|
||||
typedef void (*mca_oob_usock_module_ft_event_fn_t)(int state);
|
||||
|
||||
typedef struct {
|
||||
mca_oob_usock_module_init_fn_t init;
|
||||
mca_oob_usock_module_fini_fn_t finalize;
|
||||
mca_oob_usock_module_accept_connection_fn_t accept_connection;
|
||||
mca_oob_usock_module_ping_fn_t ping;
|
||||
mca_oob_usock_module_send_nb_fn_t send_nb;
|
||||
mca_oob_usock_module_ft_event_fn_t ft_event;
|
||||
} mca_oob_usock_module_api_t;
|
||||
typedef struct {
|
||||
mca_oob_usock_module_api_t api;
|
||||
opal_event_base_t *ev_base; /* event base for the module progress thread */
|
||||
bool ev_active;
|
||||
opal_thread_t progress_thread;
|
||||
opal_hash_table_t peers; // peer connection info
|
||||
} mca_oob_usock_module_t;
|
||||
ORTE_MODULE_DECLSPEC extern mca_oob_usock_module_t mca_oob_usock_module;
|
||||
|
||||
/**
|
||||
* the state of the connection
|
||||
*/
|
||||
typedef enum {
|
||||
MCA_OOB_USOCK_UNCONNECTED,
|
||||
MCA_OOB_USOCK_CLOSED,
|
||||
MCA_OOB_USOCK_RESOLVE,
|
||||
MCA_OOB_USOCK_CONNECTING,
|
||||
MCA_OOB_USOCK_CONNECT_ACK,
|
||||
MCA_OOB_USOCK_CONNECTED,
|
||||
MCA_OOB_USOCK_FAILED,
|
||||
MCA_OOB_USOCK_ACCEPTING
|
||||
} mca_oob_usock_state_t;
|
||||
|
||||
/* module-level shared functions */
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_send_handler(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_recv_handler(int fd, short args, void *cbdata);
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_OOB_USOCK_H_ */
|
||||
|
@ -1,593 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2015 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* In windows, many of the socket functions return an EWOULDBLOCK
|
||||
* instead of things like EAGAIN, EINPROGRESS, etc. It has been
|
||||
* verified that this will not conflict with other error codes that
|
||||
* are returned by these functions under UNIX/Linux environments
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/types.h"
|
||||
#include "opal/types.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#ifdef HAVE_NETINET_IN_H
|
||||
#include <netinet/in.h>
|
||||
#endif
|
||||
#ifdef HAVE_ARPA_INET_H
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETDB_H
|
||||
#include <netdb.h>
|
||||
#endif
|
||||
#include <ctype.h>
|
||||
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/os_path.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/opal_socket_errno.h"
|
||||
#include "opal/util/if.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/util/listener.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/parse_options.h"
|
||||
#include "orte/util/session_dir.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
|
||||
#include "orte/mca/oob/usock/oob_usock.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_component.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_peer.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_connection.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_ping.h"
|
||||
/*
|
||||
* Local utility functions
|
||||
*/
|
||||
|
||||
static int usock_component_register(void);
|
||||
static int usock_component_open(void);
|
||||
static int usock_component_close(void);
|
||||
|
||||
static int component_available(void);
|
||||
static int component_startup(void);
|
||||
static void component_shutdown(void);
|
||||
static int component_send(orte_rml_send_t *msg);
|
||||
static char* component_get_addr(void);
|
||||
static int component_set_addr(orte_process_name_t *peer,
|
||||
char **uris);
|
||||
static bool component_is_reachable(orte_process_name_t *peer);
|
||||
|
||||
/*
|
||||
* Struct of function pointers and all that to let us be initialized
|
||||
*/
|
||||
mca_oob_usock_component_t mca_oob_usock_component = {
|
||||
{
|
||||
.oob_base = {
|
||||
MCA_OOB_BASE_VERSION_2_0_0,
|
||||
.mca_component_name = "usock",
|
||||
MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
|
||||
ORTE_RELEASE_VERSION),
|
||||
.mca_open_component = usock_component_open,
|
||||
.mca_close_component = usock_component_close,
|
||||
.mca_register_component_params = usock_component_register,
|
||||
},
|
||||
.oob_data = {
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
.priority = 100,
|
||||
.available = component_available,
|
||||
.startup = component_startup,
|
||||
.shutdown = component_shutdown,
|
||||
.send_nb = component_send,
|
||||
.get_addr = component_get_addr,
|
||||
.set_addr = component_set_addr,
|
||||
.is_reachable = component_is_reachable,
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* Initialize global variables used w/in this module.
|
||||
*/
|
||||
static int usock_component_open(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Cleanup of global variables used by this module.
|
||||
*/
|
||||
static int usock_component_close(void)
|
||||
{
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int usock_component_register(void)
|
||||
{
|
||||
mca_base_component_t *component = &mca_oob_usock_component.super.oob_base;
|
||||
|
||||
/* register oob module parameters */
|
||||
mca_oob_usock_component.max_retries = 2;
|
||||
(void)mca_base_component_var_register(component, "peer_retries",
|
||||
"Number of times to try shutting down a connection before giving up",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_oob_usock_component.max_retries);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int component_available(void)
|
||||
{
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"oob:usock: component_available called");
|
||||
|
||||
/* if session directories were forbidden, then we cannot be used */
|
||||
if (!orte_create_session_dirs ||
|
||||
NULL == orte_process_info.tmpdir_base ||
|
||||
NULL == orte_process_info.top_session_dir) {
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* this component is not available to tools */
|
||||
if (ORTE_PROC_IS_TOOL) {
|
||||
return ORTE_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
if (NULL == orte_process_info.my_daemon_uri) {
|
||||
/* direct-launched apps cannot use it */
|
||||
return ORTE_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
/* apps launched by daemons *must* use it */
|
||||
return ORTE_ERR_FORCE_SELECT;
|
||||
}
|
||||
|
||||
/* otherwise, we are available */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handler for accepting connections from the event library
|
||||
*/
|
||||
static void connection_event_handler(int incoming_sd, short flags, void* cbdata)
|
||||
{
|
||||
orte_pending_connection_t *pending = (orte_pending_connection_t*)cbdata;
|
||||
int sd;
|
||||
|
||||
sd = pending->fd;
|
||||
pending->fd = -1;
|
||||
OBJ_RELEASE(pending);
|
||||
|
||||
/* process the connection */
|
||||
mca_oob_usock_module.api.accept_connection(sd, NULL);
|
||||
}
|
||||
|
||||
/* Start the module */
|
||||
static int component_startup(void)
|
||||
{
|
||||
int rc=ORTE_SUCCESS;
|
||||
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s USOCK STARTUP",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* setup the path to the daemon rendezvous point */
|
||||
memset(&mca_oob_usock_component.address, 0, sizeof(struct sockaddr_un));
|
||||
mca_oob_usock_component.address.sun_family = AF_UNIX;
|
||||
snprintf(mca_oob_usock_component.address.sun_path,
|
||||
sizeof(mca_oob_usock_component.address.sun_path)-1,
|
||||
"%s/%s/%s/0/%s", orte_process_info.tmpdir_base,
|
||||
orte_process_info.top_session_dir,
|
||||
ORTE_JOB_FAMILY_PRINT(ORTE_PROC_MY_NAME->jobid), "usock");
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"SUNPATH: %s", mca_oob_usock_component.address.sun_path);
|
||||
|
||||
/* if we are a daemon/HNP, register our listener */
|
||||
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
if (ORTE_SUCCESS != (rc = orte_register_listener((struct sockaddr*)&mca_oob_usock_component.address, sizeof(struct sockaddr_un),
|
||||
orte_event_base, connection_event_handler))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
} else {
|
||||
/* if the rendezvous point isn't there, then that's an error */
|
||||
/* if the rendezvous file doesn't exist, that's an error */
|
||||
if (0 != access(mca_oob_usock_component.address.sun_path, R_OK)) {
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"SUNPATH: %s NOT READABLE", mca_oob_usock_component.address.sun_path);
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
/* start the module */
|
||||
mca_oob_usock_module.api.init();
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void component_shutdown(void)
|
||||
{
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s USOCK SHUTDOWN",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
/* delete the rendezvous file */
|
||||
unlink(mca_oob_usock_component.address.sun_path);
|
||||
}
|
||||
|
||||
/* shutdown the module */
|
||||
if (NULL != mca_oob_usock_module.api.finalize) {
|
||||
mca_oob_usock_module.api.finalize();
|
||||
}
|
||||
}
|
||||
|
||||
static int component_send(orte_rml_send_t *msg)
|
||||
{
|
||||
orte_proc_t *proc;
|
||||
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"%s oob:usock:send_nb to peer %s:%d to channel=%d seq_num =%d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&msg->dst), msg->tag, msg->dst_channel, msg->seq_num);
|
||||
|
||||
if (ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) {
|
||||
/* daemons can only reach local procs */
|
||||
if (NULL == (proc = orte_get_proc_object(&msg->dst))) {
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
if (!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_LOCAL)) {
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
}
|
||||
|
||||
/* apps can reach anyone via this module as the daemon
|
||||
* will route the message to the final destination
|
||||
*/
|
||||
|
||||
mca_oob_usock_module.api.send_nb(msg);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* although we do not use the uri to determine a peer's
|
||||
* address (since we know the path via the session directory),
|
||||
* we have to provide something to the uri. This is needed
|
||||
* as other places in ORTE use a NULL uri to indicate lack
|
||||
* of a daemon. We may eventually remove that dependency,
|
||||
* but for now, just ensure that the uri is never NULL,
|
||||
* even if we are the only active OOB transport.
|
||||
*/
|
||||
static char* component_get_addr(void)
|
||||
{
|
||||
char *tmp;
|
||||
tmp = strdup("usock");
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static int component_set_addr(orte_process_name_t *peer,
|
||||
char **uris)
|
||||
{
|
||||
orte_proc_t *proc;
|
||||
mca_oob_usock_peer_t *pr;
|
||||
uint64_t *ui64;
|
||||
|
||||
/* if I am an application, then everything is addressable
|
||||
* by me via my daemon
|
||||
*/
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
/* if this is my daemon, then take it - otherwise, ignore */
|
||||
if (ORTE_PROC_MY_DAEMON->jobid == peer->jobid &&
|
||||
ORTE_PROC_MY_DAEMON->vpid == peer->vpid) {
|
||||
ui64 = (uint64_t*)peer;
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers,
|
||||
(*ui64), (void**)&pr) || NULL == pr) {
|
||||
pr = OBJ_NEW(mca_oob_usock_peer_t);
|
||||
pr->name = *peer;
|
||||
opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, (*ui64), pr);
|
||||
}
|
||||
/* we have to initiate the connection because otherwise the
|
||||
* daemon has no way to communicate to us via this component
|
||||
* as the app doesn't have a listening port */
|
||||
pr->state = MCA_OOB_USOCK_CONNECTING;
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE(pr, mca_oob_usock_peer_try_connect);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
/* otherwise, indicate that we cannot reach this peer */
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
/* if I am a daemon or HNP, I can only reach my
|
||||
* own local procs via this component
|
||||
*/
|
||||
if (ORTE_PROC_MY_NAME->jobid == peer->jobid) {
|
||||
/* another daemon */
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
if (NULL == (proc = orte_get_proc_object(peer)) ||
|
||||
!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_LOCAL)) {
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
/* indicate that this peer is addressable by this component */
|
||||
ui64 = (uint64_t*)peer;
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers,
|
||||
(*ui64), (void**)&pr) || NULL == pr) {
|
||||
pr = OBJ_NEW(mca_oob_usock_peer_t);
|
||||
pr->name = *peer;
|
||||
opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, (*ui64), pr);
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
void mca_oob_usock_component_set_module(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_op_t *pop = (mca_oob_usock_peer_op_t*)cbdata;
|
||||
uint64_t ui64;
|
||||
int rc;
|
||||
orte_oob_base_peer_t *bpr;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:set_module called for peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->peer->name));
|
||||
|
||||
/* retrieve the peer's name */
|
||||
memcpy(&ui64, (char*)&(pop->peer->name), sizeof(uint64_t));
|
||||
|
||||
/* make sure the OOB knows that we are handling this peer - we
|
||||
* are in the same event base as the OOB base, so we can
|
||||
* directly access its storage
|
||||
*/
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_oob_base.peers,
|
||||
ui64, (void**)&bpr) || NULL == bpr) {
|
||||
bpr = OBJ_NEW(orte_oob_base_peer_t);
|
||||
}
|
||||
opal_bitmap_set_bit(&bpr->addressable, mca_oob_usock_component.super.idx);
|
||||
bpr->component = &mca_oob_usock_component.super;
|
||||
if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers,
|
||||
ui64, bpr))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
OBJ_RELEASE(pop);
|
||||
}
|
||||
|
||||
void mca_oob_usock_component_lost_connection(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_op_t *pop = (mca_oob_usock_peer_op_t*)cbdata;
|
||||
uint64_t ui64;
|
||||
int rc;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:lost connection called for peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->peer->name));
|
||||
|
||||
/* retrieve the peer's name */
|
||||
memcpy(&ui64, (char*)&(pop->peer->name), sizeof(uint64_t));
|
||||
|
||||
/* mark the OOB's table that we can't reach it any more - for now, we don't
|
||||
* worry about shifting to another component. Eventually, we will want to push
|
||||
* this decision to the OOB so it can try other components and eventually error out
|
||||
*/
|
||||
if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers,
|
||||
ui64, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
/* activate the proc state - since an app only connects to its parent daemon,
|
||||
* and the daemon is *always* its lifeline, activate the lifeline lost state */
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer->name, ORTE_PROC_STATE_LIFELINE_LOST);
|
||||
} else {
|
||||
/* we are the daemon end, so notify that the child's comm failed */
|
||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer->name, ORTE_PROC_STATE_COMM_FAILED);
|
||||
}
|
||||
|
||||
OBJ_RELEASE(pop);
|
||||
}
|
||||
|
||||
void mca_oob_usock_component_cannot_send(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_msg_error_t *pop = (mca_oob_usock_msg_error_t*)cbdata;
|
||||
uint64_t ui64;
|
||||
int rc;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:unable to send to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->hop));
|
||||
|
||||
/* retrieve the peer's name */
|
||||
memcpy(&ui64, (char*)&(pop->hop), sizeof(uint64_t));
|
||||
|
||||
/* mark the OOB's table that we can't reach it any more - for now, we don't
|
||||
* worry about shifting to another component. Eventually, we will want to push
|
||||
* this decision to the OOB so it can try other components and eventually error out
|
||||
*/
|
||||
if (OPAL_SUCCESS != (rc = opal_hash_table_set_value_uint64(&orte_oob_base.peers,
|
||||
ui64, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
|
||||
/* have the OOB base try to send it again */
|
||||
ORTE_OOB_SEND(pop->rmsg);
|
||||
|
||||
OBJ_RELEASE(pop);
|
||||
}
|
||||
|
||||
void mca_oob_usock_component_failed_to_connect(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_op_t *pop = (mca_oob_usock_peer_op_t*)cbdata;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:failed_to_connect called for peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->peer->name));
|
||||
|
||||
/* if we are terminating, then don't do anything further */
|
||||
if (orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) {
|
||||
OBJ_RELEASE(pop);
|
||||
return;
|
||||
}
|
||||
|
||||
/* activate the proc state */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:failed_to_connect unable to reach peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&pop->peer->name));
|
||||
|
||||
/* since an app only connects to its parent daemon,
|
||||
* and the daemon is *always* its lifeline, activate the lifeline lost state */
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer->name, ORTE_PROC_STATE_LIFELINE_LOST);
|
||||
} else {
|
||||
/* we are the daemon end, so notify that the child's comm failed */
|
||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer->name, ORTE_PROC_STATE_COMM_FAILED);
|
||||
}
|
||||
OBJ_RELEASE(pop);
|
||||
}
|
||||
|
||||
static bool component_is_reachable(orte_process_name_t *peer)
|
||||
{
|
||||
orte_proc_t *proc;
|
||||
|
||||
/* if I am an application, then everything is reachable
|
||||
* by me via my daemon
|
||||
*/
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
return true;
|
||||
}
|
||||
|
||||
/* if I am a daemon or HNP, I can only reach my
|
||||
* own local procs via this component
|
||||
*/
|
||||
if (ORTE_PROC_MY_NAME->jobid == peer->jobid) {
|
||||
/* another daemon */
|
||||
return false;
|
||||
}
|
||||
if (NULL == (proc = orte_get_proc_object(peer)) ||
|
||||
!ORTE_FLAG_TEST(proc, ORTE_PROC_FLAG_LOCAL)) {
|
||||
return false;
|
||||
}
|
||||
/* indicate that this peer is reachable by this component */
|
||||
return true;
|
||||
}
|
||||
|
||||
char* mca_oob_usock_state_print(mca_oob_usock_state_t state)
|
||||
{
|
||||
switch (state) {
|
||||
case MCA_OOB_USOCK_UNCONNECTED:
|
||||
return "UNCONNECTED";
|
||||
case MCA_OOB_USOCK_CLOSED:
|
||||
return "CLOSED";
|
||||
case MCA_OOB_USOCK_RESOLVE:
|
||||
return "RESOLVE";
|
||||
case MCA_OOB_USOCK_CONNECTING:
|
||||
return "CONNECTING";
|
||||
case MCA_OOB_USOCK_CONNECT_ACK:
|
||||
return "ACK";
|
||||
case MCA_OOB_USOCK_CONNECTED:
|
||||
return "CONNECTED";
|
||||
case MCA_OOB_USOCK_FAILED:
|
||||
return "FAILED";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
mca_oob_usock_peer_t* mca_oob_usock_peer_lookup(const orte_process_name_t *name)
|
||||
{
|
||||
mca_oob_usock_peer_t *peer;
|
||||
uint64_t ui64;
|
||||
|
||||
memcpy(&ui64, (char*)name, sizeof(uint64_t));
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&mca_oob_usock_module.peers, ui64, (void**)&peer)) {
|
||||
return NULL;
|
||||
}
|
||||
return peer;
|
||||
}
|
||||
|
||||
/* OOB USOCK Class instances */
|
||||
|
||||
static void peer_cons(mca_oob_usock_peer_t *peer)
|
||||
{
|
||||
peer->auth_method = NULL;
|
||||
peer->sd = -1;
|
||||
peer->state = MCA_OOB_USOCK_UNCONNECTED;
|
||||
peer->retries = 0;
|
||||
OBJ_CONSTRUCT(&peer->send_queue, opal_list_t);
|
||||
peer->send_msg = NULL;
|
||||
peer->recv_msg = NULL;
|
||||
peer->send_ev_active = false;
|
||||
peer->recv_ev_active = false;
|
||||
peer->timer_ev_active = false;
|
||||
}
|
||||
static void peer_des(mca_oob_usock_peer_t *peer)
|
||||
{
|
||||
if (NULL != peer->auth_method) {
|
||||
free(peer->auth_method);
|
||||
}
|
||||
if (0 <= peer->sd) {
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&peer->send_queue);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_peer_t,
|
||||
opal_list_item_t,
|
||||
peer_cons, peer_des);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_peer_op_t,
|
||||
opal_object_t,
|
||||
NULL, NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_msg_op_t,
|
||||
opal_object_t,
|
||||
NULL, NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_conn_op_t,
|
||||
opal_object_t,
|
||||
NULL, NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_ping_t,
|
||||
opal_object_t,
|
||||
NULL, NULL);
|
||||
|
@ -1,64 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_COMPONENT_H_
|
||||
#define _MCA_OOB_USOCK_COMPONENT_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_SOCKET_H
|
||||
#include <sys/socket.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_UN_H
|
||||
#include <sys/un.h>
|
||||
#endif
|
||||
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
|
||||
#include "orte/mca/oob/oob.h"
|
||||
#include "oob_usock_peer.h"
|
||||
#include "oob_usock.h"
|
||||
|
||||
/**
|
||||
* OOB USOCK Component
|
||||
*/
|
||||
typedef struct {
|
||||
mca_oob_base_component_t super; /**< base OOB component */
|
||||
int max_retries; /**< max number of retries before declaring peer gone */
|
||||
struct sockaddr_un address; /**< address of our rendezvous point */
|
||||
} mca_oob_usock_component_t;
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern mca_oob_usock_component_t mca_oob_usock_component;
|
||||
|
||||
ORTE_MODULE_DECLSPEC char* mca_oob_usock_state_print(mca_oob_usock_state_t state);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_component_set_module(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_component_lost_connection(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_component_failed_to_connect(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC mca_oob_usock_peer_t* mca_oob_usock_peer_lookup(const orte_process_name_t *name);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_component_cannot_send(int fd, short args, void *cbdata);
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_COMPONENT_H_ */
|
@ -1,940 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#ifdef HAVE_SYS_UIO_H
|
||||
#include <sys/uio.h>
|
||||
#endif
|
||||
#ifdef HAVE_NET_UIO_H
|
||||
#include <net/uio.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include "opal/opal_socket_errno.h"
|
||||
#ifdef HAVE_NETINET_IN_H
|
||||
#include <netinet/in.h>
|
||||
#endif
|
||||
#ifdef HAVE_ARPA_INET_H
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETINET_TCP_H
|
||||
#include <netinet/tcp.h>
|
||||
#endif
|
||||
|
||||
#include "opal/types.h"
|
||||
#include "opal_stdint.h"
|
||||
#include "opal/mca/backtrace/backtrace.h"
|
||||
#include "opal/mca/base/mca_base_var.h"
|
||||
#include "opal/mca/sec/sec.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/util/fd.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_component.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_peer.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_connection.h"
|
||||
|
||||
static void usock_peer_event_init(mca_oob_usock_peer_t* peer);
|
||||
static int usock_peer_send_connect_ack(mca_oob_usock_peer_t* peer);
|
||||
static int usock_peer_send_blocking(mca_oob_usock_peer_t* peer,
|
||||
int sd, void* data, size_t size);
|
||||
static bool usock_peer_recv_blocking(mca_oob_usock_peer_t* peer,
|
||||
int sd, void* data, size_t size);
|
||||
static void usock_peer_connected(mca_oob_usock_peer_t* peer);
|
||||
|
||||
static int usock_peer_create_socket(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
int flags;
|
||||
|
||||
if (peer->sd > 0) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_oob_base_framework.framework_output,
|
||||
"%s oob:usock:peer creating socket to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name))));
|
||||
peer->sd = socket(PF_UNIX, SOCK_STREAM, 0);
|
||||
|
||||
if (peer->sd < 0) {
|
||||
opal_output(0, "%s-%s usock_peer_create_socket: socket() failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
/* Set this fd to be close-on-exec so that subsequent children don't see it */
|
||||
if (opal_fd_set_cloexec(peer->sd) != OPAL_SUCCESS) {
|
||||
opal_output(0, "%s unable to set socket to CLOEXEC",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
close(peer->sd);
|
||||
peer->sd = -1;
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
/* setup event callbacks */
|
||||
usock_peer_event_init(peer);
|
||||
|
||||
/* setup the socket as non-blocking */
|
||||
if (peer->sd >= 0) {
|
||||
if ((flags = fcntl(peer->sd, F_GETFL, 0)) < 0) {
|
||||
opal_output(0, "%s-%s usock_peer_connect: fcntl(F_GETFL) failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
} else {
|
||||
flags |= O_NONBLOCK;
|
||||
if(fcntl(peer->sd, F_SETFL, flags) < 0)
|
||||
opal_output(0, "%s-%s usock_peer_connect: fcntl(F_SETFL) failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
}
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Try connecting to a peer
|
||||
*/
|
||||
void mca_oob_usock_peer_try_connect(int fd, short args, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_conn_op_t *op = (mca_oob_usock_conn_op_t*)cbdata;
|
||||
mca_oob_usock_peer_t *peer = op->peer;
|
||||
int rc;
|
||||
opal_socklen_t addrlen = 0;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"attempting to connect to proc %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
|
||||
rc = usock_peer_create_socket(peer);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
/* FIXME: we cannot create a USOCK socket - report
|
||||
* back to the component that this peer is
|
||||
* unreachable so it can remove the peer
|
||||
* from its list and report back to the base
|
||||
* NOTE: this could be a reconnect attempt,
|
||||
* so we also need to mark any queued messages
|
||||
* and return them as "unreachable"
|
||||
*/
|
||||
opal_output(0, "%s CANNOT CREATE SOCKET", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
OBJ_RELEASE(op);
|
||||
return;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"attempting to connect to proc %s on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)), peer->sd);
|
||||
|
||||
addrlen = sizeof(struct sockaddr_un);
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"attempting to connect to proc %s - %d retries",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->retries);
|
||||
|
||||
retry_connect:
|
||||
peer->retries++;
|
||||
if (connect(peer->sd, (struct sockaddr *) &mca_oob_usock_component.address, addrlen) < 0) {
|
||||
/* non-blocking so wait for completion */
|
||||
if (opal_socket_errno == EINPROGRESS || opal_socket_errno == EWOULDBLOCK) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s waiting for connect completion to %s - activating send event",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
/* just ensure the send_event is active */
|
||||
if (!peer->send_ev_active) {
|
||||
opal_event_add(&peer->send_event, 0);
|
||||
peer->send_ev_active = true;
|
||||
}
|
||||
OBJ_RELEASE(op);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Some kernels (Linux 2.6) will automatically software
|
||||
abort a connection that was ECONNREFUSED on the last
|
||||
attempt, without even trying to establish the
|
||||
connection. Handle that case in a semi-rational
|
||||
way by trying twice before giving up */
|
||||
if (ECONNABORTED == opal_socket_errno) {
|
||||
if (peer->retries < mca_oob_usock_component.max_retries) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connection aborted by OS to %s - retrying",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
goto retry_connect;
|
||||
} else {
|
||||
/* We were unsuccessful in establishing this connection, and are
|
||||
* not likely to suddenly become successful,
|
||||
*/
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"Connection across unix domain socket to local proc %s failed",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
/* let the USOCK component know that this module failed to make
|
||||
* the connection so it can try other modules, and/or fail back
|
||||
* to the OOB level so another component can try. This will activate
|
||||
* an event in the component event base, and so it will fire async
|
||||
* from us if we are in our own progress thread
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_CMP_OP(peer, mca_oob_usock_component_failed_to_connect);
|
||||
OBJ_RELEASE(op);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* connection succeeded */
|
||||
peer->retries = 0;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"Connection across to proc %s succeeded",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
/* setup our recv to catch the return ack call */
|
||||
if (!peer->recv_ev_active) {
|
||||
opal_event_add(&peer->recv_event, 0);
|
||||
peer->recv_ev_active = true;
|
||||
}
|
||||
|
||||
/* send our globally unique process identifier to the peer */
|
||||
if (ORTE_SUCCESS == (rc = usock_peer_send_connect_ack(peer))) {
|
||||
peer->state = MCA_OOB_USOCK_CONNECT_ACK;
|
||||
} else {
|
||||
opal_output(0,
|
||||
"%s orte_usock_peer_try_connect: "
|
||||
"usock_peer_send_connect_ack to proc %s failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
opal_strerror(rc), rc);
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
}
|
||||
|
||||
OBJ_RELEASE(op);
|
||||
}
|
||||
|
||||
static int usock_peer_send_connect_ack(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
char *msg;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
int rc;
|
||||
size_t sdsize;
|
||||
char *cred;
|
||||
size_t credsize;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s SEND CONNECT ACK", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* send a handshake that includes our process identifier
|
||||
* to ensure we are talking to another OMPI process
|
||||
*/
|
||||
hdr.origin = *ORTE_PROC_MY_NAME;
|
||||
hdr.dst = peer->name;
|
||||
hdr.type = MCA_OOB_USOCK_IDENT;
|
||||
hdr.tag = 0;
|
||||
hdr.channel = 0xffffffff;
|
||||
hdr.seq_num = 0;
|
||||
|
||||
/* get our security credential*/
|
||||
if (OPAL_SUCCESS != (rc = opal_sec.get_my_credential(peer->auth_method,
|
||||
ORTE_PROC_MY_NAME, &cred, &credsize))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* set the number of bytes to be read beyond the header */
|
||||
hdr.nbytes = strlen(orte_version_string) + 1 + credsize;
|
||||
|
||||
/* create a space for our message */
|
||||
sdsize = (sizeof(hdr) + strlen(orte_version_string) + 1 + credsize);
|
||||
if (NULL == (msg = (char*)malloc(sdsize))) {
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
memset(msg, 0, sdsize);
|
||||
|
||||
/* load the message */
|
||||
memcpy(msg, &hdr, sizeof(hdr));
|
||||
memcpy(msg+sizeof(hdr), orte_version_string, strlen(orte_version_string));
|
||||
memcpy(msg+sizeof(hdr)+strlen(orte_version_string)+1, cred, credsize);
|
||||
free(cred);
|
||||
|
||||
if (ORTE_SUCCESS != usock_peer_send_blocking(peer, peer->sd, msg, sdsize)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_UNREACH);
|
||||
free(msg);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
free(msg);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize events to be used by the peer instance for USOCK select/poll callbacks.
|
||||
*/
|
||||
static void usock_peer_event_init(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
if (peer->sd >= 0) {
|
||||
opal_event_set(mca_oob_usock_module.ev_base,
|
||||
&peer->recv_event,
|
||||
peer->sd,
|
||||
OPAL_EV_READ|OPAL_EV_PERSIST,
|
||||
mca_oob_usock_recv_handler,
|
||||
peer);
|
||||
opal_event_set_priority(&peer->recv_event, ORTE_MSG_PRI);
|
||||
if (peer->recv_ev_active) {
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
}
|
||||
opal_event_set(mca_oob_usock_module.ev_base,
|
||||
&peer->send_event,
|
||||
peer->sd,
|
||||
OPAL_EV_WRITE|OPAL_EV_PERSIST,
|
||||
mca_oob_usock_send_handler,
|
||||
peer);
|
||||
opal_event_set_priority(&peer->send_event, ORTE_MSG_PRI);
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the status of the connection. If the connection failed, will retry
|
||||
* later. Otherwise, send this processes identifier to the peer on the
|
||||
* newly connected socket.
|
||||
*/
|
||||
void mca_oob_usock_peer_complete_connect(mca_oob_usock_peer_t *peer)
|
||||
{
|
||||
int so_error = 0;
|
||||
opal_socklen_t so_length = sizeof(so_error);
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:complete_connect called for peer %s on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name), peer->sd);
|
||||
|
||||
/* check connect completion status */
|
||||
if (getsockopt(peer->sd, SOL_SOCKET, SO_ERROR, (char *)&so_error, &so_length) < 0) {
|
||||
opal_output(0, "%s usock_peer_complete_connect: getsockopt() to %s failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return;
|
||||
}
|
||||
|
||||
if (so_error == EINPROGRESS) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:send:handler still in progress",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
return;
|
||||
} else if (so_error == ECONNREFUSED || so_error == ETIMEDOUT) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s-%s usock_peer_complete_connect: connection failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(so_error),
|
||||
so_error);
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return;
|
||||
} else if (so_error != 0) {
|
||||
/* No need to worry about the return code here - we return regardless
|
||||
at this point, and if an error did occur a message has already been
|
||||
printed for the user */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s-%s usock_peer_complete_connect: "
|
||||
"connection failed with error %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)), so_error);
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock_peer_complete_connect: "
|
||||
"sending ack to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
|
||||
if (usock_peer_send_connect_ack(peer) == ORTE_SUCCESS) {
|
||||
peer->state = MCA_OOB_USOCK_CONNECT_ACK;
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock_peer_complete_connect: "
|
||||
"setting read event on connection to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
|
||||
if (!peer->recv_ev_active) {
|
||||
opal_event_add(&peer->recv_event, 0);
|
||||
peer->recv_ev_active = true;
|
||||
}
|
||||
} else {
|
||||
opal_output(0, "%s usock_peer_complete_connect: unable to send connect ack to %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* A blocking send on a non-blocking socket. Used to send the small amount of connection
|
||||
* information that identifies the peers endpoint.
|
||||
*/
|
||||
static int usock_peer_send_blocking(mca_oob_usock_peer_t* peer,
|
||||
int sd, void* data, size_t size)
|
||||
{
|
||||
unsigned char* ptr = (unsigned char*)data;
|
||||
size_t cnt = 0;
|
||||
int retval;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s send blocking of %"PRIsize_t" bytes to socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
size, sd);
|
||||
|
||||
while (cnt < size) {
|
||||
retval = send(sd, (char*)ptr+cnt, size-cnt, 0);
|
||||
if (retval < 0) {
|
||||
if (opal_socket_errno != EINTR && opal_socket_errno != EAGAIN && opal_socket_errno != EWOULDBLOCK) {
|
||||
opal_output(0, "%s usock_peer_send_blocking: send() to socket %d failed: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), sd,
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
cnt += retval;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s blocking send complete to socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), sd);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Receive the peers globally unique process identification from a newly
|
||||
* connected socket and verify the expected response. If so, move the
|
||||
* socket to a connected state.
|
||||
*/
|
||||
int mca_oob_usock_peer_recv_connect_ack(mca_oob_usock_peer_t* pr, int sd,
|
||||
mca_oob_usock_hdr_t *dhdr)
|
||||
{
|
||||
char *msg;
|
||||
char *version;
|
||||
int rc, cmpval;
|
||||
char *cred;
|
||||
size_t credsize;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
uint64_t *ui64;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s RECV CONNECT ACK FROM %s ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == pr) ? "UNKNOWN" : ORTE_NAME_PRINT(&pr->name), sd);
|
||||
|
||||
peer = pr;
|
||||
/* ensure all is zero'd */
|
||||
memset(&hdr, 0, sizeof(mca_oob_usock_hdr_t));
|
||||
|
||||
if (usock_peer_recv_blocking(peer, sd, &hdr, sizeof(mca_oob_usock_hdr_t))) {
|
||||
if (NULL != peer) {
|
||||
/* If the peer state is CONNECT_ACK, then we were waiting for
|
||||
* the connection to be ack'd
|
||||
*/
|
||||
if (peer->state != MCA_OOB_USOCK_CONNECT_ACK) {
|
||||
/* handshake broke down - abort this connection */
|
||||
opal_output(0, "%s RECV CONNECT BAD HANDSHAKE FROM %s ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name), sd);
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* unable to complete the recv */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s unable to complete recv of connect-ack from %s ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&peer->name), sd);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
/* if the requestor wanted the header returned, then do so now */
|
||||
if (NULL != dhdr) {
|
||||
*dhdr = hdr;
|
||||
}
|
||||
|
||||
if (MCA_OOB_USOCK_PROBE == hdr.type) {
|
||||
/* send a header back */
|
||||
hdr.type = MCA_OOB_USOCK_PROBE;
|
||||
hdr.dst = hdr.origin;
|
||||
hdr.origin = *ORTE_PROC_MY_NAME;
|
||||
usock_peer_send_blocking(peer, sd, &hdr, sizeof(mca_oob_usock_hdr_t));
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
if (hdr.type != MCA_OOB_USOCK_IDENT) {
|
||||
opal_output(0, "usock_peer_recv_connect_ack: invalid header type: %d\n", hdr.type);
|
||||
if (NULL != peer) {
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
} else {
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
}
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect-ack recvd from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
/* if we don't already have it, get the peer */
|
||||
if (NULL == peer) {
|
||||
peer = mca_oob_usock_peer_lookup(&hdr.origin);
|
||||
if (NULL == peer) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s mca_oob_usock_recv_connect: connection from new peer",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
peer = OBJ_NEW(mca_oob_usock_peer_t);
|
||||
peer->name = hdr.origin;
|
||||
peer->state = MCA_OOB_USOCK_ACCEPTING;
|
||||
peer->sd = sd;
|
||||
ui64 = (uint64_t*)(&peer->name);
|
||||
if (OPAL_SUCCESS != opal_hash_table_set_value_uint64(&mca_oob_usock_module.peers, (*ui64), peer)) {
|
||||
OBJ_RELEASE(peer);
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
} else {
|
||||
/* check for a race condition - if I was in the process of
|
||||
* creating a connection to the peer, or have already established
|
||||
* such a connection, then we need to reject this connection. We will
|
||||
* let the higher ranked process retry - if I'm the lower ranked
|
||||
* process, I'll simply defer until I receive the request
|
||||
*/
|
||||
if (MCA_OOB_USOCK_CONNECTED == peer->state ||
|
||||
MCA_OOB_USOCK_CONNECTING == peer->state ||
|
||||
MCA_OOB_USOCK_CONNECT_ACK == peer->state) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s SIMUL CONNECTION WITH %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&hdr.origin));
|
||||
if (peer->recv_ev_active) {
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
}
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
if (0 < peer->sd) {
|
||||
CLOSE_THE_SOCKET(peer->sd);
|
||||
peer->sd = -1;
|
||||
}
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
peer->retries = 0;
|
||||
cmpval = orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &hdr.origin, ORTE_PROC_MY_NAME);
|
||||
if (OPAL_VALUE1_GREATER == cmpval) {
|
||||
/* force the other end to retry the connection */
|
||||
peer->state = MCA_OOB_USOCK_UNCONNECTED;
|
||||
return ORTE_ERR_UNREACH;
|
||||
} else {
|
||||
/* retry the connection */
|
||||
peer->state = MCA_OOB_USOCK_CONNECTING;
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE(peer, mca_oob_usock_peer_try_connect);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* compare the peers name to the expected value */
|
||||
if (OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, &peer->name, &hdr.origin)) {
|
||||
opal_output(0, "%s usock_peer_recv_connect_ack: "
|
||||
"received unexpected process identifier %s from %s\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(hdr.origin)),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect-ack header from %s is okay",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
/* get the authentication and version payload */
|
||||
if (NULL == (msg = (char*)malloc(hdr.nbytes))) {
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return ORTE_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
if (!usock_peer_recv_blocking(peer, sd, msg, hdr.nbytes)) {
|
||||
/* unable to complete the recv */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s unable to complete recv of connect-ack from %s ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name), peer->sd);
|
||||
free(msg);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
|
||||
/* check that this is from a matching version */
|
||||
version = (char*)(msg);
|
||||
if (0 != strcmp(version, orte_version_string)) {
|
||||
opal_output(0, "%s usock_peer_recv_connect_ack: "
|
||||
"received different version from %s: %s instead of %s\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
version, orte_version_string);
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
free(msg);
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect-ack version from %s matches ours",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
/* check security token */
|
||||
cred = (char*)(msg + strlen(version) + 1);
|
||||
credsize = hdr.nbytes - strlen(version) - 1;
|
||||
if (OPAL_SUCCESS != (rc = opal_sec.authenticate(cred, credsize, &peer->auth_method))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
free(msg);
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect-ack %s authenticated",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
/* if the requestor wanted the header returned, then they
|
||||
* will complete their processing
|
||||
*/
|
||||
if (NULL != dhdr) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/* set the peer into the component and OOB-level peer tables to indicate
|
||||
* that we know this peer and we will be handling him
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_CMP_OP(peer, mca_oob_usock_component_set_module);
|
||||
|
||||
/* connected */
|
||||
usock_peer_connected(peer);
|
||||
if (OOB_USOCK_DEBUG_CONNECT <= opal_output_get_verbosity(orte_oob_base_framework.framework_output)) {
|
||||
mca_oob_usock_peer_dump(peer, "connected");
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup peer state to reflect that connection has been established,
|
||||
* and start any pending sends.
|
||||
*/
|
||||
static void usock_peer_connected(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s-%s usock_peer_connected on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)), peer->sd);
|
||||
|
||||
if (peer->timer_ev_active) {
|
||||
opal_event_del(&peer->timer_event);
|
||||
peer->timer_ev_active = false;
|
||||
}
|
||||
peer->state = MCA_OOB_USOCK_CONNECTED;
|
||||
|
||||
/* initiate send of first message on queue */
|
||||
if (NULL == peer->send_msg) {
|
||||
peer->send_msg = (mca_oob_usock_send_t*)
|
||||
opal_list_remove_first(&peer->send_queue);
|
||||
}
|
||||
if (NULL != peer->send_msg && !peer->send_ev_active) {
|
||||
opal_event_add(&peer->send_event, 0);
|
||||
peer->send_ev_active = true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove any event registrations associated with the socket
|
||||
* and update the peer state to reflect the connection has
|
||||
* been closed.
|
||||
*/
|
||||
void mca_oob_usock_peer_close(mca_oob_usock_peer_t *peer)
|
||||
{
|
||||
mca_oob_usock_send_t *snd;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock_peer_close for %s sd %d state %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->sd, mca_oob_usock_state_print(peer->state));
|
||||
|
||||
peer->state = MCA_OOB_USOCK_CLOSED;
|
||||
|
||||
/* release the socket */
|
||||
close(peer->sd);
|
||||
|
||||
/* inform the component-level that we have lost a connection so
|
||||
* it can decide what to do about it.
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_CMP_OP(peer, mca_oob_usock_component_lost_connection);
|
||||
|
||||
if (orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) {
|
||||
/* nothing more to do */
|
||||
return;
|
||||
}
|
||||
|
||||
/* FIXME: push any queued messages back onto the OOB for retry - note that
|
||||
* this must be done after the prior call to ensure that the component
|
||||
* processes the "lost connection" notice before the OOB begins to
|
||||
* handle these recycled messages. This prevents us from unintentionally
|
||||
* attempting to send the message again across the now-failed interface
|
||||
*/
|
||||
if (NULL != peer->send_msg) {
|
||||
}
|
||||
while (NULL != (snd = (mca_oob_usock_send_t*)opal_list_remove_first(&peer->send_queue))) {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* A blocking recv on a non-blocking socket. Used to receive the small amount of connection
|
||||
* information that identifies the peers endpoint.
|
||||
*/
|
||||
static bool usock_peer_recv_blocking(mca_oob_usock_peer_t* peer,
|
||||
int sd, void* data, size_t size)
|
||||
{
|
||||
unsigned char* ptr = (unsigned char*)data;
|
||||
size_t cnt = 0;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s waiting for connect ack from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)));
|
||||
|
||||
while (cnt < size) {
|
||||
int retval = recv(sd, (char *)ptr+cnt, size-cnt, 0);
|
||||
|
||||
/* remote closed connection */
|
||||
if (retval == 0) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s-%s usock_peer_recv_blocking: "
|
||||
"peer closed connection: peer state %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)),
|
||||
(NULL == peer) ? 0 : peer->state);
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* socket is non-blocking so handle errors */
|
||||
if (retval < 0) {
|
||||
if (opal_socket_errno != EINTR &&
|
||||
opal_socket_errno != EAGAIN &&
|
||||
opal_socket_errno != EWOULDBLOCK) {
|
||||
if (peer->state == MCA_OOB_USOCK_CONNECT_ACK) {
|
||||
/* If we overflow the listen backlog, it's
|
||||
possible that even though we finished the three
|
||||
way handshake, the remote host was unable to
|
||||
transition the connection from half connected
|
||||
(received the initial SYN) to fully connected
|
||||
(in the listen backlog). We likely won't see
|
||||
the failure until we try to receive, due to
|
||||
timing and the like. The first thing we'll get
|
||||
in that case is a RST packet, which receive
|
||||
will turn into a connection reset by peer
|
||||
errno. In that case, leave the socket in
|
||||
CONNECT_ACK and propogate the error up to
|
||||
recv_connect_ack, who will try to establish the
|
||||
connection again */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect ack received error %s from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
strerror(opal_socket_errno),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)));
|
||||
return false;
|
||||
} else {
|
||||
opal_output(0,
|
||||
"%s usock_peer_recv_blocking: "
|
||||
"recv() failed for %s: %s (%d)\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
if (NULL != peer) {
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
} else {
|
||||
CLOSE_THE_SOCKET(sd);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
cnt += retval;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s connect ack received from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer) ? "UNKNOWN" : ORTE_NAME_PRINT(&(peer->name)));
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Routine for debugging to print the connection state and socket options
|
||||
*/
|
||||
void mca_oob_usock_peer_dump(mca_oob_usock_peer_t* peer, const char* msg)
|
||||
{
|
||||
char buff[255];
|
||||
int nodelay,flags;
|
||||
|
||||
if ((flags = fcntl(peer->sd, F_GETFL, 0)) < 0) {
|
||||
opal_output(0, "usock_peer_dump: fcntl(F_GETFL) failed: %s (%d)\n",
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
}
|
||||
#if defined(USOCK_NODELAY)
|
||||
optlen = sizeof(nodelay);
|
||||
if (getsockopt(peer->sd, IPPROTO_USOCK, USOCK_NODELAY, (char *)&nodelay, &optlen) < 0) {
|
||||
opal_output(0, "usock_peer_dump: USOCK_NODELAY option: %s (%d)\n",
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
}
|
||||
#else
|
||||
nodelay = 0;
|
||||
#endif
|
||||
|
||||
snprintf(buff, sizeof(buff), "%s-%s %s: nodelay %d flags %08x\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
msg, nodelay, flags);
|
||||
opal_output(0, "%s", buff);
|
||||
}
|
||||
|
||||
/*
|
||||
* Accept incoming connection - if not already connected
|
||||
*/
|
||||
|
||||
bool mca_oob_usock_peer_accept(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:peer_accept called for peer %s in state %s on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name),
|
||||
mca_oob_usock_state_print(peer->state), peer->sd);
|
||||
|
||||
if (peer->state != MCA_OOB_USOCK_CONNECTED) {
|
||||
|
||||
usock_peer_event_init(peer);
|
||||
|
||||
if (usock_peer_send_connect_ack(peer) != ORTE_SUCCESS) {
|
||||
opal_output(0, "%s-%s usock_peer_accept: "
|
||||
"usock_peer_send_connect_ack failed\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
peer->state = MCA_OOB_USOCK_FAILED;
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* set the peer into the component and OOB-level peer tables to indicate
|
||||
* that we know this peer and we will be handling him
|
||||
*/
|
||||
ORTE_ACTIVATE_USOCK_CMP_OP(peer, mca_oob_usock_component_set_module);
|
||||
|
||||
usock_peer_connected(peer);
|
||||
if (!peer->recv_ev_active) {
|
||||
opal_event_add(&peer->recv_event, 0);
|
||||
peer->recv_ev_active = true;
|
||||
}
|
||||
/* if a message is waiting to be sent, ensure the send event is active */
|
||||
if (NULL != peer->send_msg && !peer->send_ev_active) {
|
||||
opal_event_add(&peer->send_event, 0);
|
||||
peer->send_ev_active = true;
|
||||
}
|
||||
if (OOB_USOCK_DEBUG_CONNECT <= opal_output_get_verbosity(orte_oob_base_framework.framework_output)) {
|
||||
mca_oob_usock_peer_dump(peer, "accepted");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:peer_accept ignored for peer %s in state %s on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name),
|
||||
mca_oob_usock_state_print(peer->state), peer->sd);
|
||||
return false;
|
||||
}
|
@ -1,102 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_CONNECTION_H_
|
||||
#define _MCA_OOB_USOCK_CONNECTION_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_SOCKET_H
|
||||
#include <sys/socket.h>
|
||||
#endif
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "oob_usock_peer.h"
|
||||
|
||||
/* State machine for connection operations */
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
opal_event_t ev;
|
||||
} mca_oob_usock_conn_op_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_conn_op_t);
|
||||
|
||||
#define CLOSE_THE_SOCKET(socket) \
|
||||
do { \
|
||||
shutdown(socket, 2); \
|
||||
close(socket); \
|
||||
} while(0)
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_CONN_STATE(p, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_conn_op_t *cop; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] connect to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT((&(p)->name))); \
|
||||
cop = OBJ_NEW(mca_oob_usock_conn_op_t); \
|
||||
cop->peer = (p); \
|
||||
opal_event_set(mca_oob_usock_module.ev_base, &cop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), cop); \
|
||||
opal_event_set_priority(&cop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&cop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_ACCEPT_STATE(s, a, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_conn_op_t *cop; \
|
||||
cop = OBJ_NEW(mca_oob_usock_conn_op_t); \
|
||||
opal_event_set(mca_oob_usock_module.ev_base, &cop->ev, s, \
|
||||
OPAL_EV_READ, (cbfunc), cop); \
|
||||
opal_event_set_priority(&cop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_add(&cop->ev, 0); \
|
||||
} while(0);
|
||||
|
||||
#define ORTE_RETRY_USOCK_CONN_STATE(p, cbfunc, tv) \
|
||||
do { \
|
||||
mca_oob_usock_conn_op_t *cop; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] retry connect to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT((&(p)->name))); \
|
||||
cop = OBJ_NEW(mca_oob_usock_conn_op_t); \
|
||||
cop->peer = (p); \
|
||||
opal_event_evtimer_set(mca_oob_usock_module.ev_base, \
|
||||
&cop->ev, \
|
||||
(cbfunc), cop); \
|
||||
opal_event_evtimer_add(&cop->ev, (tv)); \
|
||||
} while(0);
|
||||
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_peer_try_connect(int fd, short args, void *cbdata);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_peer_dump(mca_oob_usock_peer_t* peer, const char* msg);
|
||||
ORTE_MODULE_DECLSPEC bool mca_oob_usock_peer_accept(mca_oob_usock_peer_t* peer);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_peer_complete_connect(mca_oob_usock_peer_t* peer);
|
||||
ORTE_MODULE_DECLSPEC int mca_oob_usock_peer_recv_connect_ack(mca_oob_usock_peer_t* peer,
|
||||
int sd, mca_oob_usock_hdr_t *hdr);
|
||||
ORTE_MODULE_DECLSPEC void mca_oob_usock_peer_close(mca_oob_usock_peer_t *peer);
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_CONNECTION_H_ */
|
@ -1,59 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_HDR_H_
|
||||
#define _MCA_OOB_USOCK_HDR_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
/* define several internal-only message
|
||||
* types this component uses for its own
|
||||
* handshake operations, plus one indicating
|
||||
* the message came from an external (to
|
||||
* this component) source
|
||||
*/
|
||||
typedef enum {
|
||||
MCA_OOB_USOCK_IDENT,
|
||||
MCA_OOB_USOCK_PROBE,
|
||||
MCA_OOB_USOCK_PING,
|
||||
MCA_OOB_USOCK_USER
|
||||
} mca_oob_usock_msg_type_t;
|
||||
|
||||
/* header for usock msgs */
|
||||
typedef struct {
|
||||
/* the original sender */
|
||||
orte_process_name_t origin;
|
||||
/* the intended final recipient */
|
||||
orte_process_name_t dst;
|
||||
/* type of message */
|
||||
mca_oob_usock_msg_type_t type;
|
||||
/* the rml tag where this message is headed */
|
||||
orte_rml_tag_t tag;
|
||||
/* the rml channel to which this message is headed */
|
||||
orte_rml_channel_num_t channel;
|
||||
/* msg seq number on the src channel */
|
||||
uint32_t seq_num;
|
||||
/* number of bytes in message */
|
||||
uint32_t nbytes;
|
||||
} mca_oob_usock_hdr_t;
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_HDR_H_ */
|
@ -1,85 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_PEER_H_
|
||||
#define _MCA_OOB_USOCK_PEER_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "oob_usock_sendrecv.h"
|
||||
|
||||
/* object for tracking peers */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
/* although not required, there is enough debug
|
||||
* value that retaining the name makes sense
|
||||
*/
|
||||
orte_process_name_t name;
|
||||
char *auth_method; // how the peer authenticated themselves to use
|
||||
int sd;
|
||||
int retries; // number of times we have tried to connect to this address
|
||||
mca_oob_usock_state_t state;
|
||||
opal_event_t op_event; // used for connecting and operations other than read/write
|
||||
opal_event_t send_event; /**< registration with event thread for send events */
|
||||
bool send_ev_active;
|
||||
opal_event_t recv_event; /**< registration with event thread for recv events */
|
||||
bool recv_ev_active;
|
||||
opal_event_t timer_event; /**< timer for retrying connection failures */
|
||||
bool timer_ev_active;
|
||||
opal_list_t send_queue; /**< list of messages to send */
|
||||
mca_oob_usock_send_t *send_msg; /**< current send in progress */
|
||||
mca_oob_usock_recv_t *recv_msg; /**< current recv in progress */
|
||||
} mca_oob_usock_peer_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_peer_t);
|
||||
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
mca_oob_usock_peer_t *peer;
|
||||
} mca_oob_usock_peer_op_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_peer_op_t);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_PEER_OP(p, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_peer_op_t *op; \
|
||||
op = OBJ_NEW(mca_oob_usock_peer_op_t); \
|
||||
op->peer = (p); \
|
||||
opal_event_set(mca_usock_component.ev_base, &op->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), op); \
|
||||
opal_event_set_priority(&op->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&op->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_CMP_OP(p, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_peer_op_t *pop; \
|
||||
pop = OBJ_NEW(mca_oob_usock_peer_op_t); \
|
||||
pop->peer = (p); \
|
||||
opal_event_set(orte_event_base, &pop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), pop); \
|
||||
opal_event_set_priority(&pop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&pop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_PEER_H_ */
|
@ -1,52 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_PING_H_
|
||||
#define _MCA_OOB_USOCK_PING_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "oob_usock_sendrecv.h"
|
||||
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
orte_process_name_t peer;
|
||||
} mca_oob_usock_ping_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_ping_t);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_PING(p, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_ping_t *pop; \
|
||||
pop = OBJ_NEW(mca_oob_usock_ping_t); \
|
||||
pop->peer.jobid = (p)->jobid; \
|
||||
pop->peer.vpid = (p)->vpid; \
|
||||
opal_event_set(mca_oob_usock_module.ev_base, &pop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), pop); \
|
||||
opal_event_set_priority(&pop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&pop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_PING_H_ */
|
@ -1,631 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* In windows, many of the socket functions return an EWOULDBLOCK
|
||||
* instead of \ things like EAGAIN, EINPROGRESS, etc. It has been
|
||||
* verified that this will \ not conflict with other error codes that
|
||||
* are returned by these functions \ under UNIX/Linux environments
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include <fcntl.h>
|
||||
#ifdef HAVE_SYS_UIO_H
|
||||
#include <sys/uio.h>
|
||||
#endif
|
||||
#ifdef HAVE_NET_UIO_H
|
||||
#include <net/uio.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#include "opal/opal_socket_errno.h"
|
||||
#ifdef HAVE_NETINET_IN_H
|
||||
#include <netinet/in.h>
|
||||
#endif
|
||||
#ifdef HAVE_ARPA_INET_H
|
||||
#include <arpa/inet.h>
|
||||
#endif
|
||||
#ifdef HAVE_NETINET_TCP_H
|
||||
#include <netinet/tcp.h>
|
||||
#endif
|
||||
|
||||
#include "opal_stdint.h"
|
||||
#include "opal/types.h"
|
||||
#include "opal/mca/backtrace/backtrace.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/error.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/ess/ess.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_component.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_peer.h"
|
||||
#include "orte/mca/oob/usock/oob_usock_connection.h"
|
||||
|
||||
static int send_bytes(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
mca_oob_usock_send_t* msg = peer->send_msg;
|
||||
int rc;
|
||||
|
||||
while (0 < msg->sdbytes) {
|
||||
rc = write(peer->sd, msg->sdptr, msg->sdbytes);
|
||||
if (rc < 0) {
|
||||
if (opal_socket_errno == EINTR) {
|
||||
continue;
|
||||
} else if (opal_socket_errno == EAGAIN) {
|
||||
/* tell the caller to keep this message on active,
|
||||
* but let the event lib cycle so other messages
|
||||
* can progress while this socket is busy
|
||||
*/
|
||||
return ORTE_ERR_RESOURCE_BUSY;
|
||||
} else if (opal_socket_errno == EWOULDBLOCK) {
|
||||
/* tell the caller to keep this message on active,
|
||||
* but let the event lib cycle so other messages
|
||||
* can progress while this socket is busy
|
||||
*/
|
||||
return ORTE_ERR_WOULD_BLOCK;
|
||||
}
|
||||
/* we hit an error and cannot progress this message */
|
||||
opal_output(0, "%s->%s mca_oob_usock_msg_send_bytes: write failed: %s (%d) [sd = %d]",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno,
|
||||
peer->sd);
|
||||
return ORTE_ERR_COMM_FAILURE;
|
||||
}
|
||||
/* update location */
|
||||
msg->sdbytes -= rc;
|
||||
msg->sdptr += rc;
|
||||
}
|
||||
/* we sent the full data block */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* A file descriptor is available/ready for send. Check the state
|
||||
* of the socket and take the appropriate action.
|
||||
*/
|
||||
void mca_oob_usock_send_handler(int sd, short flags, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_t* peer = (mca_oob_usock_peer_t*)cbdata;
|
||||
mca_oob_usock_send_t* msg = peer->send_msg;
|
||||
int rc;
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_handler called to send to peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
switch (peer->state) {
|
||||
case MCA_OOB_USOCK_CONNECTING:
|
||||
case MCA_OOB_USOCK_CLOSED:
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_handler %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
mca_oob_usock_state_print(peer->state));
|
||||
mca_oob_usock_peer_complete_connect(peer);
|
||||
/* de-activate the send event until the connection
|
||||
* handshake completes
|
||||
*/
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
break;
|
||||
case MCA_OOB_USOCK_CONNECTED:
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s usock:send_handler SENDING TO %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == peer->send_msg) ? "NULL" : ORTE_NAME_PRINT(&peer->name));
|
||||
if (NULL != msg) {
|
||||
/* if the header hasn't been completely sent, send it */
|
||||
if (!msg->hdr_sent) {
|
||||
if (ORTE_SUCCESS == (rc = send_bytes(peer))) {
|
||||
/* header is completely sent */
|
||||
msg->hdr_sent = true;
|
||||
/* setup to send the data */
|
||||
if (NULL == msg->msg) {
|
||||
/* this was a zero-byte msg - nothing more to do */
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
goto next;
|
||||
} else if (NULL != msg->msg->buffer) {
|
||||
/* send the buffer data as a single block */
|
||||
msg->sdptr = msg->msg->buffer->base_ptr;
|
||||
msg->sdbytes = msg->msg->buffer->bytes_used;
|
||||
} else if (NULL != msg->msg->iov) {
|
||||
/* start with the first iovec */
|
||||
msg->sdptr = msg->msg->iov[0].iov_base;
|
||||
msg->sdbytes = msg->msg->iov[0].iov_len;
|
||||
msg->iovnum = 0;
|
||||
} else {
|
||||
msg->sdptr = msg->msg->data;
|
||||
msg->sdbytes = msg->msg->count;
|
||||
}
|
||||
/* fall thru and let the send progress */
|
||||
} else if (ORTE_ERR_RESOURCE_BUSY == rc ||
|
||||
ORTE_ERR_WOULD_BLOCK == rc) {
|
||||
/* exit this event and let the event lib progress */
|
||||
return;
|
||||
} else {
|
||||
// report the error
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_send_handler: unable to send header",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
msg->msg->status = rc;
|
||||
if( NULL == msg->msg->channel) {
|
||||
ORTE_RML_SEND_COMPLETE(msg->msg);
|
||||
}
|
||||
else {
|
||||
ORTE_QOS_SEND_COMPLETE(msg->msg);
|
||||
}
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
goto next;
|
||||
}
|
||||
}
|
||||
/* progress the data transmission */
|
||||
if (msg->hdr_sent) {
|
||||
if (ORTE_SUCCESS == (rc = send_bytes(peer))) {
|
||||
/* this block is complete */
|
||||
if (NULL != msg->msg->buffer) {
|
||||
/* we are done - notify the RML */
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s MESSAGE SEND COMPLETE TO %s OF %d BYTES ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
msg->hdr.nbytes, peer->sd);
|
||||
msg->msg->status = ORTE_SUCCESS;
|
||||
if( NULL == msg->msg->channel) {
|
||||
ORTE_RML_SEND_COMPLETE(msg->msg);
|
||||
}
|
||||
else {
|
||||
ORTE_QOS_SEND_COMPLETE(msg->msg);
|
||||
}
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
} else if (NULL != msg->msg->data) {
|
||||
/* this was a relay message - nothing more to do */
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s MESSAGE SEND COMPLETE TO %s OF %d BYTES ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
msg->hdr.nbytes, peer->sd);
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
} else {
|
||||
/* rotate to the next iovec */
|
||||
msg->iovnum++;
|
||||
if (msg->iovnum < msg->msg->count) {
|
||||
msg->sdptr = msg->msg->iov[msg->iovnum].iov_base;
|
||||
msg->sdbytes = msg->msg->iov[msg->iovnum].iov_len;
|
||||
/* exit this event to give the event lib
|
||||
* a chance to progress any other pending
|
||||
* actions
|
||||
*/
|
||||
return;
|
||||
} else {
|
||||
/* this message is complete - notify the RML */
|
||||
opal_output_verbose(2, orte_oob_base_framework.framework_output,
|
||||
"%s MESSAGE SEND COMPLETE TO %s OF %d BYTES ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
msg->hdr.nbytes, peer->sd);
|
||||
msg->msg->status = ORTE_SUCCESS;
|
||||
if( NULL == msg->msg->channel) {
|
||||
ORTE_RML_SEND_COMPLETE(msg->msg);
|
||||
}
|
||||
else {
|
||||
ORTE_QOS_SEND_COMPLETE(msg->msg);
|
||||
}
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
}
|
||||
}
|
||||
/* fall thru to queue the next message */
|
||||
} else if (ORTE_ERR_RESOURCE_BUSY == rc ||
|
||||
ORTE_ERR_WOULD_BLOCK == rc) {
|
||||
/* exit this event and let the event lib progress */
|
||||
return;
|
||||
} else {
|
||||
// report the error
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_send_handler: unable to send message ON SOCKET %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)), peer->sd);
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
msg->msg->status = rc;
|
||||
if( NULL == msg->msg->channel) {
|
||||
ORTE_RML_SEND_COMPLETE(msg->msg);
|
||||
}
|
||||
else {
|
||||
ORTE_QOS_SEND_COMPLETE(msg->msg);
|
||||
}
|
||||
OBJ_RELEASE(msg);
|
||||
peer->send_msg = NULL;
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
next:
|
||||
/* if current message completed - progress any pending sends by
|
||||
* moving the next in the queue into the "on-deck" position. Note
|
||||
* that this doesn't mean we send the message right now - we will
|
||||
* wait for another send_event to fire before doing so. This gives
|
||||
* us a chance to service any pending recvs.
|
||||
*/
|
||||
peer->send_msg = (mca_oob_usock_send_t*)
|
||||
opal_list_remove_first(&peer->send_queue);
|
||||
}
|
||||
/* if nothing else to do unregister for send event notifications */
|
||||
if (NULL == peer->send_msg && peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_send_handler: invalid connection state (%d) on socket %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->state, peer->sd);
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int read_bytes(mca_oob_usock_peer_t* peer)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* read until all bytes recvd or error */
|
||||
while (0 < peer->recv_msg->rdbytes) {
|
||||
rc = read(peer->sd, peer->recv_msg->rdptr, peer->recv_msg->rdbytes);
|
||||
if (rc < 0) {
|
||||
if(opal_socket_errno == EINTR) {
|
||||
continue;
|
||||
} else if (opal_socket_errno == EAGAIN) {
|
||||
/* tell the caller to keep this message on active,
|
||||
* but let the event lib cycle so other messages
|
||||
* can progress while this socket is busy
|
||||
*/
|
||||
return ORTE_ERR_RESOURCE_BUSY;
|
||||
} else if (opal_socket_errno == EWOULDBLOCK) {
|
||||
/* tell the caller to keep this message on active,
|
||||
* but let the event lib cycle so other messages
|
||||
* can progress while this socket is busy
|
||||
*/
|
||||
return ORTE_ERR_WOULD_BLOCK;
|
||||
}
|
||||
/* we hit an error and cannot progress this message - report
|
||||
* the error back to the RML and let the caller know
|
||||
* to abort this message
|
||||
*/
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_FAIL, orte_oob_base_framework.framework_output,
|
||||
"%s-%s mca_oob_usock_msg_recv: readv failed: %s (%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
strerror(opal_socket_errno),
|
||||
opal_socket_errno);
|
||||
// mca_oob_usock_peer_close(peer);
|
||||
// if (NULL != mca_oob_usock.oob_exception_callback) {
|
||||
// mca_oob_usock.oob_exception_callback(&peer->name, ORTE_RML_PEER_DISCONNECTED);
|
||||
//}
|
||||
return ORTE_ERR_COMM_FAILURE;
|
||||
} else if (rc == 0) {
|
||||
/* the remote peer closed the connection - report that condition
|
||||
* and let the caller know
|
||||
*/
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_FAIL, orte_oob_base_framework.framework_output,
|
||||
"%s-%s mca_oob_usock_msg_recv: peer closed connection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
/* stop all events */
|
||||
if (peer->recv_ev_active) {
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
}
|
||||
if (peer->timer_ev_active) {
|
||||
opal_event_del(&peer->timer_event);
|
||||
peer->timer_ev_active = false;
|
||||
}
|
||||
if (peer->send_ev_active) {
|
||||
opal_event_del(&peer->send_event);
|
||||
peer->send_ev_active = false;
|
||||
}
|
||||
if (NULL != peer->recv_msg) {
|
||||
OBJ_RELEASE(peer->recv_msg);
|
||||
peer->recv_msg = NULL;
|
||||
}
|
||||
mca_oob_usock_peer_close(peer);
|
||||
//if (NULL != mca_oob_usock.oob_exception_callback) {
|
||||
// mca_oob_usock.oob_exception_callback(&peer->peer_name, ORTE_RML_PEER_DISCONNECTED);
|
||||
//}
|
||||
return ORTE_ERR_WOULD_BLOCK;
|
||||
}
|
||||
/* we were able to read something, so adjust counters and location */
|
||||
peer->recv_msg->rdbytes -= rc;
|
||||
peer->recv_msg->rdptr += rc;
|
||||
}
|
||||
|
||||
/* we read the full data block */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dispatch to the appropriate action routine based on the state
|
||||
* of the connection with the peer.
|
||||
*/
|
||||
|
||||
void mca_oob_usock_recv_handler(int sd, short flags, void *cbdata)
|
||||
{
|
||||
mca_oob_usock_peer_t* peer = (mca_oob_usock_peer_t*)cbdata;
|
||||
int rc;
|
||||
orte_rml_send_t *snd;
|
||||
|
||||
if (orte_abnormal_term_ordered) {
|
||||
return;
|
||||
}
|
||||
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler called for peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
|
||||
switch (peer->state) {
|
||||
case MCA_OOB_USOCK_CONNECT_ACK:
|
||||
if (ORTE_SUCCESS == (rc = mca_oob_usock_peer_recv_connect_ack(peer, peer->sd, NULL))) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler starting send/recv events",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* we connected! Start the send/recv events */
|
||||
if (!peer->recv_ev_active) {
|
||||
opal_event_add(&peer->recv_event, 0);
|
||||
peer->recv_ev_active = true;
|
||||
}
|
||||
if (peer->timer_ev_active) {
|
||||
opal_event_del(&peer->timer_event);
|
||||
peer->timer_ev_active = false;
|
||||
}
|
||||
/* if there is a message waiting to be sent, queue it */
|
||||
if (NULL == peer->send_msg) {
|
||||
peer->send_msg = (mca_oob_usock_send_t*)opal_list_remove_first(&peer->send_queue);
|
||||
}
|
||||
if (NULL != peer->send_msg && !peer->send_ev_active) {
|
||||
opal_event_add(&peer->send_event, 0);
|
||||
peer->send_ev_active = true;
|
||||
}
|
||||
/* update our state */
|
||||
peer->state = MCA_OOB_USOCK_CONNECTED;
|
||||
} else {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s UNABLE TO COMPLETE CONNECT ACK WITH %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name));
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
case MCA_OOB_USOCK_CONNECTED:
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler CONNECTED",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* allocate a new message and setup for recv */
|
||||
if (NULL == peer->recv_msg) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler allocate new recv msg",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
peer->recv_msg = OBJ_NEW(mca_oob_usock_recv_t);
|
||||
if (NULL == peer->recv_msg) {
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: unable to allocate recv message\n",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
return;
|
||||
}
|
||||
/* start by reading the header */
|
||||
peer->recv_msg->rdptr = (char*)&peer->recv_msg->hdr;
|
||||
peer->recv_msg->rdbytes = sizeof(mca_oob_usock_hdr_t);
|
||||
}
|
||||
/* if the header hasn't been completely read, read it */
|
||||
if (!peer->recv_msg->hdr_recvd) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler read hdr",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
if (ORTE_SUCCESS == (rc = read_bytes(peer))) {
|
||||
/* completed reading the header */
|
||||
peer->recv_msg->hdr_recvd = true;
|
||||
/* if this is a zero-byte message, then we are done */
|
||||
if (0 == peer->recv_msg->hdr.nbytes) {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s RECVD ZERO-BYTE MESSAGE FROM %s for tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->name), peer->recv_msg->hdr.tag);
|
||||
peer->recv_msg->data = NULL; // make sure
|
||||
peer->recv_msg->rdptr = NULL;
|
||||
peer->recv_msg->rdbytes = 0;
|
||||
} else {
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler allocate data region of size %lu",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (unsigned long)peer->recv_msg->hdr.nbytes);
|
||||
/* allocate the data region */
|
||||
peer->recv_msg->data = (char*)malloc(peer->recv_msg->hdr.nbytes);
|
||||
/* point to it */
|
||||
peer->recv_msg->rdptr = peer->recv_msg->data;
|
||||
peer->recv_msg->rdbytes = peer->recv_msg->hdr.nbytes;
|
||||
}
|
||||
/* fall thru and attempt to read the data */
|
||||
} else if (ORTE_ERR_RESOURCE_BUSY == rc ||
|
||||
ORTE_ERR_WOULD_BLOCK == rc) {
|
||||
/* exit this event and let the event lib progress */
|
||||
return;
|
||||
} else {
|
||||
/* close the connection */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s:usock:recv:handler error reading bytes - closing connection",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
mca_oob_usock_peer_close(peer);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (peer->recv_msg->hdr_recvd) {
|
||||
/* continue to read the data block - we start from
|
||||
* wherever we left off, which could be at the
|
||||
* beginning or somewhere in the message
|
||||
*/
|
||||
if (ORTE_SUCCESS == (rc = read_bytes(peer))) {
|
||||
/* we recvd all of the message */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s RECVD COMPLETE MESSAGE FROM %s OF %d BYTES FOR DEST %s TAG %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer->recv_msg->hdr.origin),
|
||||
(int)peer->recv_msg->hdr.nbytes,
|
||||
ORTE_NAME_PRINT(&peer->recv_msg->hdr.dst),
|
||||
peer->recv_msg->hdr.tag);
|
||||
/* am I the intended recipient? */
|
||||
if (peer->recv_msg->hdr.dst.jobid == ORTE_PROC_MY_NAME->jobid &&
|
||||
peer->recv_msg->hdr.dst.vpid == ORTE_PROC_MY_NAME->vpid) {
|
||||
/* yes - post it to the RML for delivery */
|
||||
opal_output_verbose(OOB_USOCK_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
"%s DELIVERING TO RML",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
ORTE_RML_POST_MESSAGE(&peer->recv_msg->hdr.origin, peer->recv_msg->hdr.tag,
|
||||
peer->recv_msg->hdr.channel, peer->recv_msg->hdr.seq_num,
|
||||
peer->recv_msg->data,
|
||||
peer->recv_msg->hdr.nbytes);
|
||||
OBJ_RELEASE(peer->recv_msg);
|
||||
} else {
|
||||
/* no - we don't route things, so we promote this
|
||||
* back to the OOB and let another transport move
|
||||
* it along. If we are a daemon and it is intended
|
||||
* for another of our local procs, it will just come
|
||||
* back to us and be handled then
|
||||
*/
|
||||
snd = OBJ_NEW(orte_rml_send_t);
|
||||
snd->dst = peer->recv_msg->hdr.dst;
|
||||
snd->origin = peer->recv_msg->hdr.origin;
|
||||
snd->tag = peer->recv_msg->hdr.tag;
|
||||
snd->data = peer->recv_msg->data;
|
||||
snd->dst_channel = peer->recv_msg->hdr.channel;
|
||||
snd->seq_num = peer->recv_msg->hdr.seq_num;
|
||||
snd->count = peer->recv_msg->hdr.nbytes;
|
||||
snd->cbfunc.iov = NULL;
|
||||
snd->cbdata = NULL;
|
||||
/* activate the OOB send state */
|
||||
ORTE_OOB_SEND(snd);
|
||||
/* protect the data */
|
||||
peer->recv_msg->data = NULL;
|
||||
/* cleanup */
|
||||
OBJ_RELEASE(peer->recv_msg);
|
||||
return;
|
||||
}
|
||||
} else if (ORTE_ERR_RESOURCE_BUSY == rc ||
|
||||
ORTE_ERR_WOULD_BLOCK == rc) {
|
||||
/* exit this event and let the event lib progress */
|
||||
return;
|
||||
} else {
|
||||
// report the error
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: unable to recv message",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)));
|
||||
/* turn off the recv event */
|
||||
opal_event_del(&peer->recv_event);
|
||||
peer->recv_ev_active = false;
|
||||
ORTE_FORCED_TERMINATE(1);
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
opal_output(0, "%s-%s mca_oob_usock_peer_recv_handler: invalid socket state(%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&(peer->name)),
|
||||
peer->state);
|
||||
// mca_oob_usock_peer_close(peer);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void snd_cons(mca_oob_usock_send_t *ptr)
|
||||
{
|
||||
ptr->msg = NULL;
|
||||
ptr->data = NULL;
|
||||
ptr->hdr_sent = false;
|
||||
ptr->iovnum = 0;
|
||||
ptr->sdptr = NULL;
|
||||
ptr->sdbytes = 0;
|
||||
}
|
||||
/* we don't destruct any RML msg that is
|
||||
* attached to our send as the RML owns
|
||||
* that memory. However, if we relay a
|
||||
* msg, the data in the relay belongs to
|
||||
* us and must be free'd
|
||||
*/
|
||||
static void snd_des(mca_oob_usock_send_t *ptr)
|
||||
{
|
||||
if (NULL != ptr->data) {
|
||||
free(ptr->data);
|
||||
}
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_send_t,
|
||||
opal_list_item_t,
|
||||
snd_cons, snd_des);
|
||||
|
||||
static void rcv_cons(mca_oob_usock_recv_t *ptr)
|
||||
{
|
||||
ptr->hdr_recvd = false;
|
||||
ptr->rdptr = NULL;
|
||||
ptr->rdbytes = 0;
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_recv_t,
|
||||
opal_list_item_t,
|
||||
rcv_cons, NULL);
|
||||
|
||||
static void err_cons(mca_oob_usock_msg_error_t *ptr)
|
||||
{
|
||||
ptr->rmsg = NULL;
|
||||
ptr->snd = NULL;
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(mca_oob_usock_msg_error_t,
|
||||
opal_object_t,
|
||||
err_cons, NULL);
|
||||
|
@ -1,255 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _MCA_OOB_USOCK_SENDRECV_H_
|
||||
#define _MCA_OOB_USOCK_SENDRECV_H_
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
|
||||
#include "oob_usock.h"
|
||||
#include "oob_usock_hdr.h"
|
||||
|
||||
/* usock structure for sending a message */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
orte_rml_send_t *msg;
|
||||
char *data;
|
||||
bool hdr_sent;
|
||||
int iovnum;
|
||||
char *sdptr;
|
||||
size_t sdbytes;
|
||||
} mca_oob_usock_send_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_send_t);
|
||||
|
||||
/* usock structure for recving a message */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
mca_oob_usock_hdr_t hdr;
|
||||
bool hdr_recvd;
|
||||
char *data;
|
||||
char *rdptr;
|
||||
size_t rdbytes;
|
||||
} mca_oob_usock_recv_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_recv_t);
|
||||
|
||||
/* Queue a message to be sent to a specified peer. The macro
|
||||
* checks to see if a message is already in position to be
|
||||
* sent - if it is, then the message provided is simply added
|
||||
* to the peer's message queue. If not, then the provided message
|
||||
* is placed in the "ready" position
|
||||
*
|
||||
* If the provided boolean is true, then the send event for the
|
||||
* peer is checked and activated if not already active. This allows
|
||||
* the macro to either immediately send the message, or to queue
|
||||
* it as "pending" for later transmission - e.g., after the
|
||||
* connection procedure is completed
|
||||
*
|
||||
* p => pointer to mca_oob_usock_peer_t
|
||||
* s => pointer to mca_oob_usock_send_t
|
||||
* f => true if send event is to be activated
|
||||
*/
|
||||
#define MCA_OOB_USOCK_QUEUE_MSG(p, s, f) \
|
||||
do { \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] queue msg to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT(&((s)->hdr.dst))); \
|
||||
/* if there is no message on-deck, put this one there */ \
|
||||
if (NULL == (p)->send_msg) { \
|
||||
(p)->send_msg = (s); \
|
||||
} else { \
|
||||
/* add it to the queue */ \
|
||||
opal_list_append(&(p)->send_queue, &(s)->super); \
|
||||
} \
|
||||
if ((f)) { \
|
||||
/* if we aren't connected, then start connecting */ \
|
||||
if (MCA_OOB_USOCK_CONNECTED != (p)->state) { \
|
||||
(p)->state = MCA_OOB_USOCK_CONNECTING; \
|
||||
ORTE_ACTIVATE_USOCK_CONN_STATE((p), \
|
||||
mca_oob_usock_peer_try_connect); \
|
||||
} else { \
|
||||
/* ensure the send event is active */ \
|
||||
if (!(p)->send_ev_active) { \
|
||||
opal_event_add(&(p)->send_event, 0); \
|
||||
(p)->send_ev_active = true; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}while(0);
|
||||
|
||||
/* queue a message to be sent by one of our modules - must
|
||||
* provide the following params:
|
||||
*
|
||||
* m - the RML message to be sent
|
||||
* p - the final recipient
|
||||
*/
|
||||
#define MCA_OOB_USOCK_QUEUE_SEND(m, p) \
|
||||
do { \
|
||||
mca_oob_usock_send_t *msg; \
|
||||
int i; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] queue send to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT(&((m)->dst))); \
|
||||
msg = OBJ_NEW(mca_oob_usock_send_t); \
|
||||
/* setup the header */ \
|
||||
msg->hdr.origin = (m)->origin; \
|
||||
msg->hdr.dst = (m)->dst; \
|
||||
msg->hdr.type = MCA_OOB_USOCK_USER; \
|
||||
msg->hdr.tag = (m)->tag; \
|
||||
msg->hdr.channel = (m)->dst_channel; \
|
||||
msg->hdr.seq_num = (m)->seq_num; \
|
||||
/* point to the actual message */ \
|
||||
msg->msg = (m); \
|
||||
/* set the total number of bytes to be sent */ \
|
||||
if (NULL != (m)->buffer) { \
|
||||
msg->hdr.nbytes = (m)->buffer->bytes_used; \
|
||||
} else if (NULL != (m)->iov) { \
|
||||
msg->hdr.nbytes = 0; \
|
||||
for (i=0; i < (m)->count; i++) { \
|
||||
msg->hdr.nbytes += (m)->iov[i].iov_len; \
|
||||
} \
|
||||
} else { \
|
||||
msg->hdr.nbytes = (m)->count; \
|
||||
} \
|
||||
/* start the send with the header */ \
|
||||
msg->sdptr = (char*)&msg->hdr; \
|
||||
msg->sdbytes = sizeof(mca_oob_usock_hdr_t); \
|
||||
/* add to the msg queue for this peer */ \
|
||||
MCA_OOB_USOCK_QUEUE_MSG((p), msg, true); \
|
||||
}while(0);
|
||||
|
||||
/* queue a message to be sent by one of our modules upon completing
|
||||
* the connection process - must provide the following params:
|
||||
*
|
||||
* m - the RML message to be sent
|
||||
* p - the final recipient
|
||||
*/
|
||||
#define MCA_OOB_USOCK_QUEUE_PENDING(m, p) \
|
||||
do { \
|
||||
mca_oob_usock_send_t *msg; \
|
||||
int i; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] queue pending to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT(&((m)->dst))); \
|
||||
msg = OBJ_NEW(mca_oob_usock_send_t); \
|
||||
/* setup the header */ \
|
||||
msg->hdr.origin = (m)->origin; \
|
||||
msg->hdr.dst = (m)->dst; \
|
||||
msg->hdr.type = MCA_OOB_USOCK_USER; \
|
||||
msg->hdr.tag = (m)->tag; \
|
||||
msg->hdr.channel = (m)->dst_channel; \
|
||||
msg->hdr.seq_num = (m)->seq_num; \
|
||||
/* point to the actual message */ \
|
||||
msg->msg = (m); \
|
||||
/* set the total number of bytes to be sent */ \
|
||||
if (NULL != (m)->buffer) { \
|
||||
msg->hdr.nbytes = (m)->buffer->bytes_used; \
|
||||
} else if (NULL != (m)->iov) { \
|
||||
msg->hdr.nbytes = 0; \
|
||||
for (i=0; i < (m)->count; i++) { \
|
||||
msg->hdr.nbytes += (m)->iov[i].iov_len; \
|
||||
} \
|
||||
} else { \
|
||||
msg->hdr.nbytes = (m)->count; \
|
||||
} \
|
||||
/* start the send with the header */ \
|
||||
msg->sdptr = (char*)&msg->hdr; \
|
||||
msg->sdbytes = sizeof(mca_oob_usock_hdr_t); \
|
||||
/* add to the msg queue for this peer */ \
|
||||
MCA_OOB_USOCK_QUEUE_MSG((p), msg, false); \
|
||||
}while(0);
|
||||
|
||||
/* State machine for processing message */
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
orte_rml_send_t *msg;
|
||||
} mca_oob_usock_msg_op_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_msg_op_t);
|
||||
|
||||
#define ORTE_ACTIVATE_USOCK_POST_SEND(ms, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_msg_op_t *mop; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] post send to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT(&((ms)->dst))); \
|
||||
mop = OBJ_NEW(mca_oob_usock_msg_op_t); \
|
||||
mop->msg = (ms); \
|
||||
opal_event_set(mca_oob_usock_module.ev_base, &mop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), mop); \
|
||||
opal_event_set_priority(&mop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&mop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
opal_event_t ev;
|
||||
orte_rml_send_t *rmsg;
|
||||
mca_oob_usock_send_t *snd;
|
||||
orte_process_name_t hop;
|
||||
} mca_oob_usock_msg_error_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_usock_msg_error_t);
|
||||
|
||||
/* macro for reporting delivery errors back to the
|
||||
* component for error handling
|
||||
*
|
||||
* s -> mca_oob_usock_send_t that failed (can be NULL)
|
||||
* r -> orte_rml_send_t that failed (can be NULL)
|
||||
* h -> process name for the next recipient
|
||||
* cbfunc -> function to handle the callback
|
||||
*/
|
||||
#define ORTE_ACTIVATE_USOCK_MSG_ERROR(s, r, h, cbfunc) \
|
||||
do { \
|
||||
mca_oob_usock_msg_error_t *mop; \
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output, \
|
||||
"%s:[%s:%d] post msg error to %s", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__, \
|
||||
ORTE_NAME_PRINT((h))); \
|
||||
mop = OBJ_NEW(mca_oob_usock_msg_error_t); \
|
||||
if (NULL != (s)) { \
|
||||
mop->snd = (s); \
|
||||
} else if (NULL != (r)) { \
|
||||
/* use a proxy so we can pass NULL into the macro */ \
|
||||
mop->rmsg = (r); \
|
||||
} \
|
||||
mop->hop.jobid = (h)->jobid; \
|
||||
mop->hop.vpid = (h)->vpid; \
|
||||
opal_event_set(orte_event_base, &mop->ev, -1, \
|
||||
OPAL_EV_WRITE, (cbfunc), mop); \
|
||||
opal_event_set_priority(&mop->ev, ORTE_MSG_PRI); \
|
||||
opal_event_active(&mop->ev, OPAL_EV_WRITE, 1); \
|
||||
} while(0);
|
||||
|
||||
#endif /* _MCA_OOB_USOCK_SENDRECV_H_ */
|
@ -1,7 +0,0 @@
|
||||
#
|
||||
# owner/status file
|
||||
# owner: institution that is responsible for this package
|
||||
# status: e.g. active, maintenance, unmaintained
|
||||
#
|
||||
owner: INTEL
|
||||
status: maintenance
|
@ -10,6 +10,7 @@
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2015 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -25,9 +26,8 @@ libmca_plm_la_SOURCES += \
|
||||
base/plm_base_frame.c \
|
||||
base/plm_base_select.c \
|
||||
base/plm_base_receive.c \
|
||||
base/plm_base_launch_support.c \
|
||||
base/plm_base_jobid.c \
|
||||
base/plm_base_proxy.c \
|
||||
base/plm_base_orted_cmds.c
|
||||
base/plm_base_launch_support.c \
|
||||
base/plm_base_jobid.c \
|
||||
base/plm_base_orted_cmds.c
|
||||
|
||||
dist_ortedata_DATA += base/help-plm-base.txt
|
||||
|
@ -10,6 +10,7 @@
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013 Los Alamos National Security, LLC. All rights reserved.
|
||||
* Copyright (c) 2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -63,7 +64,6 @@ ORTE_DECLSPEC void orte_plm_base_mapping_complete(int fd, short args, void *cbda
|
||||
ORTE_DECLSPEC void orte_plm_base_launch_apps(int fd, short args, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_plm_base_post_launch(int fd, short args, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_plm_base_registered(int fd, short args, void *cbdata);
|
||||
ORTE_DECLSPEC int orte_plm_base_fork_hnp(void);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
|
@ -1,319 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
#include "orte/constants.h"
|
||||
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/util/path.h"
|
||||
#include "opal/mca/installdirs/installdirs.h"
|
||||
#include "opal/mca/pmix/base/base.h"
|
||||
#include "opal/util/argv.h"
|
||||
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/show_help.h"
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/mca/oob/base/base.h"
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/rml/rml_types.h"
|
||||
#include "orte/mca/rml/base/rml_contact.h"
|
||||
#include "orte/mca/routed/routed.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/orted/pmix/pmix_server.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
|
||||
#include "orte/mca/plm/base/base.h"
|
||||
#include "orte/mca/plm/base/plm_private.h"
|
||||
|
||||
#define ORTE_URI_MSG_LGTH 256
|
||||
|
||||
static void set_handler_default(int sig)
|
||||
{
|
||||
struct sigaction act;
|
||||
|
||||
act.sa_handler = SIG_DFL;
|
||||
act.sa_flags = 0;
|
||||
sigemptyset(&act.sa_mask);
|
||||
|
||||
sigaction(sig, &act, (struct sigaction *)0);
|
||||
}
|
||||
|
||||
int orte_plm_base_fork_hnp(void)
|
||||
{
|
||||
int p[2], death_pipe[2];
|
||||
char *cmd;
|
||||
char **argv = NULL;
|
||||
int argc;
|
||||
char *param, *cptr, *pmix_uri;
|
||||
sigset_t sigs;
|
||||
int buffer_length, num_chars_read, chunk;
|
||||
char *orted_uri;
|
||||
int rc;
|
||||
orte_jobid_t jobid;
|
||||
|
||||
/* if we don't have any active OOB modules, then abort */
|
||||
if (0 == opal_list_get_size(&orte_oob_base.actives)) {
|
||||
orte_show_help("help-plm-base.txt", "no-oob", true);
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERR_SILENT);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
|
||||
/* A pipe is used to communicate between the parent and child to
|
||||
indicate whether the exec ultimately succeeded or failed. The
|
||||
child sets the pipe to be close-on-exec; the child only ever
|
||||
writes anything to the pipe if there is an error (e.g.,
|
||||
executable not found, exec() fails, etc.). The parent does a
|
||||
blocking read on the pipe; if the pipe closed with no data,
|
||||
then the exec() succeeded. If the parent reads something from
|
||||
the pipe, then the child was letting us know that it failed.
|
||||
*/
|
||||
if (pipe(p) < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
|
||||
return ORTE_ERR_SYS_LIMITS_PIPES;
|
||||
}
|
||||
|
||||
/* we also have to give the HNP a pipe it can watch to know when
|
||||
* we terminated. Since the HNP is going to be a child of us, it
|
||||
* can't just use waitpid to see when we leave - so it will watch
|
||||
* the pipe instead
|
||||
*/
|
||||
if (pipe(death_pipe) < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_PIPES);
|
||||
return ORTE_ERR_SYS_LIMITS_PIPES;
|
||||
}
|
||||
|
||||
/* find the orted binary using the install_dirs support - this also
|
||||
* checks to ensure that we can see this executable and it *is* executable by us
|
||||
*/
|
||||
cmd = opal_path_access("orted", opal_install_dirs.bindir, X_OK);
|
||||
if (NULL == cmd) {
|
||||
/* guess we couldn't do it - best to abort */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_FILE_NOT_EXECUTABLE);
|
||||
close(p[0]);
|
||||
close(p[1]);
|
||||
return ORTE_ERR_FILE_NOT_EXECUTABLE;
|
||||
}
|
||||
|
||||
/* okay, setup an appropriate argv */
|
||||
opal_argv_append(&argc, &argv, "orted");
|
||||
|
||||
/* tell the daemon it is to be the HNP */
|
||||
opal_argv_append(&argc, &argv, "--hnp");
|
||||
|
||||
/* tell the daemon to get out of our process group */
|
||||
opal_argv_append(&argc, &argv, "--set-sid");
|
||||
|
||||
/* tell the daemon to report back its uri so we can connect to it */
|
||||
opal_argv_append(&argc, &argv, "--report-uri");
|
||||
asprintf(¶m, "%d", p[1]);
|
||||
opal_argv_append(&argc, &argv, param);
|
||||
free(param);
|
||||
|
||||
/* give the daemon a pipe it can watch to tell when we have died */
|
||||
opal_argv_append(&argc, &argv, "--singleton-died-pipe");
|
||||
asprintf(¶m, "%d", death_pipe[0]);
|
||||
opal_argv_append(&argc, &argv, param);
|
||||
free(param);
|
||||
|
||||
/* add any debug flags */
|
||||
if (orte_debug_flag) {
|
||||
opal_argv_append(&argc, &argv, "--debug");
|
||||
}
|
||||
|
||||
if (orte_debug_daemons_flag) {
|
||||
opal_argv_append(&argc, &argv, "--debug-daemons");
|
||||
}
|
||||
|
||||
if (orte_debug_daemons_file_flag) {
|
||||
if (!orte_debug_daemons_flag) {
|
||||
opal_argv_append(&argc, &argv, "--debug-daemons");
|
||||
}
|
||||
opal_argv_append(&argc, &argv, "--debug-daemons-file");
|
||||
}
|
||||
|
||||
/* indicate that it must use the novm state machine */
|
||||
opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
|
||||
opal_argv_append(&argc, &argv, "state_novm_select");
|
||||
opal_argv_append(&argc, &argv, "1");
|
||||
|
||||
/* pass it a jobid to match my job family */
|
||||
opal_argv_append(&argc, &argv, "-"OPAL_MCA_CMD_LINE_ID);
|
||||
opal_argv_append(&argc, &argv, "ess_base_jobid");
|
||||
jobid = ORTE_DAEMON_JOBID(ORTE_PROC_MY_NAME->jobid);
|
||||
if (ORTE_SUCCESS != (rc = orte_util_convert_jobid_to_string(¶m, jobid))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(cmd);
|
||||
return rc;
|
||||
}
|
||||
opal_argv_append(&argc, &argv, param);
|
||||
free(param);
|
||||
|
||||
/* Fork off the child */
|
||||
orte_process_info.hnp_pid = fork();
|
||||
if(orte_process_info.hnp_pid < 0) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_SYS_LIMITS_CHILDREN);
|
||||
close(p[0]);
|
||||
close(p[1]);
|
||||
close(death_pipe[0]);
|
||||
close(death_pipe[1]);
|
||||
free(cmd);
|
||||
opal_argv_free(argv);
|
||||
return ORTE_ERR_SYS_LIMITS_CHILDREN;
|
||||
}
|
||||
|
||||
if (orte_process_info.hnp_pid == 0) {
|
||||
close(p[0]);
|
||||
close(death_pipe[1]);
|
||||
/* I am the child - exec me */
|
||||
|
||||
/* Set signal handlers back to the default. Do this close
|
||||
to the execve() because the event library may (and likely
|
||||
will) reset them. If we don't do this, the event
|
||||
library may have left some set that, at least on some
|
||||
OS's, don't get reset via fork() or exec(). Hence, the
|
||||
orted could be unkillable (for example). */
|
||||
set_handler_default(SIGTERM);
|
||||
set_handler_default(SIGINT);
|
||||
set_handler_default(SIGHUP);
|
||||
set_handler_default(SIGPIPE);
|
||||
set_handler_default(SIGCHLD);
|
||||
|
||||
/* Unblock all signals, for many of the same reasons that
|
||||
we set the default handlers, above. This is noticable
|
||||
on Linux where the event library blocks SIGTERM, but we
|
||||
don't want that blocked by the orted (or, more
|
||||
specifically, we don't want it to be blocked by the
|
||||
orted and then inherited by the ORTE processes that it
|
||||
forks, making them unkillable by SIGTERM). */
|
||||
sigprocmask(0, 0, &sigs);
|
||||
sigprocmask(SIG_UNBLOCK, &sigs, 0);
|
||||
|
||||
execv(cmd, argv);
|
||||
|
||||
/* if I get here, the execv failed! */
|
||||
orte_show_help("help-ess-base.txt", "ess-base:execv-error",
|
||||
true, cmd, strerror(errno));
|
||||
exit(1);
|
||||
|
||||
} else {
|
||||
free(cmd);
|
||||
/* I am the parent - wait to hear something back and
|
||||
* report results
|
||||
*/
|
||||
close(p[1]); /* parent closes the write - orted will write its contact info to it*/
|
||||
close(death_pipe[0]); /* parent closes the death_pipe's read */
|
||||
opal_argv_free(argv);
|
||||
|
||||
/* setup the buffer to read the HNP's uri */
|
||||
buffer_length = ORTE_URI_MSG_LGTH;
|
||||
chunk = ORTE_URI_MSG_LGTH-1;
|
||||
num_chars_read = 0;
|
||||
orted_uri = (char*)malloc(buffer_length);
|
||||
|
||||
while (chunk == (rc = read(p[0], &orted_uri[num_chars_read], chunk))) {
|
||||
/* we read an entire buffer - better get more */
|
||||
num_chars_read += chunk;
|
||||
buffer_length += ORTE_URI_MSG_LGTH;
|
||||
orted_uri = realloc((void*)orted_uri, buffer_length);
|
||||
}
|
||||
num_chars_read += rc;
|
||||
|
||||
if (num_chars_read <= 0) {
|
||||
/* we didn't get anything back - this is bad */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_HNP_COULD_NOT_START);
|
||||
free(orted_uri);
|
||||
return ORTE_ERR_HNP_COULD_NOT_START;
|
||||
}
|
||||
|
||||
/* parse the sysinfo from the returned info - must
|
||||
* start from the end of the string as the uri itself
|
||||
* can contain brackets */
|
||||
if (NULL == (param = strrchr(orted_uri, '['))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
|
||||
free(orted_uri);
|
||||
return ORTE_ERR_COMM_FAILURE;
|
||||
}
|
||||
*param = '\0'; /* terminate the uri string */
|
||||
++param; /* point to the start of the sysinfo */
|
||||
|
||||
/* find the end of the sysinfo */
|
||||
if (NULL == (cptr = strchr(param, ']'))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
|
||||
free(orted_uri);
|
||||
return ORTE_ERR_COMM_FAILURE;
|
||||
}
|
||||
*cptr = '\0'; /* terminate the sysinfo string */
|
||||
++cptr; /* point to the start of the pmix uri */
|
||||
|
||||
/* convert the sysinfo string */
|
||||
if (ORTE_SUCCESS != (rc = orte_util_convert_string_to_sysinfo(&orte_local_cpu_type,
|
||||
&orte_local_cpu_model, param))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(orted_uri);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* save the daemon uri - we will process it later */
|
||||
orte_process_info.my_daemon_uri = strdup(orted_uri);
|
||||
/* Set the contact info in the RML - this won't actually establish
|
||||
* the connection, but just tells the RML how to reach the daemon
|
||||
* if/when we attempt to send to it
|
||||
*/
|
||||
orte_rml.set_contact_info(orte_process_info.my_daemon_uri);
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
|
||||
ORTE_PROC_MY_DAEMON, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(orted_uri);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* likewise, since this is also the HNP, set that uri too */
|
||||
orte_process_info.my_hnp_uri = orted_uri;
|
||||
orte_rml.set_contact_info(orte_process_info.my_hnp_uri);
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
|
||||
ORTE_PROC_MY_HNP, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* push the pmix_uri into our environment - need to protect it */
|
||||
(void)asprintf(&pmix_uri, "PMIX_SERVER_URI=%s", cptr);
|
||||
putenv(pmix_uri);
|
||||
/* now re-init the pmix framework so we can connect when required */
|
||||
if (OPAL_SUCCESS != (rc = opal_pmix.init())) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* now call fence to push our own modex data into the
|
||||
* newly-launched HNP in case someone else needs it */
|
||||
if (OPAL_SUCCESS != (rc = opal_pmix.fence(NULL, 0))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/* all done - report success */
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
}
|
@ -162,6 +162,10 @@ int pmix_server_publish_fn(opal_process_name_t *proc,
|
||||
0 == strcmp(iptr->key, OPAL_PMIX_PERSISTENCE)) {
|
||||
continue;
|
||||
}
|
||||
opal_output_verbose(5, orte_pmix_server_globals.output,
|
||||
"%s publishing data %s of type %d from source %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), iptr->key, iptr->type,
|
||||
ORTE_NAME_PRINT(proc));
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.pack(&req->msg, &iptr, 1, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(req);
|
||||
@ -398,15 +402,15 @@ void pmix_server_keyval_client(int status, orte_process_name_t* sender,
|
||||
while (OPAL_SUCCESS == opal_dss.unpack(buffer, &source, &cnt, OPAL_NAME)) {
|
||||
pdata = OBJ_NEW(opal_pmix_pdata_t);
|
||||
pdata->proc = source;
|
||||
opal_output_verbose(5, orte_pmix_server_globals.output,
|
||||
"%s recvd lookup returned data from source %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&source));
|
||||
if (OPAL_SUCCESS != (rc = opal_dss.unpack(buffer, &iptr, &cnt, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(pdata);
|
||||
continue;
|
||||
}
|
||||
opal_output_verbose(5, orte_pmix_server_globals.output,
|
||||
"%s recvd lookup returned data %s of type %d from source %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), iptr->key, iptr->type,
|
||||
ORTE_NAME_PRINT(&source));
|
||||
if (OPAL_SUCCESS != (rc = opal_value_xfer(&pdata->value, iptr))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(pdata);
|
||||
|
@ -158,7 +158,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
opal_value_t *iptr, *inext;
|
||||
uint32_t ninfo, i;
|
||||
char **keys = NULL, *str;
|
||||
bool ret_packed = false, wait = false;
|
||||
bool ret_packed = false, wait = false, data_added;
|
||||
int room_number;
|
||||
uint32_t uid;
|
||||
opal_pmix_data_range_t range;
|
||||
@ -229,6 +229,10 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
data->uid = iptr->data.uint32;
|
||||
OBJ_RELEASE(iptr);
|
||||
} else {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
"%s data server: adding %s to data from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), iptr->key,
|
||||
ORTE_NAME_PRINT(&data->owner)));
|
||||
opal_list_append(&data->values, &iptr->super);
|
||||
}
|
||||
}
|
||||
@ -271,6 +275,10 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
ORTE_ERROR_LOG(rc);
|
||||
break;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
"%s data server: adding %s data from %s to response",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), iptr->key,
|
||||
ORTE_NAME_PRINT(&data->owner)));
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(reply, &iptr, 1, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
break;
|
||||
@ -294,9 +302,20 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
opal_list_remove_item(&pending, &req->super);
|
||||
OBJ_RELEASE(req);
|
||||
reply = NULL;
|
||||
/* if the persistence is "first_read", then delete this data */
|
||||
if (OPAL_PMIX_PERSIST_FIRST_READ == data->persistence) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
"%s NOT STORING DATA FROM %s AT INDEX %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&data->owner), data->index));
|
||||
opal_pointer_array_set_item(&orte_data_server_store, data->index, NULL);
|
||||
OBJ_RELEASE(data);
|
||||
goto release;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
release:
|
||||
/* tell the user it was wonderful... */
|
||||
ret = ORTE_SUCCESS;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &ret, 1, OPAL_INT))) {
|
||||
@ -367,8 +386,12 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
/* cycle across the provided keys */
|
||||
ret_packed = false;
|
||||
for (i=0; NULL != keys[i]; i++) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
"%s data server: looking for %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), keys[i]));
|
||||
/* cycle across the stored data, looking for a match */
|
||||
for (k=0; k < orte_data_server_store.size; k++) {
|
||||
data_added = false;
|
||||
data = (orte_data_object_t*)opal_pointer_array_get_item(&orte_data_server_store, k);
|
||||
if (NULL == data) {
|
||||
continue;
|
||||
@ -383,6 +406,10 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
}
|
||||
/* see if we have this key */
|
||||
OPAL_LIST_FOREACH(iptr, &data->values, opal_value_t) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
"%s COMPARING %s %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
keys[i], iptr->key));
|
||||
if (0 == strcmp(iptr->key, keys[i])) {
|
||||
/* found it - package it for return */
|
||||
if (!ret_packed) {
|
||||
@ -394,11 +421,16 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
}
|
||||
ret_packed = true;
|
||||
}
|
||||
data_added = true;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &data->owner, 1, OPAL_NAME))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
opal_argv_free(keys);
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
"%s data server: adding %s to data from %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), iptr->key,
|
||||
ORTE_NAME_PRINT(&data->owner)));
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(answer, &iptr, 1, OPAL_VALUE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
opal_argv_free(keys);
|
||||
@ -406,6 +438,14 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
}
|
||||
}
|
||||
}
|
||||
if (data_added && OPAL_PMIX_PERSIST_FIRST_READ == data->persistence) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
"%s REMOVING DATA FROM %s AT INDEX %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&data->owner), data->index));
|
||||
opal_pointer_array_set_item(&orte_data_server_store, data->index, NULL);
|
||||
OBJ_RELEASE(data);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!ret_packed) {
|
||||
@ -433,6 +473,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
opal_argv_free(keys);
|
||||
goto SEND_ERROR;
|
||||
}
|
||||
|
||||
opal_argv_free(keys);
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_debug_output,
|
||||
"%s data server:lookup: data found",
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user