We cannot use OFI to determine when daemons can finalize as we don't see the "sockets" go away. So always use the OOB for the mgmt conduit - this provides the necessary termination signal AND ensures that IOF and other mgmt messages go solely across TCP.
Cleanup the way we look for matching OFI addresses by using the opal_net_samenetwork helper function. This now works for multi-network environments, but only using the socket provider Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
f038fe6427
Коммит
919d7fcf49
@ -136,9 +136,17 @@ static void hnp_abort(int error_code, char *fmt, ...)
|
||||
char *outmsg = NULL;
|
||||
orte_timer_t *timer;
|
||||
|
||||
/* only do this once */
|
||||
if (orte_abnormal_term_ordered) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* ensure we exit with non-zero status */
|
||||
ORTE_UPDATE_EXIT_STATUS(error_code);
|
||||
|
||||
/* set the aborting flag */
|
||||
orte_abnormal_term_ordered = true;
|
||||
|
||||
/* If there was a message, construct it */
|
||||
va_start(arglist, fmt);
|
||||
if (NULL != fmt) {
|
||||
|
@ -202,9 +202,9 @@ OBJ_CLASS_DECLARATION(orte_self_send_xfer_t);
|
||||
do { \
|
||||
orte_rml_recv_t *msg; \
|
||||
opal_output_verbose(5, orte_rml_base_framework.framework_output, \
|
||||
"%s Message posted at %s:%d", \
|
||||
"%s Message posted at %s:%d for tag %d", \
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), \
|
||||
__FILE__, __LINE__); \
|
||||
__FILE__, __LINE__, (t)); \
|
||||
msg = OBJ_NEW(orte_rml_recv_t); \
|
||||
msg->sender.jobid = (p)->jobid; \
|
||||
msg->sender.vpid = (p)->vpid; \
|
||||
|
@ -12,6 +12,7 @@
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/net.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/mca/backtrace/backtrace.h"
|
||||
#include "opal/mca/event/event.h"
|
||||
@ -85,6 +86,7 @@ orte_rml_ofi_module_t orte_rml_ofi = {
|
||||
/* Local variables */
|
||||
static bool init_done = false;
|
||||
static char *ofi_transports_supported = NULL;
|
||||
static bool ofi_desired = false;
|
||||
|
||||
static int
|
||||
rml_ofi_component_open(void)
|
||||
@ -98,6 +100,7 @@ rml_ofi_component_open(void)
|
||||
orte_rml_ofi.ofi_prov_open_num = 0;
|
||||
OBJ_CONSTRUCT(&orte_rml_ofi.peers, opal_hash_table_t);
|
||||
opal_hash_table_init(&orte_rml_ofi.peers, 128);
|
||||
OBJ_CONSTRUCT(&orte_rml_ofi.recv_msg_queue_list, opal_list_t);
|
||||
|
||||
for( uint8_t ofi_prov_id=0; ofi_prov_id < MAX_OFI_PROVIDERS ; ofi_prov_id++) {
|
||||
orte_rml_ofi.ofi_prov[ofi_prov_id].fabric = NULL;
|
||||
@ -116,6 +119,12 @@ rml_ofi_component_open(void)
|
||||
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output," from %s:%d rml_ofi_component_open()",__FILE__,__LINE__);
|
||||
|
||||
if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON) {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
if (!ofi_desired) {
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -218,7 +227,7 @@ rml_ofi_component_close(void)
|
||||
(void **)&value, &node);
|
||||
while (OPAL_SUCCESS == rc) {
|
||||
if (NULL != value) {
|
||||
OBJ_RELEASE(value);
|
||||
OBJ_RELEASE(value);
|
||||
}
|
||||
rc = opal_hash_table_get_next_key_uint64 (&orte_rml_ofi.peers, &key,
|
||||
(void **) &value, node, &node);
|
||||
@ -242,7 +251,16 @@ static int rml_ofi_component_register(void)
|
||||
OPAL_INFO_LVL_2,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&ofi_transports_supported);
|
||||
opal_output(0, "OFI TRANSPORTS %s", ofi_transports_supported);
|
||||
|
||||
|
||||
ofi_desired = false;
|
||||
mca_base_component_var_register(component, "desired",
|
||||
"Use OFI for coll conduit",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_2,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&ofi_desired);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -982,7 +1000,6 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes)
|
||||
"%s - Entering rml_ofi_open_conduit()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
|
||||
/* Open all ofi endpoints */
|
||||
if (!init_done) {
|
||||
rml_ofi_component_init();
|
||||
@ -1135,6 +1152,12 @@ static void ofi_set_contact_info (const char *uri)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Open all ofi endpoints */
|
||||
if (!init_done) {
|
||||
rml_ofi_component_init();
|
||||
init_done = true;
|
||||
}
|
||||
|
||||
uris = strdup(uri);
|
||||
process_uri(uris);
|
||||
free(uris);
|
||||
@ -1146,10 +1169,10 @@ static void process_uri( char *uri)
|
||||
orte_process_name_t peer;
|
||||
char *cptr, *ofiuri;
|
||||
char **uris=NULL;
|
||||
int rc, i=0, tot_reqd = 1, tot_found = 0;
|
||||
int rc, i=0, cur_ofi_prov;
|
||||
uint64_t ui64;
|
||||
orte_rml_ofi_peer_t *pr;
|
||||
struct sockaddr_in* ep_sockaddr;
|
||||
struct sockaddr_in *ep_sockaddr, *ep_sockaddr2;
|
||||
|
||||
/* find the first semi-colon in the string */
|
||||
cptr = strchr(uri, ';');
|
||||
@ -1176,14 +1199,7 @@ static void process_uri( char *uri)
|
||||
"%s:OFI set_contact_info peer %s is me",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer));
|
||||
//skip adding to hashtable for HNP
|
||||
if (!ORTE_PROC_IS_HNP) {
|
||||
return;
|
||||
} else {
|
||||
opal_output_verbose(15, orte_rml_base_framework.framework_output,
|
||||
"%s:OFI set_contact_info - HNP process so proceeding to add to hashtable",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* split the rest of the uri into component parts */
|
||||
@ -1191,12 +1207,13 @@ static void process_uri( char *uri)
|
||||
|
||||
/* get the peer object for this process */
|
||||
memcpy(&ui64, (char*)&peer, sizeof(uint64_t));
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_rml_ofi.peers,
|
||||
ui64, (void**)&pr) ||
|
||||
pr = NULL;
|
||||
if (OPAL_SUCCESS != (rc = opal_hash_table_get_value_uint64(&orte_rml_ofi.peers,
|
||||
ui64, (void**)&pr)) ||
|
||||
NULL == pr) {
|
||||
pr = OBJ_NEW(orte_rml_ofi_peer_t);
|
||||
/* populate the peer object with the ofi addresses */
|
||||
for(i=0; NULL != uris[i] && tot_found < tot_reqd; i++) {
|
||||
for(i=0; NULL != uris[i]; i++) {
|
||||
ofiuri = strdup(uris[i]);
|
||||
if (NULL == ofiuri) {
|
||||
opal_output_verbose(2, orte_rml_base_framework.framework_output,
|
||||
@ -1211,35 +1228,43 @@ static void process_uri( char *uri)
|
||||
ep_sockaddr = malloc( sizeof ( struct sockaddr_in) );
|
||||
/* ofiuri for socket provider is of format - ofi-socket:<sin_family,sin_addr,sin_port> */
|
||||
convert_to_sockaddr(ofiuri, ep_sockaddr);
|
||||
pr->ofi_ep = (void *)ep_sockaddr;
|
||||
tot_found++;
|
||||
/* see if we have this subnet in our providers - we take
|
||||
* the first one that matches (other than loopback) */
|
||||
for( cur_ofi_prov=0; cur_ofi_prov < orte_rml_ofi.ofi_prov_open_num ; cur_ofi_prov++ ) {
|
||||
ep_sockaddr2 = (struct sockaddr_in*)orte_rml_ofi.ofi_prov[cur_ofi_prov].ep_name;
|
||||
if (opal_net_samenetwork((struct sockaddr*)ep_sockaddr, (struct sockaddr*)ep_sockaddr2, 24)) {
|
||||
pr->ofi_ep = (void *)ep_sockaddr;
|
||||
if (OPAL_SUCCESS !=
|
||||
(rc = opal_hash_table_set_value_uint64(&orte_rml_ofi.peers, ui64, (void*)pr))) {
|
||||
opal_output_verbose(15, orte_rml_base_framework.framework_output,
|
||||
"%s: ofi peer address insertion failed for peer %s ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer));
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
opal_output_verbose(15, orte_rml_base_framework.framework_output,
|
||||
"%s: ofi peer address inserted for peer %s ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer));
|
||||
opal_output_verbose(15, orte_rml_base_framework.framework_output,
|
||||
"%s: ofi sock address length = %zd ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
pr->ofi_ep_len);
|
||||
struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*)pr->ofi_ep;
|
||||
opal_output_verbose(15,orte_rml_base_framework.framework_output,
|
||||
"%s OFI set_name() port = 0x%x, InternetAddr = %s ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ntohs(ep_sockaddr->sin_port),
|
||||
inet_ntoa(ep_sockaddr->sin_addr));
|
||||
opal_argv_free(uris);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
free( ofiuri);
|
||||
}
|
||||
/* if atleast one OFI address is known for peer insert it */
|
||||
if( 1 <= tot_found ) {
|
||||
if (OPAL_SUCCESS !=
|
||||
(rc = opal_hash_table_set_value_uint64(&orte_rml_ofi.peers, ui64, (void*)pr))) {
|
||||
opal_output_verbose(15, orte_rml_base_framework.framework_output,
|
||||
"%s: ofi peer address insertion failed for peer %s ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer));
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
opal_output_verbose(15, orte_rml_base_framework.framework_output,
|
||||
"%s: ofi peer address inserted for peer %s ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&peer));
|
||||
opal_output_verbose(15, orte_rml_base_framework.framework_output,
|
||||
"%s: ofi sock address length = %zd ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
pr->ofi_ep_len);
|
||||
struct sockaddr_in* ep_sockaddr = (struct sockaddr_in*)pr->ofi_ep;
|
||||
opal_output_verbose(15,orte_rml_base_framework.framework_output,
|
||||
"%s OFI set_name() port = 0x%x, InternetAddr = %s ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ntohs(ep_sockaddr->sin_port),inet_ntoa(ep_sockaddr->sin_addr));
|
||||
}
|
||||
}
|
||||
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s OFI end of set_contact_info()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
@ -376,8 +376,6 @@ static void send_msg(int fd, short args, void *cbdata)
|
||||
uint32_t total_packets;
|
||||
fi_addr_t dest_fi_addr;
|
||||
orte_rml_send_t *snd;
|
||||
orte_rml_recv_t *rcv;
|
||||
orte_self_send_xfer_t *xfer;
|
||||
orte_rml_ofi_request_t* ofi_send_req = OBJ_NEW( orte_rml_ofi_request_t );
|
||||
uint8_t ofi_prov_id = req->ofi_prov_id;
|
||||
orte_rml_ofi_send_pkt_t* ofi_msg_pkt;
|
||||
@ -385,8 +383,6 @@ static void send_msg(int fd, short args, void *cbdata)
|
||||
orte_rml_ofi_peer_t* pr;
|
||||
uint64_t ui64;
|
||||
struct sockaddr_in* ep_sockaddr;
|
||||
int i, bytes;
|
||||
char *ptr;
|
||||
|
||||
snd = OBJ_NEW(orte_rml_send_t);
|
||||
snd->dst = *peer;
|
||||
@ -408,85 +404,59 @@ static void send_msg(int fd, short args, void *cbdata)
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
|
||||
/* get the peer address by doing modex_receive */
|
||||
/* get the peer address from our internal hash table */
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s getting contact info for DAEMON peer %s from internal hash table",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer));
|
||||
memcpy(&ui64, (char*)peer, sizeof(uint64_t));
|
||||
if (OPAL_SUCCESS != (ret = opal_hash_table_get_value_uint64(&orte_rml_ofi.peers,
|
||||
ui64, (void**)&pr) || NULL == pr)) {
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s rml:ofi: Send failed to get peer OFI contact info ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
|
||||
ORTE_RML_SEND_COMPLETE(snd);
|
||||
//OBJ_RELEASE( ofi_send_req);
|
||||
return;
|
||||
}
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s rml:ofi: OFI peer contact info got from hash table",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
dest_ep_name = pr->ofi_ep;
|
||||
dest_ep_namelen = pr->ofi_ep_len;
|
||||
|
||||
//[Debug] printing additional info of IP
|
||||
switch ( orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->addr_format)
|
||||
{
|
||||
case FI_SOCKADDR_IN :
|
||||
/* Address is of type sockaddr_in (IPv4) */
|
||||
/*[debug] - print the sockaddr - port and s_addr */
|
||||
ep_sockaddr = (struct sockaddr_in*)dest_ep_name;
|
||||
opal_output_verbose(1,orte_rml_base_framework.framework_output,
|
||||
"%s peer %s epnamelen is %lu, port = %d (or) 0x%x, InternetAddr = 0x%s ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ORTE_NAME_PRINT(peer),
|
||||
(unsigned long)orte_rml_ofi.ofi_prov[ofi_prov_id].epnamelen,ntohs(ep_sockaddr->sin_port),
|
||||
ntohs(ep_sockaddr->sin_port),inet_ntoa(ep_sockaddr->sin_addr));
|
||||
/*[end debug]*/
|
||||
break;
|
||||
}
|
||||
//[Debug] end debug
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s calling OPAL_MODEX_RECV_STRING ", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME) );
|
||||
if (ORTE_PROC_IS_APP ) {
|
||||
asprintf(&pmix_key,"%s%d",orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->fabric_attr->prov_name,ofi_prov_id);
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s calling OPAL_MODEX_RECV_STRING for ORTE_PROC_APP peer - %s, key - %s ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer),pmix_key );
|
||||
OPAL_MODEX_RECV_STRING(ret, pmix_key, peer , (uint8_t **) &dest_ep_name, &dest_ep_namelen);
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output, "Returned from MODEX_RECV");
|
||||
opal_output_verbose(50, orte_rml_base_framework.framework_output,
|
||||
"%s Return value from OPAL_MODEX_RECV_STRING - %d, length returned - %lu",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ret, dest_ep_namelen);
|
||||
free(pmix_key);
|
||||
} else {
|
||||
"%s OPAL_MODEX_RECV succeded, %s peer ep name obtained. length=%lu",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), dest_ep_namelen);
|
||||
ret = fi_av_insert(orte_rml_ofi.ofi_prov[ofi_prov_id].av, dest_ep_name,1,&dest_fi_addr,0,NULL);
|
||||
if( ret != 1) {
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s calling OPAL_MODEX_RECV_STRING for DAEMON peer %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(peer));
|
||||
memcpy(&ui64, (char*)peer, sizeof(uint64_t));
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64(&orte_rml_ofi.peers,
|
||||
ui64, (void**)&pr) || NULL == pr) {
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s rml:ofi: Send failed to get peer OFI contact info ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
return;
|
||||
}
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s rml:ofi: OFI peer contact info got from hash table",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
dest_ep_name = pr->ofi_ep;
|
||||
dest_ep_namelen = pr->ofi_ep_len;
|
||||
ret = OPAL_SUCCESS;
|
||||
}
|
||||
if ( OPAL_SUCCESS == ret) {
|
||||
//[Debug] printing additional info of IP
|
||||
switch ( orte_rml_ofi.ofi_prov[ofi_prov_id].fabric_info->addr_format)
|
||||
{
|
||||
case FI_SOCKADDR_IN :
|
||||
/* Address is of type sockaddr_in (IPv4) */
|
||||
/*[debug] - print the sockaddr - port and s_addr */
|
||||
ep_sockaddr = (struct sockaddr_in*)dest_ep_name;
|
||||
opal_output_verbose(1,orte_rml_base_framework.framework_output,
|
||||
"%s peer %s epnamelen is %d, port = %d (or) 0x%x, InternetAddr = 0x%s ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ORTE_NAME_PRINT(peer),
|
||||
orte_rml_ofi.ofi_prov[ofi_prov_id].epnamelen,ntohs(ep_sockaddr->sin_port),
|
||||
ntohs(ep_sockaddr->sin_port),inet_ntoa(ep_sockaddr->sin_addr));
|
||||
/*[end debug]*/
|
||||
break;
|
||||
}
|
||||
//[Debug] end debug
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s OPAL_MODEX_RECV succeded, %s peer ep name obtained. length=%lu",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), dest_ep_namelen);
|
||||
ret = fi_av_insert(orte_rml_ofi.ofi_prov[ofi_prov_id].av, dest_ep_name,1,&dest_fi_addr,0,NULL);
|
||||
if( ret != 1) {
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s fi_av_insert failed in send_msg() returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ret );
|
||||
/* call the send-callback fn with error and return, also return failure status */
|
||||
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
|
||||
"%s fi_av_insert failed in send_msg() returned %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),ret );
|
||||
/* call the send-callback fn with error and return, also return failure status */
|
||||
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
|
||||
|
||||
ORTE_RML_SEND_COMPLETE(snd);
|
||||
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
|
||||
opal_output_verbose(1, orte_rml_base_framework.framework_output,
|
||||
"%s OPAL_MODEX_RECV failed to obtain %s peer ep name ",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer));
|
||||
/* call the send-callback fn with error and return, also return failure status */
|
||||
snd->status = ORTE_ERR_ADDRESSEE_UNKNOWN;
|
||||
ORTE_RML_SEND_COMPLETE(snd);
|
||||
//OBJ_RELEASE( ofi_send_req);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
ofi_send_req->send = snd;
|
||||
ofi_send_req->completion_count = 1;
|
||||
|
||||
@ -625,7 +595,6 @@ int orte_rml_ofi_send_nb(struct orte_rml_base_module_t* mod,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_rml_recv_t *rcv;
|
||||
orte_rml_send_t *snd;
|
||||
int bytes;
|
||||
orte_self_send_xfer_t *xfer;
|
||||
int i;
|
||||
@ -749,7 +718,6 @@ int orte_rml_ofi_send_buffer_nb(struct orte_rml_base_module_t *mod,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_rml_recv_t *rcv;
|
||||
orte_rml_send_t *snd;
|
||||
orte_self_send_xfer_t *xfer;
|
||||
ofi_send_request_t *req;
|
||||
orte_rml_ofi_module_t *ofi_mod = (orte_rml_ofi_module_t*)mod;
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -207,7 +207,8 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes)
|
||||
NULL != comp_attrib) {
|
||||
comps = opal_argv_split(comp_attrib, ',');
|
||||
for (i=0; NULL != comps[i]; i++) {
|
||||
if (0 == strcasecmp(comps[i], "Ethernet")) {
|
||||
if (0 == strcasecmp(comps[i], "Ethernet") ||
|
||||
0 == strcasecmp(comps[i], "oob")) {
|
||||
/* we are a candidate */
|
||||
opal_argv_free(comps);
|
||||
md = make_module();
|
||||
@ -254,7 +255,14 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes)
|
||||
opal_argv_free(comps);
|
||||
free(comp_attrib);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* if they didn't specify a protocol or a transport, then we can be considered */
|
||||
if (!orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, NULL, OPAL_STRING) ||
|
||||
!orte_get_attribute(attributes, ORTE_RML_PROTOCOL_TYPE, NULL, OPAL_STRING)) {
|
||||
md = make_module();
|
||||
md->routed = orte_routed.assign_module(NULL);
|
||||
return md;
|
||||
}
|
||||
|
||||
/* if we get here, we cannot handle it */
|
||||
|
@ -768,7 +768,7 @@ int orte_register_params(void)
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &orte_coll_transport);
|
||||
|
||||
orte_mgmt_transport = "oob,ethernet";
|
||||
orte_mgmt_transport = "oob";
|
||||
(void) mca_base_var_register("orte", "orte", "mgmt", "transports",
|
||||
"Comma-separated list of transports to use for ORTE management messages",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user