Clean up the conduit open code so we return detectable errors when conduit not opened.
Signed-off-by: Ralph Castain <rhc@open-mpi.org>
Этот коммит содержится в:
родитель
e45a358bf0
Коммит
6b3bbd30c5
@ -11,7 +11,7 @@
|
|||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2014 Research Organization for Information Science
|
* Copyright (c) 2014 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* Copyright (c) 2015 Intel, Inc. All rights reserved.
|
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
* Additional copyrights may follow
|
* Additional copyrights may follow
|
||||||
@ -142,20 +142,12 @@ enum {
|
|||||||
ORTE_ERR_ALLOCATION_PENDING = (ORTE_ERR_BASE - 43),
|
ORTE_ERR_ALLOCATION_PENDING = (ORTE_ERR_BASE - 43),
|
||||||
ORTE_ERR_NO_PATH_TO_TARGET = (ORTE_ERR_BASE - 44),
|
ORTE_ERR_NO_PATH_TO_TARGET = (ORTE_ERR_BASE - 44),
|
||||||
ORTE_ERR_OP_IN_PROGRESS = (ORTE_ERR_BASE - 45),
|
ORTE_ERR_OP_IN_PROGRESS = (ORTE_ERR_BASE - 45),
|
||||||
ORTE_ERR_OPEN_CHANNEL_PEER_FAIL = (ORTE_ERR_BASE - 46),
|
ORTE_ERR_OPEN_CONDUIT_FAIL = (ORTE_ERR_BASE - 46),
|
||||||
ORTE_ERR_OPEN_CHANNEL_PEER_REJECT = (ORTE_ERR_BASE - 47),
|
ORTE_ERR_DUPLICATE_MSG = (ORTE_ERR_BASE - 47),
|
||||||
ORTE_ERR_QOS_TYPE_UNSUPPORTED = (ORTE_ERR_BASE - 48),
|
ORTE_ERR_OUT_OF_ORDER_MSG = (ORTE_ERR_BASE - 48),
|
||||||
ORTE_ERR_QOS_ACK_WINDOW_FULL = (ORTE_ERR_BASE - 49),
|
ORTE_ERR_FORCE_SELECT = (ORTE_ERR_BASE - 49),
|
||||||
ORTE_ERR_ACK_TIMEOUT_SENDER = (ORTE_ERR_BASE - 50),
|
ORTE_ERR_JOB_CANCELLED = (ORTE_ERR_BASE - 50),
|
||||||
ORTE_ERR_ACK_TIMEOUT_RECEIVER = (ORTE_ERR_BASE - 51),
|
ORTE_ERR_CONDUIT_SEND_FAIL = (ORTE_ERR_BASE - 51)
|
||||||
ORTE_ERR_LOST_MSG_IN_WINDOW = (ORTE_ERR_BASE - 52),
|
|
||||||
ORTE_ERR_CHANNEL_BUSY = (ORTE_ERR_BASE - 53),
|
|
||||||
ORTE_ERR_DUPLICATE_MSG = (ORTE_ERR_BASE - 54),
|
|
||||||
ORTE_ERR_OUT_OF_ORDER_MSG = (ORTE_ERR_BASE - 55),
|
|
||||||
ORTE_ERR_OPEN_CHANNEL_DUPLICATE = (ORTE_ERR_BASE - 56),
|
|
||||||
ORTE_ERR_FORCE_SELECT = (ORTE_ERR_BASE - 57),
|
|
||||||
ORTE_ERR_JOB_CANCELLED = (ORTE_ERR_BASE - 58),
|
|
||||||
ORTE_ERR_CONDUIT_SEND_FAIL = (ORTE_ERR_BASE - 59)
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#define ORTE_ERR_MAX (ORTE_ERR_BASE - 100)
|
#define ORTE_ERR_MAX (ORTE_ERR_BASE - 100)
|
||||||
@ -163,4 +155,3 @@ enum {
|
|||||||
END_C_DECLS
|
END_C_DECLS
|
||||||
|
|
||||||
#endif /* ORTE_CONSTANTS_H */
|
#endif /* ORTE_CONSTANTS_H */
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
* Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved.
|
* Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved.
|
||||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2013-2016 Intel, Inc. All rights reserved.
|
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||||
@ -223,13 +223,21 @@ int orte_ess_base_app_setup(bool db_restrict_local)
|
|||||||
OBJ_CONSTRUCT(&transports, opal_list_t);
|
OBJ_CONSTRUCT(&transports, opal_list_t);
|
||||||
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
||||||
ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
|
ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
|
||||||
orte_mgmt_conduit = orte_rml.open_conduit(&transports);
|
if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) {
|
||||||
|
ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
|
||||||
|
error = "orte_rml_open_mgmt_conduit";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
OPAL_LIST_DESTRUCT(&transports);
|
OPAL_LIST_DESTRUCT(&transports);
|
||||||
|
|
||||||
OBJ_CONSTRUCT(&transports, opal_list_t);
|
OBJ_CONSTRUCT(&transports, opal_list_t);
|
||||||
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
||||||
ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING);
|
ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING);
|
||||||
orte_coll_conduit = orte_rml.open_conduit(&transports);
|
if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) {
|
||||||
|
ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
|
||||||
|
error = "orte_rml_open_coll_conduit";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
OPAL_LIST_DESTRUCT(&transports);
|
OPAL_LIST_DESTRUCT(&transports);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -424,13 +424,21 @@ int orte_ess_base_orted_setup(void)
|
|||||||
OBJ_CONSTRUCT(&transports, opal_list_t);
|
OBJ_CONSTRUCT(&transports, opal_list_t);
|
||||||
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
||||||
ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
|
ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
|
||||||
orte_mgmt_conduit = orte_rml.open_conduit(&transports);
|
if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) {
|
||||||
|
ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
|
||||||
|
error = "orte_rml_open_mgmt_conduit";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
OPAL_LIST_DESTRUCT(&transports);
|
OPAL_LIST_DESTRUCT(&transports);
|
||||||
|
|
||||||
OBJ_CONSTRUCT(&transports, opal_list_t);
|
OBJ_CONSTRUCT(&transports, opal_list_t);
|
||||||
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
||||||
ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING);
|
ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING);
|
||||||
orte_coll_conduit = orte_rml.open_conduit(&transports);
|
if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) {
|
||||||
|
ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
|
||||||
|
error = "orte_rml_open_coll_conduit";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
OPAL_LIST_DESTRUCT(&transports);
|
OPAL_LIST_DESTRUCT(&transports);
|
||||||
|
|
||||||
/* add our contact info to our proc object */
|
/* add our contact info to our proc object */
|
||||||
|
@ -355,13 +355,21 @@ static int rte_init(void)
|
|||||||
OBJ_CONSTRUCT(&transports, opal_list_t);
|
OBJ_CONSTRUCT(&transports, opal_list_t);
|
||||||
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
||||||
ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
|
ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
|
||||||
orte_mgmt_conduit = orte_rml.open_conduit(&transports);
|
if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) {
|
||||||
|
ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
|
||||||
|
error = "orte_rml_open_mgmt_conduit";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
OPAL_LIST_DESTRUCT(&transports);
|
OPAL_LIST_DESTRUCT(&transports);
|
||||||
|
|
||||||
OBJ_CONSTRUCT(&transports, opal_list_t);
|
OBJ_CONSTRUCT(&transports, opal_list_t);
|
||||||
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
||||||
ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING);
|
ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING);
|
||||||
orte_coll_conduit = orte_rml.open_conduit(&transports);
|
if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) {
|
||||||
|
ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
|
||||||
|
error = "orte_rml_open_coll_conduit";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
OPAL_LIST_DESTRUCT(&transports);
|
OPAL_LIST_DESTRUCT(&transports);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -146,7 +146,7 @@ static int orte_rml_base_open(mca_base_open_flag_t flags)
|
|||||||
OBJ_CONSTRUCT(&orte_rml_base.posted_recvs, opal_list_t);
|
OBJ_CONSTRUCT(&orte_rml_base.posted_recvs, opal_list_t);
|
||||||
OBJ_CONSTRUCT(&orte_rml_base.unmatched_msgs, opal_list_t);
|
OBJ_CONSTRUCT(&orte_rml_base.unmatched_msgs, opal_list_t);
|
||||||
OBJ_CONSTRUCT(&orte_rml_base.conduits, opal_pointer_array_t);
|
OBJ_CONSTRUCT(&orte_rml_base.conduits, opal_pointer_array_t);
|
||||||
opal_pointer_array_init(&orte_rml_base.conduits,1,INT_MAX,1);
|
opal_pointer_array_init(&orte_rml_base.conduits,1,INT16_MAX,1);
|
||||||
|
|
||||||
/* Open up all available components */
|
/* Open up all available components */
|
||||||
return mca_base_framework_components_open(&orte_rml_base_framework, flags);
|
return mca_base_framework_components_open(&orte_rml_base_framework, flags);
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||||
* Copyright (c) 2014-2016 Intel Corporation. All rights reserved.
|
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||||
* Copyright (c) 2015-2017 Research Organization for Information Science
|
* Copyright (c) 2015-2017 Research Organization for Information Science
|
||||||
* and Technology (RIST). All rights reserved.
|
* and Technology (RIST). All rights reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
@ -82,10 +82,14 @@ orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes)
|
|||||||
if (NULL != ourmod) {
|
if (NULL != ourmod) {
|
||||||
/* we got an answer - store this conduit in our array */
|
/* we got an answer - store this conduit in our array */
|
||||||
rc = opal_pointer_array_add(&orte_rml_base.conduits, ourmod);
|
rc = opal_pointer_array_add(&orte_rml_base.conduits, ourmod);
|
||||||
|
if (rc < 0) {
|
||||||
|
return ORTE_RML_CONDUIT_INVALID;
|
||||||
|
}
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
/* we get here if nobody could support it */
|
/* we get here if nobody could support it */
|
||||||
return ORTE_ERR_NOT_SUPPORTED;
|
ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
|
||||||
|
return ORTE_RML_CONDUIT_INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -1006,10 +1006,8 @@ static orte_rml_base_module_t* open_conduit(opal_list_t *attributes)
|
|||||||
comps = opal_argv_split(comp_attrib, ',');
|
comps = opal_argv_split(comp_attrib, ',');
|
||||||
for (i=0; NULL != comps[i]; i++) {
|
for (i=0; NULL != comps[i]; i++) {
|
||||||
/* changing below to check for oob, as trying to use ofi for only mgmt conduit */
|
/* changing below to check for oob, as trying to use ofi for only mgmt conduit */
|
||||||
if (0 == strcmp(comps[i], "oob")) {
|
if (0 == strcasecmp(comps[i], "fabric") ||
|
||||||
/* changing below to check for fabric, as trying to use ofi for only coll conduit
|
0 == strcasecmp(comps[i], "ethernet")) {
|
||||||
if (0 == strcmp(comps[i], "fabric")) { */
|
|
||||||
/*if (0 == strcmp(comps[i], "ethernet")) { */
|
|
||||||
/* we are a candidate, */
|
/* we are a candidate, */
|
||||||
opal_output_verbose(20,orte_rml_base_framework.framework_output,
|
opal_output_verbose(20,orte_rml_base_framework.framework_output,
|
||||||
"%s - Forcibly returning ofi socket provider for ethernet transport request",
|
"%s - Forcibly returning ofi socket provider for ethernet transport request",
|
||||||
|
@ -198,6 +198,7 @@ typedef uint32_t orte_rml_tag_t;
|
|||||||
|
|
||||||
/* Conduit ID */
|
/* Conduit ID */
|
||||||
typedef uint16_t orte_rml_conduit_t;
|
typedef uint16_t orte_rml_conduit_t;
|
||||||
|
#define ORTE_RML_CONDUIT_INVALID 0xff
|
||||||
|
|
||||||
/* define an object for reporting transports */
|
/* define an object for reporting transports */
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
@ -195,39 +195,12 @@ int orte_err2str(int errnum, const char **errmsg)
|
|||||||
case ORTE_ERR_OP_IN_PROGRESS:
|
case ORTE_ERR_OP_IN_PROGRESS:
|
||||||
retval = "Operation in progress";
|
retval = "Operation in progress";
|
||||||
break;
|
break;
|
||||||
case ORTE_ERR_OPEN_CHANNEL_PEER_FAIL:
|
case ORTE_ERR_OPEN_CONDUIT_FAIL:
|
||||||
retval = "Open channel to peer failed";
|
retval = "Open messaging conduit failed";
|
||||||
break;
|
|
||||||
case ORTE_ERR_OPEN_CHANNEL_PEER_REJECT:
|
|
||||||
retval = "Open channel to peer was rejected";
|
|
||||||
break;
|
|
||||||
case ORTE_ERR_QOS_TYPE_UNSUPPORTED:
|
|
||||||
retval = "QoS type unsupported";
|
|
||||||
break;
|
|
||||||
case ORTE_ERR_QOS_ACK_WINDOW_FULL:
|
|
||||||
retval = "QoS ack window full";
|
|
||||||
break;
|
|
||||||
case ORTE_ERR_ACK_TIMEOUT_SENDER:
|
|
||||||
retval = "Send ack timed out";
|
|
||||||
break;
|
|
||||||
case ORTE_ERR_ACK_TIMEOUT_RECEIVER:
|
|
||||||
retval = "Recv ack timed out";
|
|
||||||
break;
|
|
||||||
case ORTE_ERR_LOST_MSG_IN_WINDOW:
|
|
||||||
retval = "Msg lost in window";
|
|
||||||
break;
|
|
||||||
case ORTE_ERR_CHANNEL_BUSY:
|
|
||||||
retval = "Channel busy";
|
|
||||||
break;
|
|
||||||
case ORTE_ERR_DUPLICATE_MSG:
|
|
||||||
retval = "Duplicate message";
|
|
||||||
break;
|
break;
|
||||||
case ORTE_ERR_OUT_OF_ORDER_MSG:
|
case ORTE_ERR_OUT_OF_ORDER_MSG:
|
||||||
retval = "Out of order message";
|
retval = "Out of order message";
|
||||||
break;
|
break;
|
||||||
case ORTE_ERR_OPEN_CHANNEL_DUPLICATE:
|
|
||||||
retval = "Duplicate channel open request";
|
|
||||||
break;
|
|
||||||
case ORTE_ERR_FORCE_SELECT:
|
case ORTE_ERR_FORCE_SELECT:
|
||||||
retval = "Force select";
|
retval = "Force select";
|
||||||
break;
|
break;
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user