Remove the remaining cruft from dual oob transport
* When we moved to allowing dual rml/oob transports, we added a bunch of stuff that is no longer needed. Remove it so as to simplify the messaging system. * Fix the routed/radix component so it correctly returns the parent's vpid Signed-off-by: Ralph Castain <rhc@pmix.org>
Этот коммит содержится в:
родитель
b80210c36a
Коммит
e56ee1e06a
@ -9,7 +9,7 @@
|
||||
* Copyright (c) 2011 Oracle and/or all its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -277,8 +277,7 @@ static void job_errors(int fd, short args, void *cbdata)
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid),
|
||||
ORTE_NAME_PRINT(&jdata->originator)));
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&jdata->originator, answer,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(&jdata->originator, answer,
|
||||
ORTE_RML_TAG_LAUNCH_RESP,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -358,7 +357,6 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
orte_proc_state_t state = caddy->proc_state;
|
||||
int i;
|
||||
int32_t i32, *i32ptr;
|
||||
char *rtmod;
|
||||
|
||||
ORTE_ACQUIRE_OBJECT(caddy);
|
||||
|
||||
@ -381,7 +379,6 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
goto cleanup;
|
||||
}
|
||||
pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid);
|
||||
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
|
||||
|
||||
/* we MUST handle a communication failure before doing anything else
|
||||
* as it requires some special care to avoid normal termination issues
|
||||
@ -412,9 +409,9 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
"%s Comm failure: daemons terminating - recording daemon %s as gone",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc)));
|
||||
/* remove from dependent routes, if it is one */
|
||||
orte_routed.route_lost(rtmod, proc);
|
||||
orte_routed.route_lost(proc);
|
||||
/* if all my routes and local children are gone, then terminate ourselves */
|
||||
if (0 == orte_routed.num_routes(rtmod)) {
|
||||
if (0 == orte_routed.num_routes()) {
|
||||
for (i=0; i < orte_local_children->size; i++) {
|
||||
if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
|
||||
ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_ALIVE) && proct->state < ORTE_PROC_STATE_UNTERMINATED) {
|
||||
@ -435,7 +432,7 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output,
|
||||
"%s Comm failure: %d routes remain alive",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(int)orte_routed.num_routes(rtmod)));
|
||||
(int)orte_routed.num_routes()));
|
||||
}
|
||||
goto cleanup;
|
||||
}
|
||||
@ -493,7 +490,7 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
}
|
||||
/* if all my routes and children are gone, then terminate
|
||||
ourselves nicely (i.e., this is a normal termination) */
|
||||
if (0 == orte_routed.num_routes(rtmod)) {
|
||||
if (0 == orte_routed.num_routes()) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output,
|
||||
"%s errmgr:default:hnp all routes gone - exiting",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
@ -718,7 +715,7 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
default_hnp_abort(jdata);
|
||||
}
|
||||
/* remove from dependent routes, if it is one */
|
||||
orte_routed.route_lost(rtmod, proc);
|
||||
orte_routed.route_lost(proc);
|
||||
break;
|
||||
|
||||
case ORTE_PROC_STATE_UNABLE_TO_SEND_MSG:
|
||||
@ -841,7 +838,7 @@ static void default_hnp_abort(orte_job_t *jdata)
|
||||
i32ptr = &i32;
|
||||
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32)) {
|
||||
/* warn user */
|
||||
orte_show_help("help-errmgr-base.txt", "normal-termination-but", true,
|
||||
orte_show_help("help-errmgr-base.txt", "normal-termination-but", true,
|
||||
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child",
|
||||
(1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid),
|
||||
i32, (1 == i32) ? "process returned\na non-zero exit code" :
|
||||
|
@ -204,8 +204,7 @@ static void orted_abort(int error_code, char *fmt, ...)
|
||||
}
|
||||
|
||||
/* send it */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, alert,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
|
||||
ORTE_RML_TAG_PLM,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -303,8 +302,7 @@ static void job_errors(int fd, short args, void *cbdata)
|
||||
goto cleanup;
|
||||
}
|
||||
/* send it */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, alert,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
|
||||
ORTE_RML_TAG_PLM,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -321,7 +319,6 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
orte_job_t *jdata;
|
||||
orte_process_name_t *proc = &caddy->name;
|
||||
orte_proc_state_t state = caddy->proc_state;
|
||||
char *rtmod;
|
||||
orte_proc_t *child, *ptr;
|
||||
opal_buffer_t *alert;
|
||||
orte_plm_cmd_flag_t cmd;
|
||||
@ -386,9 +383,6 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* get our management conduit's routed module name */
|
||||
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
|
||||
|
||||
if (ORTE_PROC_STATE_COMM_FAILED == state) {
|
||||
/* if it is our own connection, ignore it */
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME, proc)) {
|
||||
@ -444,7 +438,7 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
}
|
||||
/* if all my routes and children are gone, then terminate
|
||||
ourselves nicely (i.e., this is a normal termination) */
|
||||
if (0 == orte_routed.num_routes(rtmod)) {
|
||||
if (0 == orte_routed.num_routes()) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output,
|
||||
"%s errmgr:default:orted all routes gone - exiting",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
@ -453,7 +447,7 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output,
|
||||
"%s errmgr:default:orted not exiting, num_routes() == %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(int)orte_routed.num_routes(rtmod)));
|
||||
(int)orte_routed.num_routes()));
|
||||
}
|
||||
}
|
||||
/* if not, then we can continue */
|
||||
@ -513,8 +507,7 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&child->name),
|
||||
jdata->num_local_procs));
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, alert,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
|
||||
ORTE_RML_TAG_PLM,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -579,7 +572,7 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
}
|
||||
/* if all my routes and children are gone, then terminate
|
||||
ourselves nicely (i.e., this is a normal termination) */
|
||||
if (0 == orte_routed.num_routes(rtmod)) {
|
||||
if (0 == orte_routed.num_routes()) {
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output,
|
||||
"%s errmgr:default:orted all routes gone - exiting",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
@ -621,8 +614,7 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
ORTE_NAME_PRINT(&child->name),
|
||||
jdata->num_local_procs));
|
||||
/* send it */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, alert,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
|
||||
ORTE_RML_TAG_PLM,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -677,8 +669,7 @@ static void proc_errors(int fd, short args, void *cbdata)
|
||||
OBJ_RELEASE(jdata);
|
||||
|
||||
/* send it */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, alert,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
|
||||
ORTE_RML_TAG_PLM,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -109,7 +109,6 @@ int orte_ess_base_orted_setup(void)
|
||||
hwloc_obj_t obj;
|
||||
unsigned i, j;
|
||||
orte_topology_t *t;
|
||||
opal_list_t transports;
|
||||
orte_ess_base_signal_t *sig;
|
||||
int idx;
|
||||
|
||||
@ -448,27 +447,6 @@ int orte_ess_base_orted_setup(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* get a conduit for our use - we never route IO over fabric */
|
||||
OBJ_CONSTRUCT(&transports, opal_list_t);
|
||||
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
||||
ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
|
||||
if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) {
|
||||
ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
|
||||
error = "orte_rml_open_mgmt_conduit";
|
||||
goto error;
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&transports);
|
||||
|
||||
OBJ_CONSTRUCT(&transports, opal_list_t);
|
||||
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
||||
ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING);
|
||||
if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) {
|
||||
ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
|
||||
error = "orte_rml_open_coll_conduit";
|
||||
goto error;
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&transports);
|
||||
|
||||
/*
|
||||
* Group communications
|
||||
*/
|
||||
@ -609,10 +587,6 @@ int orte_ess_base_orted_finalize(void)
|
||||
pmix_server_finalize();
|
||||
(void) mca_base_framework_close(&opal_pmix_base_framework);
|
||||
|
||||
/* release the conduits */
|
||||
orte_rml.close_conduit(orte_mgmt_conduit);
|
||||
orte_rml.close_conduit(orte_coll_conduit);
|
||||
|
||||
/* close frameworks */
|
||||
(void) mca_base_framework_close(&orte_filem_base_framework);
|
||||
(void) mca_base_framework_close(&orte_grpcomm_base_framework);
|
||||
@ -695,8 +669,7 @@ static void signal_forward_callback(int fd, short event, void *arg)
|
||||
}
|
||||
|
||||
/* send it to ourselves */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_NAME, cmd,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, cmd,
|
||||
ORTE_RML_TAG_DAEMON,
|
||||
NULL, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Hochschule Esslingen. All rights reserved.
|
||||
*
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
@ -94,7 +94,6 @@ int orte_ess_base_tool_setup(opal_list_t *flags)
|
||||
{
|
||||
int ret;
|
||||
char *error = NULL;
|
||||
opal_list_t transports;
|
||||
opal_list_t info;
|
||||
opal_value_t *kv, *knext, val;
|
||||
opal_pmix_query_t *q;
|
||||
@ -222,13 +221,6 @@ int orte_ess_base_tool_setup(opal_list_t *flags)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* get a conduit for our use - we never route IO over fabric */
|
||||
OBJ_CONSTRUCT(&transports, opal_list_t);
|
||||
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
||||
ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
|
||||
orte_mgmt_conduit = orte_rml.open_conduit(&transports);
|
||||
OPAL_LIST_DESTRUCT(&transports);
|
||||
|
||||
/* we -may- need to know the name of the head
|
||||
* of our session directory tree, particularly the
|
||||
* tmp base where any other session directories on
|
||||
@ -269,7 +261,7 @@ int orte_ess_base_tool_setup(opal_list_t *flags)
|
||||
val.data.string = NULL;
|
||||
OBJ_DESTRUCT(&val);
|
||||
/* set the route to be direct */
|
||||
if (ORTE_SUCCESS != orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) {
|
||||
if (ORTE_SUCCESS != orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) {
|
||||
orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
|
||||
orte_finalize();
|
||||
exit(1);
|
||||
@ -277,7 +269,7 @@ int orte_ess_base_tool_setup(opal_list_t *flags)
|
||||
|
||||
/* connect to the HNP so we can recv forwarded output */
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, ORTE_PROC_MY_HNP,
|
||||
ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP,
|
||||
buf, ORTE_RML_TAG_WARMUP_CONNECTION,
|
||||
orte_rml_send_callback, NULL);
|
||||
if (ORTE_SUCCESS != ret) {
|
||||
@ -287,7 +279,7 @@ int orte_ess_base_tool_setup(opal_list_t *flags)
|
||||
}
|
||||
|
||||
/* set the target hnp as our lifeline so we will terminate if it exits */
|
||||
orte_routed.set_lifeline(NULL, ORTE_PROC_MY_HNP);
|
||||
orte_routed.set_lifeline(ORTE_PROC_MY_HNP);
|
||||
|
||||
/* setup the IOF */
|
||||
if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) {
|
||||
@ -317,8 +309,6 @@ int orte_ess_base_tool_finalize(void)
|
||||
{
|
||||
orte_wait_finalize();
|
||||
|
||||
orte_rml.close_conduit(orte_mgmt_conduit);
|
||||
|
||||
/* if I am a tool, then all I will have done is
|
||||
* a very small subset of orte_init - ensure that
|
||||
* I only back those elements out
|
||||
|
@ -141,7 +141,6 @@ static int rte_init(void)
|
||||
uint32_t h;
|
||||
int idx;
|
||||
orte_topology_t *t;
|
||||
opal_list_t transports;
|
||||
orte_ess_base_signal_t *sig;
|
||||
opal_value_t val;
|
||||
|
||||
@ -370,27 +369,6 @@ static int rte_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
/* get a conduit for our use - we never route IO over fabric */
|
||||
OBJ_CONSTRUCT(&transports, opal_list_t);
|
||||
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
||||
ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING);
|
||||
if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) {
|
||||
ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
|
||||
error = "orte_rml_open_mgmt_conduit";
|
||||
goto error;
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&transports);
|
||||
|
||||
OBJ_CONSTRUCT(&transports, opal_list_t);
|
||||
orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE,
|
||||
ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING);
|
||||
if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) {
|
||||
ret = ORTE_ERR_OPEN_CONDUIT_FAIL;
|
||||
error = "orte_rml_open_coll_conduit";
|
||||
goto error;
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&transports);
|
||||
|
||||
/* it is now safe to start the pmix server */
|
||||
pmix_server_start();
|
||||
|
||||
@ -776,10 +754,6 @@ static int rte_finalize(void)
|
||||
fflush(stdout);
|
||||
fflush(stderr);
|
||||
|
||||
/* release the conduits */
|
||||
orte_rml.close_conduit(orte_mgmt_conduit);
|
||||
orte_rml.close_conduit(orte_coll_conduit);
|
||||
|
||||
(void) mca_base_framework_close(&orte_iof_base_framework);
|
||||
(void) mca_base_framework_close(&orte_rtc_base_framework);
|
||||
(void) mca_base_framework_close(&orte_odls_base_framework);
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -208,8 +208,7 @@ static void filem_base_process_get_proc_node_name_cmd(orte_process_name_t* sende
|
||||
return;
|
||||
}
|
||||
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
sender, answer,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(sender, answer,
|
||||
ORTE_RML_TAG_FILEM_BASE_RESP,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -301,8 +300,7 @@ static void filem_base_process_get_remote_path_cmd(orte_process_name_t* sender,
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
sender, answer,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(sender, answer,
|
||||
ORTE_RML_TAG_FILEM_BASE_RESP,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -2,7 +2,7 @@
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved
|
||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -870,8 +870,7 @@ static void send_complete(char *file, int status)
|
||||
OBJ_RELEASE(buf);
|
||||
return;
|
||||
}
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, buf,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
|
||||
ORTE_RML_TAG_FILEM_BASE_RESP,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -231,7 +231,6 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig
|
||||
orte_namelist_t *nm;
|
||||
opal_list_t children;
|
||||
size_t n;
|
||||
char *routed;
|
||||
|
||||
/* search the existing tracker list to see if this already exists */
|
||||
OPAL_LIST_FOREACH(coll, &orte_grpcomm_base.ongoing, orte_grpcomm_coll_t) {
|
||||
@ -279,38 +278,30 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* get the routed module for our conduit */
|
||||
routed = orte_rml.get_routed(orte_coll_conduit);
|
||||
if (NULL == routed) {
|
||||
/* this conduit is not routed, so we expect all daemons
|
||||
* to directly participate */
|
||||
coll->nexpected = coll->ndmns;
|
||||
} else {
|
||||
/* cycle thru the array of daemons and compare them to our
|
||||
* children in the routing tree, counting the ones that match
|
||||
* so we know how many daemons we should receive contributions from */
|
||||
OBJ_CONSTRUCT(&children, opal_list_t);
|
||||
orte_routed.get_routing_list(routed, &children);
|
||||
while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&children))) {
|
||||
for (n=0; n < coll->ndmns; n++) {
|
||||
if (nm->name.vpid == coll->dmns[n]) {
|
||||
coll->nexpected++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
OBJ_RELEASE(nm);
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&children);
|
||||
|
||||
/* see if I am in the array of participants - note that I may
|
||||
* be in the rollup tree even though I'm not participating
|
||||
* in the collective itself */
|
||||
/* cycle thru the array of daemons and compare them to our
|
||||
* children in the routing tree, counting the ones that match
|
||||
* so we know how many daemons we should receive contributions from */
|
||||
OBJ_CONSTRUCT(&children, opal_list_t);
|
||||
orte_routed.get_routing_list(&children);
|
||||
while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&children))) {
|
||||
for (n=0; n < coll->ndmns; n++) {
|
||||
if (coll->dmns[n] == ORTE_PROC_MY_NAME->vpid) {
|
||||
if (nm->name.vpid == coll->dmns[n]) {
|
||||
coll->nexpected++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
OBJ_RELEASE(nm);
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&children);
|
||||
|
||||
/* see if I am in the array of participants - note that I may
|
||||
* be in the rollup tree even though I'm not participating
|
||||
* in the collective itself */
|
||||
for (n=0; n < coll->ndmns; n++) {
|
||||
if (coll->dmns[n] == ORTE_PROC_MY_NAME->vpid) {
|
||||
coll->nexpected++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return coll;
|
||||
|
@ -112,8 +112,7 @@ static int xcast(orte_vpid_t *vpids,
|
||||
|
||||
/* send it to the HNP (could be myself) for relay */
|
||||
OBJ_RETAIN(buf); // we'll let the RML release it
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_coll_conduit,
|
||||
ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_XCAST,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_XCAST,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
@ -153,8 +152,7 @@ static int allgather(orte_grpcomm_coll_t *coll,
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* send the info to ourselves for tracking */
|
||||
rc = orte_rml.send_buffer_nb(orte_coll_conduit,
|
||||
ORTE_PROC_MY_NAME, relay,
|
||||
rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, relay,
|
||||
ORTE_RML_TAG_ALLGATHER_DIRECT,
|
||||
orte_rml_send_callback, NULL);
|
||||
return rc;
|
||||
@ -245,8 +243,7 @@ static void allgather_recv(int status, orte_process_name_t* sender,
|
||||
/* transfer the collected bucket */
|
||||
opal_dss.copy_payload(reply, &coll->bucket);
|
||||
/* send the info to our parent */
|
||||
rc = orte_rml.send_buffer_nb(orte_coll_conduit,
|
||||
ORTE_PROC_MY_PARENT, reply,
|
||||
rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, reply,
|
||||
ORTE_RML_TAG_ALLGATHER_DIRECT,
|
||||
orte_rml_send_callback, NULL);
|
||||
}
|
||||
@ -271,7 +268,6 @@ static void xcast_recv(int status, orte_process_name_t* sender,
|
||||
opal_list_t coll;
|
||||
orte_grpcomm_signature_t *sig;
|
||||
orte_rml_tag_t tag;
|
||||
char *rtmod;
|
||||
size_t inlen, cmplen;
|
||||
uint8_t *packed_data, *cmpdata;
|
||||
int32_t nvals, i;
|
||||
@ -372,9 +368,6 @@ static void xcast_recv(int status, orte_process_name_t* sender,
|
||||
return;
|
||||
}
|
||||
|
||||
/* get our conduit's routed module name */
|
||||
rtmod = orte_rml.get_routed(orte_coll_conduit);
|
||||
|
||||
/* if this is headed for the daemon command processor,
|
||||
* then we first need to check for add_local_procs
|
||||
* as that command includes some needed wireup info */
|
||||
@ -424,7 +417,7 @@ static void xcast_recv(int status, orte_process_name_t* sender,
|
||||
/* update the routing plan - the HNP already did
|
||||
* it when it computed the VM, so don't waste time
|
||||
* re-doing it here */
|
||||
orte_routed.update_routing_plan(rtmod);
|
||||
orte_routed.update_routing_plan();
|
||||
}
|
||||
/* routing is now possible */
|
||||
orte_routed_base.routing_enabled = true;
|
||||
@ -523,7 +516,7 @@ static void xcast_recv(int status, orte_process_name_t* sender,
|
||||
relay:
|
||||
if (!orte_do_not_launch) {
|
||||
/* get the list of next recipients from the routed module */
|
||||
orte_routed.get_routing_list(rtmod, &coll);
|
||||
orte_routed.get_routing_list(&coll);
|
||||
|
||||
/* if list is empty, no relay is required */
|
||||
if (opal_list_is_empty(&coll)) {
|
||||
@ -569,8 +562,7 @@ static void xcast_recv(int status, orte_process_name_t* sender,
|
||||
ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH);
|
||||
continue;
|
||||
}
|
||||
if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(orte_coll_conduit,
|
||||
&nm->name, rly, ORTE_RML_TAG_XCAST,
|
||||
if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(&nm->name, rly, ORTE_RML_TAG_XCAST,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(rly);
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Los Alamos National Security, LLC
|
||||
* All rights reserved
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -109,9 +109,8 @@ int orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host,
|
||||
/* send the buffer to the host - this is either a daemon or
|
||||
* a tool that requested IOF
|
||||
*/
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
host, buf, ORTE_RML_TAG_IOF_PROXY,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(host, buf, ORTE_RML_TAG_IOF_PROXY,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -149,8 +149,7 @@ BEGIN_C_DECLS
|
||||
opal_dss.pack(buf, (b), 1, ORTE_NAME); \
|
||||
\
|
||||
/* send the buffer to the HNP */ \
|
||||
orte_rml.send_buffer_nb(orte_mgmt_conduit, \
|
||||
ORTE_PROC_MY_HNP, buf, \
|
||||
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, \
|
||||
ORTE_RML_TAG_IOF_HNP, \
|
||||
orte_rml_send_callback, NULL); \
|
||||
} while(0);
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
@ -513,8 +513,7 @@ static int orted_output(const orte_process_name_t* peer,
|
||||
"%s iof:orted:output sending %d bytes to HNP",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)strlen(msg)+1));
|
||||
|
||||
orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
|
||||
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
|
||||
orte_rml_send_callback, NULL);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -140,9 +140,8 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata)
|
||||
"%s iof:orted:read handler sending %d bytes to HNP",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes));
|
||||
|
||||
orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
|
||||
orte_rml_send_callback, NULL);
|
||||
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
|
||||
orte_rml_send_callback, NULL);
|
||||
|
||||
/* re-add the event */
|
||||
ORTE_IOF_READ_ACTIVATE(rev);
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014-2016 Intel Corporation. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -72,9 +72,8 @@ void orte_iof_orted_send_xonxoff(orte_iof_tag_t tag)
|
||||
(ORTE_IOF_XON == tag) ? "xon" : "xoff"));
|
||||
|
||||
/* send the buffer to the HNP */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
|
||||
send_cb, NULL))) {
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP,
|
||||
send_cb, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
}
|
||||
}
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -170,9 +170,8 @@ static int tool_pull(const orte_process_name_t* src_name,
|
||||
|
||||
/* send the buffer to the correct HNP */
|
||||
ORTE_HNP_NAME_FROM_JOB(&hnp, src_name->jobid);
|
||||
orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&hnp, buf, ORTE_RML_TAG_IOF_HNP,
|
||||
send_cb, NULL);
|
||||
orte_rml.send_buffer_nb(&hnp, buf, ORTE_RML_TAG_IOF_HNP,
|
||||
send_cb, NULL);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -220,9 +219,8 @@ static int tool_close(const orte_process_name_t* src_name,
|
||||
|
||||
/* send the buffer to the correct HNP */
|
||||
ORTE_HNP_NAME_FROM_JOB(&hnp, src_name->jobid);
|
||||
orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&hnp, buf, ORTE_RML_TAG_IOF_HNP,
|
||||
send_cb, NULL);
|
||||
orte_rml.send_buffer_nb(&hnp, buf, ORTE_RML_TAG_IOF_HNP,
|
||||
send_cb, NULL);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2017-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -144,11 +144,6 @@ ORTE_DECLSPEC void orte_oob_base_send_nb(int fd, short args, void *cbdata);
|
||||
*/
|
||||
ORTE_DECLSPEC void orte_oob_base_get_addr(char **uri);
|
||||
|
||||
/* Get the available transports and their attributes */
|
||||
#define ORTE_OOB_GET_TRANSPORTS(u) orte_oob_base_get_transports(u)
|
||||
ORTE_DECLSPEC void orte_oob_base_get_transports(opal_list_t *transports);
|
||||
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
ORTE_DECLSPEC void orte_oob_base_ft_event(int fd, short args, void *cbdata);
|
||||
#endif
|
||||
|
@ -2,7 +2,7 @@
|
||||
/*
|
||||
* Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -107,7 +107,7 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata)
|
||||
OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) {
|
||||
component = (mca_oob_base_component_t*)cli->cli_component;
|
||||
if (NULL != component->is_reachable) {
|
||||
if (component->is_reachable(msg->routed, &msg->dst)) {
|
||||
if (component->is_reachable(&msg->dst)) {
|
||||
/* there is a way to reach this peer - record it
|
||||
* so we don't waste this time again
|
||||
*/
|
||||
@ -170,7 +170,7 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata)
|
||||
OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) {
|
||||
component = (mca_oob_base_component_t*)cli->cli_component;
|
||||
/* is this peer reachable via this component? */
|
||||
if (!component->is_reachable(msg->routed, &msg->dst)) {
|
||||
if (!component->is_reachable(&msg->dst)) {
|
||||
continue;
|
||||
}
|
||||
/* it is addressable, so attempt to send via that transport */
|
||||
@ -384,30 +384,6 @@ static void process_uri(char *uri)
|
||||
opal_argv_free(uris);
|
||||
}
|
||||
|
||||
void orte_oob_base_get_transports(opal_list_t *transports)
|
||||
{
|
||||
mca_base_component_list_item_t *cli;
|
||||
mca_oob_base_component_t *component;
|
||||
orte_rml_pathway_t *p;
|
||||
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"%s: get transports",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) {
|
||||
component = (mca_oob_base_component_t*)cli->cli_component;
|
||||
opal_output_verbose(5, orte_oob_base_framework.framework_output,
|
||||
"%s:get transports for component %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
component->oob_base.mca_component_name);
|
||||
if (NULL != component->query_transports) {
|
||||
if (NULL != (p = component->query_transports())) {
|
||||
opal_list_append(transports, &p->super);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
void orte_oob_base_ft_event(int sd, short argc, void *cbdata)
|
||||
{
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2015-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -55,9 +55,8 @@ typedef int (*mca_oob_base_component_send_fn_t)(orte_rml_send_t *msg);
|
||||
typedef char* (*mca_oob_base_component_get_addr_fn_t)(void);
|
||||
typedef int (*mca_oob_base_component_set_addr_fn_t)(orte_process_name_t *peer,
|
||||
char **uris);
|
||||
typedef bool (*mca_oob_base_component_is_reachable_fn_t)(char *routed, orte_process_name_t *peer);
|
||||
typedef bool (*mca_oob_base_component_is_reachable_fn_t)(orte_process_name_t *peer);
|
||||
typedef void (*mca_oob_ping_callback_fn_t)(int status, void *cbdata);
|
||||
typedef orte_rml_pathway_t* (*mca_oob_base_component_query_transports_fn_t)(void);
|
||||
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
typedef int (*mca_oob_base_component_ft_event_fn_t)(int state);
|
||||
@ -75,7 +74,6 @@ typedef struct {
|
||||
mca_oob_base_component_get_addr_fn_t get_addr;
|
||||
mca_oob_base_component_set_addr_fn_t set_addr;
|
||||
mca_oob_base_component_is_reachable_fn_t is_reachable;
|
||||
mca_oob_base_component_query_transports_fn_t query_transports;
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
mca_oob_base_component_ft_event_fn_t ft_event;
|
||||
#endif
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -180,7 +180,7 @@ static void send_nb(orte_rml_send_t *msg)
|
||||
|
||||
|
||||
/* do we have a route to this peer (could be direct)? */
|
||||
hop = orte_routed.get_route(msg->routed, &msg->dst);
|
||||
hop = orte_routed.get_route(&msg->dst);
|
||||
/* do we know this hop? */
|
||||
if (NULL == (peer = mca_oob_tcp_peer_lookup(&hop))) {
|
||||
/* push this back to the component so it can try
|
||||
|
@ -103,8 +103,7 @@ static int component_send(orte_rml_send_t *msg);
|
||||
static char* component_get_addr(void);
|
||||
static int component_set_addr(orte_process_name_t *peer,
|
||||
char **uris);
|
||||
static bool component_is_reachable(char *rtmod, orte_process_name_t *peer);
|
||||
static orte_rml_pathway_t* component_query_transports(void);
|
||||
static bool component_is_reachable(orte_process_name_t *peer);
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
static int component_ft_event(int state);
|
||||
#endif
|
||||
@ -135,7 +134,6 @@ mca_oob_tcp_component_t mca_oob_tcp_component = {
|
||||
.get_addr = component_get_addr,
|
||||
.set_addr = component_set_addr,
|
||||
.is_reachable = component_is_reachable,
|
||||
.query_transports = component_query_transports,
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
.ft_event = component_ft_event,
|
||||
#endif
|
||||
@ -627,37 +625,6 @@ static int component_available(void)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static orte_rml_pathway_t* component_query_transports(void)
|
||||
{
|
||||
orte_rml_pathway_t *p;
|
||||
char *qual;
|
||||
|
||||
/* if neither IPv4 or IPv6 connections are available, then
|
||||
* we have nothing to support */
|
||||
if (NULL == mca_oob_tcp_component.ipv4conns &&
|
||||
NULL == mca_oob_tcp_component.ipv6conns) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* if we get here, then we support Ethernet and TCP */
|
||||
p = OBJ_NEW(orte_rml_pathway_t);
|
||||
p->component = strdup("oob");
|
||||
orte_set_attribute(&p->attributes, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, "Ethernet", OPAL_STRING);
|
||||
orte_set_attribute(&p->attributes, ORTE_RML_PROTOCOL_TYPE, ORTE_ATTR_LOCAL, "TCP", OPAL_STRING);
|
||||
/* setup our qualifiers - we route communications, may have IPv4 and/or IPv6, etc. */
|
||||
if (NULL != mca_oob_tcp_component.ipv4conns &&
|
||||
NULL != mca_oob_tcp_component.ipv6conns) {
|
||||
qual = "routed=true:ipv4:ipv6";
|
||||
} else if (NULL == mca_oob_tcp_component.ipv6conns) {
|
||||
qual = "routed=true:ipv4";
|
||||
} else {
|
||||
qual = "routed=true:ipv6";
|
||||
}
|
||||
orte_set_attribute(&p->attributes, ORTE_RML_QUALIFIER_ATTRIB, ORTE_ATTR_LOCAL, qual, OPAL_STRING);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Start all modules */
|
||||
static int component_startup(void)
|
||||
{
|
||||
@ -1008,12 +975,12 @@ static int component_set_addr(orte_process_name_t *peer,
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
static bool component_is_reachable(char *rtmod, orte_process_name_t *peer)
|
||||
static bool component_is_reachable(orte_process_name_t *peer)
|
||||
{
|
||||
orte_process_name_t hop;
|
||||
|
||||
/* if we have a route to this peer, then we can reach it */
|
||||
hop = orte_routed.get_route(rtmod, peer);
|
||||
hop = orte_routed.get_route(peer);
|
||||
if (ORTE_JOBID_INVALID == hop.jobid ||
|
||||
ORTE_VPID_INVALID == hop.vpid) {
|
||||
opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output,
|
||||
@ -1102,7 +1069,7 @@ void mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata)
|
||||
|
||||
if (!orte_finalizing) {
|
||||
/* activate the proc state */
|
||||
if (ORTE_SUCCESS != orte_routed.route_lost(pop->rtmod, &pop->peer)) {
|
||||
if (ORTE_SUCCESS != orte_routed.route_lost(&pop->peer)) {
|
||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_LIFELINE_LOST);
|
||||
} else {
|
||||
ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_COMM_FAILED);
|
||||
@ -1216,7 +1183,6 @@ void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata)
|
||||
snd->count = mop->snd->hdr.nbytes;
|
||||
snd->cbfunc.iov = NULL;
|
||||
snd->cbdata = NULL;
|
||||
snd->routed = strdup(mop->snd->hdr.routed);
|
||||
/* activate the OOB send state */
|
||||
ORTE_OOB_SEND(snd);
|
||||
/* protect the data */
|
||||
@ -1416,15 +1382,11 @@ OBJ_CLASS_INSTANCE(mca_oob_tcp_addr_t,
|
||||
|
||||
static void pop_cons(mca_oob_tcp_peer_op_t *pop)
|
||||
{
|
||||
pop->rtmod = NULL;
|
||||
pop->net = NULL;
|
||||
pop->port = NULL;
|
||||
}
|
||||
static void pop_des(mca_oob_tcp_peer_op_t *pop)
|
||||
{
|
||||
if (NULL != pop->rtmod) {
|
||||
free(pop->rtmod);
|
||||
}
|
||||
if (NULL != pop->net) {
|
||||
free(pop->net);
|
||||
}
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
|
||||
@ -334,7 +334,7 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata)
|
||||
* an event in the component event base, and so it will fire async
|
||||
* from us if we are in our own progress thread
|
||||
*/
|
||||
ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_failed_to_connect);
|
||||
ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_failed_to_connect);
|
||||
/* FIXME: post any messages in the send queue back to the OOB
|
||||
* level for reassignment
|
||||
*/
|
||||
@ -937,7 +937,7 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr,
|
||||
/* set the peer into the component and OOB-level peer tables to indicate
|
||||
* that we know this peer and we will be handling him
|
||||
*/
|
||||
ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_set_module);
|
||||
ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_set_module);
|
||||
|
||||
/* connected */
|
||||
tcp_peer_connected(peer);
|
||||
@ -968,7 +968,7 @@ static void tcp_peer_connected(mca_oob_tcp_peer_t* peer)
|
||||
}
|
||||
|
||||
/* update the route */
|
||||
orte_routed.update_route(NULL, &peer->name, &peer->name);
|
||||
orte_routed.update_route(&peer->name, &peer->name);
|
||||
|
||||
/* initiate send of first message on queue */
|
||||
if (NULL == peer->send_msg) {
|
||||
@ -1027,7 +1027,7 @@ void mca_oob_tcp_peer_close(mca_oob_tcp_peer_t *peer)
|
||||
/* inform the component-level that we have lost a connection so
|
||||
* it can decide what to do about it.
|
||||
*/
|
||||
ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_lost_connection);
|
||||
ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_lost_connection);
|
||||
|
||||
if (orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) {
|
||||
/* nothing more to do */
|
||||
@ -1238,7 +1238,7 @@ bool mca_oob_tcp_peer_accept(mca_oob_tcp_peer_t* peer)
|
||||
/* set the peer into the component and OOB-level peer tables to indicate
|
||||
* that we know this peer and we will be handling him
|
||||
*/
|
||||
ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_set_module);
|
||||
ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_set_module);
|
||||
|
||||
tcp_peer_connected(peer);
|
||||
if (!peer->recv_ev_active) {
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -73,21 +73,15 @@ typedef struct {
|
||||
uint16_t af_family;
|
||||
char *net;
|
||||
char *port;
|
||||
char *rtmod;
|
||||
} mca_oob_tcp_peer_op_t;
|
||||
OBJ_CLASS_DECLARATION(mca_oob_tcp_peer_op_t);
|
||||
|
||||
#define ORTE_ACTIVATE_TCP_CMP_OP(p, r, cbfunc) \
|
||||
#define ORTE_ACTIVATE_TCP_CMP_OP(p, cbfunc) \
|
||||
do { \
|
||||
mca_oob_tcp_peer_op_t *pop; \
|
||||
char *proxy; \
|
||||
pop = OBJ_NEW(mca_oob_tcp_peer_op_t); \
|
||||
pop->peer.jobid = (p)->name.jobid; \
|
||||
pop->peer.vpid = (p)->name.vpid; \
|
||||
proxy = (r); \
|
||||
if (NULL != proxy) { \
|
||||
pop->rtmod = strdup(proxy); \
|
||||
} \
|
||||
ORTE_THREADSHIFT(pop, orte_oob_base.ev_base, \
|
||||
(cbfunc), ORTE_MSG_PRI); \
|
||||
} while(0);
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -586,7 +586,6 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata)
|
||||
snd->data = peer->recv_msg->data;
|
||||
snd->seq_num = peer->recv_msg->hdr.seq_num;
|
||||
snd->count = peer->recv_msg->hdr.nbytes;
|
||||
snd->routed = strdup(peer->recv_msg->hdr.routed);
|
||||
snd->cbfunc.iov = NULL;
|
||||
snd->cbdata = NULL;
|
||||
/* activate the OOB send state */
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2006-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2018 Cisco Systems, Inc. All rights reserved
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -109,10 +109,6 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t);
|
||||
_s->hdr.type = MCA_OOB_TCP_USER; \
|
||||
_s->hdr.tag = (m)->tag; \
|
||||
_s->hdr.seq_num = (m)->seq_num; \
|
||||
if (NULL != (m)->routed) { \
|
||||
(void)opal_string_copy(_s->hdr.routed, (m)->routed, \
|
||||
ORTE_MAX_RTD_SIZE); \
|
||||
} \
|
||||
/* point to the actual message */ \
|
||||
_s->msg = (m); \
|
||||
/* set the total number of bytes to be sent */ \
|
||||
@ -157,10 +153,6 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t);
|
||||
_s->hdr.type = MCA_OOB_TCP_USER; \
|
||||
_s->hdr.tag = (m)->tag; \
|
||||
_s->hdr.seq_num = (m)->seq_num; \
|
||||
if (NULL != (m)->routed) { \
|
||||
(void)opal_string_copy(_s->hdr.routed, (m)->routed, \
|
||||
ORTE_MAX_RTD_SIZE); \
|
||||
} \
|
||||
/* point to the actual message */ \
|
||||
_s->msg = (m); \
|
||||
/* set the total number of bytes to be sent */ \
|
||||
|
@ -179,7 +179,7 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
|
||||
orte_ras_base_display_alloc();
|
||||
}
|
||||
/* ensure we update the routing plan */
|
||||
orte_routed.update_routing_plan(NULL);
|
||||
orte_routed.update_routing_plan();
|
||||
|
||||
/* prep the buffer */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
@ -812,8 +812,7 @@ void orte_plm_base_post_launch(int fd, short args, void *cbdata)
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid),
|
||||
ORTE_NAME_PRINT(&jdata->originator)));
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&jdata->originator, answer,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(&jdata->originator, answer,
|
||||
ORTE_RML_TAG_LAUNCH_RESP,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -1346,8 +1345,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender,
|
||||
goto CLEANUP;
|
||||
}
|
||||
/* send it */
|
||||
orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&dname, relay,
|
||||
orte_rml.send_buffer_nb(&dname, relay,
|
||||
ORTE_RML_TAG_DAEMON,
|
||||
orte_rml_send_callback, NULL);
|
||||
/* we will count this node as completed
|
||||
@ -2310,7 +2308,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
|
||||
|
||||
/* ensure all routing plans are up-to-date - we need this
|
||||
* so we know how to tree-spawn and/or xcast info */
|
||||
orte_routed.update_routing_plan(NULL);
|
||||
orte_routed.update_routing_plan();
|
||||
}
|
||||
|
||||
/* mark that the daemon job changed */
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -278,8 +278,7 @@ void orte_plm_base_recv(int status, orte_process_name_t* sender,
|
||||
}
|
||||
|
||||
/* send the response back to the sender */
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
sender, answer, ORTE_RML_TAG_LAUNCH_RESP,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_LAUNCH_RESP,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(answer);
|
||||
|
@ -14,7 +14,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2017 IBM Corporation. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -265,7 +265,6 @@ static void rsh_wait_daemon(int sd, short flags, void *cbdata)
|
||||
orte_wait_tracker_t *t2 = (orte_wait_tracker_t*)cbdata;
|
||||
orte_plm_rsh_caddy_t *caddy=(orte_plm_rsh_caddy_t*)t2->cbdata;
|
||||
orte_proc_t *daemon = caddy->daemon;
|
||||
char *rtmod;
|
||||
|
||||
if (orte_orteds_term_ordered || orte_abnormal_term_ordered) {
|
||||
/* ignore any such report - it will occur if we left the
|
||||
@ -290,8 +289,7 @@ static void rsh_wait_daemon(int sd, short flags, void *cbdata)
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
opal_dss.pack(buf, &(daemon->name.vpid), 1, ORTE_VPID);
|
||||
opal_dss.pack(buf, &daemon->exit_code, 1, OPAL_INT);
|
||||
orte_rml.send_buffer_nb(orte_coll_conduit,
|
||||
ORTE_PROC_MY_HNP, buf,
|
||||
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
|
||||
ORTE_RML_TAG_REPORT_REMOTE_LAUNCH,
|
||||
orte_rml_send_callback, NULL);
|
||||
/* note that this daemon failed */
|
||||
@ -312,8 +310,7 @@ static void rsh_wait_daemon(int sd, short flags, void *cbdata)
|
||||
/* remove it from the routing table to ensure num_routes
|
||||
* returns the correct value
|
||||
*/
|
||||
rtmod = orte_rml.get_routed(orte_coll_conduit);
|
||||
orte_routed.route_lost(rtmod, &daemon->name);
|
||||
orte_routed.route_lost(&daemon->name);
|
||||
/* report that the daemon has failed so we can exit */
|
||||
ORTE_ACTIVATE_PROC_STATE(&daemon->name, ORTE_PROC_STATE_FAILED_TO_START);
|
||||
}
|
||||
@ -797,7 +794,6 @@ static int remote_spawn(void)
|
||||
orte_job_t *daemons;
|
||||
opal_list_t coll;
|
||||
orte_namelist_t *child;
|
||||
char *rtmod;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
|
||||
"%s plm:rsh: remote spawn called",
|
||||
@ -816,9 +812,8 @@ static int remote_spawn(void)
|
||||
}
|
||||
|
||||
/* get the updated routing list */
|
||||
rtmod = orte_rml.get_routed(orte_coll_conduit);
|
||||
OBJ_CONSTRUCT(&coll, opal_list_t);
|
||||
orte_routed.get_routing_list(rtmod, &coll);
|
||||
orte_routed.get_routing_list(&coll);
|
||||
|
||||
/* if I have no children, just return */
|
||||
if (0 == opal_list_get_size(&coll)) {
|
||||
@ -913,8 +908,7 @@ cleanup:
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
opal_dss.pack(buf, &target.vpid, 1, ORTE_VPID);
|
||||
opal_dss.pack(buf, &rc, 1, OPAL_INT);
|
||||
orte_rml.send_buffer_nb(orte_coll_conduit,
|
||||
ORTE_PROC_MY_HNP, buf,
|
||||
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
|
||||
ORTE_RML_TAG_REPORT_REMOTE_LAUNCH,
|
||||
orte_rml_send_callback, NULL);
|
||||
}
|
||||
@ -1040,7 +1034,6 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
char *username;
|
||||
int port, *portptr;
|
||||
orte_namelist_t *child;
|
||||
char *rtmod;
|
||||
|
||||
ORTE_ACQUIRE_OBJECT(state);
|
||||
|
||||
@ -1185,8 +1178,7 @@ static void launch_daemons(int fd, short args, void *cbdata)
|
||||
|
||||
/* get the updated routing list */
|
||||
OBJ_CONSTRUCT(&coll, opal_list_t);
|
||||
rtmod = orte_rml.get_routed(orte_coll_conduit);
|
||||
orte_routed.get_routing_list(rtmod, &coll);
|
||||
orte_routed.get_routing_list(&coll);
|
||||
}
|
||||
|
||||
/* setup the launch */
|
||||
|
@ -11,7 +11,7 @@
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -26,5 +26,4 @@ headers += \
|
||||
libmca_rml_la_SOURCES += \
|
||||
base/rml_base_frame.c \
|
||||
base/rml_base_contact.c \
|
||||
base/rml_base_msg_handlers.c \
|
||||
base/rml_base_stubs.c
|
||||
base/rml_base_msg_handlers.c
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -65,18 +65,9 @@ ORTE_DECLSPEC int orte_rml_base_select(void);
|
||||
/*
|
||||
* globals that might be needed
|
||||
*/
|
||||
/* adding element to hold the active modules and components */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
int pri;
|
||||
orte_rml_component_t *component;
|
||||
} orte_rml_base_active_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_base_active_t);
|
||||
|
||||
/* a global struct containing framework-level values */
|
||||
typedef struct {
|
||||
opal_list_t actives; /* list to hold the active components */
|
||||
opal_pointer_array_t conduits; /* array to hold the open conduits */
|
||||
opal_list_t posted_recvs;
|
||||
opal_list_t unmatched_msgs;
|
||||
int max_retries;
|
||||
@ -114,8 +105,6 @@ typedef struct {
|
||||
* transfers
|
||||
*/
|
||||
char *data;
|
||||
/* routed module to be used */
|
||||
char *routed;
|
||||
} orte_rml_send_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_send_t);
|
||||
|
||||
@ -232,52 +221,11 @@ OBJ_CLASS_DECLARATION(orte_self_send_xfer_t);
|
||||
OBJ_RELEASE(m); \
|
||||
}while(0);
|
||||
|
||||
#define ORTE_RML_INVALID_CHANNEL_NUM UINT32_MAX
|
||||
/* common implementations */
|
||||
ORTE_DECLSPEC void orte_rml_base_post_recv(int sd, short args, void *cbdata);
|
||||
ORTE_DECLSPEC void orte_rml_base_process_msg(int fd, short flags, void *cbdata);
|
||||
|
||||
|
||||
/* Stub API interfaces to cycle through active plugins */
|
||||
int orte_rml_API_ping(orte_rml_conduit_t conduit_id,
|
||||
const char* contact_info,
|
||||
const struct timeval* tv);
|
||||
|
||||
int orte_rml_API_send_nb(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer, struct iovec* msg,
|
||||
int count, orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc, void* cbdata);
|
||||
|
||||
int orte_rml_API_send_buffer_nb(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
void orte_rml_API_recv_nb(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag);
|
||||
|
||||
void orte_rml_API_purge(orte_process_name_t *peer);
|
||||
|
||||
int orte_rml_API_query_transports(opal_list_t *providers);
|
||||
|
||||
orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes);
|
||||
|
||||
void orte_rml_API_close_conduit(orte_rml_conduit_t id);
|
||||
|
||||
char* orte_rml_API_get_routed(orte_rml_conduit_t id);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_RML_BASE_H */
|
||||
|
@ -5,7 +5,7 @@
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -40,26 +40,10 @@
|
||||
|
||||
|
||||
/* Initialising stub fns in the global var used by other modules */
|
||||
orte_rml_base_API_t orte_rml = {
|
||||
.ping = orte_rml_API_ping,
|
||||
.send_nb = orte_rml_API_send_nb,
|
||||
.send_buffer_nb = orte_rml_API_send_buffer_nb,
|
||||
.recv_nb = orte_rml_API_recv_nb,
|
||||
.recv_buffer_nb = orte_rml_API_recv_buffer_nb,
|
||||
.recv_cancel = orte_rml_API_recv_cancel,
|
||||
.purge = orte_rml_API_purge,
|
||||
.query_transports = orte_rml_API_query_transports,
|
||||
.open_conduit = orte_rml_API_open_conduit,
|
||||
.close_conduit = orte_rml_API_close_conduit,
|
||||
.get_routed = orte_rml_API_get_routed
|
||||
};
|
||||
orte_rml_base_module_t orte_rml = {0};
|
||||
|
||||
orte_rml_base_t orte_rml_base = {{{0}}};
|
||||
|
||||
orte_rml_component_t *orte_rml_component = NULL;
|
||||
|
||||
static bool selected = false;
|
||||
|
||||
static int orte_rml_base_register(mca_base_register_flag_t flags)
|
||||
{
|
||||
orte_rml_base.max_retries = 3;
|
||||
@ -82,62 +66,9 @@ static int orte_rml_base_register(mca_base_register_flag_t flags)
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static void cleanup(int sd, short args, void *cbdata)
|
||||
{
|
||||
orte_lock_t *lk = (orte_lock_t*)cbdata;
|
||||
|
||||
ORTE_ACQUIRE_OBJECT(active);
|
||||
OPAL_LIST_DESTRUCT(&orte_rml_base.posted_recvs);
|
||||
if (NULL != lk) {
|
||||
ORTE_POST_OBJECT(lk);
|
||||
ORTE_WAKEUP_THREAD(lk);
|
||||
}
|
||||
}
|
||||
|
||||
static int orte_rml_base_close(void)
|
||||
{
|
||||
orte_lock_t lock;
|
||||
int idx, total_conduits = opal_pointer_array_get_size(&orte_rml_base.conduits);
|
||||
orte_rml_base_module_t *mod;
|
||||
orte_rml_component_t *comp;
|
||||
|
||||
/* cycle thru the conduits opened and call each module's finalize */
|
||||
/* The components finalise/close() will be responsible for freeing the module pointers */
|
||||
for (idx = 0; idx < total_conduits ; idx++)
|
||||
{
|
||||
if( NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits,idx))) {
|
||||
/* close the conduit */
|
||||
comp = (orte_rml_component_t*)mod->component;
|
||||
if (NULL != comp && NULL != comp->close_conduit) {
|
||||
comp->close_conduit(mod);
|
||||
}
|
||||
free(mod);
|
||||
}
|
||||
|
||||
}
|
||||
OBJ_DESTRUCT(&orte_rml_base.conduits);
|
||||
|
||||
OPAL_LIST_DESTRUCT(&orte_rml_base.actives)
|
||||
|
||||
/* because the RML posted recvs list is in a separate
|
||||
* async thread for apps, we can't just destruct it here.
|
||||
* Instead, we push it into that event thread and destruct
|
||||
* it there */
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
opal_event_t ev;
|
||||
ORTE_CONSTRUCT_LOCK(&lock);
|
||||
opal_event_set(orte_event_base, &ev, -1,
|
||||
OPAL_EV_WRITE, cleanup, (void*)&lock);
|
||||
opal_event_set_priority(&ev, ORTE_ERROR_PRI);
|
||||
ORTE_POST_OBJECT(ev);
|
||||
opal_event_active(&ev, OPAL_EV_WRITE, 1);
|
||||
ORTE_WAIT_THREAD(&lock);
|
||||
ORTE_DESTRUCT_LOCK(&lock);
|
||||
} else {
|
||||
/* we can call the destruct directly */
|
||||
cleanup(0, 0, NULL);
|
||||
}
|
||||
|
||||
OPAL_LIST_DESTRUCT(&orte_rml_base.posted_recvs);
|
||||
return mca_base_framework_components_close(&orte_rml_base_framework, NULL);
|
||||
}
|
||||
|
||||
@ -145,11 +76,8 @@ static int orte_rml_base_open(mca_base_open_flag_t flags)
|
||||
{
|
||||
/* Initialize globals */
|
||||
/* construct object for holding the active plugin modules */
|
||||
OBJ_CONSTRUCT(&orte_rml_base.actives, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_rml_base.posted_recvs, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_rml_base.unmatched_msgs, opal_list_t);
|
||||
OBJ_CONSTRUCT(&orte_rml_base.conduits, opal_pointer_array_t);
|
||||
opal_pointer_array_init(&orte_rml_base.conduits,1,INT16_MAX,1);
|
||||
|
||||
/* Open up all available components */
|
||||
return mca_base_framework_components_open(&orte_rml_base_framework, flags);
|
||||
@ -159,61 +87,28 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, rml, "ORTE Run-Time Messaging Layer",
|
||||
orte_rml_base_register, orte_rml_base_open, orte_rml_base_close,
|
||||
mca_rml_base_static_components, 0);
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_rml_base_active_t,
|
||||
opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
/**
|
||||
* Function for ordering the component(plugin) by priority
|
||||
*/
|
||||
int orte_rml_base_select(void)
|
||||
{
|
||||
mca_base_component_list_item_t *cli=NULL;
|
||||
orte_rml_component_t *component=NULL;
|
||||
orte_rml_base_active_t *newmodule, *mod;
|
||||
bool inserted;
|
||||
orte_rml_component_t *best_component = NULL;
|
||||
orte_rml_base_module_t *best_module = NULL;
|
||||
|
||||
if (selected) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
selected = true;
|
||||
|
||||
OPAL_LIST_FOREACH(cli, &orte_rml_base_framework.framework_components, mca_base_component_list_item_t ) {
|
||||
component = (orte_rml_component_t*) cli->cli_component;
|
||||
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"orte_rml_base_select: Initializing %s component %s",
|
||||
component->base.mca_type_name,
|
||||
component->base.mca_component_name);
|
||||
|
||||
/* add to the list of available components */
|
||||
newmodule = OBJ_NEW(orte_rml_base_active_t);
|
||||
newmodule->pri = component->priority;
|
||||
newmodule->component = component;
|
||||
|
||||
/* maintain priority order */
|
||||
inserted = false;
|
||||
OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (newmodule->pri > mod->pri) {
|
||||
opal_list_insert_pos(&orte_rml_base.actives,
|
||||
(opal_list_item_t*)mod, &newmodule->super);
|
||||
inserted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!inserted) {
|
||||
/* must be lowest priority - add to end */
|
||||
opal_list_append(&orte_rml_base.actives, &newmodule->super);
|
||||
}
|
||||
/*
|
||||
* Select the best component
|
||||
*/
|
||||
if( OPAL_SUCCESS != mca_base_select("rml", orte_rml_base_framework.framework_output,
|
||||
&orte_rml_base_framework.framework_components,
|
||||
(mca_base_module_t **) &best_module,
|
||||
(mca_base_component_t **) &best_component, NULL) ) {
|
||||
/* This will only happen if no component was selected */
|
||||
/* If we didn't find one to select, that is an error */
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
if (4 < opal_output_get_verbosity(orte_rml_base_framework.framework_output)) {
|
||||
opal_output(0, "%s: Final rml priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* show the prioritized list */
|
||||
OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
opal_output(0, "\tComponent: %s Priority: %d", mod->component->base.mca_component_name, mod->pri);
|
||||
}
|
||||
}
|
||||
/* Save the winner */
|
||||
orte_rml = *best_module;
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -279,17 +174,10 @@ static void send_cons(orte_rml_send_t *ptr)
|
||||
ptr->buffer = NULL;
|
||||
ptr->data = NULL;
|
||||
ptr->seq_num = 0xFFFFFFFF;
|
||||
ptr->routed = NULL;
|
||||
}
|
||||
static void send_des(orte_rml_send_t *ptr)
|
||||
{
|
||||
if (NULL != ptr->routed) {
|
||||
free(ptr->routed);
|
||||
}
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(orte_rml_send_t,
|
||||
opal_list_item_t,
|
||||
send_cons, send_des);
|
||||
send_cons, NULL);
|
||||
|
||||
|
||||
static void send_req_cons(orte_rml_send_request_t *ptr)
|
||||
@ -353,21 +241,3 @@ static void prq_des(orte_rml_recv_request_t *ptr)
|
||||
OBJ_CLASS_INSTANCE(orte_rml_recv_request_t,
|
||||
opal_object_t,
|
||||
prq_cons, prq_des);
|
||||
|
||||
static void pthcons(orte_rml_pathway_t *p)
|
||||
{
|
||||
p->component = NULL;
|
||||
OBJ_CONSTRUCT(&p->attributes, opal_list_t);
|
||||
OBJ_CONSTRUCT(&p->transports, opal_list_t);
|
||||
}
|
||||
static void pthdes(orte_rml_pathway_t *p)
|
||||
{
|
||||
if (NULL != p->component) {
|
||||
free(p->component);
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&p->attributes);
|
||||
OPAL_LIST_DESTRUCT(&p->transports);
|
||||
}
|
||||
OBJ_CLASS_INSTANCE(orte_rml_pathway_t,
|
||||
opal_list_item_t,
|
||||
pthcons, pthdes);
|
||||
|
@ -189,8 +189,7 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata)
|
||||
return;
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&msg->sender, buffer,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&msg->sender, buffer,
|
||||
ORTE_RML_TAG_NODE_REGEX_REPORT,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -1,333 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "orte_config.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "orte/mca/mca.h"
|
||||
#include "opal/mca/base/mca_base_component_repository.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/output.h"
|
||||
|
||||
#include "orte/mca/rml/rml.h"
|
||||
#include "orte/mca/state/state.h"
|
||||
#include "orte/runtime/orte_wait.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
#include "orte/util/threads.h"
|
||||
|
||||
#include "orte/mca/rml/base/base.h"
|
||||
|
||||
/*
|
||||
* The stub API interface functions
|
||||
*/
|
||||
|
||||
/** Open a conduit - check if the ORTE_RML_INCLUDE_COMP attribute is provided, this is */
|
||||
/* a comma seperated list of components, try to open the conduit in this order. */
|
||||
/* if the ORTE_RML_INCLUDE_COMP is not provided or this list was not able to open conduit */
|
||||
/* call the open_conduit() of the component in priority order to see if they can use the */
|
||||
/* attribute to open a conduit. */
|
||||
/* Note: The component takes care of checking for duplicate and returning the previously */
|
||||
/* opened module* in case of duplicates. Currently we are saving it in a new conduit_id */
|
||||
/* even if it is duplicate. [ToDo] compare the module* received from component to see if */
|
||||
/* already present in array and return the prev conduit_id instead of adding it again to array */
|
||||
/* @param[in] attributes The attributes is a list of opal_value_t of type OPAL_STRING */
|
||||
orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes)
|
||||
{
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_component_t *comp;
|
||||
orte_rml_base_module_t *mod, *ourmod=NULL;
|
||||
int rc;
|
||||
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:open_conduit",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* bozo check - you cannot specify both include and exclude */
|
||||
if (orte_get_attribute(attributes, ORTE_RML_INCLUDE_COMP_ATTRIB, NULL, OPAL_STRING) &&
|
||||
orte_get_attribute(attributes, ORTE_RML_EXCLUDE_COMP_ATTRIB, NULL, OPAL_STRING)) {
|
||||
// orte_show_help();
|
||||
return ORTE_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
/* cycle thru the actives in priority order and let each one see if they can support this request */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
comp = (orte_rml_component_t *)active->component;
|
||||
if (NULL != comp->open_conduit) {
|
||||
if (NULL != (mod = comp->open_conduit(attributes))) {
|
||||
opal_output_verbose(2, orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:open_conduit Component %s provided a conduit",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
active->component->base.mca_component_name);
|
||||
ourmod = mod;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (NULL != ourmod) {
|
||||
/* we got an answer - store this conduit in our array */
|
||||
rc = opal_pointer_array_add(&orte_rml_base.conduits, ourmod);
|
||||
if (rc < 0) {
|
||||
return ORTE_RML_CONDUIT_INVALID;
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
/* we get here if nobody could support it */
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
|
||||
return ORTE_RML_CONDUIT_INVALID;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Shutdown the communication system and clean up resources */
|
||||
void orte_rml_API_close_conduit(orte_rml_conduit_t id)
|
||||
{
|
||||
orte_rml_base_module_t *mod;
|
||||
orte_rml_component_t *comp;
|
||||
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:close_conduit(%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)id);
|
||||
|
||||
if( NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, id))) {
|
||||
comp = (orte_rml_component_t*)mod->component;
|
||||
if (NULL != comp && NULL != comp->close_conduit) {
|
||||
comp->close_conduit(mod);
|
||||
}
|
||||
opal_pointer_array_set_item(&orte_rml_base.conduits, id, NULL);
|
||||
free(mod);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Ping process for connectivity check */
|
||||
int orte_rml_API_ping(orte_rml_conduit_t conduit_id,
|
||||
const char* contact_info,
|
||||
const struct timeval* tv)
|
||||
{
|
||||
int rc = ORTE_ERR_UNREACH;
|
||||
orte_rml_base_module_t *mod;
|
||||
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:ping(conduit-%d)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),conduit_id);
|
||||
|
||||
/* get the module */
|
||||
if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) {
|
||||
return rc;
|
||||
}
|
||||
if (NULL == mod->ping) {
|
||||
return rc;
|
||||
}
|
||||
rc = mod->ping((struct orte_rml_base_module_t*)mod, contact_info, tv);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/** Send non-blocking iovec message through a specific conduit*/
|
||||
int orte_rml_API_send_nb(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
int rc = ORTE_ERR_UNREACH;
|
||||
orte_rml_base_module_t *mod;
|
||||
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:send_nb() to peer %s through conduit %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer),conduit_id);
|
||||
/* get the module */
|
||||
if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) {
|
||||
return rc;
|
||||
}
|
||||
if (NULL == mod->send_nb) {
|
||||
return rc;
|
||||
}
|
||||
rc = mod->send_nb((struct orte_rml_base_module_t*)mod, peer, msg, count, tag, cbfunc, cbdata);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** Send non-blocking buffer message */
|
||||
int orte_rml_API_send_buffer_nb(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
int rc = ORTE_ERR_UNREACH;
|
||||
orte_rml_base_module_t *mod;
|
||||
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:send_buffer_nb() to peer %s through conduit %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer),conduit_id);
|
||||
|
||||
/* get the module */
|
||||
if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) {
|
||||
return rc;
|
||||
}
|
||||
if (NULL == mod->send_buffer_nb) {
|
||||
return rc;
|
||||
}
|
||||
rc = mod->send_buffer_nb((struct orte_rml_base_module_t*)mod, peer, buffer, tag, cbfunc, cbdata);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** post a receive for an IOV message - this is done
|
||||
* strictly in the base, and so it does not go to a module */
|
||||
void orte_rml_API_recv_nb(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_rml_recv_request_t *req;
|
||||
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s rml_recv_nb for peer %s tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
/* push the request into the event base so we can add
|
||||
* the receive to our list of posted recvs */
|
||||
req = OBJ_NEW(orte_rml_recv_request_t);
|
||||
req->post->buffer_data = false;
|
||||
req->post->peer.jobid = peer->jobid;
|
||||
req->post->peer.vpid = peer->vpid;
|
||||
req->post->tag = tag;
|
||||
req->post->persistent = persistent;
|
||||
req->post->cbfunc.iov = cbfunc;
|
||||
req->post->cbdata = cbdata;
|
||||
ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI);
|
||||
}
|
||||
|
||||
/** Receive non-blocking buffer message */
|
||||
void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_rml_recv_request_t *req;
|
||||
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s rml_recv_buffer_nb for peer %s tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
/* push the request into the event base so we can add
|
||||
* the receive to our list of posted recvs */
|
||||
req = OBJ_NEW(orte_rml_recv_request_t);
|
||||
req->post->buffer_data = true;
|
||||
req->post->peer.jobid = peer->jobid;
|
||||
req->post->peer.vpid = peer->vpid;
|
||||
req->post->tag = tag;
|
||||
req->post->persistent = persistent;
|
||||
req->post->cbfunc.buffer = cbfunc;
|
||||
req->post->cbdata = cbdata;
|
||||
ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI);
|
||||
}
|
||||
|
||||
/** Cancel posted non-blocking receive */
|
||||
void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag)
|
||||
{
|
||||
orte_rml_recv_request_t *req;
|
||||
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s rml_recv_cancel for peer %s tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
ORTE_ACQUIRE_OBJECT(orte_event_base_active);
|
||||
if (!orte_event_base_active) {
|
||||
/* no event will be processed any more, so simply return. */
|
||||
return;
|
||||
}
|
||||
|
||||
/* push the request into the event base so we can remove
|
||||
* the receive from our list of posted recvs */
|
||||
req = OBJ_NEW(orte_rml_recv_request_t);
|
||||
req->cancel = true;
|
||||
req->post->peer.jobid = peer->jobid;
|
||||
req->post->peer.vpid = peer->vpid;
|
||||
req->post->tag = tag;
|
||||
ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI);
|
||||
}
|
||||
|
||||
/** Purge information */
|
||||
void orte_rml_API_purge(orte_process_name_t *peer)
|
||||
{
|
||||
orte_rml_base_module_t *mod;
|
||||
int i;
|
||||
|
||||
for (i=0; i < orte_rml_base.conduits.size; i++) {
|
||||
/* get the module */
|
||||
if (NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, i))) {
|
||||
if (NULL != mod->purge) {
|
||||
mod->purge(peer);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int orte_rml_API_query_transports(opal_list_t *providers)
|
||||
{
|
||||
|
||||
orte_rml_base_active_t *active;
|
||||
orte_rml_pathway_t *p;
|
||||
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"%s rml:base:orte_rml_API_query_transports()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* cycle thru the actives */
|
||||
OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) {
|
||||
if (NULL != active->component->query_transports) {
|
||||
opal_output_verbose(10,orte_rml_base_framework.framework_output,
|
||||
"\n calling module: %s->query_transports() \n",
|
||||
active->component->base.mca_component_name);
|
||||
if (NULL != (p = active->component->query_transports())) {
|
||||
/* pass the results across */
|
||||
OBJ_RETAIN(p);
|
||||
opal_list_append(providers, &p->super);
|
||||
}
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
char* orte_rml_API_get_routed(orte_rml_conduit_t id)
|
||||
{
|
||||
orte_rml_base_module_t *mod;
|
||||
|
||||
/* get the module */
|
||||
if (NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, id))) {
|
||||
return mod->routed;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
@ -14,7 +14,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -48,25 +48,19 @@ ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_oob_component;
|
||||
|
||||
void orte_rml_oob_fini(struct orte_rml_base_module_t *mod);
|
||||
|
||||
int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
int orte_rml_oob_send_nb(orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
|
||||
opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
int orte_rml_oob_ping(struct orte_rml_base_module_t *mod,
|
||||
const char* uri,
|
||||
const struct timeval* tv);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -13,7 +13,7 @@
|
||||
* Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -57,9 +57,8 @@
|
||||
|
||||
static int rml_oob_open(void);
|
||||
static int rml_oob_close(void);
|
||||
static orte_rml_base_module_t* open_conduit(opal_list_t *attributes);
|
||||
static orte_rml_pathway_t* query_transports(void);
|
||||
static void close_conduit(orte_rml_base_module_t *mod);
|
||||
static int component_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
/**
|
||||
* component definition
|
||||
*/
|
||||
@ -75,220 +74,121 @@ orte_rml_component_t mca_rml_oob_component = {
|
||||
ORTE_RELEASE_VERSION),
|
||||
.mca_open_component = rml_oob_open,
|
||||
.mca_close_component = rml_oob_close,
|
||||
.mca_query_component = component_query,
|
||||
|
||||
},
|
||||
.data = {
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
.priority = 5,
|
||||
.open_conduit = open_conduit,
|
||||
.query_transports = query_transports,
|
||||
.close_conduit = close_conduit
|
||||
.priority = 5
|
||||
};
|
||||
|
||||
/* Local variables */
|
||||
static orte_rml_pathway_t pathway;
|
||||
static void recv_nb(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_rml_recv_request_t *req;
|
||||
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s rml_recv_nb for peer %s tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
/* push the request into the event base so we can add
|
||||
* the receive to our list of posted recvs */
|
||||
req = OBJ_NEW(orte_rml_recv_request_t);
|
||||
req->post->buffer_data = false;
|
||||
req->post->peer.jobid = peer->jobid;
|
||||
req->post->peer.vpid = peer->vpid;
|
||||
req->post->tag = tag;
|
||||
req->post->persistent = persistent;
|
||||
req->post->cbfunc.iov = cbfunc;
|
||||
req->post->cbdata = cbdata;
|
||||
ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI);
|
||||
}
|
||||
static void recv_buffer_nb(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata)
|
||||
{
|
||||
orte_rml_recv_request_t *req;
|
||||
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s rml_recv_buffer_nb for peer %s tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
/* push the request into the event base so we can add
|
||||
* the receive to our list of posted recvs */
|
||||
req = OBJ_NEW(orte_rml_recv_request_t);
|
||||
req->post->buffer_data = true;
|
||||
req->post->peer.jobid = peer->jobid;
|
||||
req->post->peer.vpid = peer->vpid;
|
||||
req->post->tag = tag;
|
||||
req->post->persistent = persistent;
|
||||
req->post->cbfunc.buffer = cbfunc;
|
||||
req->post->cbdata = cbdata;
|
||||
ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI);
|
||||
}
|
||||
static void recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag)
|
||||
{
|
||||
orte_rml_recv_request_t *req;
|
||||
|
||||
opal_output_verbose(10, orte_rml_base_framework.framework_output,
|
||||
"%s rml_recv_cancel for peer %s tag %d",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(peer), tag);
|
||||
|
||||
ORTE_ACQUIRE_OBJECT(orte_event_base_active);
|
||||
if (!orte_event_base_active) {
|
||||
/* no event will be processed any more, so simply return. */
|
||||
return;
|
||||
}
|
||||
|
||||
/* push the request into the event base so we can remove
|
||||
* the receive from our list of posted recvs */
|
||||
req = OBJ_NEW(orte_rml_recv_request_t);
|
||||
req->cancel = true;
|
||||
req->post->peer.jobid = peer->jobid;
|
||||
req->post->peer.vpid = peer->vpid;
|
||||
req->post->tag = tag;
|
||||
ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI);
|
||||
}
|
||||
static int oob_ping(const char* uri, const struct timeval* tv)
|
||||
{
|
||||
return ORTE_ERR_UNREACH;
|
||||
}
|
||||
|
||||
static orte_rml_base_module_t base_module = {
|
||||
.component = (struct orte_rml_component_t*)&mca_rml_oob_component,
|
||||
.ping = NULL,
|
||||
.ping = oob_ping,
|
||||
.send_nb = orte_rml_oob_send_nb,
|
||||
.send_buffer_nb = orte_rml_oob_send_buffer_nb,
|
||||
.recv_nb = recv_nb,
|
||||
.recv_buffer_nb = recv_buffer_nb,
|
||||
.recv_cancel = recv_cancel,
|
||||
.purge = NULL
|
||||
};
|
||||
|
||||
static int rml_oob_open(void)
|
||||
{
|
||||
/* ask our OOB transports for their info */
|
||||
OBJ_CONSTRUCT(&pathway, orte_rml_pathway_t);
|
||||
pathway.component = strdup("oob");
|
||||
ORTE_OOB_GET_TRANSPORTS(&pathway.transports);
|
||||
/* add any component attributes of our own */
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int rml_oob_close(void)
|
||||
{
|
||||
/* cleanup */
|
||||
OBJ_DESTRUCT(&pathway);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
static orte_rml_base_module_t* make_module(void)
|
||||
static int component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
orte_rml_oob_module_t *mod;
|
||||
|
||||
/* create a new module */
|
||||
mod = (orte_rml_oob_module_t*)malloc(sizeof(orte_rml_oob_module_t));
|
||||
if (NULL == mod) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* copy the APIs over to it */
|
||||
memcpy(mod, &base_module, sizeof(base_module));
|
||||
|
||||
/* initialize its internal storage */
|
||||
OBJ_CONSTRUCT(&mod->queued_routing_messages, opal_list_t);
|
||||
mod->timer_event = NULL;
|
||||
mod->routed = NULL;
|
||||
|
||||
/* return the result */
|
||||
return (orte_rml_base_module_t*)mod;
|
||||
}
|
||||
|
||||
static orte_rml_base_module_t* open_conduit(opal_list_t *attributes)
|
||||
{
|
||||
char *comp_attrib;
|
||||
char **comps;
|
||||
int i;
|
||||
orte_rml_base_module_t *md;
|
||||
|
||||
opal_output_verbose(20,orte_rml_base_framework.framework_output,
|
||||
"%s - Entering rml_oob_open_conduit()",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
|
||||
/* someone may require this specific component, so look for "oob" */
|
||||
comp_attrib = NULL;
|
||||
if (orte_get_attribute(attributes, ORTE_RML_INCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) &&
|
||||
NULL != comp_attrib) {
|
||||
/* they specified specific components - could be multiple */
|
||||
comps = opal_argv_split(comp_attrib, ',');
|
||||
free(comp_attrib);
|
||||
for (i=0; NULL != comps[i]; i++) {
|
||||
if (0 == strcasecmp(comps[i], "oob")) {
|
||||
/* we are a candidate */
|
||||
opal_argv_free(comps);
|
||||
md = make_module();
|
||||
free(comp_attrib);
|
||||
comp_attrib = NULL;
|
||||
orte_get_attribute(attributes, ORTE_RML_ROUTED_ATTRIB, (void**)&comp_attrib, OPAL_STRING);
|
||||
/* the routed system understands a NULL request, so no need to check
|
||||
* return status/value here */
|
||||
md->routed = orte_routed.assign_module(comp_attrib);
|
||||
if (NULL != comp_attrib) {
|
||||
free(comp_attrib);
|
||||
}
|
||||
return md;
|
||||
}
|
||||
}
|
||||
/* we are not a candidate */
|
||||
opal_argv_free(comps);
|
||||
free(comp_attrib);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
comp_attrib = NULL;
|
||||
if (orte_get_attribute(attributes, ORTE_RML_EXCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) &&
|
||||
NULL != comp_attrib) {
|
||||
/* see if we are on the list */
|
||||
comps = opal_argv_split(comp_attrib, ',');
|
||||
free(comp_attrib);
|
||||
for (i=0; NULL != comps[i]; i++) {
|
||||
if (0 == strcasecmp(comps[i], "oob")) {
|
||||
/* we cannot be a candidate */
|
||||
opal_argv_free(comps);
|
||||
free(comp_attrib);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Alternatively, check the attributes to see if we qualify - we only handle
|
||||
* "Ethernet" and "TCP" */
|
||||
comp_attrib = NULL;
|
||||
if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, (void**)&comp_attrib, OPAL_STRING) &&
|
||||
NULL != comp_attrib) {
|
||||
comps = opal_argv_split(comp_attrib, ',');
|
||||
for (i=0; NULL != comps[i]; i++) {
|
||||
if (0 == strcasecmp(comps[i], "Ethernet") ||
|
||||
0 == strcasecmp(comps[i], "oob")) {
|
||||
/* we are a candidate */
|
||||
opal_argv_free(comps);
|
||||
md = make_module();
|
||||
free(comp_attrib);
|
||||
comp_attrib = NULL;
|
||||
orte_get_attribute(attributes, ORTE_RML_ROUTED_ATTRIB, (void**)&comp_attrib, OPAL_STRING);
|
||||
/* the routed system understands a NULL request, so no need to check
|
||||
* return status/value here */
|
||||
md->routed = orte_routed.assign_module(comp_attrib);
|
||||
if (NULL != comp_attrib) {
|
||||
free(comp_attrib);
|
||||
}
|
||||
return md;
|
||||
}
|
||||
}
|
||||
/* we are not a candidate */
|
||||
opal_argv_free(comps);
|
||||
free(comp_attrib);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
comp_attrib = NULL;
|
||||
if (orte_get_attribute(attributes, ORTE_RML_PROTOCOL_TYPE, (void**)&comp_attrib, OPAL_STRING) &&
|
||||
NULL != comp_attrib) {
|
||||
comps = opal_argv_split(comp_attrib, ',');
|
||||
for (i=0; NULL != comps[i]; i++) {
|
||||
if (0 == strcasecmp(comps[i], "TCP")) {
|
||||
/* we are a candidate */
|
||||
opal_argv_free(comps);
|
||||
md = make_module();
|
||||
free(comp_attrib);
|
||||
comp_attrib = NULL;
|
||||
orte_get_attribute(attributes, ORTE_RML_ROUTED_ATTRIB, (void**)&comp_attrib, OPAL_STRING);
|
||||
/* the routed system understands a NULL request, so no need to check
|
||||
* return status/value here */
|
||||
md->routed = orte_routed.assign_module(comp_attrib);
|
||||
if (NULL != comp_attrib) {
|
||||
free(comp_attrib);
|
||||
}
|
||||
return md;
|
||||
}
|
||||
}
|
||||
/* we are not a candidate */
|
||||
opal_argv_free(comps);
|
||||
free(comp_attrib);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* if they didn't specify a protocol or a transport, then we can be considered */
|
||||
if (!orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, NULL, OPAL_STRING) ||
|
||||
!orte_get_attribute(attributes, ORTE_RML_PROTOCOL_TYPE, NULL, OPAL_STRING)) {
|
||||
md = make_module();
|
||||
md->routed = orte_routed.assign_module(NULL);
|
||||
return md;
|
||||
}
|
||||
|
||||
/* if we get here, we cannot handle it */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static orte_rml_pathway_t* query_transports(void)
|
||||
{
|
||||
/* if we have any available transports, make them available */
|
||||
if (0 < opal_list_get_size(&pathway.transports)) {
|
||||
return &pathway;
|
||||
}
|
||||
/* if not, then return NULL */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void close_conduit(orte_rml_base_module_t *md)
|
||||
{
|
||||
orte_rml_oob_module_t *mod = (orte_rml_oob_module_t*)md;
|
||||
|
||||
/* cleanup the list of messages */
|
||||
OBJ_DESTRUCT(&mod->queued_routing_messages);
|
||||
|
||||
/* clear the storage */
|
||||
if (NULL != mod->routed) {
|
||||
free(mod->routed);
|
||||
mod->routed = NULL;
|
||||
}
|
||||
|
||||
/* the rml_base_stub takes care of clearing the base receive
|
||||
* and free'ng the module */
|
||||
return;
|
||||
*priority = 50;
|
||||
*module = (mca_base_module_t *) &base_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -70,8 +70,7 @@ static void send_self_exe(int fd, short args, void* data)
|
||||
OBJ_RELEASE(xfer);
|
||||
}
|
||||
|
||||
int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
int orte_rml_oob_send_nb(orte_process_name_t* peer,
|
||||
struct iovec* iov,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
@ -170,7 +169,6 @@ int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
|
||||
snd->count = count;
|
||||
snd->cbfunc.iov = cbfunc;
|
||||
snd->cbdata = cbdata;
|
||||
snd->routed = strdup(mod->routed);
|
||||
|
||||
/* activate the OOB send state */
|
||||
ORTE_OOB_SEND(snd);
|
||||
@ -178,8 +176,7 @@ int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod,
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer,
|
||||
opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
@ -259,7 +256,6 @@ int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod,
|
||||
snd->buffer = buffer;
|
||||
snd->cbfunc.buffer = cbfunc;
|
||||
snd->cbdata = cbdata;
|
||||
snd->routed = strdup(mod->routed);
|
||||
|
||||
/* activate the OOB send state */
|
||||
ORTE_OOB_SEND(snd);
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
*
|
||||
@ -56,10 +56,6 @@ BEGIN_C_DECLS
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
/* forward declare */
|
||||
struct orte_rml_base_module_t;
|
||||
struct orte_rml_component_t;
|
||||
|
||||
typedef struct {
|
||||
opal_object_t super;
|
||||
orte_process_name_t name;
|
||||
@ -173,8 +169,7 @@ typedef void (*orte_rml_exception_callback_t)(orte_process_name_t* peer,
|
||||
* from the local process
|
||||
* @retval ORTE_ERROR An unspecified error occurred during the update
|
||||
*/
|
||||
typedef int (*orte_rml_module_ping_fn_t)(struct orte_rml_base_module_t *mod,
|
||||
const char* contact_info,
|
||||
typedef int (*orte_rml_module_ping_fn_t)(const char* contact_info,
|
||||
const struct timeval* tv);
|
||||
|
||||
|
||||
@ -201,8 +196,7 @@ typedef int (*orte_rml_module_ping_fn_t)(struct orte_rml_base_module_t *mod,
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_send_nb_fn_t)(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
typedef int (*orte_rml_module_send_nb_fn_t)(orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
@ -232,8 +226,7 @@ typedef int (*orte_rml_module_send_nb_fn_t)(struct orte_rml_base_module_t *mod,
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_module_send_buffer_nb_fn_t)(struct orte_rml_base_module_t *mod,
|
||||
orte_process_name_t* peer,
|
||||
typedef int (*orte_rml_module_send_buffer_nb_fn_t)(orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
@ -247,6 +240,49 @@ typedef int (*orte_rml_module_send_buffer_nb_fn_t)(struct orte_rml_base_module_t
|
||||
typedef void (*orte_rml_module_purge_fn_t)(orte_process_name_t *peer);
|
||||
|
||||
|
||||
/**
|
||||
* Receive an iovec non-blocking message
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*/
|
||||
typedef void (*orte_rml_module_recv_nb_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* Receive a buffer non-blocking message
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*/
|
||||
typedef void (*orte_rml_module_recv_buffer_nb_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
/**
|
||||
* Cancel a posted non-blocking receive
|
||||
*
|
||||
* Attempt to cancel a posted non-blocking receive.
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed
|
||||
* to the non-blocking receive call
|
||||
* @param[in] tag Posted receive tag
|
||||
*/
|
||||
typedef void (*orte_rml_module_recv_cancel_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag);
|
||||
|
||||
|
||||
/**
|
||||
* RML internal module interface - these will be implemented by all RML components
|
||||
@ -265,245 +301,21 @@ typedef struct orte_rml_base_module_t {
|
||||
/** Send non-blocking buffer message */
|
||||
orte_rml_module_send_buffer_nb_fn_t send_buffer_nb;
|
||||
|
||||
orte_rml_module_recv_nb_fn_t recv_nb;
|
||||
orte_rml_module_recv_buffer_nb_fn_t recv_buffer_nb;
|
||||
orte_rml_module_recv_cancel_fn_t recv_cancel;
|
||||
|
||||
/** Purge information */
|
||||
orte_rml_module_purge_fn_t purge;
|
||||
} orte_rml_base_module_t;
|
||||
|
||||
|
||||
/* ******************************************************************** */
|
||||
/* RML PUBLIC MODULE API DEFINITION */
|
||||
|
||||
/** Open conduit - call each component and see if they can provide a
|
||||
* conduit that can satisfy all these attributes - return the conduit id
|
||||
* (a negative value indicates error)
|
||||
*/
|
||||
typedef orte_rml_conduit_t (*orte_rml_API_open_conduit_fn_t)(opal_list_t *attributes);
|
||||
|
||||
/**
|
||||
* Close a conduit - allow the component to cleanup.
|
||||
*/
|
||||
typedef void (*orte_rml_API_close_conduit_fn_t)(orte_rml_conduit_t id);
|
||||
|
||||
/**
|
||||
* Query the library to provide all the supported interfaces/transport
|
||||
* providers in the current node/system.
|
||||
*
|
||||
* @param[out] List of providers and their attributes.
|
||||
*/
|
||||
typedef int (*orte_rml_API_query_transports_fn_t)(opal_list_t *transports);
|
||||
|
||||
/* query the routed module for a given conduit */
|
||||
typedef char* (*orte_rml_API_query_routed_fn_t)(orte_rml_conduit_t id);
|
||||
|
||||
/**
|
||||
* "Ping" another process to determine availability
|
||||
*
|
||||
* Ping another process to determine if it is available. This
|
||||
* function only verifies that the process is alive and will allow a
|
||||
* connection to the local process. It does *not* qualify as
|
||||
* establishing communication with the remote process, as required by
|
||||
* the note for set_contact_info().
|
||||
*
|
||||
* @param[in] contact_info The contact info string for the remote process
|
||||
* @param[in] tv Timeout after which the ping should be failed
|
||||
*
|
||||
* @retval ORTE_SUCESS The process is available and will allow connections
|
||||
* from the local process
|
||||
* @retval ORTE_ERROR An unspecified error occurred during the update
|
||||
*/
|
||||
typedef int (*orte_rml_API_ping_fn_t)(orte_rml_conduit_t conduit_id,
|
||||
const char* contact_info,
|
||||
const struct timeval* tv);
|
||||
|
||||
|
||||
/**
|
||||
* Send an iovec non-blocking message
|
||||
*
|
||||
* Send an array of iovecs to the specified peer. The call
|
||||
* will return immediately, although the iovecs may not be modified
|
||||
* until the completion callback is triggered. The iovecs *may* be
|
||||
* passed to another call to send_nb before the completion callback is
|
||||
* triggered. The callback being triggered does not give any
|
||||
* indication of remote completion.
|
||||
*
|
||||
* @param[in] peer Name of receiving process
|
||||
* @param[in] msg Pointer to an array of iovecs to be sent
|
||||
* @param[in] count Number of iovecs in array
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*
|
||||
* @retval ORTE_SUCCESS The message was successfully started
|
||||
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
|
||||
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_API_send_nb_fn_t)(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct iovec* msg,
|
||||
int count,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* Send a buffer non-blocking message
|
||||
*
|
||||
* Send a buffer to the specified peer. The call
|
||||
* will return immediately, although the buffer may not be modified
|
||||
* until the completion callback is triggered. The buffer *may* be
|
||||
* passed to another call to send_nb before the completion callback is
|
||||
* triggered. The callback being triggered does not give any
|
||||
* indication of remote completion.
|
||||
*
|
||||
* @param[in] peer Name of receiving process
|
||||
* @param[in] buffer Pointer to buffer to be sent
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*
|
||||
* @retval ORTE_SUCCESS The message was successfully started
|
||||
* @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
|
||||
* @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
|
||||
* receiving process is not available
|
||||
* @retval ORTE_ERROR An unspecified error occurred
|
||||
*/
|
||||
typedef int (*orte_rml_API_send_buffer_nb_fn_t)(orte_rml_conduit_t conduit_id,
|
||||
orte_process_name_t* peer,
|
||||
struct opal_buffer_t* buffer,
|
||||
orte_rml_tag_t tag,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
/**
|
||||
* Purge the RML/OOB of contact info and pending messages
|
||||
* to/from a specified process. Used when a process aborts
|
||||
* and is to be restarted
|
||||
*/
|
||||
typedef void (*orte_rml_API_purge_fn_t)(orte_process_name_t *peer);
|
||||
|
||||
/**
|
||||
* Receive an iovec non-blocking message
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*/
|
||||
typedef void (*orte_rml_API_recv_nb_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* Receive a buffer non-blocking message
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
|
||||
* @param[in] tag User defined tag for matching send/recv
|
||||
* @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
|
||||
* @param[in] cbfunc Callback function on message comlpetion
|
||||
* @param[in] cbdata User data to provide during completion callback
|
||||
*/
|
||||
typedef void (*orte_rml_API_recv_buffer_nb_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag,
|
||||
bool persistent,
|
||||
orte_rml_buffer_callback_fn_t cbfunc,
|
||||
void* cbdata);
|
||||
|
||||
|
||||
/**
|
||||
* Cancel a posted non-blocking receive
|
||||
*
|
||||
* Attempt to cancel a posted non-blocking receive.
|
||||
*
|
||||
* @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed
|
||||
* to the non-blocking receive call
|
||||
* @param[in] tag Posted receive tag
|
||||
*/
|
||||
typedef void (*orte_rml_API_recv_cancel_fn_t)(orte_process_name_t* peer,
|
||||
orte_rml_tag_t tag);
|
||||
|
||||
/**
|
||||
* RML API interface
|
||||
*/
|
||||
typedef struct {
|
||||
/** Open Conduit **/
|
||||
orte_rml_API_open_conduit_fn_t open_conduit;
|
||||
|
||||
/** Shutdown the conduit and clean up resources */
|
||||
orte_rml_API_close_conduit_fn_t close_conduit;
|
||||
|
||||
/** Ping process for connectivity check */
|
||||
orte_rml_API_ping_fn_t ping;
|
||||
|
||||
/** Send non-blocking iovec message */
|
||||
orte_rml_API_send_nb_fn_t send_nb;
|
||||
|
||||
/** Send non-blocking buffer message */
|
||||
orte_rml_API_send_buffer_nb_fn_t send_buffer_nb;
|
||||
|
||||
/** Receive non-blocking iovec message */
|
||||
orte_rml_API_recv_nb_fn_t recv_nb;
|
||||
|
||||
/** Receive non-blocking buffer message */
|
||||
orte_rml_API_recv_buffer_nb_fn_t recv_buffer_nb;
|
||||
|
||||
/** Cancel posted non-blocking receive */
|
||||
orte_rml_API_recv_cancel_fn_t recv_cancel;
|
||||
|
||||
/** Purge information */
|
||||
orte_rml_API_purge_fn_t purge;
|
||||
|
||||
/** Query information of transport in system */
|
||||
orte_rml_API_query_transports_fn_t query_transports;
|
||||
|
||||
/* get the routed module for a given conduit */
|
||||
orte_rml_API_query_routed_fn_t get_routed;
|
||||
} orte_rml_base_API_t;
|
||||
|
||||
/** Interface for RML communication */
|
||||
ORTE_DECLSPEC extern orte_rml_base_API_t orte_rml;
|
||||
ORTE_DECLSPEC extern orte_rml_base_module_t orte_rml;
|
||||
|
||||
/* ******************************************************************** */
|
||||
/* RML COMPONENT DEFINITION */
|
||||
|
||||
/**
|
||||
* RML open_conduit
|
||||
*
|
||||
* Create an instance (module) of the given RML component. Upon
|
||||
* returning, the module data structure should be fully populated and
|
||||
* all functions should be usable and will have the conduit information.
|
||||
*
|
||||
* @param[in] opal_list_t of all attributes requested for the conduit.
|
||||
* Each attribute will be key-value.
|
||||
* [TODO] put in examples of the key-value here.
|
||||
* @return Exactly one module created by the call to the component's
|
||||
* initialization function should be returned. The module structure
|
||||
* should be fully populated, and the priority should be set to a
|
||||
* reasonable value.
|
||||
*
|
||||
* @retval NULL An error occurred and initialization did not occur
|
||||
* @retval non-NULL The module was successfully initialized
|
||||
*/
|
||||
typedef orte_rml_base_module_t* (*orte_rml_component_open_conduit_fn_t)(opal_list_t *attributes);
|
||||
|
||||
/**
|
||||
* Query the library to provide all the supported interfaces/transport
|
||||
* providers in the current node/system.
|
||||
*
|
||||
*/
|
||||
typedef orte_rml_pathway_t* (*orte_rml_component_query_transports_fn_t)(void);
|
||||
|
||||
/** Close conduit - allow the specific component to
|
||||
* cleanup the module for this conduit
|
||||
*/
|
||||
typedef void (*orte_rml_module_close_conduit_fn_t)(orte_rml_base_module_t *mod);
|
||||
|
||||
/**
|
||||
* RML component interface
|
||||
*
|
||||
@ -518,10 +330,6 @@ typedef struct orte_rml_component_t {
|
||||
mca_base_component_data_t data;
|
||||
/* Component priority */
|
||||
int priority;
|
||||
/* Component interface functions */
|
||||
orte_rml_component_open_conduit_fn_t open_conduit;
|
||||
orte_rml_component_query_transports_fn_t query_transports;
|
||||
orte_rml_module_close_conduit_fn_t close_conduit;
|
||||
} orte_rml_component_t;
|
||||
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -201,19 +201,6 @@ BEGIN_C_DECLS
|
||||
*/
|
||||
typedef uint32_t orte_rml_tag_t;
|
||||
|
||||
/* Conduit ID */
|
||||
typedef uint16_t orte_rml_conduit_t;
|
||||
#define ORTE_RML_CONDUIT_INVALID 0xff
|
||||
|
||||
/* define an object for reporting transports */
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
char *component;
|
||||
opal_list_t attributes;
|
||||
opal_list_t transports;
|
||||
} orte_rml_pathway_t;
|
||||
OBJ_CLASS_DECLARATION(orte_rml_pathway_t);
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2007-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -32,38 +32,11 @@ ORTE_DECLSPEC extern mca_base_framework_t orte_routed_base_framework;
|
||||
ORTE_DECLSPEC int orte_routed_base_select(void);
|
||||
|
||||
typedef struct {
|
||||
opal_list_item_t super;
|
||||
int pri;
|
||||
orte_routed_component_t *component;
|
||||
orte_routed_module_t *module;
|
||||
} orte_routed_base_active_t;
|
||||
OBJ_CLASS_DECLARATION(orte_routed_base_active_t);
|
||||
|
||||
typedef struct {
|
||||
opal_list_t actives;
|
||||
bool routing_enabled;
|
||||
} orte_routed_base_t;
|
||||
ORTE_DECLSPEC extern orte_routed_base_t orte_routed_base;
|
||||
|
||||
|
||||
/* base API wrapper functions */
|
||||
ORTE_DECLSPEC char* orte_routed_base_assign_module(char *modules);
|
||||
|
||||
ORTE_DECLSPEC int orte_routed_base_delete_route(char *module, orte_process_name_t *proc);
|
||||
ORTE_DECLSPEC int orte_routed_base_update_route(char *module, orte_process_name_t *target,
|
||||
orte_process_name_t *route);
|
||||
ORTE_DECLSPEC orte_process_name_t orte_routed_base_get_route(char *module,
|
||||
orte_process_name_t *target);
|
||||
ORTE_DECLSPEC int orte_routed_base_route_lost(char *module,
|
||||
const orte_process_name_t *route);
|
||||
ORTE_DECLSPEC bool orte_routed_base_route_is_defined(char *module,
|
||||
const orte_process_name_t *target);
|
||||
ORTE_DECLSPEC void orte_routed_base_update_routing_plan(char *module);
|
||||
ORTE_DECLSPEC void orte_routed_base_get_routing_list(char *module, opal_list_t *coll);
|
||||
ORTE_DECLSPEC int orte_routed_base_set_lifeline(char *module, orte_process_name_t *proc);
|
||||
ORTE_DECLSPEC size_t orte_routed_base_num_routes(char *module);
|
||||
ORTE_DECLSPEC int orte_routed_base_ft_event(char *module, int state);
|
||||
|
||||
/* specialized support functions */
|
||||
ORTE_DECLSPEC void orte_routed_base_xcast_routing(opal_list_t *coll,
|
||||
opal_list_t *my_children);
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -37,219 +37,6 @@
|
||||
|
||||
#include "orte/mca/routed/base/base.h"
|
||||
|
||||
char* orte_routed_base_assign_module(char *modules)
|
||||
{
|
||||
orte_routed_base_active_t *active;
|
||||
char **desired;
|
||||
int i;
|
||||
|
||||
/* the incoming param contains a comma-delimited, prioritized
|
||||
* list of desired routing modules. If it is NULL, then we
|
||||
* simply return the module at the top of our list */
|
||||
if (NULL == modules) {
|
||||
active = (orte_routed_base_active_t*)opal_list_get_first(&orte_routed_base.actives);
|
||||
return active->component->base_version.mca_component_name;
|
||||
}
|
||||
|
||||
/* otherwise, cycle thru the provided list of desired modules
|
||||
* and pick the highest priority one that matches */
|
||||
desired = opal_argv_split(modules, ',');
|
||||
for (i=0; NULL != desired[i]; i++) {
|
||||
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
|
||||
if (0 == strcasecmp(desired[i], active->component->base_version.mca_component_name)) {
|
||||
opal_argv_free(desired);
|
||||
return active->component->base_version.mca_component_name;
|
||||
}
|
||||
}
|
||||
}
|
||||
opal_argv_free(desired);
|
||||
|
||||
/* get here if none match */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int orte_routed_base_delete_route(char *module, orte_process_name_t *proc)
|
||||
{
|
||||
orte_routed_base_active_t *active;
|
||||
int rc;
|
||||
|
||||
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
|
||||
if (NULL == module ||
|
||||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
|
||||
if (NULL != active->module->delete_route) {
|
||||
if (ORTE_SUCCESS != (rc = active->module->delete_route(proc))) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
int orte_routed_base_update_route(char *module, orte_process_name_t *target,
|
||||
orte_process_name_t *route)
|
||||
{
|
||||
orte_routed_base_active_t *active;
|
||||
int rc;
|
||||
|
||||
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
|
||||
if (NULL == module ||
|
||||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
|
||||
if (NULL != active->module->update_route) {
|
||||
if (ORTE_SUCCESS != (rc = active->module->update_route(target, route))) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
orte_process_name_t orte_routed_base_get_route(char *module, orte_process_name_t *target)
|
||||
{
|
||||
orte_routed_base_active_t *active;
|
||||
|
||||
/* a NULL module corresponds to direct */
|
||||
if (!orte_routed_base.routing_enabled || NULL == module) {
|
||||
return *target;
|
||||
}
|
||||
|
||||
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
|
||||
if (0 == strcmp(module, active->component->base_version.mca_component_name)) {
|
||||
if (NULL != active->module->get_route) {
|
||||
return active->module->get_route(target);
|
||||
}
|
||||
return *ORTE_NAME_INVALID;
|
||||
}
|
||||
}
|
||||
return *ORTE_NAME_INVALID;
|
||||
}
|
||||
|
||||
int orte_routed_base_route_lost(char *module, const orte_process_name_t *route)
|
||||
{
|
||||
orte_routed_base_active_t *active;
|
||||
int rc;
|
||||
|
||||
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
|
||||
if (NULL == module ||
|
||||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
|
||||
if (NULL != active->module->route_lost) {
|
||||
if (ORTE_SUCCESS != (rc = active->module->route_lost(route))) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
bool orte_routed_base_route_is_defined(char *module, const orte_process_name_t *target)
|
||||
{
|
||||
orte_routed_base_active_t *active;
|
||||
|
||||
/* a NULL module corresponds to direct */
|
||||
if (NULL == module) {
|
||||
return true;
|
||||
}
|
||||
|
||||
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
|
||||
if (0 == strcmp(module, active->component->base_version.mca_component_name)) {
|
||||
if (NULL != active->module->route_is_defined) {
|
||||
return active->module->route_is_defined(target);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* if we didn't find the specified module, or it doesn't have
|
||||
* the required API, then the route isn't defined */
|
||||
return false;
|
||||
}
|
||||
|
||||
void orte_routed_base_update_routing_plan(char *module)
|
||||
{
|
||||
orte_routed_base_active_t *active;
|
||||
|
||||
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
|
||||
if (NULL == module ||
|
||||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
|
||||
if (NULL != active->module->update_routing_plan) {
|
||||
active->module->update_routing_plan();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void orte_routed_base_get_routing_list(char *module, opal_list_t *coll)
|
||||
{
|
||||
orte_routed_base_active_t *active;
|
||||
|
||||
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
|
||||
if (NULL == module ||
|
||||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
|
||||
if (NULL != active->module->get_routing_list) {
|
||||
active->module->get_routing_list(coll);
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
int orte_routed_base_set_lifeline(char *module, orte_process_name_t *proc)
|
||||
{
|
||||
orte_routed_base_active_t *active;
|
||||
int rc;
|
||||
|
||||
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
|
||||
if (NULL == module ||
|
||||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
|
||||
if (NULL != active->module->set_lifeline) {
|
||||
if (ORTE_SUCCESS != (rc = active->module->set_lifeline(proc))) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
size_t orte_routed_base_num_routes(char *module)
|
||||
{
|
||||
orte_routed_base_active_t *active;
|
||||
size_t rc = 0;
|
||||
|
||||
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
|
||||
if (NULL == module ||
|
||||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
|
||||
if (NULL != active->module->num_routes) {
|
||||
rc += active->module->num_routes();
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
int orte_routed_base_ft_event(char *module, int state)
|
||||
{
|
||||
orte_routed_base_active_t *active;
|
||||
int rc;
|
||||
|
||||
OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) {
|
||||
if (NULL == module ||
|
||||
0 == strcmp(module, active->component->base_version.mca_component_name)) {
|
||||
if (NULL != active->module->ft_event) {
|
||||
if (ORTE_SUCCESS != (rc = active->module->ft_event(state))) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
void orte_routed_base_xcast_routing(opal_list_t *coll, opal_list_t *my_children)
|
||||
{
|
||||
orte_routed_tree_t *child;
|
||||
|
@ -10,7 +10,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -39,25 +39,11 @@
|
||||
* component's public mca_base_component_t struct. */
|
||||
#include "orte/mca/routed/base/static-components.h"
|
||||
|
||||
orte_routed_base_t orte_routed_base = {{{0}}};
|
||||
orte_routed_API_t orte_routed = {
|
||||
.assign_module = orte_routed_base_assign_module,
|
||||
.delete_route = orte_routed_base_delete_route,
|
||||
.update_route = orte_routed_base_update_route,
|
||||
.get_route = orte_routed_base_get_route,
|
||||
.route_lost = orte_routed_base_route_lost,
|
||||
.route_is_defined = orte_routed_base_route_is_defined,
|
||||
.set_lifeline = orte_routed_base_set_lifeline,
|
||||
.update_routing_plan = orte_routed_base_update_routing_plan,
|
||||
.get_routing_list = orte_routed_base_get_routing_list,
|
||||
.num_routes = orte_routed_base_num_routes,
|
||||
.ft_event = orte_routed_base_ft_event
|
||||
};
|
||||
orte_routed_base_t orte_routed_base = {0};
|
||||
orte_routed_module_t orte_routed = {0};
|
||||
|
||||
static int orte_routed_base_open(mca_base_open_flag_t flags)
|
||||
{
|
||||
/* setup our list of actives */
|
||||
OBJ_CONSTRUCT(&orte_routed_base.actives, opal_list_t);
|
||||
/* start with routing DISABLED */
|
||||
orte_routed_base.routing_enabled = false;
|
||||
|
||||
@ -67,14 +53,10 @@ static int orte_routed_base_open(mca_base_open_flag_t flags)
|
||||
|
||||
static int orte_routed_base_close(void)
|
||||
{
|
||||
orte_routed_base_active_t *active;
|
||||
|
||||
while (NULL != (active = (orte_routed_base_active_t *)opal_list_remove_first(&orte_routed_base.actives))) {
|
||||
active->module->finalize();
|
||||
OBJ_RELEASE(active);
|
||||
orte_routed_base.routing_enabled = false;
|
||||
if (NULL != orte_routed.finalize) {
|
||||
orte_routed.finalize();
|
||||
}
|
||||
OPAL_LIST_DESTRUCT(&orte_routed_base.actives);
|
||||
|
||||
return mca_base_framework_components_close(&orte_routed_base_framework, NULL);
|
||||
}
|
||||
|
||||
@ -82,69 +64,28 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, routed, "ORTE Message Routing Subsystem", NULL,
|
||||
orte_routed_base_open, orte_routed_base_close,
|
||||
mca_routed_base_static_components, 0);
|
||||
|
||||
static bool selected = false;
|
||||
|
||||
int orte_routed_base_select(void)
|
||||
{
|
||||
mca_base_component_list_item_t *cli=NULL;
|
||||
orte_routed_component_t *component=NULL;
|
||||
orte_routed_base_active_t *newmodule, *mod;
|
||||
mca_base_module_t *module;
|
||||
bool inserted;
|
||||
int pri;
|
||||
orte_routed_component_t *best_component = NULL;
|
||||
orte_routed_module_t *best_module = NULL;
|
||||
|
||||
if (selected) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
selected = true;
|
||||
|
||||
OPAL_LIST_FOREACH(cli, &orte_routed_base_framework.framework_components, mca_base_component_list_item_t ) {
|
||||
component = (orte_routed_component_t*) cli->cli_component;
|
||||
|
||||
opal_output_verbose(10, orte_routed_base_framework.framework_output,
|
||||
"orte_routed_base_select: Initializing %s component %s",
|
||||
component->base_version.mca_type_name,
|
||||
component->base_version.mca_component_name);
|
||||
|
||||
if (ORTE_SUCCESS != component->base_version.mca_query_component(&module, &pri)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/* add to the list of available components */
|
||||
newmodule = OBJ_NEW(orte_routed_base_active_t);
|
||||
newmodule->pri = pri;
|
||||
newmodule->component = component;
|
||||
newmodule->module = (orte_routed_module_t*)module;
|
||||
|
||||
if (ORTE_SUCCESS != newmodule->module->initialize()) {
|
||||
OBJ_RELEASE(newmodule);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* maintain priority order */
|
||||
inserted = false;
|
||||
OPAL_LIST_FOREACH(mod, &orte_routed_base.actives, orte_routed_base_active_t) {
|
||||
if (newmodule->pri > mod->pri) {
|
||||
opal_list_insert_pos(&orte_routed_base.actives,
|
||||
(opal_list_item_t*)mod, &newmodule->super);
|
||||
inserted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!inserted) {
|
||||
/* must be lowest priority - add to end */
|
||||
opal_list_append(&orte_routed_base.actives, &newmodule->super);
|
||||
}
|
||||
/*
|
||||
* Select the best component
|
||||
*/
|
||||
if( OPAL_SUCCESS != mca_base_select("routed", orte_routed_base_framework.framework_output,
|
||||
&orte_routed_base_framework.framework_components,
|
||||
(mca_base_module_t **) &best_module,
|
||||
(mca_base_component_t **) &best_component, NULL) ) {
|
||||
/* This will only happen if no component was selected */
|
||||
/* If we didn't find one to select, that is an error */
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
if (4 < opal_output_get_verbosity(orte_routed_base_framework.framework_output)) {
|
||||
opal_output(0, "%s: Final routed priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
/* show the prioritized list */
|
||||
OPAL_LIST_FOREACH(mod, &orte_routed_base.actives, orte_routed_base_active_t) {
|
||||
opal_output(0, "\tComponent: %s Priority: %d", mod->component->base_version.mca_component_name, mod->pri);
|
||||
}
|
||||
/* Save the winner */
|
||||
orte_routed = *best_module;
|
||||
if (NULL != orte_routed.initialize) {
|
||||
orte_routed.initialize();
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
@ -160,7 +101,3 @@ static void destruct(orte_routed_tree_t *rt)
|
||||
OBJ_CLASS_INSTANCE(orte_routed_tree_t,
|
||||
opal_list_item_t,
|
||||
construct, destruct);
|
||||
|
||||
OBJ_CLASS_INSTANCE(orte_routed_base_active_t,
|
||||
opal_list_item_t,
|
||||
NULL, NULL);
|
||||
|
@ -6,7 +6,7 @@
|
||||
* reserved.
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -223,12 +223,6 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* if I am an application process, always route via my local daemon */
|
||||
if (ORTE_PROC_IS_APP) {
|
||||
ret = ORTE_PROC_MY_DAEMON;
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* if I am a tool, the route is direct if target is in
|
||||
* my own job family, and to the target's HNP if not
|
||||
*/
|
||||
@ -264,7 +258,13 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
}
|
||||
}
|
||||
|
||||
/* if the jobid is different than our own, then this the target
|
||||
/* if the target is our parent, then send it direct */
|
||||
if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_PARENT, target)) {
|
||||
ret = ORTE_PROC_MY_PARENT;
|
||||
goto found;
|
||||
}
|
||||
|
||||
/* if the jobid is different than our own, then this target
|
||||
* is a tool and we should go direct */
|
||||
if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
|
||||
ret = target;
|
||||
@ -273,10 +273,15 @@ static orte_process_name_t get_route(orte_process_name_t *target)
|
||||
|
||||
daemon.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
/* find out what daemon hosts this proc */
|
||||
if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
ret = ORTE_NAME_INVALID;
|
||||
goto found;
|
||||
if (ORTE_PROC_MY_NAME->jobid == target->jobid) {
|
||||
/* it's a daemon - no need to look it up */
|
||||
daemon.vpid = target->vpid;
|
||||
} else {
|
||||
if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
ret = ORTE_NAME_INVALID;
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
|
||||
/* if the daemon is me, then send direct to the target! */
|
||||
|
@ -7,7 +7,7 @@
|
||||
* Copyright (c) 2004-2011 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2014-2016 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -221,47 +221,8 @@ typedef struct {
|
||||
orte_routed_module_ft_event_fn_t ft_event;
|
||||
} orte_routed_module_t;
|
||||
|
||||
/* define an equivalent set of API functions - these will be implemented
|
||||
* as "stubs" in the framework base */
|
||||
typedef char* (*orte_routed_API_assign_module_fn_t)(char *modules);
|
||||
|
||||
typedef int (*orte_routed_API_delete_route_fn_t)(char *module,
|
||||
orte_process_name_t *proc);
|
||||
typedef int (*orte_routed_API_update_route_fn_t)(char *module,
|
||||
orte_process_name_t *target,
|
||||
orte_process_name_t *route);
|
||||
typedef orte_process_name_t (*orte_routed_API_get_route_fn_t)(char *module,
|
||||
orte_process_name_t *target);
|
||||
typedef int (*orte_routed_API_route_lost_fn_t)(char *module,
|
||||
const orte_process_name_t *route);
|
||||
typedef bool (*orte_routed_API_route_is_defined_fn_t)(char *module,
|
||||
const orte_process_name_t *target);
|
||||
typedef void (*orte_routed_API_update_routing_plan_fn_t)(char *module);
|
||||
typedef void (*orte_routed_API_get_routing_list_fn_t)(char *module, opal_list_t *coll);
|
||||
typedef int (*orte_routed_API_set_lifeline_fn_t)(char *module, orte_process_name_t *proc);
|
||||
typedef size_t (*orte_routed_API_num_routes_fn_t)(char *module);
|
||||
typedef int (*orte_routed_API_ft_event_fn_t)(char *module, int state);
|
||||
|
||||
|
||||
typedef struct {
|
||||
/* API functions */
|
||||
orte_routed_API_assign_module_fn_t assign_module;
|
||||
orte_routed_API_delete_route_fn_t delete_route;
|
||||
orte_routed_API_update_route_fn_t update_route;
|
||||
orte_routed_API_get_route_fn_t get_route;
|
||||
orte_routed_API_route_lost_fn_t route_lost;
|
||||
orte_routed_API_route_is_defined_fn_t route_is_defined;
|
||||
orte_routed_API_set_lifeline_fn_t set_lifeline;
|
||||
/* fns for daemons */
|
||||
orte_routed_API_update_routing_plan_fn_t update_routing_plan;
|
||||
orte_routed_API_get_routing_list_fn_t get_routing_list;
|
||||
orte_routed_API_num_routes_fn_t num_routes;
|
||||
/* FT Notification */
|
||||
orte_routed_API_ft_event_fn_t ft_event;
|
||||
} orte_routed_API_t;
|
||||
|
||||
/* provide an interface to the routed framework stub functions */
|
||||
ORTE_DECLSPEC extern orte_routed_API_t orte_routed;
|
||||
ORTE_DECLSPEC extern orte_routed_module_t orte_routed;
|
||||
|
||||
/* ******************************************************************** */
|
||||
|
||||
|
@ -9,7 +9,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 Evergrid, Inc. All rights reserved.
|
||||
* Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -767,8 +767,7 @@ int orte_snapc_base_global_coord_ckpt_update_cmd(orte_process_name_t* peer,
|
||||
}
|
||||
}
|
||||
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
peer, loc_buffer,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(peer, loc_buffer,
|
||||
ORTE_RML_TAG_CKPT,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
opal_output(orte_snapc_base_framework.framework_output,
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -508,8 +508,7 @@ void orte_state_base_notify_data_server(orte_process_name_t *target)
|
||||
}
|
||||
|
||||
/* send the request to the server */
|
||||
rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&orte_pmix_server_globals.server, buf,
|
||||
rc = orte_rml.send_buffer_nb(&orte_pmix_server_globals.server, buf,
|
||||
ORTE_RML_TAG_DATA_SERVER,
|
||||
orte_rml_send_callback, NULL);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
@ -617,8 +616,7 @@ static void _send_notification(int status,
|
||||
ORTE_ERROR_NAME(status),
|
||||
ORTE_NAME_PRINT(target),
|
||||
ORTE_NAME_PRINT(&daemon));
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&daemon, buf,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&daemon, buf,
|
||||
ORTE_RML_TAG_NOTIFICATION,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -635,7 +633,6 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata)
|
||||
orte_job_t *jdata;
|
||||
orte_proc_t *pdata;
|
||||
int i;
|
||||
char *rtmod;
|
||||
orte_process_name_t parent, target;
|
||||
|
||||
ORTE_ACQUIRE_OBJECT(caddy);
|
||||
@ -648,9 +645,6 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata)
|
||||
ORTE_NAME_PRINT(proc),
|
||||
orte_proc_state_to_str(state));
|
||||
|
||||
/* get our "lifeline" routed module */
|
||||
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
|
||||
|
||||
/* get the job object for this proc */
|
||||
if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
@ -722,7 +716,7 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata)
|
||||
* remain (might be some from another job)
|
||||
*/
|
||||
if (orte_orteds_term_ordered &&
|
||||
0 == orte_routed.num_routes(rtmod)) {
|
||||
0 == orte_routed.num_routes()) {
|
||||
for (i=0; i < orte_local_children->size; i++) {
|
||||
if (NULL != (pdata = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
|
||||
ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_ALIVE)) {
|
||||
@ -783,7 +777,6 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata)
|
||||
int32_t i32, *i32ptr;
|
||||
uint32_t u32;
|
||||
void *nptr;
|
||||
char *rtmod;
|
||||
|
||||
ORTE_ACQUIRE_OBJECT(caddy);
|
||||
jdata = caddy->jdata;
|
||||
@ -793,10 +786,6 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata)
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid));
|
||||
|
||||
/* get our "lifeline" routed module */
|
||||
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
|
||||
|
||||
|
||||
if (NULL == jdata || jdata->jobid == ORTE_PROC_MY_NAME->jobid) {
|
||||
/* just check to see if the daemons are complete */
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
|
||||
@ -864,7 +853,7 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata)
|
||||
*/
|
||||
CHECK_DAEMONS:
|
||||
if (jdata == NULL || jdata->jobid == ORTE_PROC_MY_NAME->jobid) {
|
||||
if (0 == orte_routed.num_routes(rtmod)) {
|
||||
if (0 == orte_routed.num_routes()) {
|
||||
/* orteds are done! */
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output,
|
||||
"%s orteds complete - exiting",
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -326,8 +326,7 @@ static void _send_notification(int status,
|
||||
ORTE_ERROR_NAME(status),
|
||||
ORTE_NAME_PRINT(target),
|
||||
ORTE_NAME_PRINT(&daemon));
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&daemon, buf,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&daemon, buf,
|
||||
ORTE_RML_TAG_NOTIFICATION,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -239,8 +239,7 @@ static void track_jobs(int fd, short argc, void *cbdata)
|
||||
}
|
||||
|
||||
/* send it */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, alert,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
|
||||
ORTE_RML_TAG_PLM,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -262,7 +261,6 @@ static void track_procs(int fd, short argc, void *cbdata)
|
||||
opal_buffer_t *alert;
|
||||
int rc, i;
|
||||
orte_plm_cmd_flag_t cmd;
|
||||
char *rtmod;
|
||||
orte_std_cntr_t index;
|
||||
orte_job_map_t *map;
|
||||
orte_node_t *node;
|
||||
@ -333,8 +331,7 @@ static void track_procs(int fd, short argc, void *cbdata)
|
||||
}
|
||||
}
|
||||
/* send it */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, alert,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
|
||||
ORTE_RML_TAG_PLM,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -391,9 +388,8 @@ static void track_procs(int fd, short argc, void *cbdata)
|
||||
* gone, then terminate ourselves IF no local procs
|
||||
* remain (might be some from another job)
|
||||
*/
|
||||
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
|
||||
if (orte_orteds_term_ordered &&
|
||||
0 == orte_routed.num_routes(rtmod)) {
|
||||
0 == orte_routed.num_routes()) {
|
||||
for (i=0; i < orte_local_children->size; i++) {
|
||||
if (NULL != (pdata = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
|
||||
ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_ALIVE)) {
|
||||
@ -431,8 +427,7 @@ static void track_procs(int fd, short argc, void *cbdata)
|
||||
"%s state:orted: SENDING JOB LOCAL TERMINATION UPDATE FOR JOB %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(jdata->jobid)));
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, alert,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert,
|
||||
ORTE_RML_TAG_PLM,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -123,7 +123,6 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
char string[256], *string_ptr = string;
|
||||
float pss;
|
||||
opal_pstats_t pstat;
|
||||
char *rtmod;
|
||||
char *coprocessors;
|
||||
orte_job_map_t *map;
|
||||
int8_t flag;
|
||||
@ -382,8 +381,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
/* flag that orteds were ordered to terminate */
|
||||
orte_orteds_term_ordered = true;
|
||||
/* if all my routes and local children are gone, then terminate ourselves */
|
||||
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
|
||||
if (0 == (ret = orte_routed.num_routes(rtmod))) {
|
||||
if (0 == (ret = orte_routed.num_routes())) {
|
||||
for (i=0; i < orte_local_children->size; i++) {
|
||||
if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
|
||||
ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) {
|
||||
@ -425,8 +423,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
orte_orteds_term_ordered = true;
|
||||
if (ORTE_PROC_IS_HNP) {
|
||||
/* if all my routes and local children are gone, then terminate ourselves */
|
||||
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
|
||||
if (0 == orte_routed.num_routes(rtmod)) {
|
||||
if (0 == orte_routed.num_routes()) {
|
||||
for (i=0; i < orte_local_children->size; i++) {
|
||||
if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) &&
|
||||
ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) {
|
||||
@ -526,8 +523,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
break;
|
||||
}
|
||||
/* send the buffer to our IOF */
|
||||
orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_NAME, iofbuf, ORTE_RML_TAG_IOF_HNP,
|
||||
orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, iofbuf, ORTE_RML_TAG_IOF_HNP,
|
||||
orte_rml_send_callback, NULL);
|
||||
}
|
||||
for (i=1; i < orte_node_pool->size; i++) {
|
||||
@ -718,8 +714,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
OBJ_DESTRUCT(&data);
|
||||
}
|
||||
/* send the data */
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
sender, answer, ORTE_RML_TAG_TOPOLOGY_REPORT,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOPOLOGY_REPORT,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(answer);
|
||||
@ -749,8 +744,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
goto CLEANUP;
|
||||
}
|
||||
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
sender, answer, ORTE_RML_TAG_TOOL,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(answer);
|
||||
@ -775,8 +769,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
OBJ_RELEASE(answer);
|
||||
goto CLEANUP;
|
||||
}
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
sender, answer, ORTE_RML_TAG_TOOL,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(answer);
|
||||
@ -845,8 +838,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
rc = opal_hash_table_get_next_key_uint32(orte_job_data, &u32, (void **)&jobdat, nptr, &nptr);
|
||||
}
|
||||
}
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
sender, answer, ORTE_RML_TAG_TOOL,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(answer);
|
||||
@ -872,8 +864,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
OBJ_RELEASE(answer);
|
||||
goto CLEANUP;
|
||||
}
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
sender, answer, ORTE_RML_TAG_TOOL,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(answer);
|
||||
@ -942,8 +933,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
}
|
||||
}
|
||||
/* send the info */
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
sender, answer, ORTE_RML_TAG_TOOL,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(answer);
|
||||
@ -969,8 +959,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
OBJ_RELEASE(answer);
|
||||
goto CLEANUP;
|
||||
}
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
sender, answer, ORTE_RML_TAG_TOOL,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(answer);
|
||||
@ -1088,8 +1077,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
}
|
||||
}
|
||||
/* send the info */
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
sender, answer, ORTE_RML_TAG_TOOL,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(answer);
|
||||
@ -1147,8 +1135,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
goto SEND_TOP_ANSWER;
|
||||
}
|
||||
/* the callback function will release relay_msg buffer */
|
||||
if (0 > orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&proc2, relay_msg,
|
||||
if (0 > orte_rml.send_buffer_nb(&proc2, relay_msg,
|
||||
ORTE_RML_TAG_DAEMON,
|
||||
orte_rml_send_callback, NULL)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
|
||||
@ -1199,8 +1186,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
goto SEND_TOP_ANSWER;
|
||||
}
|
||||
/* the callback function will release relay_msg buffer */
|
||||
if (0 > orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&proc2, relay_msg,
|
||||
if (0 > orte_rml.send_buffer_nb(&proc2, relay_msg,
|
||||
ORTE_RML_TAG_DAEMON,
|
||||
orte_rml_send_callback, NULL)) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE);
|
||||
@ -1264,8 +1250,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
ret = ORTE_ERR_COMM_FAILURE;
|
||||
break;
|
||||
}
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
return_addr, answer, ORTE_RML_TAG_TOOL,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(return_addr, answer, ORTE_RML_TAG_TOOL,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(answer);
|
||||
@ -1343,8 +1328,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
free(gstack_exec);
|
||||
}
|
||||
/* always send our response */
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, answer,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, answer,
|
||||
ORTE_RML_TAG_STACK_TRACE,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -1382,8 +1366,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
||||
}
|
||||
opal_dss.pack(answer, &pss, 1, OPAL_FLOAT);
|
||||
/* send it back */
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, answer,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, answer,
|
||||
ORTE_RML_TAG_MEMPROFILE,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
|
@ -712,19 +712,19 @@ int orte_daemon(int argc, char *argv[])
|
||||
/* tell the routed module that we have a path
|
||||
* back to the HNP
|
||||
*/
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, ORTE_PROC_MY_PARENT))) {
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_PARENT))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
goto DONE;
|
||||
}
|
||||
/* and a path to our parent */
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.update_route(NULL, ORTE_PROC_MY_PARENT, ORTE_PROC_MY_PARENT))) {
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.update_route(ORTE_PROC_MY_PARENT, ORTE_PROC_MY_PARENT))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
goto DONE;
|
||||
}
|
||||
/* set the lifeline to point to our parent so that we
|
||||
* can handle the situation if that lifeline goes away
|
||||
*/
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.set_lifeline(NULL, ORTE_PROC_MY_PARENT))) {
|
||||
if (ORTE_SUCCESS != (ret = orte_routed.set_lifeline(ORTE_PROC_MY_PARENT))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
goto DONE;
|
||||
}
|
||||
@ -754,8 +754,7 @@ int orte_daemon(int argc, char *argv[])
|
||||
node_regex_waiting = true;
|
||||
orte_rml.recv_buffer_nb(ORTE_PROC_MY_PARENT, ORTE_RML_TAG_NODE_REGEX_REPORT,
|
||||
ORTE_RML_PERSISTENT, node_regex_report, &node_regex_waiting);
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_PARENT, buffer,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, buffer,
|
||||
ORTE_RML_TAG_WARMUP_CONNECTION,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -955,8 +954,7 @@ int orte_daemon(int argc, char *argv[])
|
||||
}
|
||||
|
||||
/* send it to the designated target */
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&target, buffer,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(&target, buffer,
|
||||
ORTE_RML_TAG_ORTED_CALLBACK,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -1140,19 +1138,16 @@ static void rollup(int status, orte_process_name_t* sender,
|
||||
}
|
||||
|
||||
static void report_orted() {
|
||||
char *rtmod;
|
||||
int nreqd, ret;
|
||||
|
||||
/* get the number of children */
|
||||
rtmod = orte_rml.get_routed(orte_mgmt_conduit);
|
||||
nreqd = orte_routed.num_routes(rtmod) + 1;
|
||||
nreqd = orte_routed.num_routes() + 1;
|
||||
if (nreqd == ncollected && NULL != mybucket && !node_regex_waiting) {
|
||||
/* add the collection of our children's buckets to ours */
|
||||
opal_dss.copy_payload(mybucket, bucket);
|
||||
OBJ_RELEASE(bucket);
|
||||
/* relay this on to our parent */
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_PARENT, mybucket,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, mybucket,
|
||||
ORTE_RML_TAG_ORTED_CALLBACK,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
@ -1175,7 +1170,7 @@ static void node_regex_report(int status, orte_process_name_t* sender,
|
||||
|
||||
/* update the routing tree so any tree spawn operation
|
||||
* properly gets the number of children underneath us */
|
||||
orte_routed.update_routing_plan(NULL);
|
||||
orte_routed.update_routing_plan();
|
||||
|
||||
*active = false;
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
* Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2018 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
@ -575,14 +575,14 @@ int orte_submit_init(int argc, char *argv[],
|
||||
OBJ_DESTRUCT(&val);
|
||||
|
||||
/* set the route to be direct */
|
||||
if (ORTE_SUCCESS != orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) {
|
||||
if (ORTE_SUCCESS != orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) {
|
||||
orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
|
||||
orte_finalize();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* set the target hnp as our lifeline so we will terminate if it exits */
|
||||
orte_routed.set_lifeline(NULL, ORTE_PROC_MY_HNP);
|
||||
orte_routed.set_lifeline(ORTE_PROC_MY_HNP);
|
||||
|
||||
/* setup to listen for HNP response to my commands */
|
||||
orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFY_COMPLETE,
|
||||
@ -700,8 +700,7 @@ int orte_submit_cancel(int index) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON,
|
||||
rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON,
|
||||
orte_rml_send_callback, NULL);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -724,8 +723,7 @@ int orte_submit_halt(void)
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, req,
|
||||
rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req,
|
||||
ORTE_RML_TAG_DAEMON,
|
||||
orte_rml_send_callback, NULL);
|
||||
if (ORTE_SUCCESS != rc) {
|
||||
@ -1146,8 +1144,7 @@ int orte_submit_job(char *argv[], int *index,
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON,
|
||||
orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON,
|
||||
orte_rml_send_callback, NULL);
|
||||
|
||||
/* Inform the caller of the tracker index if they passed a index pointer */
|
||||
@ -3378,8 +3375,7 @@ void orte_profile_wakeup(int sd, short args, void *cbdata)
|
||||
for (i=0; i < nreports; i++) {
|
||||
OBJ_RETAIN(buffer);
|
||||
name.vpid = i;
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&name, buffer,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(&name, buffer,
|
||||
ORTE_RML_TAG_DAEMON,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
@ -391,8 +391,7 @@ static void send_error(int status, opal_process_name_t *idreq,
|
||||
}
|
||||
|
||||
/* send the response */
|
||||
orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
remote, reply,
|
||||
orte_rml.send_buffer_nb(remote, reply,
|
||||
ORTE_RML_TAG_DIRECT_MODEX_RESP,
|
||||
orte_rml_send_callback, NULL);
|
||||
return;
|
||||
@ -435,8 +434,7 @@ static void _mdxresp(int sd, short args, void *cbdata)
|
||||
opal_dss.copy_payload(reply, &req->msg);
|
||||
|
||||
/* send the response */
|
||||
orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&req->proxy, reply,
|
||||
orte_rml.send_buffer_nb(&req->proxy, reply,
|
||||
ORTE_RML_TAG_DIRECT_MODEX_RESP,
|
||||
orte_rml_send_callback, NULL);
|
||||
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
@ -143,8 +143,7 @@ static void spawn(int sd, short args, void *cbdata)
|
||||
}
|
||||
|
||||
/* send it to the HNP for processing - might be myself! */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, buf,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
|
||||
ORTE_RML_TAG_PLM,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2017 Research Organization for Information Science
|
||||
@ -261,8 +261,7 @@ static void dmodex_req(int sd, short args, void *cbdata)
|
||||
}
|
||||
|
||||
/* send it to the host daemon */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&dmn->name, buf, ORTE_RML_TAG_DIRECT_MODEX,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&dmn->name, buf, ORTE_RML_TAG_DIRECT_MODEX,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
opal_hotel_checkout(&orte_pmix_server_globals.reqs, req->room_num);
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2017 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
@ -1080,8 +1080,7 @@ void pmix_server_log_fn(opal_process_name_t *requestor,
|
||||
buf = OBJ_NEW(opal_buffer_t);
|
||||
opal_dss.load(buf, val->data.bo.bytes, val->data.bo.size);
|
||||
val->data.bo.bytes = NULL;
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, buf,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
|
||||
ORTE_RML_TAG_SHOW_HELP,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -13,7 +13,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2013-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
@ -132,9 +132,9 @@ static int init_server(void)
|
||||
struct timeval timeout;
|
||||
timeout.tv_sec = orte_pmix_server_globals.timeout;
|
||||
timeout.tv_usec = 0;
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.ping(orte_mgmt_conduit, server, &timeout))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.ping(server, &timeout))) {
|
||||
/* try it one more time */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.ping(orte_mgmt_conduit, server, &timeout))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.ping(server, &timeout))) {
|
||||
/* okay give up */
|
||||
orte_show_help("help-orterun.txt", "orterun:server-not-found", true,
|
||||
orte_basename, server,
|
||||
@ -205,8 +205,7 @@ static void execute(int sd, short args, void *cbdata)
|
||||
}
|
||||
|
||||
/* send the request to the target */
|
||||
rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
target, xfer,
|
||||
rc = orte_rml.send_buffer_nb(target, xfer,
|
||||
ORTE_RML_TAG_DATA_SERVER,
|
||||
orte_rml_send_callback, NULL);
|
||||
if (ORTE_SUCCESS == rc) {
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2016 Los Alamos National Security, LLC.
|
||||
* All rights reserved
|
||||
* Copyright (c) 2015-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -336,8 +336,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&req->requestor));
|
||||
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&req->requestor, reply, ORTE_RML_TAG_DATA_CLIENT,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(&req->requestor, reply, ORTE_RML_TAG_DATA_CLIENT,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(reply);
|
||||
@ -716,8 +715,7 @@ void orte_data_server(int status, orte_process_name_t* sender,
|
||||
}
|
||||
|
||||
SEND_ANSWER:
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
sender, answer, ORTE_RML_TAG_DATA_CLIENT,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_DATA_CLIENT,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(answer);
|
||||
|
@ -72,10 +72,6 @@ char *orte_basename = NULL;
|
||||
bool orte_coprocessors_detected = false;
|
||||
opal_hash_table_t *orte_coprocessors = NULL;
|
||||
char *orte_topo_signature = NULL;
|
||||
char *orte_mgmt_transport = NULL;
|
||||
char *orte_coll_transport = NULL;
|
||||
int orte_mgmt_conduit = -1;
|
||||
int orte_coll_conduit = -1;
|
||||
bool orte_no_vm = false;
|
||||
char *orte_data_server_uri = NULL;
|
||||
|
||||
|
@ -74,11 +74,6 @@ ORTE_DECLSPEC extern bool orte_event_base_active; /* instantiated in orte/runtim
|
||||
ORTE_DECLSPEC extern bool orte_proc_is_bound; /* instantiated in orte/runtime/orte_init.c */
|
||||
ORTE_DECLSPEC extern int orte_progress_thread_debug; /* instantiated in orte/runtime/orte_init.c */
|
||||
|
||||
ORTE_DECLSPEC extern char *orte_mgmt_transport;
|
||||
ORTE_DECLSPEC extern char *orte_coll_transport;
|
||||
ORTE_DECLSPEC extern int orte_mgmt_conduit;
|
||||
ORTE_DECLSPEC extern int orte_coll_conduit;
|
||||
|
||||
/**
|
||||
* Global indicating where this process was bound to at launch (will
|
||||
* be NULL if !orte_proc_is_bound)
|
||||
|
@ -746,19 +746,6 @@ int orte_register_params(void)
|
||||
OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_daemon_cores);
|
||||
|
||||
/* get the conduit params */
|
||||
orte_coll_transport = "fabric,ethernet";
|
||||
(void) mca_base_var_register("orte", "orte", "coll", "transports",
|
||||
"Comma-separated list of transports to use for ORTE collectives",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &orte_coll_transport);
|
||||
|
||||
orte_mgmt_transport = "oob";
|
||||
(void) mca_base_var_register("orte", "orte", "mgmt", "transports",
|
||||
"Comma-separated list of transports to use for ORTE management messages",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &orte_mgmt_transport);
|
||||
|
||||
/* Amount of time to wait for a stack trace to return from the daemons */
|
||||
orte_stack_trace_wait_timeout = 30;
|
||||
(void) mca_base_var_register ("orte", "orte", NULL, "timeout_for_stack_trace",
|
||||
|
@ -11,7 +11,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2010-2012 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -136,7 +136,7 @@ int orte_util_comm_connect_tool(char *uri)
|
||||
OBJ_DESTRUCT(&val);
|
||||
|
||||
/* set the route to be direct */
|
||||
if (ORTE_SUCCESS != (rc = orte_routed.update_route(NULL, &tool, &tool))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_routed.update_route(&tool, &tool))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
@ -201,8 +201,7 @@ int orte_util_comm_report_event(orte_comm_event_t ev)
|
||||
opal_event_evtimer_add(quicktime, &tv);
|
||||
|
||||
/* do the send */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
&tool, buf, ORTE_RML_TAG_TOOL, send_cbfunc, NULL))) {
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(&tool, buf, ORTE_RML_TAG_TOOL, send_cbfunc, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(buf);
|
||||
return rc;
|
||||
@ -292,8 +291,7 @@ int orte_util_comm_query_job_info(const orte_process_name_t *hnp, orte_jobid_t j
|
||||
opal_event_evtimer_add(quicktime, &tv);
|
||||
|
||||
/* do the send */
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
(orte_process_name_t*)hnp, cmd,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, cmd,
|
||||
ORTE_RML_TAG_DAEMON, send_cbfunc, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(cmd);
|
||||
@ -401,8 +399,7 @@ int orte_util_comm_query_node_info(const orte_process_name_t *hnp, char *node,
|
||||
opal_event_evtimer_add(quicktime, &tv);
|
||||
|
||||
/* do the send */
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
(orte_process_name_t*)hnp, cmd,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, cmd,
|
||||
ORTE_RML_TAG_DAEMON, send_cbfunc, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(cmd);
|
||||
@ -519,8 +516,7 @@ int orte_util_comm_query_proc_info(const orte_process_name_t *hnp, orte_jobid_t
|
||||
opal_event_evtimer_add(quicktime, &tv);
|
||||
|
||||
/* do the send */
|
||||
if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
(orte_process_name_t*)hnp, cmd, ORTE_RML_TAG_DAEMON,
|
||||
if (0 > (ret = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, cmd, ORTE_RML_TAG_DAEMON,
|
||||
send_cbfunc, NULL))) {
|
||||
ORTE_ERROR_LOG(ret);
|
||||
OBJ_RELEASE(cmd);
|
||||
@ -669,8 +665,7 @@ int orte_util_comm_spawn_job(const orte_process_name_t *hnp, orte_job_t *jdata)
|
||||
ORTE_NAME_PRINT(hnp)));
|
||||
|
||||
/* tell the target HNP to launch the job */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
(orte_process_name_t*)hnp, buf,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, buf,
|
||||
ORTE_RML_TAG_DAEMON,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -751,8 +746,7 @@ int orte_util_comm_terminate_job(const orte_process_name_t *hnp, orte_jobid_t jo
|
||||
ORTE_NAME_PRINT(hnp)));
|
||||
|
||||
/* tell the target HNP to terminate the job */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
(orte_process_name_t*)hnp, buf,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, buf,
|
||||
ORTE_RML_TAG_DAEMON,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
@ -809,8 +803,7 @@ int orte_util_comm_halt_vm(const orte_process_name_t *hnp)
|
||||
}
|
||||
|
||||
/* send the order */
|
||||
if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
(orte_process_name_t*)hnp, buf,
|
||||
if (0 > (rc = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, buf,
|
||||
ORTE_RML_TAG_DAEMON,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -12,7 +12,7 @@
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2016-2017 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -161,7 +161,7 @@ int orte_read_hnp_contact_file(char *filename, orte_hnp_contact_t *hnp, bool con
|
||||
OBJ_DESTRUCT(&val);
|
||||
|
||||
/* set the route to be direct */
|
||||
if (ORTE_SUCCESS != (rc = orte_routed.update_route(NULL, &hnp->name, &hnp->name))) {
|
||||
if (ORTE_SUCCESS != (rc = orte_routed.update_route(&hnp->name, &hnp->name))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
free(hnp_uri);
|
||||
return rc;
|
||||
|
@ -399,29 +399,30 @@ int orte_util_decode_nidmap(opal_buffer_t *buf)
|
||||
} else {
|
||||
vpid = UINT32_MAX;
|
||||
}
|
||||
if (UINT32_MAX != vpid &&
|
||||
NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, vpid))) {
|
||||
proc = OBJ_NEW(orte_proc_t);
|
||||
proc->name.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
proc->name.vpid = vpid;
|
||||
proc->state = ORTE_PROC_STATE_RUNNING;
|
||||
ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE);
|
||||
daemons->num_procs++;
|
||||
opal_pointer_array_set_item(daemons->procs, proc->name.vpid, proc);
|
||||
if (UINT32_MAX != vpid) {
|
||||
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, vpid))) {
|
||||
proc = OBJ_NEW(orte_proc_t);
|
||||
proc->name.jobid = ORTE_PROC_MY_NAME->jobid;
|
||||
proc->name.vpid = vpid;
|
||||
proc->state = ORTE_PROC_STATE_RUNNING;
|
||||
ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE);
|
||||
daemons->num_procs++;
|
||||
opal_pointer_array_set_item(daemons->procs, proc->name.vpid, proc);
|
||||
}
|
||||
nd->index = proc->name.vpid;
|
||||
OBJ_RETAIN(nd);
|
||||
proc->node = nd;
|
||||
OBJ_RETAIN(proc);
|
||||
nd->daemon = proc;
|
||||
}
|
||||
nd->index = proc->name.vpid;
|
||||
OBJ_RETAIN(nd);
|
||||
proc->node = nd;
|
||||
OBJ_RETAIN(proc);
|
||||
nd->daemon = proc;
|
||||
}
|
||||
|
||||
/* update num procs */
|
||||
if (orte_process_info.num_procs != daemons->num_procs) {
|
||||
orte_process_info.num_procs = daemons->num_procs;
|
||||
/* need to update the routing plan */
|
||||
orte_routed.update_routing_plan(NULL);
|
||||
}
|
||||
/* need to update the routing plan */
|
||||
orte_routed.update_routing_plan();
|
||||
|
||||
if (orte_process_info.max_procs < orte_process_info.num_procs) {
|
||||
orte_process_info.max_procs = orte_process_info.num_procs;
|
||||
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2016-2018 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2016-2019 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -694,8 +694,7 @@ int orte_show_help_norender(const char *filename, const char *topic,
|
||||
/* if we are a daemon, then send it via RML to the HNP */
|
||||
if (ORTE_PROC_IS_DAEMON) {
|
||||
/* send it to the HNP */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, buf,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
|
||||
ORTE_RML_TAG_SHOW_HELP,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
OBJ_RELEASE(buf);
|
||||
@ -787,8 +786,7 @@ int orte_show_help_suppress(const char *filename, const char *topic)
|
||||
/* pack the flag that we DO NOT have a string */
|
||||
opal_dss.pack(buf, &have_output, 1, OPAL_INT8);
|
||||
/* send it to the HNP */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit,
|
||||
ORTE_PROC_MY_HNP, buf,
|
||||
if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf,
|
||||
ORTE_RML_TAG_SHOW_HELP,
|
||||
orte_rml_send_callback, NULL))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user