diff --git a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c index c748d28067..8bf485293c 100644 --- a/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c +++ b/orte/mca/errmgr/default_hnp/errmgr_default_hnp.c @@ -9,7 +9,7 @@ * Copyright (c) 2011 Oracle and/or all its affiliates. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -277,8 +277,7 @@ static void job_errors(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid), ORTE_NAME_PRINT(&jdata->originator))); - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &jdata->originator, answer, + if (0 > (ret = orte_rml.send_buffer_nb(&jdata->originator, answer, ORTE_RML_TAG_LAUNCH_RESP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); @@ -358,7 +357,6 @@ static void proc_errors(int fd, short args, void *cbdata) orte_proc_state_t state = caddy->proc_state; int i; int32_t i32, *i32ptr; - char *rtmod; ORTE_ACQUIRE_OBJECT(caddy); @@ -381,7 +379,6 @@ static void proc_errors(int fd, short args, void *cbdata) goto cleanup; } pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->vpid); - rtmod = orte_rml.get_routed(orte_mgmt_conduit); /* we MUST handle a communication failure before doing anything else * as it requires some special care to avoid normal termination issues @@ -412,9 +409,9 @@ static void proc_errors(int fd, short args, void *cbdata) "%s Comm failure: daemons terminating - recording daemon %s as gone", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(proc))); /* remove from dependent routes, if it is one */ - orte_routed.route_lost(rtmod, proc); + orte_routed.route_lost(proc); /* if all my routes and local children are gone, then terminate ourselves */ - if (0 == orte_routed.num_routes(rtmod)) { + if (0 == orte_routed.num_routes()) { for (i=0; i < orte_local_children->size; i++) { if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) && ORTE_FLAG_TEST(pptr, ORTE_PROC_FLAG_ALIVE) && proct->state < ORTE_PROC_STATE_UNTERMINATED) { @@ -435,7 +432,7 @@ static void proc_errors(int fd, short args, void *cbdata) OPAL_OUTPUT_VERBOSE((5, orte_errmgr_base_framework.framework_output, "%s Comm failure: %d routes remain alive", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (int)orte_routed.num_routes(rtmod))); + (int)orte_routed.num_routes())); } goto cleanup; } @@ -493,7 +490,7 @@ static void proc_errors(int fd, short args, void *cbdata) } /* if all my routes and children are gone, then terminate ourselves nicely (i.e., this is a normal termination) */ - if (0 == orte_routed.num_routes(rtmod)) { + if (0 == orte_routed.num_routes()) { OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, "%s errmgr:default:hnp all routes gone - exiting", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -718,7 +715,7 @@ static void proc_errors(int fd, short args, void *cbdata) default_hnp_abort(jdata); } /* remove from dependent routes, if it is one */ - orte_routed.route_lost(rtmod, proc); + orte_routed.route_lost(proc); break; case ORTE_PROC_STATE_UNABLE_TO_SEND_MSG: @@ -841,7 +838,7 @@ static void default_hnp_abort(orte_job_t *jdata) i32ptr = &i32; if (orte_get_attribute(&jdata->attributes, ORTE_JOB_NUM_NONZERO_EXIT, (void**)&i32ptr, OPAL_INT32)) { /* warn user */ - orte_show_help("help-errmgr-base.txt", "normal-termination-but", true, + orte_show_help("help-errmgr-base.txt", "normal-termination-but", true, (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "Primary" : "Child", (1 == ORTE_LOCAL_JOBID(jdata->jobid)) ? "" : ORTE_LOCAL_JOBID_PRINT(jdata->jobid), i32, (1 == i32) ? "process returned\na non-zero exit code" : diff --git a/orte/mca/errmgr/default_orted/errmgr_default_orted.c b/orte/mca/errmgr/default_orted/errmgr_default_orted.c index 5fe4ca1793..ff0fe38fdc 100644 --- a/orte/mca/errmgr/default_orted/errmgr_default_orted.c +++ b/orte/mca/errmgr/default_orted/errmgr_default_orted.c @@ -204,8 +204,7 @@ static void orted_abort(int error_code, char *fmt, ...) } /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -303,8 +302,7 @@ static void job_errors(int fd, short args, void *cbdata) goto cleanup; } /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -321,7 +319,6 @@ static void proc_errors(int fd, short args, void *cbdata) orte_job_t *jdata; orte_process_name_t *proc = &caddy->name; orte_proc_state_t state = caddy->proc_state; - char *rtmod; orte_proc_t *child, *ptr; opal_buffer_t *alert; orte_plm_cmd_flag_t cmd; @@ -386,9 +383,6 @@ static void proc_errors(int fd, short args, void *cbdata) goto cleanup; } - /* get our management conduit's routed module name */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - if (ORTE_PROC_STATE_COMM_FAILED == state) { /* if it is our own connection, ignore it */ if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_NAME, proc)) { @@ -444,7 +438,7 @@ static void proc_errors(int fd, short args, void *cbdata) } /* if all my routes and children are gone, then terminate ourselves nicely (i.e., this is a normal termination) */ - if (0 == orte_routed.num_routes(rtmod)) { + if (0 == orte_routed.num_routes()) { OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, "%s errmgr:default:orted all routes gone - exiting", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -453,7 +447,7 @@ static void proc_errors(int fd, short args, void *cbdata) OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, "%s errmgr:default:orted not exiting, num_routes() == %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - (int)orte_routed.num_routes(rtmod))); + (int)orte_routed.num_routes())); } } /* if not, then we can continue */ @@ -513,8 +507,7 @@ static void proc_errors(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&child->name), jdata->num_local_procs)); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -579,7 +572,7 @@ static void proc_errors(int fd, short args, void *cbdata) } /* if all my routes and children are gone, then terminate ourselves nicely (i.e., this is a normal termination) */ - if (0 == orte_routed.num_routes(rtmod)) { + if (0 == orte_routed.num_routes()) { OPAL_OUTPUT_VERBOSE((2, orte_errmgr_base_framework.framework_output, "%s errmgr:default:orted all routes gone - exiting", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); @@ -621,8 +614,7 @@ static void proc_errors(int fd, short args, void *cbdata) ORTE_NAME_PRINT(&child->name), jdata->num_local_procs)); /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -677,8 +669,7 @@ static void proc_errors(int fd, short args, void *cbdata) OBJ_RELEASE(jdata); /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/ess/base/ess_base_std_orted.c b/orte/mca/ess/base/ess_base_std_orted.c index 365fc871e5..d50c9bfd45 100644 --- a/orte/mca/ess/base/ess_base_std_orted.c +++ b/orte/mca/ess/base/ess_base_std_orted.c @@ -109,7 +109,6 @@ int orte_ess_base_orted_setup(void) hwloc_obj_t obj; unsigned i, j; orte_topology_t *t; - opal_list_t transports; orte_ess_base_signal_t *sig; int idx; @@ -448,27 +447,6 @@ int orte_ess_base_orted_setup(void) goto error; } - /* get a conduit for our use - we never route IO over fabric */ - OBJ_CONSTRUCT(&transports, opal_list_t); - orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, - ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); - if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) { - ret = ORTE_ERR_OPEN_CONDUIT_FAIL; - error = "orte_rml_open_mgmt_conduit"; - goto error; - } - OPAL_LIST_DESTRUCT(&transports); - - OBJ_CONSTRUCT(&transports, opal_list_t); - orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, - ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING); - if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) { - ret = ORTE_ERR_OPEN_CONDUIT_FAIL; - error = "orte_rml_open_coll_conduit"; - goto error; - } - OPAL_LIST_DESTRUCT(&transports); - /* * Group communications */ @@ -609,10 +587,6 @@ int orte_ess_base_orted_finalize(void) pmix_server_finalize(); (void) mca_base_framework_close(&opal_pmix_base_framework); - /* release the conduits */ - orte_rml.close_conduit(orte_mgmt_conduit); - orte_rml.close_conduit(orte_coll_conduit); - /* close frameworks */ (void) mca_base_framework_close(&orte_filem_base_framework); (void) mca_base_framework_close(&orte_grpcomm_base_framework); @@ -695,8 +669,7 @@ static void signal_forward_callback(int fd, short event, void *arg) } /* send it to ourselves */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_NAME, cmd, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, cmd, ORTE_RML_TAG_DAEMON, NULL, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/ess/base/ess_base_std_tool.c b/orte/mca/ess/base/ess_base_std_tool.c index f3ca7baa3f..9f76890d23 100644 --- a/orte/mca/ess/base/ess_base_std_tool.c +++ b/orte/mca/ess/base/ess_base_std_tool.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014 Hochschule Esslingen. All rights reserved. * * Copyright (c) 2015 Cisco Systems, Inc. All rights reserved. @@ -94,7 +94,6 @@ int orte_ess_base_tool_setup(opal_list_t *flags) { int ret; char *error = NULL; - opal_list_t transports; opal_list_t info; opal_value_t *kv, *knext, val; opal_pmix_query_t *q; @@ -222,13 +221,6 @@ int orte_ess_base_tool_setup(opal_list_t *flags) goto error; } - /* get a conduit for our use - we never route IO over fabric */ - OBJ_CONSTRUCT(&transports, opal_list_t); - orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, - ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); - orte_mgmt_conduit = orte_rml.open_conduit(&transports); - OPAL_LIST_DESTRUCT(&transports); - /* we -may- need to know the name of the head * of our session directory tree, particularly the * tmp base where any other session directories on @@ -269,7 +261,7 @@ int orte_ess_base_tool_setup(opal_list_t *flags) val.data.string = NULL; OBJ_DESTRUCT(&val); /* set the route to be direct */ - if (ORTE_SUCCESS != orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) { + if (ORTE_SUCCESS != orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) { orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri); orte_finalize(); exit(1); @@ -277,7 +269,7 @@ int orte_ess_base_tool_setup(opal_list_t *flags) /* connect to the HNP so we can recv forwarded output */ buf = OBJ_NEW(opal_buffer_t); - ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, ORTE_PROC_MY_HNP, + ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_WARMUP_CONNECTION, orte_rml_send_callback, NULL); if (ORTE_SUCCESS != ret) { @@ -287,7 +279,7 @@ int orte_ess_base_tool_setup(opal_list_t *flags) } /* set the target hnp as our lifeline so we will terminate if it exits */ - orte_routed.set_lifeline(NULL, ORTE_PROC_MY_HNP); + orte_routed.set_lifeline(ORTE_PROC_MY_HNP); /* setup the IOF */ if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) { @@ -317,8 +309,6 @@ int orte_ess_base_tool_finalize(void) { orte_wait_finalize(); - orte_rml.close_conduit(orte_mgmt_conduit); - /* if I am a tool, then all I will have done is * a very small subset of orte_init - ensure that * I only back those elements out diff --git a/orte/mca/ess/hnp/ess_hnp_module.c b/orte/mca/ess/hnp/ess_hnp_module.c index 6e56d69ff5..3706e07596 100644 --- a/orte/mca/ess/hnp/ess_hnp_module.c +++ b/orte/mca/ess/hnp/ess_hnp_module.c @@ -141,7 +141,6 @@ static int rte_init(void) uint32_t h; int idx; orte_topology_t *t; - opal_list_t transports; orte_ess_base_signal_t *sig; opal_value_t val; @@ -370,27 +369,6 @@ static int rte_init(void) goto error; } - /* get a conduit for our use - we never route IO over fabric */ - OBJ_CONSTRUCT(&transports, opal_list_t); - orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, - ORTE_ATTR_LOCAL, orte_mgmt_transport, OPAL_STRING); - if (ORTE_RML_CONDUIT_INVALID == (orte_mgmt_conduit = orte_rml.open_conduit(&transports))) { - ret = ORTE_ERR_OPEN_CONDUIT_FAIL; - error = "orte_rml_open_mgmt_conduit"; - goto error; - } - OPAL_LIST_DESTRUCT(&transports); - - OBJ_CONSTRUCT(&transports, opal_list_t); - orte_set_attribute(&transports, ORTE_RML_TRANSPORT_TYPE, - ORTE_ATTR_LOCAL, orte_coll_transport, OPAL_STRING); - if (ORTE_RML_CONDUIT_INVALID == (orte_coll_conduit = orte_rml.open_conduit(&transports))) { - ret = ORTE_ERR_OPEN_CONDUIT_FAIL; - error = "orte_rml_open_coll_conduit"; - goto error; - } - OPAL_LIST_DESTRUCT(&transports); - /* it is now safe to start the pmix server */ pmix_server_start(); @@ -776,10 +754,6 @@ static int rte_finalize(void) fflush(stdout); fflush(stderr); - /* release the conduits */ - orte_rml.close_conduit(orte_mgmt_conduit); - orte_rml.close_conduit(orte_coll_conduit); - (void) mca_base_framework_close(&orte_iof_base_framework); (void) mca_base_framework_close(&orte_rtc_base_framework); (void) mca_base_framework_close(&orte_odls_base_framework); diff --git a/orte/mca/filem/base/filem_base_receive.c b/orte/mca/filem/base/filem_base_receive.c index 30c958d3e2..6e9a6e7b82 100644 --- a/orte/mca/filem/base/filem_base_receive.c +++ b/orte/mca/filem/base/filem_base_receive.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -208,8 +208,7 @@ static void filem_base_process_get_proc_node_name_cmd(orte_process_name_t* sende return; } - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, + if (0 > (rc = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_FILEM_BASE_RESP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -301,8 +300,7 @@ static void filem_base_process_get_remote_path_cmd(orte_process_name_t* sender, goto CLEANUP; } - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, + if (0 > (rc = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_FILEM_BASE_RESP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/filem/raw/filem_raw_module.c b/orte/mca/filem/raw/filem_raw_module.c index 14359217d9..e499c3bc61 100644 --- a/orte/mca/filem/raw/filem_raw_module.c +++ b/orte/mca/filem/raw/filem_raw_module.c @@ -2,7 +2,7 @@ * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -870,8 +870,7 @@ static void send_complete(char *file, int status) OBJ_RELEASE(buf); return; } - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_FILEM_BASE_RESP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/grpcomm/base/grpcomm_base_stubs.c b/orte/mca/grpcomm/base/grpcomm_base_stubs.c index a27e8603e9..d410a399fd 100644 --- a/orte/mca/grpcomm/base/grpcomm_base_stubs.c +++ b/orte/mca/grpcomm/base/grpcomm_base_stubs.c @@ -231,7 +231,6 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig orte_namelist_t *nm; opal_list_t children; size_t n; - char *routed; /* search the existing tracker list to see if this already exists */ OPAL_LIST_FOREACH(coll, &orte_grpcomm_base.ongoing, orte_grpcomm_coll_t) { @@ -279,38 +278,30 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig return NULL; } - /* get the routed module for our conduit */ - routed = orte_rml.get_routed(orte_coll_conduit); - if (NULL == routed) { - /* this conduit is not routed, so we expect all daemons - * to directly participate */ - coll->nexpected = coll->ndmns; - } else { - /* cycle thru the array of daemons and compare them to our - * children in the routing tree, counting the ones that match - * so we know how many daemons we should receive contributions from */ - OBJ_CONSTRUCT(&children, opal_list_t); - orte_routed.get_routing_list(routed, &children); - while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&children))) { - for (n=0; n < coll->ndmns; n++) { - if (nm->name.vpid == coll->dmns[n]) { - coll->nexpected++; - break; - } - } - OBJ_RELEASE(nm); - } - OPAL_LIST_DESTRUCT(&children); - - /* see if I am in the array of participants - note that I may - * be in the rollup tree even though I'm not participating - * in the collective itself */ + /* cycle thru the array of daemons and compare them to our + * children in the routing tree, counting the ones that match + * so we know how many daemons we should receive contributions from */ + OBJ_CONSTRUCT(&children, opal_list_t); + orte_routed.get_routing_list(&children); + while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&children))) { for (n=0; n < coll->ndmns; n++) { - if (coll->dmns[n] == ORTE_PROC_MY_NAME->vpid) { + if (nm->name.vpid == coll->dmns[n]) { coll->nexpected++; break; } } + OBJ_RELEASE(nm); + } + OPAL_LIST_DESTRUCT(&children); + + /* see if I am in the array of participants - note that I may + * be in the rollup tree even though I'm not participating + * in the collective itself */ + for (n=0; n < coll->ndmns; n++) { + if (coll->dmns[n] == ORTE_PROC_MY_NAME->vpid) { + coll->nexpected++; + break; + } } return coll; diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.c b/orte/mca/grpcomm/direct/grpcomm_direct.c index ce95319dbc..35779b2ed0 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct.c @@ -112,8 +112,7 @@ static int xcast(orte_vpid_t *vpids, /* send it to the HNP (could be myself) for relay */ OBJ_RETAIN(buf); // we'll let the RML release it - if (0 > (rc = orte_rml.send_buffer_nb(orte_coll_conduit, - ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_XCAST, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_XCAST, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); @@ -153,8 +152,7 @@ static int allgather(orte_grpcomm_coll_t *coll, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); /* send the info to ourselves for tracking */ - rc = orte_rml.send_buffer_nb(orte_coll_conduit, - ORTE_PROC_MY_NAME, relay, + rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, relay, ORTE_RML_TAG_ALLGATHER_DIRECT, orte_rml_send_callback, NULL); return rc; @@ -245,8 +243,7 @@ static void allgather_recv(int status, orte_process_name_t* sender, /* transfer the collected bucket */ opal_dss.copy_payload(reply, &coll->bucket); /* send the info to our parent */ - rc = orte_rml.send_buffer_nb(orte_coll_conduit, - ORTE_PROC_MY_PARENT, reply, + rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, reply, ORTE_RML_TAG_ALLGATHER_DIRECT, orte_rml_send_callback, NULL); } @@ -271,7 +268,6 @@ static void xcast_recv(int status, orte_process_name_t* sender, opal_list_t coll; orte_grpcomm_signature_t *sig; orte_rml_tag_t tag; - char *rtmod; size_t inlen, cmplen; uint8_t *packed_data, *cmpdata; int32_t nvals, i; @@ -372,9 +368,6 @@ static void xcast_recv(int status, orte_process_name_t* sender, return; } - /* get our conduit's routed module name */ - rtmod = orte_rml.get_routed(orte_coll_conduit); - /* if this is headed for the daemon command processor, * then we first need to check for add_local_procs * as that command includes some needed wireup info */ @@ -424,7 +417,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, /* update the routing plan - the HNP already did * it when it computed the VM, so don't waste time * re-doing it here */ - orte_routed.update_routing_plan(rtmod); + orte_routed.update_routing_plan(); } /* routing is now possible */ orte_routed_base.routing_enabled = true; @@ -523,7 +516,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, relay: if (!orte_do_not_launch) { /* get the list of next recipients from the routed module */ - orte_routed.get_routing_list(rtmod, &coll); + orte_routed.get_routing_list(&coll); /* if list is empty, no relay is required */ if (opal_list_is_empty(&coll)) { @@ -569,8 +562,7 @@ static void xcast_recv(int status, orte_process_name_t* sender, ORTE_FORCED_TERMINATE(ORTE_ERR_UNREACH); continue; } - if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(orte_coll_conduit, - &nm->name, rly, ORTE_RML_TAG_XCAST, + if (ORTE_SUCCESS != (ret = orte_rml.send_buffer_nb(&nm->name, rly, ORTE_RML_TAG_XCAST, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(rly); diff --git a/orte/mca/iof/hnp/iof_hnp_send.c b/orte/mca/iof/hnp/iof_hnp_send.c index 89f9ff8761..9e2f202ea5 100644 --- a/orte/mca/iof/hnp/iof_hnp_send.c +++ b/orte/mca/iof/hnp/iof_hnp_send.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012 Los Alamos National Security, LLC * All rights reserved - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -109,9 +109,8 @@ int orte_iof_hnp_send_data_to_endpoint(orte_process_name_t *host, /* send the buffer to the host - this is either a daemon or * a tool that requested IOF */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - host, buf, ORTE_RML_TAG_IOF_PROXY, - orte_rml_send_callback, NULL))) { + if (0 > (rc = orte_rml.send_buffer_nb(host, buf, ORTE_RML_TAG_IOF_PROXY, + orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); return rc; } diff --git a/orte/mca/iof/iof.h b/orte/mca/iof/iof.h index 742eab42ca..30292863ed 100644 --- a/orte/mca/iof/iof.h +++ b/orte/mca/iof/iof.h @@ -13,7 +13,7 @@ * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -149,8 +149,7 @@ BEGIN_C_DECLS opal_dss.pack(buf, (b), 1, ORTE_NAME); \ \ /* send the buffer to the HNP */ \ - orte_rml.send_buffer_nb(orte_mgmt_conduit, \ - ORTE_PROC_MY_HNP, buf, \ + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, \ ORTE_RML_TAG_IOF_HNP, \ orte_rml_send_callback, NULL); \ } while(0); diff --git a/orte/mca/iof/orted/iof_orted.c b/orte/mca/iof/orted/iof_orted.c index e06c3ce9ed..a3b84bcde2 100644 --- a/orte/mca/iof/orted/iof_orted.c +++ b/orte/mca/iof/orted/iof_orted.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Mellanox Technologies. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -513,8 +513,7 @@ static int orted_output(const orte_process_name_t* peer, "%s iof:orted:output sending %d bytes to HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)strlen(msg)+1)); - orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, orte_rml_send_callback, NULL); return ORTE_SUCCESS; diff --git a/orte/mca/iof/orted/iof_orted_read.c b/orte/mca/iof/orted/iof_orted_read.c index d1e07898bb..c1f1e3a646 100644 --- a/orte/mca/iof/orted/iof_orted_read.c +++ b/orte/mca/iof/orted/iof_orted_read.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -140,9 +140,8 @@ void orte_iof_orted_read_handler(int fd, short event, void *cbdata) "%s iof:orted:read handler sending %d bytes to HNP", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), numbytes)); - orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, - orte_rml_send_callback, NULL); + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, + orte_rml_send_callback, NULL); /* re-add the event */ ORTE_IOF_READ_ACTIVATE(rev); diff --git a/orte/mca/iof/orted/iof_orted_receive.c b/orte/mca/iof/orted/iof_orted_receive.c index 9fae3499fc..c49c437be8 100644 --- a/orte/mca/iof/orted/iof_orted_receive.c +++ b/orte/mca/iof/orted/iof_orted_receive.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2016 Intel Corporation. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -72,9 +72,8 @@ void orte_iof_orted_send_xonxoff(orte_iof_tag_t tag) (ORTE_IOF_XON == tag) ? "xon" : "xoff")); /* send the buffer to the HNP */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, - send_cb, NULL))) { + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_IOF_HNP, + send_cb, NULL))) { ORTE_ERROR_LOG(rc); } } diff --git a/orte/mca/iof/tool/iof_tool.c b/orte/mca/iof/tool/iof_tool.c index 9ec085bc18..a9ebb86a80 100644 --- a/orte/mca/iof/tool/iof_tool.c +++ b/orte/mca/iof/tool/iof_tool.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -170,9 +170,8 @@ static int tool_pull(const orte_process_name_t* src_name, /* send the buffer to the correct HNP */ ORTE_HNP_NAME_FROM_JOB(&hnp, src_name->jobid); - orte_rml.send_buffer_nb(orte_mgmt_conduit, - &hnp, buf, ORTE_RML_TAG_IOF_HNP, - send_cb, NULL); + orte_rml.send_buffer_nb(&hnp, buf, ORTE_RML_TAG_IOF_HNP, + send_cb, NULL); return ORTE_SUCCESS; } @@ -220,9 +219,8 @@ static int tool_close(const orte_process_name_t* src_name, /* send the buffer to the correct HNP */ ORTE_HNP_NAME_FROM_JOB(&hnp, src_name->jobid); - orte_rml.send_buffer_nb(orte_mgmt_conduit, - &hnp, buf, ORTE_RML_TAG_IOF_HNP, - send_cb, NULL); + orte_rml.send_buffer_nb(&hnp, buf, ORTE_RML_TAG_IOF_HNP, + send_cb, NULL); return ORTE_SUCCESS; } diff --git a/orte/mca/oob/base/base.h b/orte/mca/oob/base/base.h index 830a05ad33..fb4ed1c0be 100644 --- a/orte/mca/oob/base/base.h +++ b/orte/mca/oob/base/base.h @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2017-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2017-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -144,11 +144,6 @@ ORTE_DECLSPEC void orte_oob_base_send_nb(int fd, short args, void *cbdata); */ ORTE_DECLSPEC void orte_oob_base_get_addr(char **uri); -/* Get the available transports and their attributes */ -#define ORTE_OOB_GET_TRANSPORTS(u) orte_oob_base_get_transports(u) -ORTE_DECLSPEC void orte_oob_base_get_transports(opal_list_t *transports); - - #if OPAL_ENABLE_FT_CR == 1 ORTE_DECLSPEC void orte_oob_base_ft_event(int fd, short args, void *cbdata); #endif diff --git a/orte/mca/oob/base/oob_base_stubs.c b/orte/mca/oob/base/oob_base_stubs.c index 03da3c815e..7ead5e847e 100644 --- a/orte/mca/oob/base/oob_base_stubs.c +++ b/orte/mca/oob/base/oob_base_stubs.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2012-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -107,7 +107,7 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata) OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) { component = (mca_oob_base_component_t*)cli->cli_component; if (NULL != component->is_reachable) { - if (component->is_reachable(msg->routed, &msg->dst)) { + if (component->is_reachable(&msg->dst)) { /* there is a way to reach this peer - record it * so we don't waste this time again */ @@ -170,7 +170,7 @@ void orte_oob_base_send_nb(int fd, short args, void *cbdata) OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) { component = (mca_oob_base_component_t*)cli->cli_component; /* is this peer reachable via this component? */ - if (!component->is_reachable(msg->routed, &msg->dst)) { + if (!component->is_reachable(&msg->dst)) { continue; } /* it is addressable, so attempt to send via that transport */ @@ -384,30 +384,6 @@ static void process_uri(char *uri) opal_argv_free(uris); } -void orte_oob_base_get_transports(opal_list_t *transports) -{ - mca_base_component_list_item_t *cli; - mca_oob_base_component_t *component; - orte_rml_pathway_t *p; - - opal_output_verbose(5, orte_oob_base_framework.framework_output, - "%s: get transports", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - OPAL_LIST_FOREACH(cli, &orte_oob_base.actives, mca_base_component_list_item_t) { - component = (mca_oob_base_component_t*)cli->cli_component; - opal_output_verbose(5, orte_oob_base_framework.framework_output, - "%s:get transports for component %s", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - component->oob_base.mca_component_name); - if (NULL != component->query_transports) { - if (NULL != (p = component->query_transports())) { - opal_list_append(transports, &p->super); - } - } - } -} - #if OPAL_ENABLE_FT_CR == 1 void orte_oob_base_ft_event(int sd, short argc, void *cbdata) { diff --git a/orte/mca/oob/oob.h b/orte/mca/oob/oob.h index ebb0540ea9..15650d2b39 100644 --- a/orte/mca/oob/oob.h +++ b/orte/mca/oob/oob.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2015-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -55,9 +55,8 @@ typedef int (*mca_oob_base_component_send_fn_t)(orte_rml_send_t *msg); typedef char* (*mca_oob_base_component_get_addr_fn_t)(void); typedef int (*mca_oob_base_component_set_addr_fn_t)(orte_process_name_t *peer, char **uris); -typedef bool (*mca_oob_base_component_is_reachable_fn_t)(char *routed, orte_process_name_t *peer); +typedef bool (*mca_oob_base_component_is_reachable_fn_t)(orte_process_name_t *peer); typedef void (*mca_oob_ping_callback_fn_t)(int status, void *cbdata); -typedef orte_rml_pathway_t* (*mca_oob_base_component_query_transports_fn_t)(void); #if OPAL_ENABLE_FT_CR == 1 typedef int (*mca_oob_base_component_ft_event_fn_t)(int state); @@ -75,7 +74,6 @@ typedef struct { mca_oob_base_component_get_addr_fn_t get_addr; mca_oob_base_component_set_addr_fn_t set_addr; mca_oob_base_component_is_reachable_fn_t is_reachable; - mca_oob_base_component_query_transports_fn_t query_transports; #if OPAL_ENABLE_FT_CR == 1 mca_oob_base_component_ft_event_fn_t ft_event; #endif diff --git a/orte/mca/oob/tcp/oob_tcp.c b/orte/mca/oob/tcp/oob_tcp.c index d5f5ce9c55..15f326d277 100644 --- a/orte/mca/oob/tcp/oob_tcp.c +++ b/orte/mca/oob/tcp/oob_tcp.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -180,7 +180,7 @@ static void send_nb(orte_rml_send_t *msg) /* do we have a route to this peer (could be direct)? */ - hop = orte_routed.get_route(msg->routed, &msg->dst); + hop = orte_routed.get_route(&msg->dst); /* do we know this hop? */ if (NULL == (peer = mca_oob_tcp_peer_lookup(&hop))) { /* push this back to the component so it can try diff --git a/orte/mca/oob/tcp/oob_tcp_component.c b/orte/mca/oob/tcp/oob_tcp_component.c index 85a17e01a6..cdc79cd9e9 100644 --- a/orte/mca/oob/tcp/oob_tcp_component.c +++ b/orte/mca/oob/tcp/oob_tcp_component.c @@ -103,8 +103,7 @@ static int component_send(orte_rml_send_t *msg); static char* component_get_addr(void); static int component_set_addr(orte_process_name_t *peer, char **uris); -static bool component_is_reachable(char *rtmod, orte_process_name_t *peer); -static orte_rml_pathway_t* component_query_transports(void); +static bool component_is_reachable(orte_process_name_t *peer); #if OPAL_ENABLE_FT_CR == 1 static int component_ft_event(int state); #endif @@ -135,7 +134,6 @@ mca_oob_tcp_component_t mca_oob_tcp_component = { .get_addr = component_get_addr, .set_addr = component_set_addr, .is_reachable = component_is_reachable, - .query_transports = component_query_transports, #if OPAL_ENABLE_FT_CR == 1 .ft_event = component_ft_event, #endif @@ -627,37 +625,6 @@ static int component_available(void) return ORTE_SUCCESS; } -static orte_rml_pathway_t* component_query_transports(void) -{ - orte_rml_pathway_t *p; - char *qual; - - /* if neither IPv4 or IPv6 connections are available, then - * we have nothing to support */ - if (NULL == mca_oob_tcp_component.ipv4conns && - NULL == mca_oob_tcp_component.ipv6conns) { - return NULL; - } - - /* if we get here, then we support Ethernet and TCP */ - p = OBJ_NEW(orte_rml_pathway_t); - p->component = strdup("oob"); - orte_set_attribute(&p->attributes, ORTE_RML_TRANSPORT_TYPE, ORTE_ATTR_LOCAL, "Ethernet", OPAL_STRING); - orte_set_attribute(&p->attributes, ORTE_RML_PROTOCOL_TYPE, ORTE_ATTR_LOCAL, "TCP", OPAL_STRING); - /* setup our qualifiers - we route communications, may have IPv4 and/or IPv6, etc. */ - if (NULL != mca_oob_tcp_component.ipv4conns && - NULL != mca_oob_tcp_component.ipv6conns) { - qual = "routed=true:ipv4:ipv6"; - } else if (NULL == mca_oob_tcp_component.ipv6conns) { - qual = "routed=true:ipv4"; - } else { - qual = "routed=true:ipv6"; - } - orte_set_attribute(&p->attributes, ORTE_RML_QUALIFIER_ATTRIB, ORTE_ATTR_LOCAL, qual, OPAL_STRING); - - return p; -} - /* Start all modules */ static int component_startup(void) { @@ -1008,12 +975,12 @@ static int component_set_addr(orte_process_name_t *peer, return ORTE_ERR_TAKE_NEXT_OPTION; } -static bool component_is_reachable(char *rtmod, orte_process_name_t *peer) +static bool component_is_reachable(orte_process_name_t *peer) { orte_process_name_t hop; /* if we have a route to this peer, then we can reach it */ - hop = orte_routed.get_route(rtmod, peer); + hop = orte_routed.get_route(peer); if (ORTE_JOBID_INVALID == hop.jobid || ORTE_VPID_INVALID == hop.vpid) { opal_output_verbose(OOB_TCP_DEBUG_CONNECT, orte_oob_base_framework.framework_output, @@ -1102,7 +1069,7 @@ void mca_oob_tcp_component_lost_connection(int fd, short args, void *cbdata) if (!orte_finalizing) { /* activate the proc state */ - if (ORTE_SUCCESS != orte_routed.route_lost(pop->rtmod, &pop->peer)) { + if (ORTE_SUCCESS != orte_routed.route_lost(&pop->peer)) { ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_LIFELINE_LOST); } else { ORTE_ACTIVATE_PROC_STATE(&pop->peer, ORTE_PROC_STATE_COMM_FAILED); @@ -1216,7 +1183,6 @@ void mca_oob_tcp_component_hop_unknown(int fd, short args, void *cbdata) snd->count = mop->snd->hdr.nbytes; snd->cbfunc.iov = NULL; snd->cbdata = NULL; - snd->routed = strdup(mop->snd->hdr.routed); /* activate the OOB send state */ ORTE_OOB_SEND(snd); /* protect the data */ @@ -1416,15 +1382,11 @@ OBJ_CLASS_INSTANCE(mca_oob_tcp_addr_t, static void pop_cons(mca_oob_tcp_peer_op_t *pop) { - pop->rtmod = NULL; pop->net = NULL; pop->port = NULL; } static void pop_des(mca_oob_tcp_peer_op_t *pop) { - if (NULL != pop->rtmod) { - free(pop->rtmod); - } if (NULL != pop->net) { free(pop->net); } diff --git a/orte/mca/oob/tcp/oob_tcp_connection.c b/orte/mca/oob/tcp/oob_tcp_connection.c index ff06ec8a97..819d2d77bf 100644 --- a/orte/mca/oob/tcp/oob_tcp_connection.c +++ b/orte/mca/oob/tcp/oob_tcp_connection.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2018 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. @@ -334,7 +334,7 @@ void mca_oob_tcp_peer_try_connect(int fd, short args, void *cbdata) * an event in the component event base, and so it will fire async * from us if we are in our own progress thread */ - ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_failed_to_connect); + ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_failed_to_connect); /* FIXME: post any messages in the send queue back to the OOB * level for reassignment */ @@ -937,7 +937,7 @@ int mca_oob_tcp_peer_recv_connect_ack(mca_oob_tcp_peer_t* pr, /* set the peer into the component and OOB-level peer tables to indicate * that we know this peer and we will be handling him */ - ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_set_module); + ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_set_module); /* connected */ tcp_peer_connected(peer); @@ -968,7 +968,7 @@ static void tcp_peer_connected(mca_oob_tcp_peer_t* peer) } /* update the route */ - orte_routed.update_route(NULL, &peer->name, &peer->name); + orte_routed.update_route(&peer->name, &peer->name); /* initiate send of first message on queue */ if (NULL == peer->send_msg) { @@ -1027,7 +1027,7 @@ void mca_oob_tcp_peer_close(mca_oob_tcp_peer_t *peer) /* inform the component-level that we have lost a connection so * it can decide what to do about it. */ - ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_lost_connection); + ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_lost_connection); if (orte_orteds_term_ordered || orte_finalizing || orte_abnormal_term_ordered) { /* nothing more to do */ @@ -1238,7 +1238,7 @@ bool mca_oob_tcp_peer_accept(mca_oob_tcp_peer_t* peer) /* set the peer into the component and OOB-level peer tables to indicate * that we know this peer and we will be handling him */ - ORTE_ACTIVATE_TCP_CMP_OP(peer, NULL, mca_oob_tcp_component_set_module); + ORTE_ACTIVATE_TCP_CMP_OP(peer, mca_oob_tcp_component_set_module); tcp_peer_connected(peer); if (!peer->recv_ev_active) { diff --git a/orte/mca/oob/tcp/oob_tcp_peer.h b/orte/mca/oob/tcp/oob_tcp_peer.h index 8d04fd4438..9a175e084e 100644 --- a/orte/mca/oob/tcp/oob_tcp_peer.h +++ b/orte/mca/oob/tcp/oob_tcp_peer.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2011 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -73,21 +73,15 @@ typedef struct { uint16_t af_family; char *net; char *port; - char *rtmod; } mca_oob_tcp_peer_op_t; OBJ_CLASS_DECLARATION(mca_oob_tcp_peer_op_t); -#define ORTE_ACTIVATE_TCP_CMP_OP(p, r, cbfunc) \ +#define ORTE_ACTIVATE_TCP_CMP_OP(p, cbfunc) \ do { \ mca_oob_tcp_peer_op_t *pop; \ - char *proxy; \ pop = OBJ_NEW(mca_oob_tcp_peer_op_t); \ pop->peer.jobid = (p)->name.jobid; \ pop->peer.vpid = (p)->name.vpid; \ - proxy = (r); \ - if (NULL != proxy) { \ - pop->rtmod = strdup(proxy); \ - } \ ORTE_THREADSHIFT(pop, orte_oob_base.ev_base, \ (cbfunc), ORTE_MSG_PRI); \ } while(0); diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.c b/orte/mca/oob/tcp/oob_tcp_sendrecv.c index 6db0243ed5..c1ee0740f5 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.c +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -586,7 +586,6 @@ void mca_oob_tcp_recv_handler(int sd, short flags, void *cbdata) snd->data = peer->recv_msg->data; snd->seq_num = peer->recv_msg->hdr.seq_num; snd->count = peer->recv_msg->hdr.nbytes; - snd->routed = strdup(peer->recv_msg->hdr.routed); snd->cbfunc.iov = NULL; snd->cbdata = NULL; /* activate the OOB send state */ diff --git a/orte/mca/oob/tcp/oob_tcp_sendrecv.h b/orte/mca/oob/tcp/oob_tcp_sendrecv.h index e906c962a9..1ac1b570fc 100644 --- a/orte/mca/oob/tcp/oob_tcp_sendrecv.h +++ b/orte/mca/oob/tcp/oob_tcp_sendrecv.h @@ -12,7 +12,7 @@ * Copyright (c) 2006-2013 Los Alamos National Security, LLC. * All rights reserved. * Copyright (c) 2010-2018 Cisco Systems, Inc. All rights reserved - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -109,10 +109,6 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); _s->hdr.type = MCA_OOB_TCP_USER; \ _s->hdr.tag = (m)->tag; \ _s->hdr.seq_num = (m)->seq_num; \ - if (NULL != (m)->routed) { \ - (void)opal_string_copy(_s->hdr.routed, (m)->routed, \ - ORTE_MAX_RTD_SIZE); \ - } \ /* point to the actual message */ \ _s->msg = (m); \ /* set the total number of bytes to be sent */ \ @@ -157,10 +153,6 @@ OBJ_CLASS_DECLARATION(mca_oob_tcp_recv_t); _s->hdr.type = MCA_OOB_TCP_USER; \ _s->hdr.tag = (m)->tag; \ _s->hdr.seq_num = (m)->seq_num; \ - if (NULL != (m)->routed) { \ - (void)opal_string_copy(_s->hdr.routed, (m)->routed, \ - ORTE_MAX_RTD_SIZE); \ - } \ /* point to the actual message */ \ _s->msg = (m); \ /* set the total number of bytes to be sent */ \ diff --git a/orte/mca/plm/base/plm_base_launch_support.c b/orte/mca/plm/base/plm_base_launch_support.c index 57f609bfc1..1d9da92aad 100644 --- a/orte/mca/plm/base/plm_base_launch_support.c +++ b/orte/mca/plm/base/plm_base_launch_support.c @@ -179,7 +179,7 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata) orte_ras_base_display_alloc(); } /* ensure we update the routing plan */ - orte_routed.update_routing_plan(NULL); + orte_routed.update_routing_plan(); /* prep the buffer */ OBJ_CONSTRUCT(&buf, opal_buffer_t); @@ -812,8 +812,7 @@ void orte_plm_base_post_launch(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid), ORTE_NAME_PRINT(&jdata->originator))); - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &jdata->originator, answer, + if (0 > (ret = orte_rml.send_buffer_nb(&jdata->originator, answer, ORTE_RML_TAG_LAUNCH_RESP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); @@ -1346,8 +1345,7 @@ void orte_plm_base_daemon_callback(int status, orte_process_name_t* sender, goto CLEANUP; } /* send it */ - orte_rml.send_buffer_nb(orte_mgmt_conduit, - &dname, relay, + orte_rml.send_buffer_nb(&dname, relay, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL); /* we will count this node as completed @@ -2310,7 +2308,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata) /* ensure all routing plans are up-to-date - we need this * so we know how to tree-spawn and/or xcast info */ - orte_routed.update_routing_plan(NULL); + orte_routed.update_routing_plan(); } /* mark that the daemon job changed */ diff --git a/orte/mca/plm/base/plm_base_receive.c b/orte/mca/plm/base/plm_base_receive.c index d89a6b9313..c95f203bc7 100644 --- a/orte/mca/plm/base/plm_base_receive.c +++ b/orte/mca/plm/base/plm_base_receive.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2011 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -278,8 +278,7 @@ void orte_plm_base_recv(int status, orte_process_name_t* sender, } /* send the response back to the sender */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_LAUNCH_RESP, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_LAUNCH_RESP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); diff --git a/orte/mca/plm/rsh/plm_rsh_module.c b/orte/mca/plm/rsh/plm_rsh_module.c index d913dab6de..ffe0488ea4 100644 --- a/orte/mca/plm/rsh/plm_rsh_module.c +++ b/orte/mca/plm/rsh/plm_rsh_module.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2011-2017 IBM Corporation. All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -265,7 +265,6 @@ static void rsh_wait_daemon(int sd, short flags, void *cbdata) orte_wait_tracker_t *t2 = (orte_wait_tracker_t*)cbdata; orte_plm_rsh_caddy_t *caddy=(orte_plm_rsh_caddy_t*)t2->cbdata; orte_proc_t *daemon = caddy->daemon; - char *rtmod; if (orte_orteds_term_ordered || orte_abnormal_term_ordered) { /* ignore any such report - it will occur if we left the @@ -290,8 +289,7 @@ static void rsh_wait_daemon(int sd, short flags, void *cbdata) buf = OBJ_NEW(opal_buffer_t); opal_dss.pack(buf, &(daemon->name.vpid), 1, ORTE_VPID); opal_dss.pack(buf, &daemon->exit_code, 1, OPAL_INT); - orte_rml.send_buffer_nb(orte_coll_conduit, - ORTE_PROC_MY_HNP, buf, + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_REPORT_REMOTE_LAUNCH, orte_rml_send_callback, NULL); /* note that this daemon failed */ @@ -312,8 +310,7 @@ static void rsh_wait_daemon(int sd, short flags, void *cbdata) /* remove it from the routing table to ensure num_routes * returns the correct value */ - rtmod = orte_rml.get_routed(orte_coll_conduit); - orte_routed.route_lost(rtmod, &daemon->name); + orte_routed.route_lost(&daemon->name); /* report that the daemon has failed so we can exit */ ORTE_ACTIVATE_PROC_STATE(&daemon->name, ORTE_PROC_STATE_FAILED_TO_START); } @@ -797,7 +794,6 @@ static int remote_spawn(void) orte_job_t *daemons; opal_list_t coll; orte_namelist_t *child; - char *rtmod; OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output, "%s plm:rsh: remote spawn called", @@ -816,9 +812,8 @@ static int remote_spawn(void) } /* get the updated routing list */ - rtmod = orte_rml.get_routed(orte_coll_conduit); OBJ_CONSTRUCT(&coll, opal_list_t); - orte_routed.get_routing_list(rtmod, &coll); + orte_routed.get_routing_list(&coll); /* if I have no children, just return */ if (0 == opal_list_get_size(&coll)) { @@ -913,8 +908,7 @@ cleanup: buf = OBJ_NEW(opal_buffer_t); opal_dss.pack(buf, &target.vpid, 1, ORTE_VPID); opal_dss.pack(buf, &rc, 1, OPAL_INT); - orte_rml.send_buffer_nb(orte_coll_conduit, - ORTE_PROC_MY_HNP, buf, + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_REPORT_REMOTE_LAUNCH, orte_rml_send_callback, NULL); } @@ -1040,7 +1034,6 @@ static void launch_daemons(int fd, short args, void *cbdata) char *username; int port, *portptr; orte_namelist_t *child; - char *rtmod; ORTE_ACQUIRE_OBJECT(state); @@ -1185,8 +1178,7 @@ static void launch_daemons(int fd, short args, void *cbdata) /* get the updated routing list */ OBJ_CONSTRUCT(&coll, opal_list_t); - rtmod = orte_rml.get_routed(orte_coll_conduit); - orte_routed.get_routing_list(rtmod, &coll); + orte_routed.get_routing_list(&coll); } /* setup the launch */ diff --git a/orte/mca/rml/base/Makefile.am b/orte/mca/rml/base/Makefile.am index 4bbaa83e60..d9a03c8f79 100644 --- a/orte/mca/rml/base/Makefile.am +++ b/orte/mca/rml/base/Makefile.am @@ -11,7 +11,7 @@ # All rights reserved. # Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights # reserved. -# Copyright (c) 2016-2017 Intel, Inc. All rights reserved. +# Copyright (c) 2016-2019 Intel, Inc. All rights reserved. # $COPYRIGHT$ # # Additional copyrights may follow @@ -26,5 +26,4 @@ headers += \ libmca_rml_la_SOURCES += \ base/rml_base_frame.c \ base/rml_base_contact.c \ - base/rml_base_msg_handlers.c \ - base/rml_base_stubs.c + base/rml_base_msg_handlers.c diff --git a/orte/mca/rml/base/base.h b/orte/mca/rml/base/base.h index 36a8046eea..6683c90876 100644 --- a/orte/mca/rml/base/base.h +++ b/orte/mca/rml/base/base.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007-2014 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -65,18 +65,9 @@ ORTE_DECLSPEC int orte_rml_base_select(void); /* * globals that might be needed */ -/* adding element to hold the active modules and components */ -typedef struct { - opal_list_item_t super; - int pri; - orte_rml_component_t *component; -} orte_rml_base_active_t; -OBJ_CLASS_DECLARATION(orte_rml_base_active_t); /* a global struct containing framework-level values */ typedef struct { - opal_list_t actives; /* list to hold the active components */ - opal_pointer_array_t conduits; /* array to hold the open conduits */ opal_list_t posted_recvs; opal_list_t unmatched_msgs; int max_retries; @@ -114,8 +105,6 @@ typedef struct { * transfers */ char *data; - /* routed module to be used */ - char *routed; } orte_rml_send_t; OBJ_CLASS_DECLARATION(orte_rml_send_t); @@ -232,52 +221,11 @@ OBJ_CLASS_DECLARATION(orte_self_send_xfer_t); OBJ_RELEASE(m); \ }while(0); -#define ORTE_RML_INVALID_CHANNEL_NUM UINT32_MAX /* common implementations */ ORTE_DECLSPEC void orte_rml_base_post_recv(int sd, short args, void *cbdata); ORTE_DECLSPEC void orte_rml_base_process_msg(int fd, short flags, void *cbdata); -/* Stub API interfaces to cycle through active plugins */ -int orte_rml_API_ping(orte_rml_conduit_t conduit_id, - const char* contact_info, - const struct timeval* tv); - -int orte_rml_API_send_nb(orte_rml_conduit_t conduit_id, - orte_process_name_t* peer, struct iovec* msg, - int count, orte_rml_tag_t tag, - orte_rml_callback_fn_t cbfunc, void* cbdata); - -int orte_rml_API_send_buffer_nb(orte_rml_conduit_t conduit_id, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata); - -void orte_rml_API_recv_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_callback_fn_t cbfunc, - void* cbdata); -void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata); - -void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag); - -void orte_rml_API_purge(orte_process_name_t *peer); - -int orte_rml_API_query_transports(opal_list_t *providers); - -orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes); - -void orte_rml_API_close_conduit(orte_rml_conduit_t id); - -char* orte_rml_API_get_routed(orte_rml_conduit_t id); - END_C_DECLS #endif /* MCA_RML_BASE_H */ diff --git a/orte/mca/rml/base/rml_base_frame.c b/orte/mca/rml/base/rml_base_frame.c index 157cbe3267..f13e1f70df 100644 --- a/orte/mca/rml/base/rml_base_frame.c +++ b/orte/mca/rml/base/rml_base_frame.c @@ -5,7 +5,7 @@ * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -40,26 +40,10 @@ /* Initialising stub fns in the global var used by other modules */ -orte_rml_base_API_t orte_rml = { - .ping = orte_rml_API_ping, - .send_nb = orte_rml_API_send_nb, - .send_buffer_nb = orte_rml_API_send_buffer_nb, - .recv_nb = orte_rml_API_recv_nb, - .recv_buffer_nb = orte_rml_API_recv_buffer_nb, - .recv_cancel = orte_rml_API_recv_cancel, - .purge = orte_rml_API_purge, - .query_transports = orte_rml_API_query_transports, - .open_conduit = orte_rml_API_open_conduit, - .close_conduit = orte_rml_API_close_conduit, - .get_routed = orte_rml_API_get_routed -}; +orte_rml_base_module_t orte_rml = {0}; orte_rml_base_t orte_rml_base = {{{0}}}; -orte_rml_component_t *orte_rml_component = NULL; - -static bool selected = false; - static int orte_rml_base_register(mca_base_register_flag_t flags) { orte_rml_base.max_retries = 3; @@ -82,62 +66,9 @@ static int orte_rml_base_register(mca_base_register_flag_t flags) return ORTE_SUCCESS; } -static void cleanup(int sd, short args, void *cbdata) -{ - orte_lock_t *lk = (orte_lock_t*)cbdata; - - ORTE_ACQUIRE_OBJECT(active); - OPAL_LIST_DESTRUCT(&orte_rml_base.posted_recvs); - if (NULL != lk) { - ORTE_POST_OBJECT(lk); - ORTE_WAKEUP_THREAD(lk); - } -} - static int orte_rml_base_close(void) { - orte_lock_t lock; - int idx, total_conduits = opal_pointer_array_get_size(&orte_rml_base.conduits); - orte_rml_base_module_t *mod; - orte_rml_component_t *comp; - - /* cycle thru the conduits opened and call each module's finalize */ - /* The components finalise/close() will be responsible for freeing the module pointers */ - for (idx = 0; idx < total_conduits ; idx++) - { - if( NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits,idx))) { - /* close the conduit */ - comp = (orte_rml_component_t*)mod->component; - if (NULL != comp && NULL != comp->close_conduit) { - comp->close_conduit(mod); - } - free(mod); - } - - } - OBJ_DESTRUCT(&orte_rml_base.conduits); - - OPAL_LIST_DESTRUCT(&orte_rml_base.actives) - - /* because the RML posted recvs list is in a separate - * async thread for apps, we can't just destruct it here. - * Instead, we push it into that event thread and destruct - * it there */ - if (ORTE_PROC_IS_APP) { - opal_event_t ev; - ORTE_CONSTRUCT_LOCK(&lock); - opal_event_set(orte_event_base, &ev, -1, - OPAL_EV_WRITE, cleanup, (void*)&lock); - opal_event_set_priority(&ev, ORTE_ERROR_PRI); - ORTE_POST_OBJECT(ev); - opal_event_active(&ev, OPAL_EV_WRITE, 1); - ORTE_WAIT_THREAD(&lock); - ORTE_DESTRUCT_LOCK(&lock); - } else { - /* we can call the destruct directly */ - cleanup(0, 0, NULL); - } - + OPAL_LIST_DESTRUCT(&orte_rml_base.posted_recvs); return mca_base_framework_components_close(&orte_rml_base_framework, NULL); } @@ -145,11 +76,8 @@ static int orte_rml_base_open(mca_base_open_flag_t flags) { /* Initialize globals */ /* construct object for holding the active plugin modules */ - OBJ_CONSTRUCT(&orte_rml_base.actives, opal_list_t); OBJ_CONSTRUCT(&orte_rml_base.posted_recvs, opal_list_t); OBJ_CONSTRUCT(&orte_rml_base.unmatched_msgs, opal_list_t); - OBJ_CONSTRUCT(&orte_rml_base.conduits, opal_pointer_array_t); - opal_pointer_array_init(&orte_rml_base.conduits,1,INT16_MAX,1); /* Open up all available components */ return mca_base_framework_components_open(&orte_rml_base_framework, flags); @@ -159,61 +87,28 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, rml, "ORTE Run-Time Messaging Layer", orte_rml_base_register, orte_rml_base_open, orte_rml_base_close, mca_rml_base_static_components, 0); -OBJ_CLASS_INSTANCE(orte_rml_base_active_t, - opal_list_item_t, - NULL, NULL); - /** * Function for ordering the component(plugin) by priority */ int orte_rml_base_select(void) { - mca_base_component_list_item_t *cli=NULL; - orte_rml_component_t *component=NULL; - orte_rml_base_active_t *newmodule, *mod; - bool inserted; + orte_rml_component_t *best_component = NULL; + orte_rml_base_module_t *best_module = NULL; - if (selected) { - return ORTE_SUCCESS; - } - selected = true; - - OPAL_LIST_FOREACH(cli, &orte_rml_base_framework.framework_components, mca_base_component_list_item_t ) { - component = (orte_rml_component_t*) cli->cli_component; - - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "orte_rml_base_select: Initializing %s component %s", - component->base.mca_type_name, - component->base.mca_component_name); - - /* add to the list of available components */ - newmodule = OBJ_NEW(orte_rml_base_active_t); - newmodule->pri = component->priority; - newmodule->component = component; - - /* maintain priority order */ - inserted = false; - OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) { - if (newmodule->pri > mod->pri) { - opal_list_insert_pos(&orte_rml_base.actives, - (opal_list_item_t*)mod, &newmodule->super); - inserted = true; - break; - } - } - if (!inserted) { - /* must be lowest priority - add to end */ - opal_list_append(&orte_rml_base.actives, &newmodule->super); - } + /* + * Select the best component + */ + if( OPAL_SUCCESS != mca_base_select("rml", orte_rml_base_framework.framework_output, + &orte_rml_base_framework.framework_components, + (mca_base_module_t **) &best_module, + (mca_base_component_t **) &best_component, NULL) ) { + /* This will only happen if no component was selected */ + /* If we didn't find one to select, that is an error */ + return ORTE_ERROR; } - if (4 < opal_output_get_verbosity(orte_rml_base_framework.framework_output)) { - opal_output(0, "%s: Final rml priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - /* show the prioritized list */ - OPAL_LIST_FOREACH(mod, &orte_rml_base.actives, orte_rml_base_active_t) { - opal_output(0, "\tComponent: %s Priority: %d", mod->component->base.mca_component_name, mod->pri); - } - } + /* Save the winner */ + orte_rml = *best_module; return ORTE_SUCCESS; } @@ -279,17 +174,10 @@ static void send_cons(orte_rml_send_t *ptr) ptr->buffer = NULL; ptr->data = NULL; ptr->seq_num = 0xFFFFFFFF; - ptr->routed = NULL; -} -static void send_des(orte_rml_send_t *ptr) -{ - if (NULL != ptr->routed) { - free(ptr->routed); - } } OBJ_CLASS_INSTANCE(orte_rml_send_t, opal_list_item_t, - send_cons, send_des); + send_cons, NULL); static void send_req_cons(orte_rml_send_request_t *ptr) @@ -353,21 +241,3 @@ static void prq_des(orte_rml_recv_request_t *ptr) OBJ_CLASS_INSTANCE(orte_rml_recv_request_t, opal_object_t, prq_cons, prq_des); - -static void pthcons(orte_rml_pathway_t *p) -{ - p->component = NULL; - OBJ_CONSTRUCT(&p->attributes, opal_list_t); - OBJ_CONSTRUCT(&p->transports, opal_list_t); -} -static void pthdes(orte_rml_pathway_t *p) -{ - if (NULL != p->component) { - free(p->component); - } - OPAL_LIST_DESTRUCT(&p->attributes); - OPAL_LIST_DESTRUCT(&p->transports); -} -OBJ_CLASS_INSTANCE(orte_rml_pathway_t, - opal_list_item_t, - pthcons, pthdes); diff --git a/orte/mca/rml/base/rml_base_msg_handlers.c b/orte/mca/rml/base/rml_base_msg_handlers.c index facc1e41c5..4a072d1757 100644 --- a/orte/mca/rml/base/rml_base_msg_handlers.c +++ b/orte/mca/rml/base/rml_base_msg_handlers.c @@ -189,8 +189,7 @@ void orte_rml_base_process_msg(int fd, short flags, void *cbdata) return; } - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &msg->sender, buffer, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&msg->sender, buffer, ORTE_RML_TAG_NODE_REGEX_REPORT, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/rml/base/rml_base_stubs.c b/orte/mca/rml/base/rml_base_stubs.c deleted file mode 100644 index 00a8e8f0c5..0000000000 --- a/orte/mca/rml/base/rml_base_stubs.c +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Copyright (c) 2004-2011 The University of Tennessee and The University - * of Tennessee Research Foundation. All rights - * reserved. - * Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights - * reserved. - * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. - * Copyright (c) 2015-2017 Research Organization for Information Science - * and Technology (RIST). All rights reserved. - * $COPYRIGHT$ - * - * Additional copyrights may follow - * - * $HEADER$ - */ - -#include "orte_config.h" - -#include - -#include "opal/class/opal_list.h" -#include "opal/dss/dss.h" -#include "orte/mca/mca.h" -#include "opal/mca/base/mca_base_component_repository.h" -#include "opal/util/argv.h" -#include "opal/util/output.h" - -#include "orte/mca/rml/rml.h" -#include "orte/mca/state/state.h" -#include "orte/runtime/orte_wait.h" -#include "orte/util/name_fns.h" -#include "orte/util/threads.h" - -#include "orte/mca/rml/base/base.h" - -/* - * The stub API interface functions - */ - -/** Open a conduit - check if the ORTE_RML_INCLUDE_COMP attribute is provided, this is */ -/* a comma seperated list of components, try to open the conduit in this order. */ -/* if the ORTE_RML_INCLUDE_COMP is not provided or this list was not able to open conduit */ -/* call the open_conduit() of the component in priority order to see if they can use the */ -/* attribute to open a conduit. */ -/* Note: The component takes care of checking for duplicate and returning the previously */ -/* opened module* in case of duplicates. Currently we are saving it in a new conduit_id */ -/* even if it is duplicate. [ToDo] compare the module* received from component to see if */ -/* already present in array and return the prev conduit_id instead of adding it again to array */ -/* @param[in] attributes The attributes is a list of opal_value_t of type OPAL_STRING */ -orte_rml_conduit_t orte_rml_API_open_conduit(opal_list_t *attributes) -{ - orte_rml_base_active_t *active; - orte_rml_component_t *comp; - orte_rml_base_module_t *mod, *ourmod=NULL; - int rc; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s rml:base:open_conduit", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - /* bozo check - you cannot specify both include and exclude */ - if (orte_get_attribute(attributes, ORTE_RML_INCLUDE_COMP_ATTRIB, NULL, OPAL_STRING) && - orte_get_attribute(attributes, ORTE_RML_EXCLUDE_COMP_ATTRIB, NULL, OPAL_STRING)) { - // orte_show_help(); - return ORTE_ERR_NOT_SUPPORTED; - } - - /* cycle thru the actives in priority order and let each one see if they can support this request */ - OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { - comp = (orte_rml_component_t *)active->component; - if (NULL != comp->open_conduit) { - if (NULL != (mod = comp->open_conduit(attributes))) { - opal_output_verbose(2, orte_rml_base_framework.framework_output, - "%s rml:base:open_conduit Component %s provided a conduit", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - active->component->base.mca_component_name); - ourmod = mod; - break; - } - } - } - if (NULL != ourmod) { - /* we got an answer - store this conduit in our array */ - rc = opal_pointer_array_add(&orte_rml_base.conduits, ourmod); - if (rc < 0) { - return ORTE_RML_CONDUIT_INVALID; - } - return rc; - } - /* we get here if nobody could support it */ - ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED); - return ORTE_RML_CONDUIT_INVALID; -} - - - -/** Shutdown the communication system and clean up resources */ -void orte_rml_API_close_conduit(orte_rml_conduit_t id) -{ - orte_rml_base_module_t *mod; - orte_rml_component_t *comp; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s rml:base:close_conduit(%d)", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (int)id); - - if( NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, id))) { - comp = (orte_rml_component_t*)mod->component; - if (NULL != comp && NULL != comp->close_conduit) { - comp->close_conduit(mod); - } - opal_pointer_array_set_item(&orte_rml_base.conduits, id, NULL); - free(mod); - } -} - - - -/** Ping process for connectivity check */ -int orte_rml_API_ping(orte_rml_conduit_t conduit_id, - const char* contact_info, - const struct timeval* tv) -{ - int rc = ORTE_ERR_UNREACH; - orte_rml_base_module_t *mod; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s rml:base:ping(conduit-%d)", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),conduit_id); - - /* get the module */ - if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) { - return rc; - } - if (NULL == mod->ping) { - return rc; - } - rc = mod->ping((struct orte_rml_base_module_t*)mod, contact_info, tv); - return rc; -} - - -/** Send non-blocking iovec message through a specific conduit*/ -int orte_rml_API_send_nb(orte_rml_conduit_t conduit_id, - orte_process_name_t* peer, - struct iovec* msg, - int count, - orte_rml_tag_t tag, - orte_rml_callback_fn_t cbfunc, - void* cbdata) -{ - int rc = ORTE_ERR_UNREACH; - orte_rml_base_module_t *mod; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s rml:base:send_nb() to peer %s through conduit %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer),conduit_id); - /* get the module */ - if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) { - return rc; - } - if (NULL == mod->send_nb) { - return rc; - } - rc = mod->send_nb((struct orte_rml_base_module_t*)mod, peer, msg, count, tag, cbfunc, cbdata); - return rc; -} - -/** Send non-blocking buffer message */ -int orte_rml_API_send_buffer_nb(orte_rml_conduit_t conduit_id, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata) -{ - int rc = ORTE_ERR_UNREACH; - orte_rml_base_module_t *mod; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s rml:base:send_buffer_nb() to peer %s through conduit %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer),conduit_id); - - /* get the module */ - if (NULL == (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, conduit_id))) { - return rc; - } - if (NULL == mod->send_buffer_nb) { - return rc; - } - rc = mod->send_buffer_nb((struct orte_rml_base_module_t*)mod, peer, buffer, tag, cbfunc, cbdata); - return rc; -} - -/** post a receive for an IOV message - this is done - * strictly in the base, and so it does not go to a module */ -void orte_rml_API_recv_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_callback_fn_t cbfunc, - void* cbdata) -{ - orte_rml_recv_request_t *req; - - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s rml_recv_nb for peer %s tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), tag); - - /* push the request into the event base so we can add - * the receive to our list of posted recvs */ - req = OBJ_NEW(orte_rml_recv_request_t); - req->post->buffer_data = false; - req->post->peer.jobid = peer->jobid; - req->post->peer.vpid = peer->vpid; - req->post->tag = tag; - req->post->persistent = persistent; - req->post->cbfunc.iov = cbfunc; - req->post->cbdata = cbdata; - ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); -} - -/** Receive non-blocking buffer message */ -void orte_rml_API_recv_buffer_nb(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata) -{ - orte_rml_recv_request_t *req; - - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s rml_recv_buffer_nb for peer %s tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), tag); - - /* push the request into the event base so we can add - * the receive to our list of posted recvs */ - req = OBJ_NEW(orte_rml_recv_request_t); - req->post->buffer_data = true; - req->post->peer.jobid = peer->jobid; - req->post->peer.vpid = peer->vpid; - req->post->tag = tag; - req->post->persistent = persistent; - req->post->cbfunc.buffer = cbfunc; - req->post->cbdata = cbdata; - ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); -} - -/** Cancel posted non-blocking receive */ -void orte_rml_API_recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag) -{ - orte_rml_recv_request_t *req; - - opal_output_verbose(10, orte_rml_base_framework.framework_output, - "%s rml_recv_cancel for peer %s tag %d", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), - ORTE_NAME_PRINT(peer), tag); - - ORTE_ACQUIRE_OBJECT(orte_event_base_active); - if (!orte_event_base_active) { - /* no event will be processed any more, so simply return. */ - return; - } - - /* push the request into the event base so we can remove - * the receive from our list of posted recvs */ - req = OBJ_NEW(orte_rml_recv_request_t); - req->cancel = true; - req->post->peer.jobid = peer->jobid; - req->post->peer.vpid = peer->vpid; - req->post->tag = tag; - ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); -} - -/** Purge information */ -void orte_rml_API_purge(orte_process_name_t *peer) -{ - orte_rml_base_module_t *mod; - int i; - - for (i=0; i < orte_rml_base.conduits.size; i++) { - /* get the module */ - if (NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, i))) { - if (NULL != mod->purge) { - mod->purge(peer); - } - } - } -} - - -int orte_rml_API_query_transports(opal_list_t *providers) -{ - - orte_rml_base_active_t *active; - orte_rml_pathway_t *p; - - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "%s rml:base:orte_rml_API_query_transports()", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - /* cycle thru the actives */ - OPAL_LIST_FOREACH(active, &orte_rml_base.actives, orte_rml_base_active_t) { - if (NULL != active->component->query_transports) { - opal_output_verbose(10,orte_rml_base_framework.framework_output, - "\n calling module: %s->query_transports() \n", - active->component->base.mca_component_name); - if (NULL != (p = active->component->query_transports())) { - /* pass the results across */ - OBJ_RETAIN(p); - opal_list_append(providers, &p->super); - } - } - } - return ORTE_SUCCESS; - -} - -char* orte_rml_API_get_routed(orte_rml_conduit_t id) -{ - orte_rml_base_module_t *mod; - - /* get the module */ - if (NULL != (mod = (orte_rml_base_module_t*)opal_pointer_array_get_item(&orte_rml_base.conduits, id))) { - return mod->routed; - } - - return NULL; -} diff --git a/orte/mca/rml/oob/rml_oob.h b/orte/mca/rml/oob/rml_oob.h index 5958bf5d9e..90ca2285af 100644 --- a/orte/mca/rml/oob/rml_oob.h +++ b/orte/mca/rml/oob/rml_oob.h @@ -14,7 +14,7 @@ * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -48,25 +48,19 @@ ORTE_MODULE_DECLSPEC extern orte_rml_component_t mca_rml_oob_component; void orte_rml_oob_fini(struct orte_rml_base_module_t *mod); -int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, +int orte_rml_oob_send_nb(orte_process_name_t* peer, struct iovec* msg, int count, orte_rml_tag_t tag, orte_rml_callback_fn_t cbfunc, void* cbdata); -int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, +int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer, opal_buffer_t* buffer, orte_rml_tag_t tag, orte_rml_buffer_callback_fn_t cbfunc, void* cbdata); -int orte_rml_oob_ping(struct orte_rml_base_module_t *mod, - const char* uri, - const struct timeval* tv); - END_C_DECLS #endif diff --git a/orte/mca/rml/oob/rml_oob_component.c b/orte/mca/rml/oob/rml_oob_component.c index ee058d2d01..2e1bcf94ce 100644 --- a/orte/mca/rml/oob/rml_oob_component.c +++ b/orte/mca/rml/oob/rml_oob_component.c @@ -13,7 +13,7 @@ * Copyright (c) 2007-2015 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -57,9 +57,8 @@ static int rml_oob_open(void); static int rml_oob_close(void); -static orte_rml_base_module_t* open_conduit(opal_list_t *attributes); -static orte_rml_pathway_t* query_transports(void); -static void close_conduit(orte_rml_base_module_t *mod); +static int component_query(mca_base_module_t **module, int *priority); + /** * component definition */ @@ -75,220 +74,121 @@ orte_rml_component_t mca_rml_oob_component = { ORTE_RELEASE_VERSION), .mca_open_component = rml_oob_open, .mca_close_component = rml_oob_close, + .mca_query_component = component_query, }, .data = { /* The component is checkpoint ready */ MCA_BASE_METADATA_PARAM_CHECKPOINT }, - .priority = 5, - .open_conduit = open_conduit, - .query_transports = query_transports, - .close_conduit = close_conduit + .priority = 5 }; /* Local variables */ -static orte_rml_pathway_t pathway; +static void recv_nb(orte_process_name_t* peer, + orte_rml_tag_t tag, + bool persistent, + orte_rml_callback_fn_t cbfunc, + void* cbdata) +{ + orte_rml_recv_request_t *req; + + opal_output_verbose(10, orte_rml_base_framework.framework_output, + "%s rml_recv_nb for peer %s tag %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer), tag); + + /* push the request into the event base so we can add + * the receive to our list of posted recvs */ + req = OBJ_NEW(orte_rml_recv_request_t); + req->post->buffer_data = false; + req->post->peer.jobid = peer->jobid; + req->post->peer.vpid = peer->vpid; + req->post->tag = tag; + req->post->persistent = persistent; + req->post->cbfunc.iov = cbfunc; + req->post->cbdata = cbdata; + ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); +} +static void recv_buffer_nb(orte_process_name_t* peer, + orte_rml_tag_t tag, + bool persistent, + orte_rml_buffer_callback_fn_t cbfunc, + void* cbdata) +{ + orte_rml_recv_request_t *req; + + opal_output_verbose(10, orte_rml_base_framework.framework_output, + "%s rml_recv_buffer_nb for peer %s tag %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer), tag); + + /* push the request into the event base so we can add + * the receive to our list of posted recvs */ + req = OBJ_NEW(orte_rml_recv_request_t); + req->post->buffer_data = true; + req->post->peer.jobid = peer->jobid; + req->post->peer.vpid = peer->vpid; + req->post->tag = tag; + req->post->persistent = persistent; + req->post->cbfunc.buffer = cbfunc; + req->post->cbdata = cbdata; + ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); +} +static void recv_cancel(orte_process_name_t* peer, orte_rml_tag_t tag) +{ + orte_rml_recv_request_t *req; + + opal_output_verbose(10, orte_rml_base_framework.framework_output, + "%s rml_recv_cancel for peer %s tag %d", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), + ORTE_NAME_PRINT(peer), tag); + + ORTE_ACQUIRE_OBJECT(orte_event_base_active); + if (!orte_event_base_active) { + /* no event will be processed any more, so simply return. */ + return; + } + + /* push the request into the event base so we can remove + * the receive from our list of posted recvs */ + req = OBJ_NEW(orte_rml_recv_request_t); + req->cancel = true; + req->post->peer.jobid = peer->jobid; + req->post->peer.vpid = peer->vpid; + req->post->tag = tag; + ORTE_THREADSHIFT(req, orte_event_base, orte_rml_base_post_recv, ORTE_MSG_PRI); +} +static int oob_ping(const char* uri, const struct timeval* tv) +{ + return ORTE_ERR_UNREACH; +} + static orte_rml_base_module_t base_module = { .component = (struct orte_rml_component_t*)&mca_rml_oob_component, - .ping = NULL, + .ping = oob_ping, .send_nb = orte_rml_oob_send_nb, .send_buffer_nb = orte_rml_oob_send_buffer_nb, + .recv_nb = recv_nb, + .recv_buffer_nb = recv_buffer_nb, + .recv_cancel = recv_cancel, .purge = NULL }; static int rml_oob_open(void) { - /* ask our OOB transports for their info */ - OBJ_CONSTRUCT(&pathway, orte_rml_pathway_t); - pathway.component = strdup("oob"); - ORTE_OOB_GET_TRANSPORTS(&pathway.transports); - /* add any component attributes of our own */ - return ORTE_SUCCESS; } static int rml_oob_close(void) { - /* cleanup */ - OBJ_DESTRUCT(&pathway); - return ORTE_SUCCESS; } -static orte_rml_base_module_t* make_module(void) +static int component_query(mca_base_module_t **module, int *priority) { - orte_rml_oob_module_t *mod; - - /* create a new module */ - mod = (orte_rml_oob_module_t*)malloc(sizeof(orte_rml_oob_module_t)); - if (NULL == mod) { - return NULL; - } - - /* copy the APIs over to it */ - memcpy(mod, &base_module, sizeof(base_module)); - - /* initialize its internal storage */ - OBJ_CONSTRUCT(&mod->queued_routing_messages, opal_list_t); - mod->timer_event = NULL; - mod->routed = NULL; - - /* return the result */ - return (orte_rml_base_module_t*)mod; -} - -static orte_rml_base_module_t* open_conduit(opal_list_t *attributes) -{ - char *comp_attrib; - char **comps; - int i; - orte_rml_base_module_t *md; - - opal_output_verbose(20,orte_rml_base_framework.framework_output, - "%s - Entering rml_oob_open_conduit()", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - - /* someone may require this specific component, so look for "oob" */ - comp_attrib = NULL; - if (orte_get_attribute(attributes, ORTE_RML_INCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) && - NULL != comp_attrib) { - /* they specified specific components - could be multiple */ - comps = opal_argv_split(comp_attrib, ','); - free(comp_attrib); - for (i=0; NULL != comps[i]; i++) { - if (0 == strcasecmp(comps[i], "oob")) { - /* we are a candidate */ - opal_argv_free(comps); - md = make_module(); - free(comp_attrib); - comp_attrib = NULL; - orte_get_attribute(attributes, ORTE_RML_ROUTED_ATTRIB, (void**)&comp_attrib, OPAL_STRING); - /* the routed system understands a NULL request, so no need to check - * return status/value here */ - md->routed = orte_routed.assign_module(comp_attrib); - if (NULL != comp_attrib) { - free(comp_attrib); - } - return md; - } - } - /* we are not a candidate */ - opal_argv_free(comps); - free(comp_attrib); - return NULL; - } - - comp_attrib = NULL; - if (orte_get_attribute(attributes, ORTE_RML_EXCLUDE_COMP_ATTRIB, (void**)&comp_attrib, OPAL_STRING) && - NULL != comp_attrib) { - /* see if we are on the list */ - comps = opal_argv_split(comp_attrib, ','); - free(comp_attrib); - for (i=0; NULL != comps[i]; i++) { - if (0 == strcasecmp(comps[i], "oob")) { - /* we cannot be a candidate */ - opal_argv_free(comps); - free(comp_attrib); - return NULL; - } - } - } - - /* Alternatively, check the attributes to see if we qualify - we only handle - * "Ethernet" and "TCP" */ - comp_attrib = NULL; - if (orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, (void**)&comp_attrib, OPAL_STRING) && - NULL != comp_attrib) { - comps = opal_argv_split(comp_attrib, ','); - for (i=0; NULL != comps[i]; i++) { - if (0 == strcasecmp(comps[i], "Ethernet") || - 0 == strcasecmp(comps[i], "oob")) { - /* we are a candidate */ - opal_argv_free(comps); - md = make_module(); - free(comp_attrib); - comp_attrib = NULL; - orte_get_attribute(attributes, ORTE_RML_ROUTED_ATTRIB, (void**)&comp_attrib, OPAL_STRING); - /* the routed system understands a NULL request, so no need to check - * return status/value here */ - md->routed = orte_routed.assign_module(comp_attrib); - if (NULL != comp_attrib) { - free(comp_attrib); - } - return md; - } - } - /* we are not a candidate */ - opal_argv_free(comps); - free(comp_attrib); - return NULL; - } - - comp_attrib = NULL; - if (orte_get_attribute(attributes, ORTE_RML_PROTOCOL_TYPE, (void**)&comp_attrib, OPAL_STRING) && - NULL != comp_attrib) { - comps = opal_argv_split(comp_attrib, ','); - for (i=0; NULL != comps[i]; i++) { - if (0 == strcasecmp(comps[i], "TCP")) { - /* we are a candidate */ - opal_argv_free(comps); - md = make_module(); - free(comp_attrib); - comp_attrib = NULL; - orte_get_attribute(attributes, ORTE_RML_ROUTED_ATTRIB, (void**)&comp_attrib, OPAL_STRING); - /* the routed system understands a NULL request, so no need to check - * return status/value here */ - md->routed = orte_routed.assign_module(comp_attrib); - if (NULL != comp_attrib) { - free(comp_attrib); - } - return md; - } - } - /* we are not a candidate */ - opal_argv_free(comps); - free(comp_attrib); - return NULL; - } - - /* if they didn't specify a protocol or a transport, then we can be considered */ - if (!orte_get_attribute(attributes, ORTE_RML_TRANSPORT_TYPE, NULL, OPAL_STRING) || - !orte_get_attribute(attributes, ORTE_RML_PROTOCOL_TYPE, NULL, OPAL_STRING)) { - md = make_module(); - md->routed = orte_routed.assign_module(NULL); - return md; - } - - /* if we get here, we cannot handle it */ - return NULL; -} - -static orte_rml_pathway_t* query_transports(void) -{ - /* if we have any available transports, make them available */ - if (0 < opal_list_get_size(&pathway.transports)) { - return &pathway; - } - /* if not, then return NULL */ - return NULL; -} - -static void close_conduit(orte_rml_base_module_t *md) -{ - orte_rml_oob_module_t *mod = (orte_rml_oob_module_t*)md; - - /* cleanup the list of messages */ - OBJ_DESTRUCT(&mod->queued_routing_messages); - - /* clear the storage */ - if (NULL != mod->routed) { - free(mod->routed); - mod->routed = NULL; - } - - /* the rml_base_stub takes care of clearing the base receive - * and free'ng the module */ - return; + *priority = 50; + *module = (mca_base_module_t *) &base_module; + return ORTE_SUCCESS; } diff --git a/orte/mca/rml/oob/rml_oob_send.c b/orte/mca/rml/oob/rml_oob_send.c index 7e5330e944..577354f215 100644 --- a/orte/mca/rml/oob/rml_oob_send.c +++ b/orte/mca/rml/oob/rml_oob_send.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -70,8 +70,7 @@ static void send_self_exe(int fd, short args, void* data) OBJ_RELEASE(xfer); } -int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, +int orte_rml_oob_send_nb(orte_process_name_t* peer, struct iovec* iov, int count, orte_rml_tag_t tag, @@ -170,7 +169,6 @@ int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod, snd->count = count; snd->cbfunc.iov = cbfunc; snd->cbdata = cbdata; - snd->routed = strdup(mod->routed); /* activate the OOB send state */ ORTE_OOB_SEND(snd); @@ -178,8 +176,7 @@ int orte_rml_oob_send_nb(struct orte_rml_base_module_t *mod, return ORTE_SUCCESS; } -int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, +int orte_rml_oob_send_buffer_nb(orte_process_name_t* peer, opal_buffer_t* buffer, orte_rml_tag_t tag, orte_rml_buffer_callback_fn_t cbfunc, @@ -259,7 +256,6 @@ int orte_rml_oob_send_buffer_nb(struct orte_rml_base_module_t *mod, snd->buffer = buffer; snd->cbfunc.buffer = cbfunc; snd->cbdata = cbdata; - snd->routed = strdup(mod->routed); /* activate the OOB send state */ ORTE_OOB_SEND(snd); diff --git a/orte/mca/rml/rml.h b/orte/mca/rml/rml.h index 2eb1232652..9694add627 100644 --- a/orte/mca/rml/rml.h +++ b/orte/mca/rml/rml.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. * @@ -56,10 +56,6 @@ BEGIN_C_DECLS /* ******************************************************************** */ -/* forward declare */ -struct orte_rml_base_module_t; -struct orte_rml_component_t; - typedef struct { opal_object_t super; orte_process_name_t name; @@ -173,8 +169,7 @@ typedef void (*orte_rml_exception_callback_t)(orte_process_name_t* peer, * from the local process * @retval ORTE_ERROR An unspecified error occurred during the update */ -typedef int (*orte_rml_module_ping_fn_t)(struct orte_rml_base_module_t *mod, - const char* contact_info, +typedef int (*orte_rml_module_ping_fn_t)(const char* contact_info, const struct timeval* tv); @@ -201,8 +196,7 @@ typedef int (*orte_rml_module_ping_fn_t)(struct orte_rml_base_module_t *mod, * receiving process is not available * @retval ORTE_ERROR An unspecified error occurred */ -typedef int (*orte_rml_module_send_nb_fn_t)(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, +typedef int (*orte_rml_module_send_nb_fn_t)(orte_process_name_t* peer, struct iovec* msg, int count, orte_rml_tag_t tag, @@ -232,8 +226,7 @@ typedef int (*orte_rml_module_send_nb_fn_t)(struct orte_rml_base_module_t *mod, * receiving process is not available * @retval ORTE_ERROR An unspecified error occurred */ -typedef int (*orte_rml_module_send_buffer_nb_fn_t)(struct orte_rml_base_module_t *mod, - orte_process_name_t* peer, +typedef int (*orte_rml_module_send_buffer_nb_fn_t)(orte_process_name_t* peer, struct opal_buffer_t* buffer, orte_rml_tag_t tag, orte_rml_buffer_callback_fn_t cbfunc, @@ -247,6 +240,49 @@ typedef int (*orte_rml_module_send_buffer_nb_fn_t)(struct orte_rml_base_module_t typedef void (*orte_rml_module_purge_fn_t)(orte_process_name_t *peer); +/** + * Receive an iovec non-blocking message + * + * @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive + * @param[in] tag User defined tag for matching send/recv + * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv + * @param[in] cbfunc Callback function on message comlpetion + * @param[in] cbdata User data to provide during completion callback + */ +typedef void (*orte_rml_module_recv_nb_fn_t)(orte_process_name_t* peer, + orte_rml_tag_t tag, + bool persistent, + orte_rml_callback_fn_t cbfunc, + void* cbdata); + + +/** + * Receive a buffer non-blocking message + * + * @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive + * @param[in] tag User defined tag for matching send/recv + * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv + * @param[in] cbfunc Callback function on message comlpetion + * @param[in] cbdata User data to provide during completion callback + */ +typedef void (*orte_rml_module_recv_buffer_nb_fn_t)(orte_process_name_t* peer, + orte_rml_tag_t tag, + bool persistent, + orte_rml_buffer_callback_fn_t cbfunc, + void* cbdata); + +/** + * Cancel a posted non-blocking receive + * + * Attempt to cancel a posted non-blocking receive. + * + * @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed + * to the non-blocking receive call + * @param[in] tag Posted receive tag + */ +typedef void (*orte_rml_module_recv_cancel_fn_t)(orte_process_name_t* peer, + orte_rml_tag_t tag); + /** * RML internal module interface - these will be implemented by all RML components @@ -265,245 +301,21 @@ typedef struct orte_rml_base_module_t { /** Send non-blocking buffer message */ orte_rml_module_send_buffer_nb_fn_t send_buffer_nb; + orte_rml_module_recv_nb_fn_t recv_nb; + orte_rml_module_recv_buffer_nb_fn_t recv_buffer_nb; + orte_rml_module_recv_cancel_fn_t recv_cancel; + /** Purge information */ orte_rml_module_purge_fn_t purge; } orte_rml_base_module_t; -/* ******************************************************************** */ -/* RML PUBLIC MODULE API DEFINITION */ - -/** Open conduit - call each component and see if they can provide a - * conduit that can satisfy all these attributes - return the conduit id - * (a negative value indicates error) - */ -typedef orte_rml_conduit_t (*orte_rml_API_open_conduit_fn_t)(opal_list_t *attributes); - -/** - * Close a conduit - allow the component to cleanup. - */ -typedef void (*orte_rml_API_close_conduit_fn_t)(orte_rml_conduit_t id); - -/** - * Query the library to provide all the supported interfaces/transport - * providers in the current node/system. - * - * @param[out] List of providers and their attributes. -*/ -typedef int (*orte_rml_API_query_transports_fn_t)(opal_list_t *transports); - -/* query the routed module for a given conduit */ -typedef char* (*orte_rml_API_query_routed_fn_t)(orte_rml_conduit_t id); - -/** - * "Ping" another process to determine availability - * - * Ping another process to determine if it is available. This - * function only verifies that the process is alive and will allow a - * connection to the local process. It does *not* qualify as - * establishing communication with the remote process, as required by - * the note for set_contact_info(). - * - * @param[in] contact_info The contact info string for the remote process - * @param[in] tv Timeout after which the ping should be failed - * - * @retval ORTE_SUCESS The process is available and will allow connections - * from the local process - * @retval ORTE_ERROR An unspecified error occurred during the update - */ -typedef int (*orte_rml_API_ping_fn_t)(orte_rml_conduit_t conduit_id, - const char* contact_info, - const struct timeval* tv); - - -/** - * Send an iovec non-blocking message - * - * Send an array of iovecs to the specified peer. The call - * will return immediately, although the iovecs may not be modified - * until the completion callback is triggered. The iovecs *may* be - * passed to another call to send_nb before the completion callback is - * triggered. The callback being triggered does not give any - * indication of remote completion. - * - * @param[in] peer Name of receiving process - * @param[in] msg Pointer to an array of iovecs to be sent - * @param[in] count Number of iovecs in array - * @param[in] tag User defined tag for matching send/recv - * @param[in] cbfunc Callback function on message comlpetion - * @param[in] cbdata User data to provide during completion callback - * - * @retval ORTE_SUCCESS The message was successfully started - * @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid - * @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the - * receiving process is not available - * @retval ORTE_ERROR An unspecified error occurred - */ -typedef int (*orte_rml_API_send_nb_fn_t)(orte_rml_conduit_t conduit_id, - orte_process_name_t* peer, - struct iovec* msg, - int count, - orte_rml_tag_t tag, - orte_rml_callback_fn_t cbfunc, - void* cbdata); - - -/** - * Send a buffer non-blocking message - * - * Send a buffer to the specified peer. The call - * will return immediately, although the buffer may not be modified - * until the completion callback is triggered. The buffer *may* be - * passed to another call to send_nb before the completion callback is - * triggered. The callback being triggered does not give any - * indication of remote completion. - * - * @param[in] peer Name of receiving process - * @param[in] buffer Pointer to buffer to be sent - * @param[in] tag User defined tag for matching send/recv - * @param[in] cbfunc Callback function on message comlpetion - * @param[in] cbdata User data to provide during completion callback - * - * @retval ORTE_SUCCESS The message was successfully started - * @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid - * @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the - * receiving process is not available - * @retval ORTE_ERROR An unspecified error occurred - */ -typedef int (*orte_rml_API_send_buffer_nb_fn_t)(orte_rml_conduit_t conduit_id, - orte_process_name_t* peer, - struct opal_buffer_t* buffer, - orte_rml_tag_t tag, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata); - -/** - * Purge the RML/OOB of contact info and pending messages - * to/from a specified process. Used when a process aborts - * and is to be restarted - */ -typedef void (*orte_rml_API_purge_fn_t)(orte_process_name_t *peer); - -/** - * Receive an iovec non-blocking message - * - * @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive - * @param[in] tag User defined tag for matching send/recv - * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv - * @param[in] cbfunc Callback function on message comlpetion - * @param[in] cbdata User data to provide during completion callback - */ -typedef void (*orte_rml_API_recv_nb_fn_t)(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_callback_fn_t cbfunc, - void* cbdata); - - -/** - * Receive a buffer non-blocking message - * - * @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive - * @param[in] tag User defined tag for matching send/recv - * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv - * @param[in] cbfunc Callback function on message comlpetion - * @param[in] cbdata User data to provide during completion callback - */ -typedef void (*orte_rml_API_recv_buffer_nb_fn_t)(orte_process_name_t* peer, - orte_rml_tag_t tag, - bool persistent, - orte_rml_buffer_callback_fn_t cbfunc, - void* cbdata); - - -/** - * Cancel a posted non-blocking receive - * - * Attempt to cancel a posted non-blocking receive. - * - * @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed - * to the non-blocking receive call - * @param[in] tag Posted receive tag - */ -typedef void (*orte_rml_API_recv_cancel_fn_t)(orte_process_name_t* peer, - orte_rml_tag_t tag); - -/** - * RML API interface - */ -typedef struct { - /** Open Conduit **/ - orte_rml_API_open_conduit_fn_t open_conduit; - - /** Shutdown the conduit and clean up resources */ - orte_rml_API_close_conduit_fn_t close_conduit; - - /** Ping process for connectivity check */ - orte_rml_API_ping_fn_t ping; - - /** Send non-blocking iovec message */ - orte_rml_API_send_nb_fn_t send_nb; - - /** Send non-blocking buffer message */ - orte_rml_API_send_buffer_nb_fn_t send_buffer_nb; - - /** Receive non-blocking iovec message */ - orte_rml_API_recv_nb_fn_t recv_nb; - - /** Receive non-blocking buffer message */ - orte_rml_API_recv_buffer_nb_fn_t recv_buffer_nb; - - /** Cancel posted non-blocking receive */ - orte_rml_API_recv_cancel_fn_t recv_cancel; - - /** Purge information */ - orte_rml_API_purge_fn_t purge; - - /** Query information of transport in system */ - orte_rml_API_query_transports_fn_t query_transports; - - /* get the routed module for a given conduit */ - orte_rml_API_query_routed_fn_t get_routed; -} orte_rml_base_API_t; - /** Interface for RML communication */ -ORTE_DECLSPEC extern orte_rml_base_API_t orte_rml; +ORTE_DECLSPEC extern orte_rml_base_module_t orte_rml; /* ******************************************************************** */ /* RML COMPONENT DEFINITION */ -/** - * RML open_conduit - * - * Create an instance (module) of the given RML component. Upon - * returning, the module data structure should be fully populated and - * all functions should be usable and will have the conduit information. - * - * @param[in] opal_list_t of all attributes requested for the conduit. - * Each attribute will be key-value. - * [TODO] put in examples of the key-value here. - * @return Exactly one module created by the call to the component's - * initialization function should be returned. The module structure - * should be fully populated, and the priority should be set to a - * reasonable value. - * - * @retval NULL An error occurred and initialization did not occur - * @retval non-NULL The module was successfully initialized - */ -typedef orte_rml_base_module_t* (*orte_rml_component_open_conduit_fn_t)(opal_list_t *attributes); - -/** - * Query the library to provide all the supported interfaces/transport - * providers in the current node/system. - * - */ -typedef orte_rml_pathway_t* (*orte_rml_component_query_transports_fn_t)(void); - -/** Close conduit - allow the specific component to - * cleanup the module for this conduit - */ -typedef void (*orte_rml_module_close_conduit_fn_t)(orte_rml_base_module_t *mod); - /** * RML component interface * @@ -518,10 +330,6 @@ typedef struct orte_rml_component_t { mca_base_component_data_t data; /* Component priority */ int priority; - /* Component interface functions */ - orte_rml_component_open_conduit_fn_t open_conduit; - orte_rml_component_query_transports_fn_t query_transports; - orte_rml_module_close_conduit_fn_t close_conduit; } orte_rml_component_t; diff --git a/orte/mca/rml/rml_types.h b/orte/mca/rml/rml_types.h index 2acb03c1bb..5d14c0e8aa 100644 --- a/orte/mca/rml/rml_types.h +++ b/orte/mca/rml/rml_types.h @@ -12,7 +12,7 @@ * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2009-2016 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -201,19 +201,6 @@ BEGIN_C_DECLS */ typedef uint32_t orte_rml_tag_t; -/* Conduit ID */ -typedef uint16_t orte_rml_conduit_t; -#define ORTE_RML_CONDUIT_INVALID 0xff - -/* define an object for reporting transports */ -typedef struct { - opal_list_item_t super; - char *component; - opal_list_t attributes; - opal_list_t transports; -} orte_rml_pathway_t; -OBJ_CLASS_DECLARATION(orte_rml_pathway_t); - /* ******************************************************************** */ diff --git a/orte/mca/routed/base/base.h b/orte/mca/routed/base/base.h index 1f2ce533da..632bb2bb4d 100644 --- a/orte/mca/routed/base/base.h +++ b/orte/mca/routed/base/base.h @@ -1,7 +1,7 @@ /* * Copyright (c) 2007-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -32,38 +32,11 @@ ORTE_DECLSPEC extern mca_base_framework_t orte_routed_base_framework; ORTE_DECLSPEC int orte_routed_base_select(void); typedef struct { - opal_list_item_t super; - int pri; - orte_routed_component_t *component; - orte_routed_module_t *module; -} orte_routed_base_active_t; -OBJ_CLASS_DECLARATION(orte_routed_base_active_t); - -typedef struct { - opal_list_t actives; bool routing_enabled; } orte_routed_base_t; ORTE_DECLSPEC extern orte_routed_base_t orte_routed_base; -/* base API wrapper functions */ -ORTE_DECLSPEC char* orte_routed_base_assign_module(char *modules); - -ORTE_DECLSPEC int orte_routed_base_delete_route(char *module, orte_process_name_t *proc); -ORTE_DECLSPEC int orte_routed_base_update_route(char *module, orte_process_name_t *target, - orte_process_name_t *route); -ORTE_DECLSPEC orte_process_name_t orte_routed_base_get_route(char *module, - orte_process_name_t *target); -ORTE_DECLSPEC int orte_routed_base_route_lost(char *module, - const orte_process_name_t *route); -ORTE_DECLSPEC bool orte_routed_base_route_is_defined(char *module, - const orte_process_name_t *target); -ORTE_DECLSPEC void orte_routed_base_update_routing_plan(char *module); -ORTE_DECLSPEC void orte_routed_base_get_routing_list(char *module, opal_list_t *coll); -ORTE_DECLSPEC int orte_routed_base_set_lifeline(char *module, orte_process_name_t *proc); -ORTE_DECLSPEC size_t orte_routed_base_num_routes(char *module); -ORTE_DECLSPEC int orte_routed_base_ft_event(char *module, int state); - /* specialized support functions */ ORTE_DECLSPEC void orte_routed_base_xcast_routing(opal_list_t *coll, opal_list_t *my_children); diff --git a/orte/mca/routed/base/routed_base_fns.c b/orte/mca/routed/base/routed_base_fns.c index 73d74580a4..99c949ba10 100644 --- a/orte/mca/routed/base/routed_base_fns.c +++ b/orte/mca/routed/base/routed_base_fns.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -37,219 +37,6 @@ #include "orte/mca/routed/base/base.h" -char* orte_routed_base_assign_module(char *modules) -{ - orte_routed_base_active_t *active; - char **desired; - int i; - - /* the incoming param contains a comma-delimited, prioritized - * list of desired routing modules. If it is NULL, then we - * simply return the module at the top of our list */ - if (NULL == modules) { - active = (orte_routed_base_active_t*)opal_list_get_first(&orte_routed_base.actives); - return active->component->base_version.mca_component_name; - } - - /* otherwise, cycle thru the provided list of desired modules - * and pick the highest priority one that matches */ - desired = opal_argv_split(modules, ','); - for (i=0; NULL != desired[i]; i++) { - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (0 == strcasecmp(desired[i], active->component->base_version.mca_component_name)) { - opal_argv_free(desired); - return active->component->base_version.mca_component_name; - } - } - } - opal_argv_free(desired); - - /* get here if none match */ - return NULL; -} - -int orte_routed_base_delete_route(char *module, orte_process_name_t *proc) -{ - orte_routed_base_active_t *active; - int rc; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->delete_route) { - if (ORTE_SUCCESS != (rc = active->module->delete_route(proc))) { - return rc; - } - } - } - } - return ORTE_SUCCESS; -} - -int orte_routed_base_update_route(char *module, orte_process_name_t *target, - orte_process_name_t *route) -{ - orte_routed_base_active_t *active; - int rc; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->update_route) { - if (ORTE_SUCCESS != (rc = active->module->update_route(target, route))) { - return rc; - } - } - } - } - return ORTE_SUCCESS; -} - -orte_process_name_t orte_routed_base_get_route(char *module, orte_process_name_t *target) -{ - orte_routed_base_active_t *active; - - /* a NULL module corresponds to direct */ - if (!orte_routed_base.routing_enabled || NULL == module) { - return *target; - } - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->get_route) { - return active->module->get_route(target); - } - return *ORTE_NAME_INVALID; - } - } - return *ORTE_NAME_INVALID; -} - -int orte_routed_base_route_lost(char *module, const orte_process_name_t *route) -{ - orte_routed_base_active_t *active; - int rc; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->route_lost) { - if (ORTE_SUCCESS != (rc = active->module->route_lost(route))) { - return rc; - } - } - } - } - return ORTE_SUCCESS; -} - -bool orte_routed_base_route_is_defined(char *module, const orte_process_name_t *target) -{ - orte_routed_base_active_t *active; - - /* a NULL module corresponds to direct */ - if (NULL == module) { - return true; - } - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->route_is_defined) { - return active->module->route_is_defined(target); - } - break; - } - } - - /* if we didn't find the specified module, or it doesn't have - * the required API, then the route isn't defined */ - return false; -} - -void orte_routed_base_update_routing_plan(char *module) -{ - orte_routed_base_active_t *active; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->update_routing_plan) { - active->module->update_routing_plan(); - } - } - } - - return; -} - -void orte_routed_base_get_routing_list(char *module, opal_list_t *coll) -{ - orte_routed_base_active_t *active; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->get_routing_list) { - active->module->get_routing_list(coll); - } - } - } - return; -} - -int orte_routed_base_set_lifeline(char *module, orte_process_name_t *proc) -{ - orte_routed_base_active_t *active; - int rc; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->set_lifeline) { - if (ORTE_SUCCESS != (rc = active->module->set_lifeline(proc))) { - return rc; - } - } - } - } - return ORTE_SUCCESS; -} - -size_t orte_routed_base_num_routes(char *module) -{ - orte_routed_base_active_t *active; - size_t rc = 0; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->num_routes) { - rc += active->module->num_routes(); - } - } - } - return rc; -} - -int orte_routed_base_ft_event(char *module, int state) -{ - orte_routed_base_active_t *active; - int rc; - - OPAL_LIST_FOREACH(active, &orte_routed_base.actives, orte_routed_base_active_t) { - if (NULL == module || - 0 == strcmp(module, active->component->base_version.mca_component_name)) { - if (NULL != active->module->ft_event) { - if (ORTE_SUCCESS != (rc = active->module->ft_event(state))) { - return rc; - } - } - } - } - return ORTE_SUCCESS; -} - - void orte_routed_base_xcast_routing(opal_list_t *coll, opal_list_t *my_children) { orte_routed_tree_t *child; diff --git a/orte/mca/routed/base/routed_base_frame.c b/orte/mca/routed/base/routed_base_frame.c index 5069697975..3c61600a4f 100644 --- a/orte/mca/routed/base/routed_base_frame.c +++ b/orte/mca/routed/base/routed_base_frame.c @@ -10,7 +10,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -39,25 +39,11 @@ * component's public mca_base_component_t struct. */ #include "orte/mca/routed/base/static-components.h" -orte_routed_base_t orte_routed_base = {{{0}}}; -orte_routed_API_t orte_routed = { - .assign_module = orte_routed_base_assign_module, - .delete_route = orte_routed_base_delete_route, - .update_route = orte_routed_base_update_route, - .get_route = orte_routed_base_get_route, - .route_lost = orte_routed_base_route_lost, - .route_is_defined = orte_routed_base_route_is_defined, - .set_lifeline = orte_routed_base_set_lifeline, - .update_routing_plan = orte_routed_base_update_routing_plan, - .get_routing_list = orte_routed_base_get_routing_list, - .num_routes = orte_routed_base_num_routes, - .ft_event = orte_routed_base_ft_event -}; +orte_routed_base_t orte_routed_base = {0}; +orte_routed_module_t orte_routed = {0}; static int orte_routed_base_open(mca_base_open_flag_t flags) { - /* setup our list of actives */ - OBJ_CONSTRUCT(&orte_routed_base.actives, opal_list_t); /* start with routing DISABLED */ orte_routed_base.routing_enabled = false; @@ -67,14 +53,10 @@ static int orte_routed_base_open(mca_base_open_flag_t flags) static int orte_routed_base_close(void) { - orte_routed_base_active_t *active; - - while (NULL != (active = (orte_routed_base_active_t *)opal_list_remove_first(&orte_routed_base.actives))) { - active->module->finalize(); - OBJ_RELEASE(active); + orte_routed_base.routing_enabled = false; + if (NULL != orte_routed.finalize) { + orte_routed.finalize(); } - OPAL_LIST_DESTRUCT(&orte_routed_base.actives); - return mca_base_framework_components_close(&orte_routed_base_framework, NULL); } @@ -82,69 +64,28 @@ MCA_BASE_FRAMEWORK_DECLARE(orte, routed, "ORTE Message Routing Subsystem", NULL, orte_routed_base_open, orte_routed_base_close, mca_routed_base_static_components, 0); -static bool selected = false; - int orte_routed_base_select(void) { - mca_base_component_list_item_t *cli=NULL; - orte_routed_component_t *component=NULL; - orte_routed_base_active_t *newmodule, *mod; - mca_base_module_t *module; - bool inserted; - int pri; + orte_routed_component_t *best_component = NULL; + orte_routed_module_t *best_module = NULL; - if (selected) { - return ORTE_SUCCESS; - } - selected = true; - - OPAL_LIST_FOREACH(cli, &orte_routed_base_framework.framework_components, mca_base_component_list_item_t ) { - component = (orte_routed_component_t*) cli->cli_component; - - opal_output_verbose(10, orte_routed_base_framework.framework_output, - "orte_routed_base_select: Initializing %s component %s", - component->base_version.mca_type_name, - component->base_version.mca_component_name); - - if (ORTE_SUCCESS != component->base_version.mca_query_component(&module, &pri)) { - continue; - } - - /* add to the list of available components */ - newmodule = OBJ_NEW(orte_routed_base_active_t); - newmodule->pri = pri; - newmodule->component = component; - newmodule->module = (orte_routed_module_t*)module; - - if (ORTE_SUCCESS != newmodule->module->initialize()) { - OBJ_RELEASE(newmodule); - continue; - } - - /* maintain priority order */ - inserted = false; - OPAL_LIST_FOREACH(mod, &orte_routed_base.actives, orte_routed_base_active_t) { - if (newmodule->pri > mod->pri) { - opal_list_insert_pos(&orte_routed_base.actives, - (opal_list_item_t*)mod, &newmodule->super); - inserted = true; - break; - } - } - if (!inserted) { - /* must be lowest priority - add to end */ - opal_list_append(&orte_routed_base.actives, &newmodule->super); - } + /* + * Select the best component + */ + if( OPAL_SUCCESS != mca_base_select("routed", orte_routed_base_framework.framework_output, + &orte_routed_base_framework.framework_components, + (mca_base_module_t **) &best_module, + (mca_base_component_t **) &best_component, NULL) ) { + /* This will only happen if no component was selected */ + /* If we didn't find one to select, that is an error */ + return ORTE_ERROR; } - if (4 < opal_output_get_verbosity(orte_routed_base_framework.framework_output)) { - opal_output(0, "%s: Final routed priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)); - /* show the prioritized list */ - OPAL_LIST_FOREACH(mod, &orte_routed_base.actives, orte_routed_base_active_t) { - opal_output(0, "\tComponent: %s Priority: %d", mod->component->base_version.mca_component_name, mod->pri); - } + /* Save the winner */ + orte_routed = *best_module; + if (NULL != orte_routed.initialize) { + orte_routed.initialize(); } - return ORTE_SUCCESS; } @@ -160,7 +101,3 @@ static void destruct(orte_routed_tree_t *rt) OBJ_CLASS_INSTANCE(orte_routed_tree_t, opal_list_item_t, construct, destruct); - -OBJ_CLASS_INSTANCE(orte_routed_base_active_t, - opal_list_item_t, - NULL, NULL); diff --git a/orte/mca/routed/radix/routed_radix.c b/orte/mca/routed/radix/routed_radix.c index 670189014c..31c59372cf 100644 --- a/orte/mca/routed/radix/routed_radix.c +++ b/orte/mca/routed/radix/routed_radix.c @@ -6,7 +6,7 @@ * reserved. * Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -223,12 +223,6 @@ static orte_process_name_t get_route(orte_process_name_t *target) goto found; } - /* if I am an application process, always route via my local daemon */ - if (ORTE_PROC_IS_APP) { - ret = ORTE_PROC_MY_DAEMON; - goto found; - } - /* if I am a tool, the route is direct if target is in * my own job family, and to the target's HNP if not */ @@ -264,7 +258,13 @@ static orte_process_name_t get_route(orte_process_name_t *target) } } - /* if the jobid is different than our own, then this the target + /* if the target is our parent, then send it direct */ + if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_PARENT, target)) { + ret = ORTE_PROC_MY_PARENT; + goto found; + } + + /* if the jobid is different than our own, then this target * is a tool and we should go direct */ if (ORTE_JOB_FAMILY(target->jobid) != ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) { ret = target; @@ -273,10 +273,15 @@ static orte_process_name_t get_route(orte_process_name_t *target) daemon.jobid = ORTE_PROC_MY_NAME->jobid; /* find out what daemon hosts this proc */ - if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) { - ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); - ret = ORTE_NAME_INVALID; - goto found; + if (ORTE_PROC_MY_NAME->jobid == target->jobid) { + /* it's a daemon - no need to look it up */ + daemon.vpid = target->vpid; + } else { + if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) { + ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); + ret = ORTE_NAME_INVALID; + goto found; + } } /* if the daemon is me, then send direct to the target! */ diff --git a/orte/mca/routed/routed.h b/orte/mca/routed/routed.h index cc8c800619..68aff352fa 100644 --- a/orte/mca/routed/routed.h +++ b/orte/mca/routed/routed.h @@ -7,7 +7,7 @@ * Copyright (c) 2004-2011 The University of Tennessee and The University * of Tennessee Research Foundation. All rights * reserved. - * Copyright (c) 2014-2016 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -221,47 +221,8 @@ typedef struct { orte_routed_module_ft_event_fn_t ft_event; } orte_routed_module_t; -/* define an equivalent set of API functions - these will be implemented - * as "stubs" in the framework base */ -typedef char* (*orte_routed_API_assign_module_fn_t)(char *modules); - -typedef int (*orte_routed_API_delete_route_fn_t)(char *module, - orte_process_name_t *proc); -typedef int (*orte_routed_API_update_route_fn_t)(char *module, - orte_process_name_t *target, - orte_process_name_t *route); -typedef orte_process_name_t (*orte_routed_API_get_route_fn_t)(char *module, - orte_process_name_t *target); -typedef int (*orte_routed_API_route_lost_fn_t)(char *module, - const orte_process_name_t *route); -typedef bool (*orte_routed_API_route_is_defined_fn_t)(char *module, - const orte_process_name_t *target); -typedef void (*orte_routed_API_update_routing_plan_fn_t)(char *module); -typedef void (*orte_routed_API_get_routing_list_fn_t)(char *module, opal_list_t *coll); -typedef int (*orte_routed_API_set_lifeline_fn_t)(char *module, orte_process_name_t *proc); -typedef size_t (*orte_routed_API_num_routes_fn_t)(char *module); -typedef int (*orte_routed_API_ft_event_fn_t)(char *module, int state); - - -typedef struct { - /* API functions */ - orte_routed_API_assign_module_fn_t assign_module; - orte_routed_API_delete_route_fn_t delete_route; - orte_routed_API_update_route_fn_t update_route; - orte_routed_API_get_route_fn_t get_route; - orte_routed_API_route_lost_fn_t route_lost; - orte_routed_API_route_is_defined_fn_t route_is_defined; - orte_routed_API_set_lifeline_fn_t set_lifeline; - /* fns for daemons */ - orte_routed_API_update_routing_plan_fn_t update_routing_plan; - orte_routed_API_get_routing_list_fn_t get_routing_list; - orte_routed_API_num_routes_fn_t num_routes; - /* FT Notification */ - orte_routed_API_ft_event_fn_t ft_event; -} orte_routed_API_t; - /* provide an interface to the routed framework stub functions */ -ORTE_DECLSPEC extern orte_routed_API_t orte_routed; +ORTE_DECLSPEC extern orte_routed_module_t orte_routed; /* ******************************************************************** */ diff --git a/orte/mca/snapc/base/snapc_base_fns.c b/orte/mca/snapc/base/snapc_base_fns.c index a7a0f98ec7..3217b6dd4e 100644 --- a/orte/mca/snapc/base/snapc_base_fns.c +++ b/orte/mca/snapc/base/snapc_base_fns.c @@ -9,7 +9,7 @@ * All rights reserved. * Copyright (c) 2007 Evergrid, Inc. All rights reserved. * Copyright (c) 2013 Cisco Systems, Inc. All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -767,8 +767,7 @@ int orte_snapc_base_global_coord_ckpt_update_cmd(orte_process_name_t* peer, } } - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - peer, loc_buffer, + if (0 > (ret = orte_rml.send_buffer_nb(peer, loc_buffer, ORTE_RML_TAG_CKPT, orte_rml_send_callback, NULL))) { opal_output(orte_snapc_base_framework.framework_output, diff --git a/orte/mca/state/base/state_base_fns.c b/orte/mca/state/base/state_base_fns.c index accda8edd3..3aa93c5c6c 100644 --- a/orte/mca/state/base/state_base_fns.c +++ b/orte/mca/state/base/state_base_fns.c @@ -1,6 +1,6 @@ /* * Copyright (c) 2011-2012 Los Alamos National Security, LLC. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -508,8 +508,7 @@ void orte_state_base_notify_data_server(orte_process_name_t *target) } /* send the request to the server */ - rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &orte_pmix_server_globals.server, buf, + rc = orte_rml.send_buffer_nb(&orte_pmix_server_globals.server, buf, ORTE_RML_TAG_DATA_SERVER, orte_rml_send_callback, NULL); if (ORTE_SUCCESS != rc) { @@ -617,8 +616,7 @@ static void _send_notification(int status, ORTE_ERROR_NAME(status), ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(&daemon)); - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &daemon, buf, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&daemon, buf, ORTE_RML_TAG_NOTIFICATION, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -635,7 +633,6 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata) orte_job_t *jdata; orte_proc_t *pdata; int i; - char *rtmod; orte_process_name_t parent, target; ORTE_ACQUIRE_OBJECT(caddy); @@ -648,9 +645,6 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata) ORTE_NAME_PRINT(proc), orte_proc_state_to_str(state)); - /* get our "lifeline" routed module */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - /* get the job object for this proc */ if (NULL == (jdata = orte_get_job_data_object(proc->jobid))) { ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND); @@ -722,7 +716,7 @@ void orte_state_base_track_procs(int fd, short argc, void *cbdata) * remain (might be some from another job) */ if (orte_orteds_term_ordered && - 0 == orte_routed.num_routes(rtmod)) { + 0 == orte_routed.num_routes()) { for (i=0; i < orte_local_children->size; i++) { if (NULL != (pdata = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) && ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_ALIVE)) { @@ -783,7 +777,6 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) int32_t i32, *i32ptr; uint32_t u32; void *nptr; - char *rtmod; ORTE_ACQUIRE_OBJECT(caddy); jdata = caddy->jdata; @@ -793,10 +786,6 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (NULL == jdata) ? "NULL" : ORTE_JOBID_PRINT(jdata->jobid)); - /* get our "lifeline" routed module */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - - if (NULL == jdata || jdata->jobid == ORTE_PROC_MY_NAME->jobid) { /* just check to see if the daemons are complete */ OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, @@ -864,7 +853,7 @@ void orte_state_base_check_all_complete(int fd, short args, void *cbdata) */ CHECK_DAEMONS: if (jdata == NULL || jdata->jobid == ORTE_PROC_MY_NAME->jobid) { - if (0 == orte_routed.num_routes(rtmod)) { + if (0 == orte_routed.num_routes()) { /* orteds are done! */ OPAL_OUTPUT_VERBOSE((2, orte_state_base_framework.framework_output, "%s orteds complete - exiting", diff --git a/orte/mca/state/hnp/state_hnp.c b/orte/mca/state/hnp/state_hnp.c index 2d3e520042..4a8855a944 100644 --- a/orte/mca/state/hnp/state_hnp.c +++ b/orte/mca/state/hnp/state_hnp.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2011-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -326,8 +326,7 @@ static void _send_notification(int status, ORTE_ERROR_NAME(status), ORTE_NAME_PRINT(target), ORTE_NAME_PRINT(&daemon)); - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &daemon, buf, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&daemon, buf, ORTE_RML_TAG_NOTIFICATION, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/state/orted/state_orted.c b/orte/mca/state/orted/state_orted.c index dfd05fea31..8846c90a1f 100644 --- a/orte/mca/state/orted/state_orted.c +++ b/orte/mca/state/orted/state_orted.c @@ -1,7 +1,7 @@ /* * Copyright (c) 2011-2017 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -239,8 +239,7 @@ static void track_jobs(int fd, short argc, void *cbdata) } /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -262,7 +261,6 @@ static void track_procs(int fd, short argc, void *cbdata) opal_buffer_t *alert; int rc, i; orte_plm_cmd_flag_t cmd; - char *rtmod; orte_std_cntr_t index; orte_job_map_t *map; orte_node_t *node; @@ -333,8 +331,7 @@ static void track_procs(int fd, short argc, void *cbdata) } } /* send it */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -391,9 +388,8 @@ static void track_procs(int fd, short argc, void *cbdata) * gone, then terminate ourselves IF no local procs * remain (might be some from another job) */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); if (orte_orteds_term_ordered && - 0 == orte_routed.num_routes(rtmod)) { + 0 == orte_routed.num_routes()) { for (i=0; i < orte_local_children->size; i++) { if (NULL != (pdata = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) && ORTE_FLAG_TEST(pdata, ORTE_PROC_FLAG_ALIVE)) { @@ -431,8 +427,7 @@ static void track_procs(int fd, short argc, void *cbdata) "%s state:orted: SENDING JOB LOCAL TERMINATION UPDATE FOR JOB %s", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_JOBID_PRINT(jdata->jobid))); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, alert, + if (0 > (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, alert, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/orted/orted_comm.c b/orte/orted/orted_comm.c index b07e86e6ab..803a3d94aa 100644 --- a/orte/orted/orted_comm.c +++ b/orte/orted/orted_comm.c @@ -123,7 +123,6 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, char string[256], *string_ptr = string; float pss; opal_pstats_t pstat; - char *rtmod; char *coprocessors; orte_job_map_t *map; int8_t flag; @@ -382,8 +381,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, /* flag that orteds were ordered to terminate */ orte_orteds_term_ordered = true; /* if all my routes and local children are gone, then terminate ourselves */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - if (0 == (ret = orte_routed.num_routes(rtmod))) { + if (0 == (ret = orte_routed.num_routes())) { for (i=0; i < orte_local_children->size; i++) { if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) && ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) { @@ -425,8 +423,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, orte_orteds_term_ordered = true; if (ORTE_PROC_IS_HNP) { /* if all my routes and local children are gone, then terminate ourselves */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - if (0 == orte_routed.num_routes(rtmod)) { + if (0 == orte_routed.num_routes()) { for (i=0; i < orte_local_children->size; i++) { if (NULL != (proct = (orte_proc_t*)opal_pointer_array_get_item(orte_local_children, i)) && ORTE_FLAG_TEST(proct, ORTE_PROC_FLAG_ALIVE)) { @@ -526,8 +523,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, break; } /* send the buffer to our IOF */ - orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_NAME, iofbuf, ORTE_RML_TAG_IOF_HNP, + orte_rml.send_buffer_nb(ORTE_PROC_MY_NAME, iofbuf, ORTE_RML_TAG_IOF_HNP, orte_rml_send_callback, NULL); } for (i=1; i < orte_node_pool->size; i++) { @@ -718,8 +714,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, OBJ_DESTRUCT(&data); } /* send the data */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOPOLOGY_REPORT, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOPOLOGY_REPORT, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -749,8 +744,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, goto CLEANUP; } - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -775,8 +769,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, OBJ_RELEASE(answer); goto CLEANUP; } - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -845,8 +838,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, rc = opal_hash_table_get_next_key_uint32(orte_job_data, &u32, (void **)&jobdat, nptr, &nptr); } } - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -872,8 +864,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, OBJ_RELEASE(answer); goto CLEANUP; } - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -942,8 +933,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, } } /* send the info */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -969,8 +959,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, OBJ_RELEASE(answer); goto CLEANUP; } - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -1088,8 +1077,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, } } /* send the info */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -1147,8 +1135,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, goto SEND_TOP_ANSWER; } /* the callback function will release relay_msg buffer */ - if (0 > orte_rml.send_buffer_nb(orte_mgmt_conduit, - &proc2, relay_msg, + if (0 > orte_rml.send_buffer_nb(&proc2, relay_msg, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); @@ -1199,8 +1186,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, goto SEND_TOP_ANSWER; } /* the callback function will release relay_msg buffer */ - if (0 > orte_rml.send_buffer_nb(orte_mgmt_conduit, - &proc2, relay_msg, + if (0 > orte_rml.send_buffer_nb(&proc2, relay_msg, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL)) { ORTE_ERROR_LOG(ORTE_ERR_COMM_FAILURE); @@ -1264,8 +1250,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, ret = ORTE_ERR_COMM_FAILURE; break; } - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - return_addr, answer, ORTE_RML_TAG_TOOL, + if (0 > (ret = orte_rml.send_buffer_nb(return_addr, answer, ORTE_RML_TAG_TOOL, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(answer); @@ -1343,8 +1328,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, free(gstack_exec); } /* always send our response */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, answer, + if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, answer, ORTE_RML_TAG_STACK_TRACE, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); @@ -1382,8 +1366,7 @@ void orte_daemon_recv(int status, orte_process_name_t* sender, } opal_dss.pack(answer, &pss, 1, OPAL_FLOAT); /* send it back */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, answer, + if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, answer, ORTE_RML_TAG_MEMPROFILE, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); diff --git a/orte/orted/orted_main.c b/orte/orted/orted_main.c index 197828aa5d..e30fc1b09a 100644 --- a/orte/orted/orted_main.c +++ b/orte/orted/orted_main.c @@ -712,19 +712,19 @@ int orte_daemon(int argc, char *argv[]) /* tell the routed module that we have a path * back to the HNP */ - if (ORTE_SUCCESS != (ret = orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, ORTE_PROC_MY_PARENT))) { + if (ORTE_SUCCESS != (ret = orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_PARENT))) { ORTE_ERROR_LOG(ret); goto DONE; } /* and a path to our parent */ - if (ORTE_SUCCESS != (ret = orte_routed.update_route(NULL, ORTE_PROC_MY_PARENT, ORTE_PROC_MY_PARENT))) { + if (ORTE_SUCCESS != (ret = orte_routed.update_route(ORTE_PROC_MY_PARENT, ORTE_PROC_MY_PARENT))) { ORTE_ERROR_LOG(ret); goto DONE; } /* set the lifeline to point to our parent so that we * can handle the situation if that lifeline goes away */ - if (ORTE_SUCCESS != (ret = orte_routed.set_lifeline(NULL, ORTE_PROC_MY_PARENT))) { + if (ORTE_SUCCESS != (ret = orte_routed.set_lifeline(ORTE_PROC_MY_PARENT))) { ORTE_ERROR_LOG(ret); goto DONE; } @@ -754,8 +754,7 @@ int orte_daemon(int argc, char *argv[]) node_regex_waiting = true; orte_rml.recv_buffer_nb(ORTE_PROC_MY_PARENT, ORTE_RML_TAG_NODE_REGEX_REPORT, ORTE_RML_PERSISTENT, node_regex_report, &node_regex_waiting); - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_PARENT, buffer, + if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, buffer, ORTE_RML_TAG_WARMUP_CONNECTION, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); @@ -955,8 +954,7 @@ int orte_daemon(int argc, char *argv[]) } /* send it to the designated target */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &target, buffer, + if (0 > (ret = orte_rml.send_buffer_nb(&target, buffer, ORTE_RML_TAG_ORTED_CALLBACK, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); @@ -1140,19 +1138,16 @@ static void rollup(int status, orte_process_name_t* sender, } static void report_orted() { - char *rtmod; int nreqd, ret; /* get the number of children */ - rtmod = orte_rml.get_routed(orte_mgmt_conduit); - nreqd = orte_routed.num_routes(rtmod) + 1; + nreqd = orte_routed.num_routes() + 1; if (nreqd == ncollected && NULL != mybucket && !node_regex_waiting) { /* add the collection of our children's buckets to ours */ opal_dss.copy_payload(mybucket, bucket); OBJ_RELEASE(bucket); /* relay this on to our parent */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_PARENT, mybucket, + if (0 > (ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_PARENT, mybucket, ORTE_RML_TAG_ORTED_CALLBACK, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(ret); @@ -1175,7 +1170,7 @@ static void node_regex_report(int status, orte_process_name_t* sender, /* update the routing tree so any tree spawn operation * properly gets the number of children underneath us */ - orte_routed.update_routing_plan(NULL); + orte_routed.update_routing_plan(); *active = false; diff --git a/orte/orted/orted_submit.c b/orte/orted/orted_submit.c index 3e74eb7206..022fcc89d7 100644 --- a/orte/orted/orted_submit.c +++ b/orte/orted/orted_submit.c @@ -14,7 +14,7 @@ * Copyright (c) 2007-2009 Sun Microsystems, Inc. All rights reserved. * Copyright (c) 2007-2017 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. @@ -575,14 +575,14 @@ int orte_submit_init(int argc, char *argv[], OBJ_DESTRUCT(&val); /* set the route to be direct */ - if (ORTE_SUCCESS != orte_routed.update_route(NULL, ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) { + if (ORTE_SUCCESS != orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) { orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri); orte_finalize(); exit(1); } /* set the target hnp as our lifeline so we will terminate if it exits */ - orte_routed.set_lifeline(NULL, ORTE_PROC_MY_HNP); + orte_routed.set_lifeline(ORTE_PROC_MY_HNP); /* setup to listen for HNP response to my commands */ orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_NOTIFY_COMPLETE, @@ -700,8 +700,7 @@ int orte_submit_cancel(int index) { ORTE_ERROR_LOG(rc); return rc; } - rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON, + rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL); if (ORTE_SUCCESS != rc) { ORTE_ERROR_LOG(rc); @@ -724,8 +723,7 @@ int orte_submit_halt(void) ORTE_ERROR_LOG(rc); return rc; } - rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, req, + rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL); if (ORTE_SUCCESS != rc) { @@ -1146,8 +1144,7 @@ int orte_submit_job(char *argv[], int *index, ORTE_ERROR_LOG(rc); return rc; } - orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON, + orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, req, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL); /* Inform the caller of the tracker index if they passed a index pointer */ @@ -3378,8 +3375,7 @@ void orte_profile_wakeup(int sd, short args, void *cbdata) for (i=0; i < nreports; i++) { OBJ_RETAIN(buffer); name.vpid = i; - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &name, buffer, + if (0 > (rc = orte_rml.send_buffer_nb(&name, buffer, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/orted/pmix/pmix_server.c b/orte/orted/pmix/pmix_server.c index ee4f7f4d45..b363442a0e 100644 --- a/orte/orted/pmix/pmix_server.c +++ b/orte/orted/pmix/pmix_server.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2012 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2015 Research Organization for Information Science @@ -391,8 +391,7 @@ static void send_error(int status, opal_process_name_t *idreq, } /* send the response */ - orte_rml.send_buffer_nb(orte_mgmt_conduit, - remote, reply, + orte_rml.send_buffer_nb(remote, reply, ORTE_RML_TAG_DIRECT_MODEX_RESP, orte_rml_send_callback, NULL); return; @@ -435,8 +434,7 @@ static void _mdxresp(int sd, short args, void *cbdata) opal_dss.copy_payload(reply, &req->msg); /* send the response */ - orte_rml.send_buffer_nb(orte_mgmt_conduit, - &req->proxy, reply, + orte_rml.send_buffer_nb(&req->proxy, reply, ORTE_RML_TAG_DIRECT_MODEX_RESP, orte_rml_send_callback, NULL); diff --git a/orte/orted/pmix/pmix_server_dyn.c b/orte/orted/pmix/pmix_server_dyn.c index 259d719132..e84178ee50 100644 --- a/orte/orted/pmix/pmix_server_dyn.c +++ b/orte/orted/pmix/pmix_server_dyn.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009-2017 Cisco Systems, Inc. All rights reserved * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science @@ -143,8 +143,7 @@ static void spawn(int sd, short args, void *cbdata) } /* send it to the HNP for processing - might be myself! */ - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_PLM, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/orted/pmix/pmix_server_fence.c b/orte/orted/pmix/pmix_server_fence.c index fe0f942cd1..20c1849bbb 100644 --- a/orte/orted/pmix/pmix_server_fence.c +++ b/orte/orted/pmix/pmix_server_fence.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2017 Research Organization for Information Science @@ -261,8 +261,7 @@ static void dmodex_req(int sd, short args, void *cbdata) } /* send it to the host daemon */ - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &dmn->name, buf, ORTE_RML_TAG_DIRECT_MODEX, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(&dmn->name, buf, ORTE_RML_TAG_DIRECT_MODEX, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); opal_hotel_checkout(&orte_pmix_server_globals.reqs, req->room_num); diff --git a/orte/orted/pmix/pmix_server_gen.c b/orte/orted/pmix/pmix_server_gen.c index 648d69557a..4d8aba4d54 100644 --- a/orte/orted/pmix/pmix_server_gen.c +++ b/orte/orted/pmix/pmix_server_gen.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014-2017 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014 Research Organization for Information Science @@ -1080,8 +1080,7 @@ void pmix_server_log_fn(opal_process_name_t *requestor, buf = OBJ_NEW(opal_buffer_t); opal_dss.load(buf, val->data.bo.bytes, val->data.bo.size); val->data.bo.bytes = NULL; - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_SHOW_HELP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/orted/pmix/pmix_server_pub.c b/orte/orted/pmix/pmix_server_pub.c index db1b44e2a5..72c9a4c506 100644 --- a/orte/orted/pmix/pmix_server_pub.c +++ b/orte/orted/pmix/pmix_server_pub.c @@ -13,7 +13,7 @@ * All rights reserved. * Copyright (c) 2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2011 Oak Ridge National Labs. All rights reserved. - * Copyright (c) 2013-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2013-2019 Intel, Inc. All rights reserved. * Copyright (c) 2014 Mellanox Technologies, Inc. * All rights reserved. * Copyright (c) 2014-2016 Research Organization for Information Science @@ -132,9 +132,9 @@ static int init_server(void) struct timeval timeout; timeout.tv_sec = orte_pmix_server_globals.timeout; timeout.tv_usec = 0; - if (ORTE_SUCCESS != (rc = orte_rml.ping(orte_mgmt_conduit, server, &timeout))) { + if (ORTE_SUCCESS != (rc = orte_rml.ping(server, &timeout))) { /* try it one more time */ - if (ORTE_SUCCESS != (rc = orte_rml.ping(orte_mgmt_conduit, server, &timeout))) { + if (ORTE_SUCCESS != (rc = orte_rml.ping(server, &timeout))) { /* okay give up */ orte_show_help("help-orterun.txt", "orterun:server-not-found", true, orte_basename, server, @@ -205,8 +205,7 @@ static void execute(int sd, short args, void *cbdata) } /* send the request to the target */ - rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - target, xfer, + rc = orte_rml.send_buffer_nb(target, xfer, ORTE_RML_TAG_DATA_SERVER, orte_rml_send_callback, NULL); if (ORTE_SUCCESS == rc) { diff --git a/orte/runtime/orte_data_server.c b/orte/runtime/orte_data_server.c index 903e17c66a..d4d3cb5fca 100644 --- a/orte/runtime/orte_data_server.c +++ b/orte/runtime/orte_data_server.c @@ -12,7 +12,7 @@ * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2016 Los Alamos National Security, LLC. * All rights reserved - * Copyright (c) 2015-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2015-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science * and Technology (RIST). All rights reserved. * $COPYRIGHT$ @@ -336,8 +336,7 @@ void orte_data_server(int status, orte_process_name_t* sender, ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(&req->requestor)); - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &req->requestor, reply, ORTE_RML_TAG_DATA_CLIENT, + if (0 > (rc = orte_rml.send_buffer_nb(&req->requestor, reply, ORTE_RML_TAG_DATA_CLIENT, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(reply); @@ -716,8 +715,7 @@ void orte_data_server(int status, orte_process_name_t* sender, } SEND_ANSWER: - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - sender, answer, ORTE_RML_TAG_DATA_CLIENT, + if (0 > (rc = orte_rml.send_buffer_nb(sender, answer, ORTE_RML_TAG_DATA_CLIENT, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(answer); diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 2413ee7e90..abd2bfb386 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -72,10 +72,6 @@ char *orte_basename = NULL; bool orte_coprocessors_detected = false; opal_hash_table_t *orte_coprocessors = NULL; char *orte_topo_signature = NULL; -char *orte_mgmt_transport = NULL; -char *orte_coll_transport = NULL; -int orte_mgmt_conduit = -1; -int orte_coll_conduit = -1; bool orte_no_vm = false; char *orte_data_server_uri = NULL; diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 68c03842b1..9abe705a47 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -74,11 +74,6 @@ ORTE_DECLSPEC extern bool orte_event_base_active; /* instantiated in orte/runtim ORTE_DECLSPEC extern bool orte_proc_is_bound; /* instantiated in orte/runtime/orte_init.c */ ORTE_DECLSPEC extern int orte_progress_thread_debug; /* instantiated in orte/runtime/orte_init.c */ -ORTE_DECLSPEC extern char *orte_mgmt_transport; -ORTE_DECLSPEC extern char *orte_coll_transport; -ORTE_DECLSPEC extern int orte_mgmt_conduit; -ORTE_DECLSPEC extern int orte_coll_conduit; - /** * Global indicating where this process was bound to at launch (will * be NULL if !orte_proc_is_bound) diff --git a/orte/runtime/orte_mca_params.c b/orte/runtime/orte_mca_params.c index 0e4c10acd1..5b50ea0e8d 100644 --- a/orte/runtime/orte_mca_params.c +++ b/orte/runtime/orte_mca_params.c @@ -746,19 +746,6 @@ int orte_register_params(void) OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_READONLY, &orte_daemon_cores); - /* get the conduit params */ - orte_coll_transport = "fabric,ethernet"; - (void) mca_base_var_register("orte", "orte", "coll", "transports", - "Comma-separated list of transports to use for ORTE collectives", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &orte_coll_transport); - - orte_mgmt_transport = "oob"; - (void) mca_base_var_register("orte", "orte", "mgmt", "transports", - "Comma-separated list of transports to use for ORTE management messages", - MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9, - MCA_BASE_VAR_SCOPE_READONLY, &orte_mgmt_transport); - /* Amount of time to wait for a stack trace to return from the daemons */ orte_stack_trace_wait_timeout = 30; (void) mca_base_var_register ("orte", "orte", NULL, "timeout_for_stack_trace", diff --git a/orte/util/comm/comm.c b/orte/util/comm/comm.c index fdcbcc033e..6f2fba50aa 100644 --- a/orte/util/comm/comm.c +++ b/orte/util/comm/comm.c @@ -11,7 +11,7 @@ * All rights reserved. * Copyright (c) 2010-2012 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2014-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -136,7 +136,7 @@ int orte_util_comm_connect_tool(char *uri) OBJ_DESTRUCT(&val); /* set the route to be direct */ - if (ORTE_SUCCESS != (rc = orte_routed.update_route(NULL, &tool, &tool))) { + if (ORTE_SUCCESS != (rc = orte_routed.update_route(&tool, &tool))) { ORTE_ERROR_LOG(rc); return rc; } @@ -201,8 +201,7 @@ int orte_util_comm_report_event(orte_comm_event_t ev) opal_event_evtimer_add(quicktime, &tv); /* do the send */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - &tool, buf, ORTE_RML_TAG_TOOL, send_cbfunc, NULL))) { + if (0 > (rc = orte_rml.send_buffer_nb(&tool, buf, ORTE_RML_TAG_TOOL, send_cbfunc, NULL))) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(buf); return rc; @@ -292,8 +291,7 @@ int orte_util_comm_query_job_info(const orte_process_name_t *hnp, orte_jobid_t j opal_event_evtimer_add(quicktime, &tv); /* do the send */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - (orte_process_name_t*)hnp, cmd, + if (0 > (ret = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, cmd, ORTE_RML_TAG_DAEMON, send_cbfunc, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(cmd); @@ -401,8 +399,7 @@ int orte_util_comm_query_node_info(const orte_process_name_t *hnp, char *node, opal_event_evtimer_add(quicktime, &tv); /* do the send */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - (orte_process_name_t*)hnp, cmd, + if (0 > (ret = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, cmd, ORTE_RML_TAG_DAEMON, send_cbfunc, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(cmd); @@ -519,8 +516,7 @@ int orte_util_comm_query_proc_info(const orte_process_name_t *hnp, orte_jobid_t opal_event_evtimer_add(quicktime, &tv); /* do the send */ - if (0 > (ret = orte_rml.send_buffer_nb(orte_mgmt_conduit, - (orte_process_name_t*)hnp, cmd, ORTE_RML_TAG_DAEMON, + if (0 > (ret = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, cmd, ORTE_RML_TAG_DAEMON, send_cbfunc, NULL))) { ORTE_ERROR_LOG(ret); OBJ_RELEASE(cmd); @@ -669,8 +665,7 @@ int orte_util_comm_spawn_job(const orte_process_name_t *hnp, orte_job_t *jdata) ORTE_NAME_PRINT(hnp))); /* tell the target HNP to launch the job */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - (orte_process_name_t*)hnp, buf, + if (0 > (rc = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, buf, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -751,8 +746,7 @@ int orte_util_comm_terminate_job(const orte_process_name_t *hnp, orte_jobid_t jo ORTE_NAME_PRINT(hnp))); /* tell the target HNP to terminate the job */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - (orte_process_name_t*)hnp, buf, + if (0 > (rc = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, buf, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); @@ -809,8 +803,7 @@ int orte_util_comm_halt_vm(const orte_process_name_t *hnp) } /* send the order */ - if (0 > (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - (orte_process_name_t*)hnp, buf, + if (0 > (rc = orte_rml.send_buffer_nb((orte_process_name_t*)hnp, buf, ORTE_RML_TAG_DAEMON, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc); diff --git a/orte/util/hnp_contact.c b/orte/util/hnp_contact.c index f7cf36f837..134dd6a2c8 100644 --- a/orte/util/hnp_contact.c +++ b/orte/util/hnp_contact.c @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016-2017 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -161,7 +161,7 @@ int orte_read_hnp_contact_file(char *filename, orte_hnp_contact_t *hnp, bool con OBJ_DESTRUCT(&val); /* set the route to be direct */ - if (ORTE_SUCCESS != (rc = orte_routed.update_route(NULL, &hnp->name, &hnp->name))) { + if (ORTE_SUCCESS != (rc = orte_routed.update_route(&hnp->name, &hnp->name))) { ORTE_ERROR_LOG(rc); free(hnp_uri); return rc; diff --git a/orte/util/nidmap.c b/orte/util/nidmap.c index 39feb9677b..744718fa21 100644 --- a/orte/util/nidmap.c +++ b/orte/util/nidmap.c @@ -399,29 +399,30 @@ int orte_util_decode_nidmap(opal_buffer_t *buf) } else { vpid = UINT32_MAX; } - if (UINT32_MAX != vpid && - NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, vpid))) { - proc = OBJ_NEW(orte_proc_t); - proc->name.jobid = ORTE_PROC_MY_NAME->jobid; - proc->name.vpid = vpid; - proc->state = ORTE_PROC_STATE_RUNNING; - ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE); - daemons->num_procs++; - opal_pointer_array_set_item(daemons->procs, proc->name.vpid, proc); + if (UINT32_MAX != vpid) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, vpid))) { + proc = OBJ_NEW(orte_proc_t); + proc->name.jobid = ORTE_PROC_MY_NAME->jobid; + proc->name.vpid = vpid; + proc->state = ORTE_PROC_STATE_RUNNING; + ORTE_FLAG_SET(proc, ORTE_PROC_FLAG_ALIVE); + daemons->num_procs++; + opal_pointer_array_set_item(daemons->procs, proc->name.vpid, proc); + } + nd->index = proc->name.vpid; + OBJ_RETAIN(nd); + proc->node = nd; + OBJ_RETAIN(proc); + nd->daemon = proc; } - nd->index = proc->name.vpid; - OBJ_RETAIN(nd); - proc->node = nd; - OBJ_RETAIN(proc); - nd->daemon = proc; } /* update num procs */ if (orte_process_info.num_procs != daemons->num_procs) { orte_process_info.num_procs = daemons->num_procs; - /* need to update the routing plan */ - orte_routed.update_routing_plan(NULL); } + /* need to update the routing plan */ + orte_routed.update_routing_plan(); if (orte_process_info.max_procs < orte_process_info.num_procs) { orte_process_info.max_procs = orte_process_info.num_procs; diff --git a/orte/util/show_help.c b/orte/util/show_help.c index 8ea1519d6b..f8078465c6 100644 --- a/orte/util/show_help.c +++ b/orte/util/show_help.c @@ -12,7 +12,7 @@ * Copyright (c) 2008-2018 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2012-2013 Los Alamos National Security, LLC. * All rights reserved. - * Copyright (c) 2016-2018 Intel, Inc. All rights reserved. + * Copyright (c) 2016-2019 Intel, Inc. All rights reserved. * Copyright (c) 2017 IBM Corporation. All rights reserved. * $COPYRIGHT$ * @@ -694,8 +694,7 @@ int orte_show_help_norender(const char *filename, const char *topic, /* if we are a daemon, then send it via RML to the HNP */ if (ORTE_PROC_IS_DAEMON) { /* send it to the HNP */ - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_SHOW_HELP, orte_rml_send_callback, NULL))) { OBJ_RELEASE(buf); @@ -787,8 +786,7 @@ int orte_show_help_suppress(const char *filename, const char *topic) /* pack the flag that we DO NOT have a string */ opal_dss.pack(buf, &have_output, 1, OPAL_INT8); /* send it to the HNP */ - if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(orte_mgmt_conduit, - ORTE_PROC_MY_HNP, buf, + if (ORTE_SUCCESS != (rc = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, ORTE_RML_TAG_SHOW_HELP, orte_rml_send_callback, NULL))) { ORTE_ERROR_LOG(rc);