diff --git a/orte/mca/grpcomm/direct/grpcomm_direct.c b/orte/mca/grpcomm/direct/grpcomm_direct.c index b8df2e48be..da30940ee5 100644 --- a/orte/mca/grpcomm/direct/grpcomm_direct.c +++ b/orte/mca/grpcomm/direct/grpcomm_direct.c @@ -391,6 +391,8 @@ static void xcast_recv(int status, orte_process_name_t* sender, goto relay; } if (0 == flag) { + /* copy the remainder of the payload */ + opal_dss.copy_payload(relay, buffer); /* no - just return */ goto relay; } diff --git a/orte/mca/routed/direct/routed_direct.c b/orte/mca/routed/direct/routed_direct.c index 12a555f352..9a963726e6 100644 --- a/orte/mca/routed/direct/routed_direct.c +++ b/orte/mca/routed/direct/routed_direct.c @@ -117,8 +117,12 @@ static orte_process_name_t get_route(orte_process_name_t *target) goto found; } - /* all routes go direct */ - ret = target; + if (ORTE_PROC_IS_APP) { + ret = ORTE_PROC_MY_HNP; + } else { + /* all routes go direct */ + ret = target; + } found: OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, @@ -217,39 +221,32 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat) /*** MUST BE A PROC ***/ - /* if ndat=NULL, then we are being called during orte_init */ - if (NULL == ndat) { - if (NULL != orte_process_info.my_daemon_uri) { - /* we are being launched by a daemon, so we need to - * register a sync with it to get our nidmap back - */ - /* Set the contact info in the RML - this won't actually establish - * the connection, but just tells the RML how to reach the daemon - * if/when we attempt to send to it - */ - orte_rml.set_contact_info(orte_process_info.my_daemon_uri); - /* extract the daemon's name so we can update the routing table */ - if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri, - ORTE_PROC_MY_DAEMON, NULL))) { - ORTE_ERROR_LOG(rc); - return rc; - } + if (NULL != orte_process_info.my_hnp_uri) { + /* set the contact info into the hash table */ + orte_rml.set_contact_info(orte_process_info.my_hnp_uri); + + /* extract the hnp name and store it */ + if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, + ORTE_PROC_MY_HNP, NULL))) { + ORTE_ERROR_LOG(rc); + return rc; } - return ORTE_SUCCESS; } - + /* if ndat != NULL, then this is being invoked by the proc to * init a route to a specified process that is outside of our * job family. It really doesn't matter as everything must * go direct */ - OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, - "%s routed_direct: init routes w/non-NULL data", - ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); - - if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(ndat))) { - ORTE_ERROR_LOG(rc); - return rc; + if (NULL != ndat) { + OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, + "%s routed_direct: init routes w/non-NULL data", + ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); + + if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(ndat))) { + ORTE_ERROR_LOG(rc); + return rc; + } } return ORTE_SUCCESS; @@ -290,7 +287,7 @@ static void get_routing_list(opal_list_t *coll) /* if I am anything other than daemons and the HNP, this * is a meaningless command as I am not allowed to route */ - if (!ORTE_PROC_IS_DAEMON || !ORTE_PROC_IS_HNP) { + if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) { return; } @@ -331,14 +328,14 @@ static void get_routing_list(opal_list_t *coll) static int get_wireup_info(opal_buffer_t *buf) { - opal_byte_object_t bo, *boptr; + int rc; - /* this is a meaningless command for a direct as I am not allowed to route */ - bo.bytes = NULL; - bo.size = 0; - boptr = &bo; - - opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT); + if (ORTE_PROC_IS_HNP) { + if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, buf))) { + ORTE_ERROR_LOG(rc); + } + return rc; + } return ORTE_SUCCESS; }