1
1

fixes for direct routed component under mpirun

Этот коммит содержится в:
Elena 2014-11-26 13:36:49 +02:00
родитель 8cae899a42
Коммит b17ea23ce0
2 изменённых файлов: 35 добавлений и 36 удалений

Просмотреть файл

@ -391,6 +391,8 @@ static void xcast_recv(int status, orte_process_name_t* sender,
goto relay; goto relay;
} }
if (0 == flag) { if (0 == flag) {
/* copy the remainder of the payload */
opal_dss.copy_payload(relay, buffer);
/* no - just return */ /* no - just return */
goto relay; goto relay;
} }

Просмотреть файл

@ -117,8 +117,12 @@ static orte_process_name_t get_route(orte_process_name_t *target)
goto found; goto found;
} }
/* all routes go direct */ if (ORTE_PROC_IS_APP) {
ret = target; ret = ORTE_PROC_MY_HNP;
} else {
/* all routes go direct */
ret = target;
}
found: found:
OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
@ -217,39 +221,32 @@ static int init_routes(orte_jobid_t job, opal_buffer_t *ndat)
/*** MUST BE A PROC ***/ /*** MUST BE A PROC ***/
/* if ndat=NULL, then we are being called during orte_init */ if (NULL != orte_process_info.my_hnp_uri) {
if (NULL == ndat) { /* set the contact info into the hash table */
if (NULL != orte_process_info.my_daemon_uri) { orte_rml.set_contact_info(orte_process_info.my_hnp_uri);
/* we are being launched by a daemon, so we need to
* register a sync with it to get our nidmap back /* extract the hnp name and store it */
*/ if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_hnp_uri,
/* Set the contact info in the RML - this won't actually establish ORTE_PROC_MY_HNP, NULL))) {
* the connection, but just tells the RML how to reach the daemon ORTE_ERROR_LOG(rc);
* if/when we attempt to send to it return rc;
*/
orte_rml.set_contact_info(orte_process_info.my_daemon_uri);
/* extract the daemon's name so we can update the routing table */
if (ORTE_SUCCESS != (rc = orte_rml_base_parse_uris(orte_process_info.my_daemon_uri,
ORTE_PROC_MY_DAEMON, NULL))) {
ORTE_ERROR_LOG(rc);
return rc;
}
} }
return ORTE_SUCCESS;
} }
/* if ndat != NULL, then this is being invoked by the proc to /* if ndat != NULL, then this is being invoked by the proc to
* init a route to a specified process that is outside of our * init a route to a specified process that is outside of our
* job family. It really doesn't matter as everything must * job family. It really doesn't matter as everything must
* go direct * go direct
*/ */
OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output, if (NULL != ndat) {
"%s routed_direct: init routes w/non-NULL data", OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); "%s routed_direct: init routes w/non-NULL data",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(ndat))) {
ORTE_ERROR_LOG(rc); if (ORTE_SUCCESS != (rc = orte_rml_base_update_contact_info(ndat))) {
return rc; ORTE_ERROR_LOG(rc);
return rc;
}
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
@ -290,7 +287,7 @@ static void get_routing_list(opal_list_t *coll)
/* if I am anything other than daemons and the HNP, this /* if I am anything other than daemons and the HNP, this
* is a meaningless command as I am not allowed to route * is a meaningless command as I am not allowed to route
*/ */
if (!ORTE_PROC_IS_DAEMON || !ORTE_PROC_IS_HNP) { if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) {
return; return;
} }
@ -331,14 +328,14 @@ static void get_routing_list(opal_list_t *coll)
static int get_wireup_info(opal_buffer_t *buf) static int get_wireup_info(opal_buffer_t *buf)
{ {
opal_byte_object_t bo, *boptr; int rc;
/* this is a meaningless command for a direct as I am not allowed to route */ if (ORTE_PROC_IS_HNP) {
bo.bytes = NULL; if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(ORTE_PROC_MY_NAME->jobid, buf))) {
bo.size = 0; ORTE_ERROR_LOG(rc);
boptr = &bo; }
return rc;
opal_dss.pack(buf, &boptr, 1, OPAL_BYTE_OBJECT); }
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }