Merge pull request #3702 from rhc54/topic/rf
Fix rank-file mapper launch by correctly setting up the remote map from the provided data
Этот коммит содержится в:
Коммит
a7741ab120
@ -219,10 +219,12 @@ static void _event_hdlr(int sd, short args, void *cbdata)
|
|||||||
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
opal_output_verbose(2, opal_pmix_base_framework.framework_output,
|
||||||
"%s _EVENT_HDLR CALLING EVHDLR",
|
"%s _EVENT_HDLR CALLING EVHDLR",
|
||||||
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
OPAL_NAME_PRINT(OPAL_PROC_MY_NAME));
|
||||||
event->handler(cd->status, &cd->pname,
|
if (NULL != event->handler) {
|
||||||
cd->info, &cd->results,
|
event->handler(cd->status, &cd->pname,
|
||||||
return_local_event_hdlr, (void*)cd);
|
cd->info, &cd->results,
|
||||||
return;
|
return_local_event_hdlr, (void*)cd);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* if we didn't find a match, we still have to call their final callback */
|
/* if we didn't find a match, we still have to call their final callback */
|
||||||
|
@ -279,6 +279,7 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
|||||||
int rc;
|
int rc;
|
||||||
orte_std_cntr_t cnt;
|
orte_std_cntr_t cnt;
|
||||||
orte_job_t *jdata=NULL, *daemons;
|
orte_job_t *jdata=NULL, *daemons;
|
||||||
|
orte_node_t *node;
|
||||||
int32_t n, k;
|
int32_t n, k;
|
||||||
opal_buffer_t *bptr;
|
opal_buffer_t *bptr;
|
||||||
orte_proc_t *pptr, *dmn;
|
orte_proc_t *pptr, *dmn;
|
||||||
@ -436,7 +437,8 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
|||||||
/* not ready for use yet */
|
/* not ready for use yet */
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
|
if (!ORTE_PROC_IS_HNP &&
|
||||||
|
orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
|
||||||
/* the parser will have already made the connection, but the fully described
|
/* the parser will have already made the connection, but the fully described
|
||||||
* case won't have done it, so connect the proc to its node here */
|
* case won't have done it, so connect the proc to its node here */
|
||||||
opal_output_verbose(5, orte_odls_base_framework.framework_output,
|
opal_output_verbose(5, orte_odls_base_framework.framework_output,
|
||||||
@ -457,6 +459,17 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
|||||||
}
|
}
|
||||||
OBJ_RETAIN(dmn->node);
|
OBJ_RETAIN(dmn->node);
|
||||||
pptr->node = dmn->node;
|
pptr->node = dmn->node;
|
||||||
|
/* add the node to the job map, if needed */
|
||||||
|
if (!ORTE_FLAG_TEST(pptr->node, ORTE_NODE_FLAG_MAPPED)) {
|
||||||
|
OBJ_RETAIN(pptr->node);
|
||||||
|
opal_pointer_array_add(jdata->map->nodes, pptr->node);
|
||||||
|
jdata->map->num_nodes++;
|
||||||
|
ORTE_FLAG_SET(pptr->node, ORTE_NODE_FLAG_MAPPED);
|
||||||
|
}
|
||||||
|
/* add this proc to that node */
|
||||||
|
OBJ_RETAIN(pptr);
|
||||||
|
opal_pointer_array_add(pptr->node->procs, pptr);
|
||||||
|
pptr->node->num_procs++;
|
||||||
}
|
}
|
||||||
/* see if it belongs to us */
|
/* see if it belongs to us */
|
||||||
if (pptr->parent == ORTE_PROC_MY_NAME->vpid) {
|
if (pptr->parent == ORTE_PROC_MY_NAME->vpid) {
|
||||||
@ -485,6 +498,14 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
|
|||||||
ORTE_FLAG_SET(app, ORTE_APP_FLAG_USED_ON_NODE);
|
ORTE_FLAG_SET(app, ORTE_APP_FLAG_USED_ON_NODE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
|
||||||
|
/* reset the mapped flags */
|
||||||
|
for (n=0; n < jdata->map->nodes->size; n++) {
|
||||||
|
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, n))) {
|
||||||
|
ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_MAPPED);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
|
if (!orte_get_attribute(&jdata->attributes, ORTE_JOB_FULLY_DESCRIBED, NULL, OPAL_BOOL)) {
|
||||||
/* compute and save bindings of local children */
|
/* compute and save bindings of local children */
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user