Ensure the nodes get added to the job map on the remote nodes, add some debug to grpcomm daemon array construction
This commit was SVN r32617.
Этот коммит содержится в:
родитель
842aaf6167
Коммит
b87b69e977
@ -205,7 +205,6 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
|
||||
"%s grpcomm:base:returning existing collective",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
return coll;
|
||||
}
|
||||
}
|
||||
@ -250,6 +249,11 @@ static int create_dmns(orte_grpcomm_signature_t *sig,
|
||||
size_t nds;
|
||||
orte_vpid_t *dns;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
|
||||
"%s grpcomm:base:create_dmns called with %s signature",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(NULL == sig->signature) ? "NULL" : "NON-NULL"));
|
||||
|
||||
/* if NULL == procs, then all daemons are participating */
|
||||
if (NULL == sig->signature) {
|
||||
*ndmns = orte_process_info.num_procs;
|
||||
@ -258,6 +262,10 @@ static int create_dmns(orte_grpcomm_signature_t *sig,
|
||||
}
|
||||
|
||||
if (ORTE_VPID_WILDCARD == sig->signature[0].vpid) {
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
|
||||
"%s grpcomm:base:create_dmns called for all procs in job %s",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_JOBID_PRINT(sig->signature[0].jobid)));
|
||||
/* all daemons hosting this jobid are participating */
|
||||
if (NULL == (jdata = orte_get_job_data_object(sig->signature[0].jobid))) {
|
||||
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
|
||||
@ -280,6 +288,10 @@ static int create_dmns(orte_grpcomm_signature_t *sig,
|
||||
free(dns);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
|
||||
"%s grpcomm:base:create_dmns adding daemon %s to array",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&node->daemon->name)));
|
||||
dns[nds++] = node->daemon->name.vpid;
|
||||
}
|
||||
} else {
|
||||
@ -324,6 +336,10 @@ static int create_dmns(orte_grpcomm_signature_t *sig,
|
||||
dns = (orte_vpid_t*)malloc(opal_list_get_size(&ds) * sizeof(orte_vpid_t));
|
||||
nds = 0;
|
||||
while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&ds))) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
|
||||
"%s grpcomm:base:create_dmns adding daemon %s to array",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
ORTE_NAME_PRINT(&nm->name)));
|
||||
dns[nds++] = nm->name.vpid;
|
||||
OBJ_RELEASE(nm);
|
||||
}
|
||||
|
@ -189,10 +189,12 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
int rc;
|
||||
orte_std_cntr_t cnt;
|
||||
orte_job_t *jdata=NULL, *daemons;
|
||||
int32_t n;
|
||||
int32_t n, k;
|
||||
orte_proc_t *pptr, *dmn;
|
||||
opal_buffer_t *bptr;
|
||||
orte_app_context_t *app;
|
||||
bool found;
|
||||
orte_node_t *node;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
|
||||
"%s odls:constructing child list",
|
||||
@ -312,9 +314,28 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
|
||||
}
|
||||
OBJ_RETAIN(dmn->node);
|
||||
pptr->node = dmn->node;
|
||||
/* add proc to node - note that num_procs for the
|
||||
* node was already correctly unpacked, so don't
|
||||
* increment it here */
|
||||
OBJ_RETAIN(pptr);
|
||||
opal_pointer_array_add(dmn->node->procs, pptr);
|
||||
dmn->node->num_procs++;
|
||||
|
||||
/* add the node to the map, if not already there */
|
||||
found = false;
|
||||
for (k=0; k < jdata->map->nodes->size; k++) {
|
||||
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, k))) {
|
||||
continue;
|
||||
}
|
||||
if (node->daemon == dmn) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
OBJ_RETAIN(dmn->node);
|
||||
opal_pointer_array_add(jdata->map->nodes, dmn->node);
|
||||
jdata->map->num_nodes++;
|
||||
}
|
||||
|
||||
/* see if it belongs to us */
|
||||
if (pptr->parent == ORTE_PROC_MY_NAME->vpid) {
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user