1
1

Ensure the nodes get added to the job map on the remote nodes, add some debug to grpcomm daemon array construction

This commit was SVN r32617.
Этот коммит содержится в:
Ralph Castain 2014-08-27 16:16:46 +00:00
родитель 842aaf6167
Коммит b87b69e977
2 изменённых файлов: 40 добавлений и 3 удалений

Просмотреть файл

@ -205,7 +205,6 @@ orte_grpcomm_coll_t* orte_grpcomm_base_get_tracker(orte_grpcomm_signature_t *sig
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:base:returning existing collective",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
return coll;
}
}
@ -250,6 +249,11 @@ static int create_dmns(orte_grpcomm_signature_t *sig,
size_t nds;
orte_vpid_t *dns;
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:base:create_dmns called with %s signature",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == sig->signature) ? "NULL" : "NON-NULL"));
/* if NULL == procs, then all daemons are participating */
if (NULL == sig->signature) {
*ndmns = orte_process_info.num_procs;
@ -258,6 +262,10 @@ static int create_dmns(orte_grpcomm_signature_t *sig,
}
if (ORTE_VPID_WILDCARD == sig->signature[0].vpid) {
OPAL_OUTPUT_VERBOSE((1, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:base:create_dmns called for all procs in job %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(sig->signature[0].jobid)));
/* all daemons hosting this jobid are participating */
if (NULL == (jdata = orte_get_job_data_object(sig->signature[0].jobid))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
@ -280,6 +288,10 @@ static int create_dmns(orte_grpcomm_signature_t *sig,
free(dns);
return ORTE_ERROR;
}
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:base:create_dmns adding daemon %s to array",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&node->daemon->name)));
dns[nds++] = node->daemon->name.vpid;
}
} else {
@ -324,6 +336,10 @@ static int create_dmns(orte_grpcomm_signature_t *sig,
dns = (orte_vpid_t*)malloc(opal_list_get_size(&ds) * sizeof(orte_vpid_t));
nds = 0;
while (NULL != (nm = (orte_namelist_t*)opal_list_remove_first(&ds))) {
OPAL_OUTPUT_VERBOSE((5, orte_grpcomm_base_framework.framework_output,
"%s grpcomm:base:create_dmns adding daemon %s to array",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&nm->name)));
dns[nds++] = nm->name.vpid;
OBJ_RELEASE(nm);
}

Просмотреть файл

@ -189,10 +189,12 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
int rc;
orte_std_cntr_t cnt;
orte_job_t *jdata=NULL, *daemons;
int32_t n;
int32_t n, k;
orte_proc_t *pptr, *dmn;
opal_buffer_t *bptr;
orte_app_context_t *app;
bool found;
orte_node_t *node;
OPAL_OUTPUT_VERBOSE((5, orte_odls_base_framework.framework_output,
"%s odls:constructing child list",
@ -312,9 +314,28 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *data,
}
OBJ_RETAIN(dmn->node);
pptr->node = dmn->node;
/* add proc to node - note that num_procs for the
* node was already correctly unpacked, so don't
* increment it here */
OBJ_RETAIN(pptr);
opal_pointer_array_add(dmn->node->procs, pptr);
dmn->node->num_procs++;
/* add the node to the map, if not already there */
found = false;
for (k=0; k < jdata->map->nodes->size; k++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, k))) {
continue;
}
if (node->daemon == dmn) {
found = true;
break;
}
}
if (!found) {
OBJ_RETAIN(dmn->node);
opal_pointer_array_add(jdata->map->nodes, dmn->node);
jdata->map->num_nodes++;
}
/* see if it belongs to us */
if (pptr->parent == ORTE_PROC_MY_NAME->vpid) {