We stripped the process info packing routine to minimize message size when sending the launch message, but tools still require all the info. So modify the tool-hnp handshake to explicitly add the missing info
Refs trac:3992 This commit was SVN r29989. The following Trac tickets were found above: Ticket 3992 --> https://svn.open-mpi.org/trac/ompi/ticket/3992
Этот коммит содержится в:
@ -758,7 +758,8 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
|||||||
orte_proc_t *proc;
|
orte_proc_t *proc;
|
||||||
orte_vpid_t vpid;
|
orte_vpid_t vpid;
|
||||||
int32_t i, num_procs;
|
int32_t i, num_procs;
|
||||||
|
char *nid;
|
||||||
|
|
||||||
/* setup the answer */
|
/* setup the answer */
|
||||||
answer = OBJ_NEW(opal_buffer_t);
|
answer = OBJ_NEW(opal_buffer_t);
|
||||||
|
|
||||||
@ -801,8 +802,25 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
|||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
|
/* the vpid and nodename for this proc are no longer packed
|
||||||
|
* in the ORTE_PROC packing routines to save space for other
|
||||||
|
* uses, so we have to pack them separately
|
||||||
|
*/
|
||||||
|
if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &proc->pid, 1, OPAL_PID))) {
|
||||||
|
ORTE_ERROR_LOG(ret);
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
|
if (NULL == proc->node) {
|
||||||
|
nid = "UNKNOWN";
|
||||||
|
} else {
|
||||||
|
nid = proc->node->name;
|
||||||
|
}
|
||||||
|
if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &nid, 1, OPAL_STRING))) {
|
||||||
|
ORTE_ERROR_LOG(ret);
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* count number of procs */
|
/* count number of procs */
|
||||||
@ -826,6 +844,23 @@ void orte_daemon_recv(int status, orte_process_name_t* sender,
|
|||||||
OBJ_RELEASE(answer);
|
OBJ_RELEASE(answer);
|
||||||
goto CLEANUP;
|
goto CLEANUP;
|
||||||
}
|
}
|
||||||
|
/* the vpid and nodename for this proc are no longer packed
|
||||||
|
* in the ORTE_PROC packing routines to save space for other
|
||||||
|
* uses, so we have to pack them separately
|
||||||
|
*/
|
||||||
|
if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &proc->pid, 1, OPAL_PID))) {
|
||||||
|
ORTE_ERROR_LOG(ret);
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
|
if (NULL == proc->node) {
|
||||||
|
nid = "UNKNOWN";
|
||||||
|
} else {
|
||||||
|
nid = proc->node->name;
|
||||||
|
}
|
||||||
|
if (ORTE_SUCCESS != (ret = opal_dss.pack(answer, &nid, 1, OPAL_STRING))) {
|
||||||
|
ORTE_ERROR_LOG(ret);
|
||||||
|
goto CLEANUP;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -852,7 +852,7 @@ static int gather_nodes(orte_ps_mpirun_info_t *hnpinfo) {
|
|||||||
&hnpinfo->num_nodes, &hnpinfo->nodes))) {
|
&hnpinfo->num_nodes, &hnpinfo->nodes))) {
|
||||||
ORTE_ERROR_LOG(ret);
|
ORTE_ERROR_LOG(ret);
|
||||||
}
|
}
|
||||||
|
opal_output(0, "RECEIVED %d NODES", hnpinfo->num_nodes);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -560,6 +560,24 @@ int orte_util_comm_query_proc_info(const orte_process_name_t *hnp, orte_jobid_t
|
|||||||
free(proc_info);
|
free(proc_info);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
/* the vpid and nodename for this proc are no longer packed
|
||||||
|
* in the ORTE_PROC packing routines to save space for other
|
||||||
|
* uses, so we have to unpack them separately
|
||||||
|
*/
|
||||||
|
cnt = 1;
|
||||||
|
if (ORTE_SUCCESS != (ret = opal_dss.unpack(&answer, &proc_info[n]->pid, &cnt, OPAL_PID))) {
|
||||||
|
ORTE_ERROR_LOG(ret);
|
||||||
|
OBJ_DESTRUCT(&answer);
|
||||||
|
free(proc_info);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
cnt = 1;
|
||||||
|
if (ORTE_SUCCESS != (ret = opal_dss.unpack(&answer, &proc_info[n]->nodename, &cnt, OPAL_STRING))) {
|
||||||
|
ORTE_ERROR_LOG(ret);
|
||||||
|
OBJ_DESTRUCT(&answer);
|
||||||
|
free(proc_info);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
*proc_info_array = proc_info;
|
*proc_info_array = proc_info;
|
||||||
*num_procs = (int)cnt_procs;
|
*num_procs = (int)cnt_procs;
|
||||||
|
Ссылка в новой задаче
Block a user