Fix unity routed component by adding xcast of proc data to the daemons. This enables daemons to complete the revised modex procedure by forwarding their collected modex info to the rank=0 proc.
This commit was SVN r18039.
Этот коммит содержится в:
родитель
5adaa88241
Коммит
e396b9ee9a
@ -217,8 +217,9 @@ int orte_grpcomm_base_allgather(opal_buffer_t *sbuf, opal_buffer_t *rbuf)
|
||||
*/
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((2, orte_grpcomm_base_output,
|
||||
"%s allgather collecting buffers",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
"%s allgather collecting buffers from %ld daemons",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
(long)orte_process_info.num_daemons));
|
||||
|
||||
/* setup the recv conditions */
|
||||
allgather_failed = false;
|
||||
|
@ -205,6 +205,7 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
|
||||
orte_std_cntr_t cnt;
|
||||
char *rml_uri;
|
||||
int rc;
|
||||
orte_rml_cmd_flag_t command=ORTE_RML_UPDATE_CMD;
|
||||
|
||||
/* lookup the job object */
|
||||
if (NULL == (jdata = orte_get_job_data_object(job))) {
|
||||
@ -265,6 +266,31 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
|
||||
jdata->state = ORTE_JOB_STATE_RUNNING;
|
||||
}
|
||||
|
||||
/* first update the daemons so they will know how to talk to the
|
||||
* procs - this is required for support of modex and barrier
|
||||
*/
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
/* pack an update command */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss.pack(&buf, &command, 1, ORTE_RML_CMD))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
/* pack the RML contact info for each proc */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(jdata->jobid, &buf))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
/* send it to the daemons via xcast */
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(jdata->jobid, &buf, ORTE_RML_TAG_RML_INFO_UPDATE))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
OBJ_DESTRUCT(&buf);
|
||||
|
||||
/* now send to the procs so they release from their barrier */
|
||||
OBJ_CONSTRUCT(&buf, opal_buffer_t);
|
||||
/* pack the RML contact info for each proc */
|
||||
if (ORTE_SUCCESS != (rc = orte_rml_base_get_contact_info(jdata->jobid, &buf))) {
|
||||
@ -272,14 +298,12 @@ static int process_callback(orte_jobid_t job, opal_buffer_t *buffer)
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* send it to all procs via xcast */
|
||||
if (ORTE_SUCCESS != (rc = orte_grpcomm.xcast(jdata->jobid, &buf, ORTE_RML_TAG_INIT_ROUTES))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_DESTRUCT(&buf);
|
||||
return rc;
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&buf);
|
||||
}
|
||||
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user