The modex code was unfortunately written solely for PMI1 when updated to minimize calls to PMI_get - add the required PMI2 code
This commit was SVN r29084.
Этот коммит содержится в:
родитель
537e7380b1
Коммит
c71e760e6c
@ -184,6 +184,49 @@ static int modex(orte_grpcomm_collective_t *coll)
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
/* discover the local ranks */
|
||||
#if WANT_PMI2_SUPPORT
|
||||
{
|
||||
char *pmapping = (char*)malloc(PMI2_MAX_VALLEN);
|
||||
int found, sid, nodes, k;
|
||||
orte_vpid_t n;
|
||||
char *p;
|
||||
rc = PMI2_Info_GetJobAttr("PMI_process_mapping", pmapping, PMI2_MAX_VALLEN, &found);
|
||||
if (!found || PMI_SUCCESS != rc) { /* can't check PMI2_SUCCESS as some folks (i.e., Cray) don't define it */
|
||||
opal_output(0, "%s could not get PMI_process_mapping (PMI2_Info_GetJobAttr() failed)",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
|
||||
i = 0; n = 0; local_rank_count = 0;
|
||||
if (NULL != (p = strstr(pmapping, "(vector"))) {
|
||||
while (NULL != (p = strstr(p+1, ",("))) {
|
||||
if (3 == sscanf(p, ",(%d,%d,%d)", &sid, &nodes, &local_rank_count)) {
|
||||
for (k = 0; k < nodes; k++) {
|
||||
if ((ORTE_PROC_MY_NAME->vpid >= n) &&
|
||||
(ORTE_PROC_MY_NAME->vpid < (n + local_rank_count))) {
|
||||
break;
|
||||
}
|
||||
n += local_rank_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
free(pmapping);
|
||||
|
||||
if ((local_rank_count > 0) && (local_rank_count < (int)orte_process_info.num_procs)) {
|
||||
local_ranks = (int*)malloc(local_rank_count * sizeof(int));
|
||||
for (i=0; i < local_rank_count; i++) {
|
||||
local_ranks[i] = n + i;
|
||||
}
|
||||
}
|
||||
|
||||
if (NULL == local_ranks) {
|
||||
opal_output(0, "%s could not get PMI_process_mapping",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
}
|
||||
#else
|
||||
rc = PMI_Get_clique_size (&local_rank_count);
|
||||
if (PMI_SUCCESS != rc) {
|
||||
ORTE_ERROR_LOG(ORTE_ERROR);
|
||||
@ -201,6 +244,7 @@ static int modex(orte_grpcomm_collective_t *coll)
|
||||
ORTE_ERROR_LOG(ORTE_ERROR);
|
||||
return ORTE_ERROR;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/* our RTE data was constructed and pushed in the ESS pmi component */
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user