1
1

Restore the prior default rank assignment scheme for round-robin mappers. Ensure that each app_context has sequential vpids.

This commit was SVN r22048.
Этот коммит содержится в:
Ralph Castain 2009-10-02 03:16:18 +00:00
родитель c8c3132605
Коммит dcab61ad83
5 изменённых файлов: 85 добавлений и 36 удалений

Просмотреть файл

@ -128,8 +128,10 @@ int orte_rmaps_base_map_byslot(orte_job_t *jdata, orte_app_context_t *app,
int rc=ORTE_SUCCESS; int rc=ORTE_SUCCESS;
int i; int i;
orte_node_t *node; orte_node_t *node;
orte_proc_t *proc;
opal_list_item_t *next; opal_list_item_t *next;
orte_vpid_t num_alloc = 0; orte_vpid_t num_alloc = 0;
orte_vpid_t start;
int num_procs_to_assign, num_possible_procs; int num_procs_to_assign, num_possible_procs;
/* This loop continues until all procs have been mapped or we run /* This loop continues until all procs have been mapped or we run
@ -140,6 +142,8 @@ int orte_rmaps_base_map_byslot(orte_job_t *jdata, orte_app_context_t *app,
If we still have processes that haven't been mapped yet, then it's an If we still have processes that haven't been mapped yet, then it's an
"out of resources" error. */ "out of resources" error. */
start = jdata->num_procs;
while ( num_alloc < num_procs) { while ( num_alloc < num_procs) {
/** see if any nodes remain unused and available. We need to do this check /** see if any nodes remain unused and available. We need to do this check
* each time since we may remove nodes from the list (as they become fully * each time since we may remove nodes from the list (as they become fully
@ -211,10 +215,11 @@ int orte_rmaps_base_map_byslot(orte_job_t *jdata, orte_app_context_t *app,
} }
for( i = 0; i < num_procs_to_assign; ++i) { for( i = 0; i < num_procs_to_assign; ++i) {
proc = NULL;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node,
jdata->map->cpus_per_rank, app->idx, jdata->map->cpus_per_rank, app->idx,
node_list, jdata->map->oversubscribe, node_list, jdata->map->oversubscribe,
true, NULL))) { true, &proc))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop * really isn't an error - we just need to break from the loop
* since the node is fully used up. For now, just don't report * since the node is fully used up. For now, just don't report
@ -226,6 +231,9 @@ int orte_rmaps_base_map_byslot(orte_job_t *jdata, orte_app_context_t *app,
} }
} }
/* assign the vpid */
proc->name.vpid = start++;
/* Update the number of procs allocated */ /* Update the number of procs allocated */
++num_alloc; ++num_alloc;
@ -265,7 +273,9 @@ int orte_rmaps_base_map_bynode(orte_job_t *jdata, orte_app_context_t *app,
int rc = ORTE_SUCCESS; int rc = ORTE_SUCCESS;
opal_list_item_t *next; opal_list_item_t *next;
orte_node_t *node; orte_node_t *node;
orte_proc_t *proc;
orte_vpid_t num_alloc=0; orte_vpid_t num_alloc=0;
orte_vpid_t start;
/* This loop continues until all procs have been mapped or we run /* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of out of resources. We determine that we have "run out of
@ -284,6 +294,8 @@ int orte_rmaps_base_map_bynode(orte_job_t *jdata, orte_app_context_t *app,
list, oversubscription is automatically taken care of via this logic. list, oversubscription is automatically taken care of via this logic.
*/ */
start = jdata->num_procs;
while (num_alloc < num_procs) { while (num_alloc < num_procs) {
/** see if any nodes remain unused and available. We need to do this check /** see if any nodes remain unused and available. We need to do this check
* each time since we may remove nodes from the list (as they become fully * each time since we may remove nodes from the list (as they become fully
@ -307,8 +319,9 @@ int orte_rmaps_base_map_bynode(orte_job_t *jdata, orte_app_context_t *app,
/* Allocate a slot on this node */ /* Allocate a slot on this node */
node = (orte_node_t*) cur_node_item; node = (orte_node_t*) cur_node_item;
proc = NULL;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, jdata->map->cpus_per_rank, app->idx, if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, jdata->map->cpus_per_rank, app->idx,
node_list, jdata->map->oversubscribe, true, NULL))) { node_list, jdata->map->oversubscribe, true, &proc))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop * really isn't an error - we just need to break from the loop
* since the node is fully used up. For now, just don't report * since the node is fully used up. For now, just don't report
@ -320,6 +333,10 @@ int orte_rmaps_base_map_bynode(orte_job_t *jdata, orte_app_context_t *app,
} }
} }
/* assign the vpid */
proc->name.vpid = start++;
/* Update the number of procs allocated */
++num_alloc; ++num_alloc;
cur_node_item = next; cur_node_item = next;

Просмотреть файл

@ -387,7 +387,7 @@ int orte_rmaps_base_claim_slot(orte_job_t *jdata,
int orte_rmaps_base_compute_vpids(orte_job_t *jdata) int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
{ {
orte_job_map_t *map; orte_job_map_t *map;
orte_vpid_t vpid, vpid_start=0; orte_vpid_t vpid, vpid_start;
int i, j; int i, j;
orte_node_t *node; orte_node_t *node;
orte_proc_t *proc; orte_proc_t *proc;
@ -395,28 +395,31 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
map = jdata->map; map = jdata->map;
if (ORTE_MAPPING_BYUSER & map->policy) { /* find the max vpid already assigned */
/* find the max vpid already assigned */ vpid_start = ORTE_VPID_MIN;
vpid_start = ORTE_VPID_MIN; for (i=0; i < map->nodes->size; i++) {
for (i=0; i < map->nodes->size; i++) { if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { continue;
}
for (j=0; j < node->procs->size; j++) {
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
continue; continue;
} }
for (j=0; j < node->procs->size; j++) { /* ignore procs from other jobs */
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { if (proc->name.jobid != jdata->jobid) {
continue; continue;
} }
/* ignore procs from other jobs */ /* if the vpid is already defined, then update start */
if (proc->name.jobid != jdata->jobid) { if (ORTE_VPID_INVALID != proc->name.vpid &&
continue; vpid_start < proc->name.vpid) {
} vpid_start = proc->name.vpid;
/* if the vpid is already defined, then update start */
if (ORTE_VPID_INVALID != proc->name.vpid &&
vpid_start < proc->name.vpid) {
vpid_start = proc->name.vpid;
}
} }
} }
}
if (ORTE_VPID_MIN == vpid_start) {
/* start at zero */
vpid_start = 0;
} else {
/* we start one higher than the max found */ /* we start one higher than the max found */
vpid_start++; vpid_start++;
} }

Просмотреть файл

@ -68,12 +68,6 @@ static int switchyard(orte_job_t *jdata)
return rc; return rc;
} }
/* compute vpids and add proc objects to the job */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* compute and save local ranks */ /* compute and save local ranks */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
@ -166,6 +160,14 @@ static int npernode(orte_job_t *jdata)
"npernode", orte_rmaps_base.npernode); "npernode", orte_rmaps_base.npernode);
return ORTE_ERR_SILENT; return ORTE_ERR_SILENT;
} }
/* compute vpids and add proc objects to the job - this has to be
* done after each app_context is mapped in order to keep the
* vpids contiguous within an app_context
*/
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
} }
jdata->num_procs = total_procs; jdata->num_procs = total_procs;
@ -253,6 +255,14 @@ static int nperboard(orte_job_t *jdata)
"nperboard", orte_rmaps_base.nperboard); "nperboard", orte_rmaps_base.nperboard);
return ORTE_ERR_SILENT; return ORTE_ERR_SILENT;
} }
/* compute vpids and add proc objects to the job - this has to be
* done after each app_context is mapped in order to keep the
* vpids contiguous within an app_context
*/
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
} }
jdata->num_procs = total_procs; jdata->num_procs = total_procs;
@ -345,6 +355,14 @@ static int npersocket(orte_job_t *jdata)
"npersocket", orte_rmaps_base.npersocket); "npersocket", orte_rmaps_base.npersocket);
return ORTE_ERR_SILENT; return ORTE_ERR_SILENT;
} }
/* compute vpids and add proc objects to the job - this has to be
* done after each app_context is mapped in order to keep the
* vpids contiguous within an app_context
*/
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
} }
jdata->num_procs = total_procs; jdata->num_procs = total_procs;
@ -480,6 +498,14 @@ static int loadbalance(orte_job_t *jdata)
"number of nodes", num_nodes); "number of nodes", num_nodes);
return ORTE_ERR_SILENT; return ORTE_ERR_SILENT;
} }
/* compute vpids and add proc objects to the job - this has to be
* done after each app_context is mapped in order to keep the
* vpids contiguous within an app_context
*/
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
} }
/* record the number of procs */ /* record the number of procs */
jdata->num_procs = total_procs; jdata->num_procs = total_procs;

Просмотреть файл

@ -478,6 +478,15 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
} }
cleanup: cleanup:
/* compute vpids and add proc objects to the job - this has to be
* done after each app_context is mapped in order to keep the
* vpids contiguous within an app_context
*/
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* track number of procs */ /* track number of procs */
jdata->num_procs += app->num_procs; jdata->num_procs += app->num_procs;
@ -490,12 +499,6 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
OBJ_DESTRUCT(&node_list); OBJ_DESTRUCT(&node_list);
} }
/* compute vpids and add proc objects to the job */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* compute and save local ranks */ /* compute and save local ranks */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -92,9 +92,6 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
app->num_procs = num_slots; app->num_procs = num_slots;
} }
/* track the total number of processes we mapped */
jdata->num_procs += app->num_procs;
/* Make assignments */ /* Make assignments */
if (jdata->map->policy & ORTE_MAPPING_BYNODE) { if (jdata->map->policy & ORTE_MAPPING_BYNODE) {
rc = orte_rmaps_base_map_bynode(jdata, app, &node_list, rc = orte_rmaps_base_map_bynode(jdata, app, &node_list,
@ -108,6 +105,9 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
goto error; goto error;
} }
/* track the total number of processes we mapped */
jdata->num_procs += app->num_procs;
/* cleanup the node list - it can differ from one app_context /* cleanup the node list - it can differ from one app_context
* to another, so we have to get it every time * to another, so we have to get it every time
*/ */