diff --git a/orte/mca/rmaps/base/rmaps_base_common_mappers.c b/orte/mca/rmaps/base/rmaps_base_common_mappers.c index d7df9f125e..0e5d33c0df 100644 --- a/orte/mca/rmaps/base/rmaps_base_common_mappers.c +++ b/orte/mca/rmaps/base/rmaps_base_common_mappers.c @@ -128,8 +128,10 @@ int orte_rmaps_base_map_byslot(orte_job_t *jdata, orte_app_context_t *app, int rc=ORTE_SUCCESS; int i; orte_node_t *node; + orte_proc_t *proc; opal_list_item_t *next; orte_vpid_t num_alloc = 0; + orte_vpid_t start; int num_procs_to_assign, num_possible_procs; /* This loop continues until all procs have been mapped or we run @@ -140,6 +142,8 @@ int orte_rmaps_base_map_byslot(orte_job_t *jdata, orte_app_context_t *app, If we still have processes that haven't been mapped yet, then it's an "out of resources" error. */ + start = jdata->num_procs; + while ( num_alloc < num_procs) { /** see if any nodes remain unused and available. We need to do this check * each time since we may remove nodes from the list (as they become fully @@ -211,10 +215,11 @@ int orte_rmaps_base_map_byslot(orte_job_t *jdata, orte_app_context_t *app, } for( i = 0; i < num_procs_to_assign; ++i) { + proc = NULL; if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, jdata->map->cpus_per_rank, app->idx, node_list, jdata->map->oversubscribe, - true, NULL))) { + true, &proc))) { /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this * really isn't an error - we just need to break from the loop * since the node is fully used up. For now, just don't report @@ -226,6 +231,9 @@ int orte_rmaps_base_map_byslot(orte_job_t *jdata, orte_app_context_t *app, } } + /* assign the vpid */ + proc->name.vpid = start++; + /* Update the number of procs allocated */ ++num_alloc; @@ -265,7 +273,9 @@ int orte_rmaps_base_map_bynode(orte_job_t *jdata, orte_app_context_t *app, int rc = ORTE_SUCCESS; opal_list_item_t *next; orte_node_t *node; + orte_proc_t *proc; orte_vpid_t num_alloc=0; + orte_vpid_t start; /* This loop continues until all procs have been mapped or we run out of resources. We determine that we have "run out of @@ -284,6 +294,8 @@ int orte_rmaps_base_map_bynode(orte_job_t *jdata, orte_app_context_t *app, list, oversubscription is automatically taken care of via this logic. */ + start = jdata->num_procs; + while (num_alloc < num_procs) { /** see if any nodes remain unused and available. We need to do this check * each time since we may remove nodes from the list (as they become fully @@ -307,8 +319,9 @@ int orte_rmaps_base_map_bynode(orte_job_t *jdata, orte_app_context_t *app, /* Allocate a slot on this node */ node = (orte_node_t*) cur_node_item; + proc = NULL; if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, jdata->map->cpus_per_rank, app->idx, - node_list, jdata->map->oversubscribe, true, NULL))) { + node_list, jdata->map->oversubscribe, true, &proc))) { /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this * really isn't an error - we just need to break from the loop * since the node is fully used up. For now, just don't report @@ -320,6 +333,10 @@ int orte_rmaps_base_map_bynode(orte_job_t *jdata, orte_app_context_t *app, } } + /* assign the vpid */ + proc->name.vpid = start++; + + /* Update the number of procs allocated */ ++num_alloc; cur_node_item = next; diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index 712400aa53..1d1b15b657 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -387,7 +387,7 @@ int orte_rmaps_base_claim_slot(orte_job_t *jdata, int orte_rmaps_base_compute_vpids(orte_job_t *jdata) { orte_job_map_t *map; - orte_vpid_t vpid, vpid_start=0; + orte_vpid_t vpid, vpid_start; int i, j; orte_node_t *node; orte_proc_t *proc; @@ -395,28 +395,31 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata) map = jdata->map; - if (ORTE_MAPPING_BYUSER & map->policy) { - /* find the max vpid already assigned */ - vpid_start = ORTE_VPID_MIN; - for (i=0; i < map->nodes->size; i++) { - if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { + /* find the max vpid already assigned */ + vpid_start = ORTE_VPID_MIN; + for (i=0; i < map->nodes->size; i++) { + if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) { + continue; + } + for (j=0; j < node->procs->size; j++) { + if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { continue; } - for (j=0; j < node->procs->size; j++) { - if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) { - continue; - } - /* ignore procs from other jobs */ - if (proc->name.jobid != jdata->jobid) { - continue; - } - /* if the vpid is already defined, then update start */ - if (ORTE_VPID_INVALID != proc->name.vpid && - vpid_start < proc->name.vpid) { - vpid_start = proc->name.vpid; - } + /* ignore procs from other jobs */ + if (proc->name.jobid != jdata->jobid) { + continue; + } + /* if the vpid is already defined, then update start */ + if (ORTE_VPID_INVALID != proc->name.vpid && + vpid_start < proc->name.vpid) { + vpid_start = proc->name.vpid; } } + } + if (ORTE_VPID_MIN == vpid_start) { + /* start at zero */ + vpid_start = 0; + } else { /* we start one higher than the max found */ vpid_start++; } diff --git a/orte/mca/rmaps/load_balance/rmaps_lb.c b/orte/mca/rmaps/load_balance/rmaps_lb.c index c63aeca771..47461a93b8 100644 --- a/orte/mca/rmaps/load_balance/rmaps_lb.c +++ b/orte/mca/rmaps/load_balance/rmaps_lb.c @@ -68,12 +68,6 @@ static int switchyard(orte_job_t *jdata) return rc; } - /* compute vpids and add proc objects to the job */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* compute and save local ranks */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { ORTE_ERROR_LOG(rc); @@ -166,6 +160,14 @@ static int npernode(orte_job_t *jdata) "npernode", orte_rmaps_base.npernode); return ORTE_ERR_SILENT; } + /* compute vpids and add proc objects to the job - this has to be + * done after each app_context is mapped in order to keep the + * vpids contiguous within an app_context + */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { + ORTE_ERROR_LOG(rc); + return rc; + } } jdata->num_procs = total_procs; @@ -253,6 +255,14 @@ static int nperboard(orte_job_t *jdata) "nperboard", orte_rmaps_base.nperboard); return ORTE_ERR_SILENT; } + /* compute vpids and add proc objects to the job - this has to be + * done after each app_context is mapped in order to keep the + * vpids contiguous within an app_context + */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { + ORTE_ERROR_LOG(rc); + return rc; + } } jdata->num_procs = total_procs; @@ -345,6 +355,14 @@ static int npersocket(orte_job_t *jdata) "npersocket", orte_rmaps_base.npersocket); return ORTE_ERR_SILENT; } + /* compute vpids and add proc objects to the job - this has to be + * done after each app_context is mapped in order to keep the + * vpids contiguous within an app_context + */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { + ORTE_ERROR_LOG(rc); + return rc; + } } jdata->num_procs = total_procs; @@ -480,6 +498,14 @@ static int loadbalance(orte_job_t *jdata) "number of nodes", num_nodes); return ORTE_ERR_SILENT; } + /* compute vpids and add proc objects to the job - this has to be + * done after each app_context is mapped in order to keep the + * vpids contiguous within an app_context + */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { + ORTE_ERROR_LOG(rc); + return rc; + } } /* record the number of procs */ jdata->num_procs = total_procs; diff --git a/orte/mca/rmaps/resilient/rmaps_resilient.c b/orte/mca/rmaps/resilient/rmaps_resilient.c index 0b2b32d086..fe5c06482c 100644 --- a/orte/mca/rmaps/resilient/rmaps_resilient.c +++ b/orte/mca/rmaps/resilient/rmaps_resilient.c @@ -478,6 +478,15 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata) } cleanup: + /* compute vpids and add proc objects to the job - this has to be + * done after each app_context is mapped in order to keep the + * vpids contiguous within an app_context + */ + if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { + ORTE_ERROR_LOG(rc); + return rc; + } + /* track number of procs */ jdata->num_procs += app->num_procs; @@ -490,12 +499,6 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata) OBJ_DESTRUCT(&node_list); } - /* compute vpids and add proc objects to the job */ - if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_vpids(jdata))) { - ORTE_ERROR_LOG(rc); - return rc; - } - /* compute and save local ranks */ if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) { ORTE_ERROR_LOG(rc); diff --git a/orte/mca/rmaps/round_robin/rmaps_rr.c b/orte/mca/rmaps/round_robin/rmaps_rr.c index 69b0e35215..e5dbb329df 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr.c @@ -92,9 +92,6 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) app->num_procs = num_slots; } - /* track the total number of processes we mapped */ - jdata->num_procs += app->num_procs; - /* Make assignments */ if (jdata->map->policy & ORTE_MAPPING_BYNODE) { rc = orte_rmaps_base_map_bynode(jdata, app, &node_list, @@ -108,6 +105,9 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) goto error; } + /* track the total number of processes we mapped */ + jdata->num_procs += app->num_procs; + /* cleanup the node list - it can differ from one app_context * to another, so we have to get it every time */