Per the OMPI devel conference that changed our default behaviors:
* default to bind-to core * map-by slot if np=2 * map-by socket (balance across sockets on each node) if np > 2 * map-by <obj> will imply rank-by <obj> by default (leave default binding as above) Fix a bug in the map-by <obj> mapper where we incorrectly compute the #procs to assign if the #slots > #procs cmr=v1.7.4:reviewer=jsquyres:subject=Update default binding and mapping values This commit was SVN r29919.
Этот коммит содержится в:
родитель
efb32da1e0
Коммит
8b6d117541
@ -124,7 +124,7 @@ static int opal_hwloc_base_register(mca_base_register_flag_t flags)
|
|||||||
|
|
||||||
opal_hwloc_base_binding_policy = NULL;
|
opal_hwloc_base_binding_policy = NULL;
|
||||||
(void) mca_base_var_register("opal", "hwloc", "base", "binding_policy",
|
(void) mca_base_var_register("opal", "hwloc", "base", "binding_policy",
|
||||||
"Policy for binding processes [none (default) | hwthread | core | l1cache | l2cache | l3cache | socket | numa | board] (supported qualifiers: overload-allowed,if-supported)",
|
"Policy for binding processes [none | hwthread | core (default) | l1cache | l2cache | l3cache | socket | numa | board] (supported qualifiers: overload-allowed,if-supported)",
|
||||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
|
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
|
||||||
MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_binding_policy);
|
MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_binding_policy);
|
||||||
|
|
||||||
@ -189,7 +189,7 @@ static int opal_hwloc_base_open(mca_base_open_flag_t flags)
|
|||||||
|
|
||||||
/* binding specification */
|
/* binding specification */
|
||||||
if (NULL == opal_hwloc_base_binding_policy) {
|
if (NULL == opal_hwloc_base_binding_policy) {
|
||||||
opal_hwloc_binding_policy = OPAL_BIND_TO_NONE;
|
opal_hwloc_binding_policy = OPAL_BIND_TO_CORE;
|
||||||
/* mark that no binding policy was specified */
|
/* mark that no binding policy was specified */
|
||||||
opal_hwloc_binding_policy &= ~OPAL_BIND_GIVEN;
|
opal_hwloc_binding_policy &= ~OPAL_BIND_GIVEN;
|
||||||
} else if (0 == strncasecmp(opal_hwloc_base_binding_policy, "none", strlen("none"))) {
|
} else if (0 == strncasecmp(opal_hwloc_base_binding_policy, "none", strlen("none"))) {
|
||||||
|
@ -643,8 +643,7 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
|
|||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding) ||
|
if (OPAL_BIND_TO_NONE == OPAL_GET_BINDING_POLICY(jdata->map->binding)) {
|
||||||
OPAL_BIND_TO_NONE == OPAL_GET_BINDING_POLICY(jdata->map->binding)) {
|
|
||||||
/* no binding requested */
|
/* no binding requested */
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
}
|
}
|
||||||
|
@ -102,7 +102,7 @@ static int orte_rmaps_base_register(mca_base_register_flag_t flags)
|
|||||||
rmaps_base_mapping_policy = NULL;
|
rmaps_base_mapping_policy = NULL;
|
||||||
var_id = mca_base_var_register("orte", "rmaps", "base", "mapping_policy",
|
var_id = mca_base_var_register("orte", "rmaps", "base", "mapping_policy",
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
"Mapping Policy [slot (default) | hwthread | core | l1cache | l2cache | l3cache | socket | numa | board | node | seq | dist], with allowed modifiers :SPAN,OVERSUBSCRIBE,NOOVERSUBSCRIBE",
|
"Mapping Policy [slot (default:np<=2) | hwthread | core | l1cache | l2cache | l3cache | socket (default:np>2) | numa | board | node | seq | dist], with allowed modifiers :SPAN,OVERSUBSCRIBE,NOOVERSUBSCRIBE",
|
||||||
#else
|
#else
|
||||||
"Mapping Policy [slot (default) | node], with allowed modifiers :SPAN,OVERSUBSCRIBE,NOOVERSUBSCRIBE",
|
"Mapping Policy [slot (default) | node], with allowed modifiers :SPAN,OVERSUBSCRIBE,NOOVERSUBSCRIBE",
|
||||||
#endif
|
#endif
|
||||||
@ -117,7 +117,7 @@ static int orte_rmaps_base_register(mca_base_register_flag_t flags)
|
|||||||
rmaps_base_ranking_policy = NULL;
|
rmaps_base_ranking_policy = NULL;
|
||||||
(void) mca_base_var_register("orte", "rmaps", "base", "ranking_policy",
|
(void) mca_base_var_register("orte", "rmaps", "base", "ranking_policy",
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
"Ranking Policy [slot (default) | hwthread | core | l1cache | l2cache | l3cache | socket | numa | board | node], with modifier :SPAN or :FILL",
|
"Ranking Policy [slot (default:np<=2) | hwthread | core | l1cache | l2cache | l3cache | socket (default:np>2) | numa | board | node], with modifier :SPAN or :FILL",
|
||||||
#else
|
#else
|
||||||
"Ranking Policy [slot (default) | node]",
|
"Ranking Policy [slot (default) | node]",
|
||||||
#endif
|
#endif
|
||||||
@ -262,6 +262,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
|||||||
|
|
||||||
if (NULL == rmaps_base_mapping_policy) {
|
if (NULL == rmaps_base_mapping_policy) {
|
||||||
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYSLOT);
|
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYSLOT);
|
||||||
|
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SPAN);
|
||||||
ORTE_UNSET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
ORTE_UNSET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
||||||
} else {
|
} else {
|
||||||
ck = opal_argv_split(rmaps_base_mapping_policy, ':');
|
ck = opal_argv_split(rmaps_base_mapping_policy, ':');
|
||||||
@ -294,7 +295,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
|||||||
ck2 = opal_argv_split(ck[1], ',');
|
ck2 = opal_argv_split(ck[1], ',');
|
||||||
for (i=0; NULL != ck2[i]; i++) {
|
for (i=0; NULL != ck2[i]; i++) {
|
||||||
if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) {
|
if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) {
|
||||||
orte_rmaps_base.mapping |= ORTE_MAPPING_SPAN;
|
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SPAN);
|
||||||
} else if (0 == strncasecmp(ck2[i], "oversubscribe", strlen(ck2[i]))) {
|
} else if (0 == strncasecmp(ck2[i], "oversubscribe", strlen(ck2[i]))) {
|
||||||
if (ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
if (ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||||
/* error - cannot redefine the default mapping policy */
|
/* error - cannot redefine the default mapping policy */
|
||||||
@ -364,7 +365,33 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (NULL == rmaps_base_ranking_policy) {
|
if (NULL == rmaps_base_ranking_policy) {
|
||||||
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SLOT);
|
/* check for map-by object directives - we set the
|
||||||
|
* ranking to match if one was given
|
||||||
|
*/
|
||||||
|
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||||
|
if (ORTE_MAPPING_BYCORE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||||
|
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_CORE);
|
||||||
|
} else if (ORTE_MAPPING_BYNODE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||||
|
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_NODE);
|
||||||
|
} else if (ORTE_MAPPING_BYL1CACHE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||||
|
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_L1CACHE);
|
||||||
|
} else if (ORTE_MAPPING_BYL2CACHE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||||
|
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_L2CACHE);
|
||||||
|
} else if (ORTE_MAPPING_BYL3CACHE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||||
|
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_L3CACHE);
|
||||||
|
} else if (ORTE_MAPPING_BYSOCKET & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||||
|
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SOCKET);
|
||||||
|
} else if (ORTE_MAPPING_BYNUMA & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||||
|
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_NUMA);
|
||||||
|
} else if (ORTE_MAPPING_BYBOARD & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||||
|
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_BOARD);
|
||||||
|
} else if (ORTE_MAPPING_BYHWTHREAD & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
|
||||||
|
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_HWTHREAD);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* if no map-by was given, default to by-slot */
|
||||||
|
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SLOT);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
ck = opal_argv_split(rmaps_base_ranking_policy, ':');
|
ck = opal_argv_split(rmaps_base_ranking_policy, ':');
|
||||||
if (2 < opal_argv_count(ck)) {
|
if (2 < opal_argv_count(ck)) {
|
||||||
@ -461,26 +488,6 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
|||||||
ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN);
|
ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if OPAL_HAVE_HWLOC
|
|
||||||
if (!OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
|
|
||||||
/* if MAP BY DIST then we set binding policy to numa UNLESS the binding has
|
|
||||||
* already been set to something else
|
|
||||||
*/
|
|
||||||
if (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) == ORTE_MAPPING_BYDIST) {
|
|
||||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_NUMA);
|
|
||||||
} else if (1 < orte_rmaps_base.cpus_per_rank) {
|
|
||||||
/* if the cpus/rank > 1, then we have to bind to cores UNLESS the binding has
|
|
||||||
* already been set to something else
|
|
||||||
*/
|
|
||||||
if (opal_hwloc_use_hwthreads_as_cpus) {
|
|
||||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD);
|
|
||||||
} else {
|
|
||||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Should we schedule on the local node or not? */
|
/* Should we schedule on the local node or not? */
|
||||||
if (rmaps_base_no_schedule_local) {
|
if (rmaps_base_no_schedule_local) {
|
||||||
orte_rmaps_base.mapping |= ORTE_MAPPING_NO_USE_LOCAL;
|
orte_rmaps_base.mapping |= ORTE_MAPPING_NO_USE_LOCAL;
|
||||||
|
@ -47,12 +47,14 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
|||||||
{
|
{
|
||||||
orte_job_t *jdata;
|
orte_job_t *jdata;
|
||||||
orte_job_map_t *map;
|
orte_job_map_t *map;
|
||||||
int rc;
|
int rc, i;
|
||||||
bool did_map;
|
bool did_map;
|
||||||
opal_list_item_t *item;
|
opal_list_item_t *item;
|
||||||
orte_rmaps_base_selected_module_t *mod;
|
orte_rmaps_base_selected_module_t *mod;
|
||||||
orte_job_t *parent;
|
orte_job_t *parent;
|
||||||
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
|
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
|
||||||
|
orte_vpid_t nprocs;
|
||||||
|
orte_app_context_t *app;
|
||||||
|
|
||||||
/* convenience */
|
/* convenience */
|
||||||
jdata = caddy->jdata;
|
jdata = caddy->jdata;
|
||||||
@ -87,9 +89,36 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
|||||||
OBJ_RELEASE(caddy);
|
OBJ_RELEASE(caddy);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
/* load it with the system defaults */
|
/* compute the number of procs */
|
||||||
map->mapping = orte_rmaps_base.mapping;
|
nprocs = 0;
|
||||||
map->ranking = orte_rmaps_base.ranking;
|
for (i=0; i < jdata->apps->size; i++) {
|
||||||
|
if (NULL != (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
|
||||||
|
nprocs += app->num_procs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||||
|
"mca:rmaps: nprocs %s",
|
||||||
|
ORTE_VPID_PRINT(nprocs));
|
||||||
|
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||||
|
map->mapping = orte_rmaps_base.mapping;
|
||||||
|
} else {
|
||||||
|
/* default based on number of procs */
|
||||||
|
if (nprocs <= 2) {
|
||||||
|
ORTE_SET_MAPPING_POLICY(map->mapping, ORTE_MAPPING_BYSLOT);
|
||||||
|
} else {
|
||||||
|
ORTE_SET_MAPPING_POLICY(map->mapping, ORTE_MAPPING_BYSOCKET);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) {
|
||||||
|
map->ranking = orte_rmaps_base.ranking;
|
||||||
|
} else {
|
||||||
|
/* default based on number of procs */
|
||||||
|
if (nprocs <= 2) {
|
||||||
|
ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
|
||||||
|
} else {
|
||||||
|
ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SOCKET);
|
||||||
|
}
|
||||||
|
}
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
map->binding = opal_hwloc_binding_policy;
|
map->binding = opal_hwloc_binding_policy;
|
||||||
#endif
|
#endif
|
||||||
@ -108,16 +137,33 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
|
|||||||
if (!jdata->map->display_map) {
|
if (!jdata->map->display_map) {
|
||||||
jdata->map->display_map = orte_rmaps_base.display_map;
|
jdata->map->display_map = orte_rmaps_base.display_map;
|
||||||
}
|
}
|
||||||
|
/* compute the number of procs */
|
||||||
|
nprocs = 0;
|
||||||
|
for (i=0; i < jdata->apps->size; i++) {
|
||||||
|
if (NULL != (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
|
||||||
|
nprocs += app->num_procs;
|
||||||
|
}
|
||||||
|
}
|
||||||
/* set the default mapping policy IFF it wasn't provided */
|
/* set the default mapping policy IFF it wasn't provided */
|
||||||
if (!ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
if (!ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
|
||||||
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, orte_rmaps_base.mapping);
|
/* default based on number of procs */
|
||||||
|
if (nprocs <= 2) {
|
||||||
|
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
|
||||||
|
} else {
|
||||||
|
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSOCKET);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (!ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
|
if (!ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
|
||||||
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping));
|
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping));
|
||||||
}
|
}
|
||||||
/* ditto for rank and bind policies */
|
/* ditto for rank and bind policies */
|
||||||
if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) {
|
if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) {
|
||||||
ORTE_SET_RANKING_POLICY(jdata->map->ranking, orte_rmaps_base.ranking);
|
/* default based on number of procs */
|
||||||
|
if (nprocs <= 2) {
|
||||||
|
ORTE_SET_RANKING_POLICY(jdata->map->ranking, ORTE_RANK_BY_SLOT);
|
||||||
|
} else {
|
||||||
|
ORTE_SET_RANKING_POLICY(jdata->map->ranking, ORTE_RANK_BY_SOCKET);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {
|
||||||
|
@ -146,6 +146,7 @@ static int rank_span(orte_job_t *jdata,
|
|||||||
}
|
}
|
||||||
cnt++;
|
cnt++;
|
||||||
|
|
||||||
|
/* insert the proc into the jdata array - no harm if already there */
|
||||||
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
|
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -253,6 +254,7 @@ static int rank_fill(orte_job_t *jdata,
|
|||||||
}
|
}
|
||||||
cnt++;
|
cnt++;
|
||||||
|
|
||||||
|
/* insert the proc into the jdata array - no harm if already there */
|
||||||
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
|
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -275,7 +277,7 @@ static int rank_by(orte_job_t *jdata,
|
|||||||
unsigned cache_level)
|
unsigned cache_level)
|
||||||
{
|
{
|
||||||
hwloc_obj_t obj;
|
hwloc_obj_t obj;
|
||||||
int num_objs, i, j;
|
int num_objs, i, j, rc;
|
||||||
orte_vpid_t num_ranked=0;
|
orte_vpid_t num_ranked=0;
|
||||||
orte_node_t *node;
|
orte_node_t *node;
|
||||||
orte_proc_t *proc;
|
orte_proc_t *proc;
|
||||||
@ -378,7 +380,11 @@ static int rank_by(orte_job_t *jdata,
|
|||||||
cnt++;
|
cnt++;
|
||||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||||
"mca:rmaps:rank_by: assigned rank %s", ORTE_VPID_PRINT(proc->name.vpid));
|
"mca:rmaps:rank_by: assigned rank %s", ORTE_VPID_PRINT(proc->name.vpid));
|
||||||
|
/* insert the proc into the jdata array - no harm if already there */
|
||||||
|
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
|
||||||
|
ORTE_ERROR_LOG(rc);
|
||||||
|
return rc;
|
||||||
|
}
|
||||||
/* flag that one was mapped */
|
/* flag that one was mapped */
|
||||||
all_done = false;
|
all_done = false;
|
||||||
/* track where the highest vpid landed - this is our
|
/* track where the highest vpid landed - this is our
|
||||||
@ -453,10 +459,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
proc->name.vpid = vpid++;
|
proc->name.vpid = vpid++;
|
||||||
/* insert the proc into the jdata->procs array - can't already
|
/* insert the proc into the jdata array - no harm if already there */
|
||||||
* be there as the only way to this point in the code is for the
|
|
||||||
* vpid to have been INVALID
|
|
||||||
*/
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
|
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
|
||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
@ -510,15 +513,10 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
|
|||||||
*/
|
*/
|
||||||
jdata->bookmark = node;
|
jdata->bookmark = node;
|
||||||
}
|
}
|
||||||
/* some mappers require that we insert the proc into the jdata->procs
|
/* insert the proc into the jdata array - no harm if already there */
|
||||||
* array, while others will have already done it - so check and
|
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
|
||||||
* do the operation if required
|
ORTE_ERROR_LOG(rc);
|
||||||
*/
|
return rc;
|
||||||
if (NULL == opal_pointer_array_get_item(jdata->procs, proc->name.vpid)) {
|
|
||||||
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
|
|
||||||
ORTE_ERROR_LOG(rc);
|
|
||||||
return rc;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -465,8 +465,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
|||||||
* do more because we don't know how many total objects exist
|
* do more because we don't know how many total objects exist
|
||||||
* across all the nodes
|
* across all the nodes
|
||||||
*/
|
*/
|
||||||
nprocs = app->num_procs * orte_rmaps_base.cpus_per_rank;
|
if (num_slots < (app->num_procs * orte_rmaps_base.cpus_per_rank)) {
|
||||||
if (num_slots < nprocs) {
|
|
||||||
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
|
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
|
||||||
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
|
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
|
||||||
true, app->num_procs, app->app);
|
true, app->num_procs, app->app);
|
||||||
@ -528,12 +527,16 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
|
|||||||
num_procs_to_assign = extra_procs_to_assign;
|
num_procs_to_assign = extra_procs_to_assign;
|
||||||
} else {
|
} else {
|
||||||
num_procs_to_assign = (node->slots - node->slots_inuse)/orte_rmaps_base.cpus_per_rank + extra_procs_to_assign;
|
num_procs_to_assign = (node->slots - node->slots_inuse)/orte_rmaps_base.cpus_per_rank + extra_procs_to_assign;
|
||||||
|
if (app->num_procs < num_procs_to_assign) {
|
||||||
|
/* might have more slots than procs */
|
||||||
|
num_procs_to_assign = app->num_procs;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* get the number of objects of this type on this node */
|
/* get the number of objects of this type on this node */
|
||||||
nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology, target, cache_level, OPAL_HWLOC_AVAILABLE);
|
nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology, target, cache_level, OPAL_HWLOC_AVAILABLE);
|
||||||
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
|
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
|
||||||
"mca:rmaps:rr:byobj: found %d objs on node %s", nobjs, node->name);
|
"mca:rmaps:rr:byobj: nprocs-to-assign %d for %d objs on node %s", num_procs_to_assign, nobjs, node->name);
|
||||||
/* if there are no objects of this type, then report the error
|
/* if there are no objects of this type, then report the error
|
||||||
* and abort - this can happen, for example, on systems that
|
* and abort - this can happen, for example, on systems that
|
||||||
* don't report "sockets" as an independent object
|
* don't report "sockets" as an independent object
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user