1
1

Per the OMPI devel conference that changed our default behaviors:

* default to bind-to core 
* map-by slot if np=2
* map-by socket (balance across sockets on each node) if np > 2
* map-by <obj> will imply rank-by <obj> by default (leave default binding as above) 

Fix a bug in the map-by <obj> mapper where we incorrectly compute the #procs to assign if the #slots > #procs

cmr=v1.7.4:reviewer=jsquyres:subject=Update default binding and mapping values

This commit was SVN r29919.
Этот коммит содержится в:
Ralph Castain 2013-12-15 17:25:54 +00:00
родитель efb32da1e0
Коммит 8b6d117541
6 изменённых файлов: 105 добавлений и 52 удалений

Просмотреть файл

@ -124,7 +124,7 @@ static int opal_hwloc_base_register(mca_base_register_flag_t flags)
opal_hwloc_base_binding_policy = NULL;
(void) mca_base_var_register("opal", "hwloc", "base", "binding_policy",
"Policy for binding processes [none (default) | hwthread | core | l1cache | l2cache | l3cache | socket | numa | board] (supported qualifiers: overload-allowed,if-supported)",
"Policy for binding processes [none | hwthread | core (default) | l1cache | l2cache | l3cache | socket | numa | board] (supported qualifiers: overload-allowed,if-supported)",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY, &opal_hwloc_base_binding_policy);
@ -189,7 +189,7 @@ static int opal_hwloc_base_open(mca_base_open_flag_t flags)
/* binding specification */
if (NULL == opal_hwloc_base_binding_policy) {
opal_hwloc_binding_policy = OPAL_BIND_TO_NONE;
opal_hwloc_binding_policy = OPAL_BIND_TO_CORE;
/* mark that no binding policy was specified */
opal_hwloc_binding_policy &= ~OPAL_BIND_GIVEN;
} else if (0 == strncasecmp(opal_hwloc_base_binding_policy, "none", strlen("none"))) {

Просмотреть файл

@ -643,8 +643,7 @@ int orte_rmaps_base_compute_bindings(orte_job_t *jdata)
return rc;
}
if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding) ||
OPAL_BIND_TO_NONE == OPAL_GET_BINDING_POLICY(jdata->map->binding)) {
if (OPAL_BIND_TO_NONE == OPAL_GET_BINDING_POLICY(jdata->map->binding)) {
/* no binding requested */
return ORTE_SUCCESS;
}

Просмотреть файл

@ -102,7 +102,7 @@ static int orte_rmaps_base_register(mca_base_register_flag_t flags)
rmaps_base_mapping_policy = NULL;
var_id = mca_base_var_register("orte", "rmaps", "base", "mapping_policy",
#if OPAL_HAVE_HWLOC
"Mapping Policy [slot (default) | hwthread | core | l1cache | l2cache | l3cache | socket | numa | board | node | seq | dist], with allowed modifiers :SPAN,OVERSUBSCRIBE,NOOVERSUBSCRIBE",
"Mapping Policy [slot (default:np<=2) | hwthread | core | l1cache | l2cache | l3cache | socket (default:np>2) | numa | board | node | seq | dist], with allowed modifiers :SPAN,OVERSUBSCRIBE,NOOVERSUBSCRIBE",
#else
"Mapping Policy [slot (default) | node], with allowed modifiers :SPAN,OVERSUBSCRIBE,NOOVERSUBSCRIBE",
#endif
@ -117,7 +117,7 @@ static int orte_rmaps_base_register(mca_base_register_flag_t flags)
rmaps_base_ranking_policy = NULL;
(void) mca_base_var_register("orte", "rmaps", "base", "ranking_policy",
#if OPAL_HAVE_HWLOC
"Ranking Policy [slot (default) | hwthread | core | l1cache | l2cache | l3cache | socket | numa | board | node], with modifier :SPAN or :FILL",
"Ranking Policy [slot (default:np<=2) | hwthread | core | l1cache | l2cache | l3cache | socket (default:np>2) | numa | board | node], with modifier :SPAN or :FILL",
#else
"Ranking Policy [slot (default) | node]",
#endif
@ -262,6 +262,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
if (NULL == rmaps_base_mapping_policy) {
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYSLOT);
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SPAN);
ORTE_UNSET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
} else {
ck = opal_argv_split(rmaps_base_mapping_policy, ':');
@ -294,7 +295,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
ck2 = opal_argv_split(ck[1], ',');
for (i=0; NULL != ck2[i]; i++) {
if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) {
orte_rmaps_base.mapping |= ORTE_MAPPING_SPAN;
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_SPAN);
} else if (0 == strncasecmp(ck2[i], "oversubscribe", strlen(ck2[i]))) {
if (ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
/* error - cannot redefine the default mapping policy */
@ -364,7 +365,33 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
}
if (NULL == rmaps_base_ranking_policy) {
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SLOT);
/* check for map-by object directives - we set the
* ranking to match if one was given
*/
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
if (ORTE_MAPPING_BYCORE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_CORE);
} else if (ORTE_MAPPING_BYNODE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_NODE);
} else if (ORTE_MAPPING_BYL1CACHE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_L1CACHE);
} else if (ORTE_MAPPING_BYL2CACHE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_L2CACHE);
} else if (ORTE_MAPPING_BYL3CACHE & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_L3CACHE);
} else if (ORTE_MAPPING_BYSOCKET & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SOCKET);
} else if (ORTE_MAPPING_BYNUMA & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_NUMA);
} else if (ORTE_MAPPING_BYBOARD & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_BOARD);
} else if (ORTE_MAPPING_BYHWTHREAD & ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping)) {
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_HWTHREAD);
}
} else {
/* if no map-by was given, default to by-slot */
ORTE_SET_RANKING_POLICY(orte_rmaps_base.ranking, ORTE_RANK_BY_SLOT);
}
} else {
ck = opal_argv_split(rmaps_base_ranking_policy, ':');
if (2 < opal_argv_count(ck)) {
@ -461,26 +488,6 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
ORTE_SET_RANKING_DIRECTIVE(orte_rmaps_base.ranking, ORTE_RANKING_GIVEN);
}
#if OPAL_HAVE_HWLOC
if (!OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
/* if MAP BY DIST then we set binding policy to numa UNLESS the binding has
* already been set to something else
*/
if (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) == ORTE_MAPPING_BYDIST) {
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_NUMA);
} else if (1 < orte_rmaps_base.cpus_per_rank) {
/* if the cpus/rank > 1, then we have to bind to cores UNLESS the binding has
* already been set to something else
*/
if (opal_hwloc_use_hwthreads_as_cpus) {
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_HWTHREAD);
} else {
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE);
}
}
}
#endif
/* Should we schedule on the local node or not? */
if (rmaps_base_no_schedule_local) {
orte_rmaps_base.mapping |= ORTE_MAPPING_NO_USE_LOCAL;

Просмотреть файл

@ -47,12 +47,14 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
{
orte_job_t *jdata;
orte_job_map_t *map;
int rc;
int rc, i;
bool did_map;
opal_list_item_t *item;
orte_rmaps_base_selected_module_t *mod;
orte_job_t *parent;
orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
orte_vpid_t nprocs;
orte_app_context_t *app;
/* convenience */
jdata = caddy->jdata;
@ -87,9 +89,36 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
OBJ_RELEASE(caddy);
return;
}
/* load it with the system defaults */
map->mapping = orte_rmaps_base.mapping;
map->ranking = orte_rmaps_base.ranking;
/* compute the number of procs */
nprocs = 0;
for (i=0; i < jdata->apps->size; i++) {
if (NULL != (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
nprocs += app->num_procs;
}
}
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps: nprocs %s",
ORTE_VPID_PRINT(nprocs));
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
map->mapping = orte_rmaps_base.mapping;
} else {
/* default based on number of procs */
if (nprocs <= 2) {
ORTE_SET_MAPPING_POLICY(map->mapping, ORTE_MAPPING_BYSLOT);
} else {
ORTE_SET_MAPPING_POLICY(map->mapping, ORTE_MAPPING_BYSOCKET);
}
}
if (ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(orte_rmaps_base.ranking)) {
map->ranking = orte_rmaps_base.ranking;
} else {
/* default based on number of procs */
if (nprocs <= 2) {
ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
} else {
ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SOCKET);
}
}
#if OPAL_HAVE_HWLOC
map->binding = opal_hwloc_binding_policy;
#endif
@ -108,16 +137,33 @@ void orte_rmaps_base_map_job(int fd, short args, void *cbdata)
if (!jdata->map->display_map) {
jdata->map->display_map = orte_rmaps_base.display_map;
}
/* compute the number of procs */
nprocs = 0;
for (i=0; i < jdata->apps->size; i++) {
if (NULL != (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
nprocs += app->num_procs;
}
}
/* set the default mapping policy IFF it wasn't provided */
if (!ORTE_MAPPING_POLICY_IS_SET(jdata->map->mapping)) {
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, orte_rmaps_base.mapping);
/* default based on number of procs */
if (nprocs <= 2) {
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
} else {
ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSOCKET);
}
}
if (!ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
ORTE_SET_MAPPING_DIRECTIVE(jdata->map->mapping, ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping));
}
/* ditto for rank and bind policies */
if (!ORTE_RANKING_POLICY_IS_SET(jdata->map->ranking)) {
ORTE_SET_RANKING_POLICY(jdata->map->ranking, orte_rmaps_base.ranking);
/* default based on number of procs */
if (nprocs <= 2) {
ORTE_SET_RANKING_POLICY(jdata->map->ranking, ORTE_RANK_BY_SLOT);
} else {
ORTE_SET_RANKING_POLICY(jdata->map->ranking, ORTE_RANK_BY_SOCKET);
}
}
#if OPAL_HAVE_HWLOC
if (!OPAL_BINDING_POLICY_IS_SET(jdata->map->binding)) {

Просмотреть файл

@ -146,6 +146,7 @@ static int rank_span(orte_job_t *jdata,
}
cnt++;
/* insert the proc into the jdata array - no harm if already there */
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
ORTE_ERROR_LOG(rc);
return rc;
@ -253,6 +254,7 @@ static int rank_fill(orte_job_t *jdata,
}
cnt++;
/* insert the proc into the jdata array - no harm if already there */
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
ORTE_ERROR_LOG(rc);
return rc;
@ -275,7 +277,7 @@ static int rank_by(orte_job_t *jdata,
unsigned cache_level)
{
hwloc_obj_t obj;
int num_objs, i, j;
int num_objs, i, j, rc;
orte_vpid_t num_ranked=0;
orte_node_t *node;
orte_proc_t *proc;
@ -378,7 +380,11 @@ static int rank_by(orte_job_t *jdata,
cnt++;
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rank_by: assigned rank %s", ORTE_VPID_PRINT(proc->name.vpid));
/* insert the proc into the jdata array - no harm if already there */
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* flag that one was mapped */
all_done = false;
/* track where the highest vpid landed - this is our
@ -453,10 +459,7 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
continue;
}
proc->name.vpid = vpid++;
/* insert the proc into the jdata->procs array - can't already
* be there as the only way to this point in the code is for the
* vpid to have been INVALID
*/
/* insert the proc into the jdata array - no harm if already there */
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
ORTE_ERROR_LOG(rc);
return rc;
@ -510,15 +513,10 @@ int orte_rmaps_base_compute_vpids(orte_job_t *jdata,
*/
jdata->bookmark = node;
}
/* some mappers require that we insert the proc into the jdata->procs
* array, while others will have already done it - so check and
* do the operation if required
*/
if (NULL == opal_pointer_array_get_item(jdata->procs, proc->name.vpid)) {
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* insert the proc into the jdata array - no harm if already there */
if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}

Просмотреть файл

@ -465,8 +465,7 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
* do more because we don't know how many total objects exist
* across all the nodes
*/
nprocs = app->num_procs * orte_rmaps_base.cpus_per_rank;
if (num_slots < nprocs) {
if (num_slots < (app->num_procs * orte_rmaps_base.cpus_per_rank)) {
if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
true, app->num_procs, app->app);
@ -528,12 +527,16 @@ int orte_rmaps_rr_byobj(orte_job_t *jdata,
num_procs_to_assign = extra_procs_to_assign;
} else {
num_procs_to_assign = (node->slots - node->slots_inuse)/orte_rmaps_base.cpus_per_rank + extra_procs_to_assign;
if (app->num_procs < num_procs_to_assign) {
/* might have more slots than procs */
num_procs_to_assign = app->num_procs;
}
}
/* get the number of objects of this type on this node */
nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology, target, cache_level, OPAL_HWLOC_AVAILABLE);
opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
"mca:rmaps:rr:byobj: found %d objs on node %s", nobjs, node->name);
"mca:rmaps:rr:byobj: nprocs-to-assign %d for %d objs on node %s", num_procs_to_assign, nobjs, node->name);
/* if there are no objects of this type, then report the error
* and abort - this can happen, for example, on systems that
* don't report "sockets" as an independent object