1
1

Make the cpus-per-proc equivalent a little more intuitive:

* allow users to specify just a modifier for map-by instead of requiring that they also specify a policy. Thus, we now accept --map-by :pe=3 as indicating that we should use the default mapping policy, but bind 3 cpus/proc.

* if users specify a pe's/proc but no policy, default to --map-by NUMA to ensure we have access to multiple cpus for the request. This won't guarantee we have access to enough to meet the request, but gives us a chance. In addition, we know that binding a proc to multiple cpus will work best if those cpus are all in the same NUMA, so this provides some degree of optimized behavior.

Per a request from Jeff, define "oversubscribe" for binding as a synonym for the "overload" modifier.

cmr=v1.8.2:reviewer=rhc

This commit was SVN r31967.
Этот коммит содержится в:
Ralph Castain 2014-06-08 20:26:59 +00:00
родитель 8db76e9c6f
Коммит 06dbfa3098
3 изменённых файлов: 91 добавлений и 26 удалений

Просмотреть файл

@ -520,38 +520,44 @@ int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec
if (1 < opal_argv_count(tmpvals)) {
quals = opal_argv_split(tmpvals[1], ',');
for (i=0; NULL != quals[i]; i++) {
if (0 == strcasecmp(quals[i], "if-supported")) {
if (0 == strncasecmp(quals[i], "if-supported", strlen(quals[i]))) {
tmp |= OPAL_BIND_IF_SUPPORTED;
} else if (0 == strcasecmp(quals[i], "overload-allowed")) {
} else if (0 == strncasecmp(quals[i], "overload-allowed", strlen(quals[i])) ||
0 == strncasecmp(quals[i], "oversubscribe-allowed", strlen(quals[i]))) {
tmp |= OPAL_BIND_ALLOW_OVERLOAD;
} else {
/* unknown option */
opal_output(0, "Unknown qualifier to orte_process_binding: %s", spec);
opal_output(0, "Unknown qualifier to binding policy: %s", spec);
return OPAL_ERR_BAD_PARAM;
}
}
opal_argv_free(quals);
}
if (0 == strcasecmp(tmpvals[0], "hwthread")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_HWTHREAD);
} else if (0 == strcasecmp(tmpvals[0], "core")) {
if (NULL == tmpvals[0]) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CORE);
} else if (0 == strcasecmp(tmpvals[0], "l1cache")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L1CACHE);
} else if (0 == strcasecmp(tmpvals[0], "l2cache")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L2CACHE);
} else if (0 == strcasecmp(tmpvals[0], "l3cache")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L3CACHE);
} else if (0 == strcasecmp(tmpvals[0], "socket")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_SOCKET);
} else if (0 == strcasecmp(tmpvals[0], "numa")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NUMA);
} else if (0 == strcasecmp(tmpvals[0], "board")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_BOARD);
tmp &= ~OPAL_BIND_GIVEN;
} else {
opal_show_help("help-opal-hwloc-base.txt", "invalid binding_policy", true, "binding", spec);
opal_argv_free(tmpvals);
return OPAL_ERR_BAD_PARAM;
if (0 == strcasecmp(tmpvals[0], "hwthread")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_HWTHREAD);
} else if (0 == strcasecmp(tmpvals[0], "core")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CORE);
} else if (0 == strcasecmp(tmpvals[0], "l1cache")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L1CACHE);
} else if (0 == strcasecmp(tmpvals[0], "l2cache")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L2CACHE);
} else if (0 == strcasecmp(tmpvals[0], "l3cache")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L3CACHE);
} else if (0 == strcasecmp(tmpvals[0], "socket")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_SOCKET);
} else if (0 == strcasecmp(tmpvals[0], "numa")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NUMA);
} else if (0 == strcasecmp(tmpvals[0], "board")) {
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_BOARD);
} else {
opal_show_help("help-opal-hwloc-base.txt", "invalid binding_policy", true, "binding", spec);
opal_argv_free(tmpvals);
return OPAL_ERR_BAD_PARAM;
}
}
opal_argv_free(tmpvals);
}

Просмотреть файл

@ -303,4 +303,22 @@ not set by the mapper code:
Please contact the OMPI developers for assistance. Meantime,
you will still be able to run your application without binding
by specifying "--bind-to none" on your command line.
#
[mapping-too-low-init]
A request for multiple cpus-per-proc was given, but a directive
was also give to map to an object level that cannot support that
directive.
Please specify a mapping level that has more than one cpu, or
else let us define a default mapping that will allow multiple
cpus-per-proc.
#
[unrecog-modifier]
A modifier was given to the --map-by directive that is not
recognized:
Modifier: %s
Please see "mpirun --help" for a description of supported
modifiers.

Просмотреть файл

@ -285,8 +285,8 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
if (1 < orte_rmaps_base.cpus_per_rank) {
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
"--cpus-per-proc, -cpus-per-proc, --cpus-per-rank, -cpus-per-rank",
"--map-by <obj>:PE=N",
"rmaps_base_cpus_per_proc", "rmaps_base_mapping_policy=<obj>:PE=N");
"--map-by <obj>:PE=N, default <obj>=NUMA",
"rmaps_base_cpus_per_proc", "rmaps_base_mapping_policy=<obj>:PE=N, default <obj>=NUMA");
}
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping,
@ -407,6 +407,20 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE);
}
}
/* we also need to ensure we are mapping to a high-enough level to have
* multiple cpus beneath it - by default, we'll go to the NUMA level */
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
if (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) >= ORTE_MAPPING_BYCORE) {
orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low-init", true);
return ORTE_ERR_SILENT;
}
} else {
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"%s rmaps:base pe/rank set - setting mapping to BYNUMA",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNUMA);
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
}
#endif
}
@ -565,6 +579,10 @@ static int check_modifiers(char *ck, orte_mapping_policy_t *tmp)
return ORTE_ERR_SILENT;
}
orte_rmaps_base.cpus_per_rank = strtol(ptr, NULL, 10);
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"%s rmaps:base setting pe/rank to %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
orte_rmaps_base.cpus_per_rank);
found = true;
} else if (0 == strncasecmp(ck2[i], "oversubscribe", strlen(ck2[i]))) {
ORTE_UNSET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_NO_OVERSUBSCRIBE);
@ -577,7 +595,8 @@ static int check_modifiers(char *ck, orte_mapping_policy_t *tmp)
} else {
/* unrecognized modifier */
opal_argv_free(ck2);
return ORTE_ERR_BAD_PARAM;
orte_show_help("help-orte-rmaps-base.txt", "unrecog-modifier", true, ck2[i]);
return ORTE_ERR_SILENT;
}
}
opal_argv_free(ck2);
@ -603,6 +622,11 @@ int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
tmp = 0;
*device = NULL;
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"%s rmaps:base set policy with %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(NULL == inspec) ? "NULL" : inspec);
if (NULL == inspec) {
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
} else {
@ -610,9 +634,28 @@ int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
/* see if a colon was included - if so, then we have a policy + modifier */
ck = strchr(spec, ':');
if (NULL != ck) {
/* if the colon is the first character of the string, then we
* just have modifiers on the default mapping policy */
if (ck == spec) {
ck++;
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"%s rmaps:base only modifiers %s provided - assuming bysocket mapping",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ck);
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
if (ORTE_ERR_SILENT == (rc = check_modifiers(ck, &tmp)) &&
ORTE_ERR_BAD_PARAM != rc) {
free(spec);
return ORTE_ERR_SILENT;
}
free(spec);
goto setpolicy;
}
/* split the string */
*ck = '\0';
ck++;
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"%s rmaps:base policy %s modifiers %s provided",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), spec, ck);
/* if the policy is "dist", then we set the policy to that value
* and save the second argument as the device
*/
@ -721,9 +764,7 @@ int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
}
#if OPAL_HAVE_HWLOC
setpolicy:
#endif
*policy = tmp;
return ORTE_SUCCESS;