Make the cpus-per-proc equivalent a little more intuitive:
* allow users to specify just a modifier for map-by instead of requiring that they also specify a policy. Thus, we now accept --map-by :pe=3 as indicating that we should use the default mapping policy, but bind 3 cpus/proc. * if users specify a pe's/proc but no policy, default to --map-by NUMA to ensure we have access to multiple cpus for the request. This won't guarantee we have access to enough to meet the request, but gives us a chance. In addition, we know that binding a proc to multiple cpus will work best if those cpus are all in the same NUMA, so this provides some degree of optimized behavior. Per a request from Jeff, define "oversubscribe" for binding as a synonym for the "overload" modifier. cmr=v1.8.2:reviewer=rhc This commit was SVN r31967.
Этот коммит содержится в:
родитель
8db76e9c6f
Коммит
06dbfa3098
@ -520,38 +520,44 @@ int opal_hwloc_base_set_binding_policy(opal_binding_policy_t *policy, char *spec
|
|||||||
if (1 < opal_argv_count(tmpvals)) {
|
if (1 < opal_argv_count(tmpvals)) {
|
||||||
quals = opal_argv_split(tmpvals[1], ',');
|
quals = opal_argv_split(tmpvals[1], ',');
|
||||||
for (i=0; NULL != quals[i]; i++) {
|
for (i=0; NULL != quals[i]; i++) {
|
||||||
if (0 == strcasecmp(quals[i], "if-supported")) {
|
if (0 == strncasecmp(quals[i], "if-supported", strlen(quals[i]))) {
|
||||||
tmp |= OPAL_BIND_IF_SUPPORTED;
|
tmp |= OPAL_BIND_IF_SUPPORTED;
|
||||||
} else if (0 == strcasecmp(quals[i], "overload-allowed")) {
|
} else if (0 == strncasecmp(quals[i], "overload-allowed", strlen(quals[i])) ||
|
||||||
|
0 == strncasecmp(quals[i], "oversubscribe-allowed", strlen(quals[i]))) {
|
||||||
tmp |= OPAL_BIND_ALLOW_OVERLOAD;
|
tmp |= OPAL_BIND_ALLOW_OVERLOAD;
|
||||||
} else {
|
} else {
|
||||||
/* unknown option */
|
/* unknown option */
|
||||||
opal_output(0, "Unknown qualifier to orte_process_binding: %s", spec);
|
opal_output(0, "Unknown qualifier to binding policy: %s", spec);
|
||||||
return OPAL_ERR_BAD_PARAM;
|
return OPAL_ERR_BAD_PARAM;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
opal_argv_free(quals);
|
opal_argv_free(quals);
|
||||||
}
|
}
|
||||||
if (0 == strcasecmp(tmpvals[0], "hwthread")) {
|
if (NULL == tmpvals[0]) {
|
||||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_HWTHREAD);
|
|
||||||
} else if (0 == strcasecmp(tmpvals[0], "core")) {
|
|
||||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CORE);
|
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CORE);
|
||||||
} else if (0 == strcasecmp(tmpvals[0], "l1cache")) {
|
tmp &= ~OPAL_BIND_GIVEN;
|
||||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L1CACHE);
|
|
||||||
} else if (0 == strcasecmp(tmpvals[0], "l2cache")) {
|
|
||||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L2CACHE);
|
|
||||||
} else if (0 == strcasecmp(tmpvals[0], "l3cache")) {
|
|
||||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L3CACHE);
|
|
||||||
} else if (0 == strcasecmp(tmpvals[0], "socket")) {
|
|
||||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_SOCKET);
|
|
||||||
} else if (0 == strcasecmp(tmpvals[0], "numa")) {
|
|
||||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NUMA);
|
|
||||||
} else if (0 == strcasecmp(tmpvals[0], "board")) {
|
|
||||||
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_BOARD);
|
|
||||||
} else {
|
} else {
|
||||||
opal_show_help("help-opal-hwloc-base.txt", "invalid binding_policy", true, "binding", spec);
|
if (0 == strcasecmp(tmpvals[0], "hwthread")) {
|
||||||
opal_argv_free(tmpvals);
|
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_HWTHREAD);
|
||||||
return OPAL_ERR_BAD_PARAM;
|
} else if (0 == strcasecmp(tmpvals[0], "core")) {
|
||||||
|
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_CORE);
|
||||||
|
} else if (0 == strcasecmp(tmpvals[0], "l1cache")) {
|
||||||
|
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L1CACHE);
|
||||||
|
} else if (0 == strcasecmp(tmpvals[0], "l2cache")) {
|
||||||
|
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L2CACHE);
|
||||||
|
} else if (0 == strcasecmp(tmpvals[0], "l3cache")) {
|
||||||
|
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_L3CACHE);
|
||||||
|
} else if (0 == strcasecmp(tmpvals[0], "socket")) {
|
||||||
|
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_SOCKET);
|
||||||
|
} else if (0 == strcasecmp(tmpvals[0], "numa")) {
|
||||||
|
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_NUMA);
|
||||||
|
} else if (0 == strcasecmp(tmpvals[0], "board")) {
|
||||||
|
OPAL_SET_BINDING_POLICY(tmp, OPAL_BIND_TO_BOARD);
|
||||||
|
} else {
|
||||||
|
opal_show_help("help-opal-hwloc-base.txt", "invalid binding_policy", true, "binding", spec);
|
||||||
|
opal_argv_free(tmpvals);
|
||||||
|
return OPAL_ERR_BAD_PARAM;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
opal_argv_free(tmpvals);
|
opal_argv_free(tmpvals);
|
||||||
}
|
}
|
||||||
|
@ -303,4 +303,22 @@ not set by the mapper code:
|
|||||||
Please contact the OMPI developers for assistance. Meantime,
|
Please contact the OMPI developers for assistance. Meantime,
|
||||||
you will still be able to run your application without binding
|
you will still be able to run your application without binding
|
||||||
by specifying "--bind-to none" on your command line.
|
by specifying "--bind-to none" on your command line.
|
||||||
|
#
|
||||||
|
[mapping-too-low-init]
|
||||||
|
A request for multiple cpus-per-proc was given, but a directive
|
||||||
|
was also give to map to an object level that cannot support that
|
||||||
|
directive.
|
||||||
|
|
||||||
|
Please specify a mapping level that has more than one cpu, or
|
||||||
|
else let us define a default mapping that will allow multiple
|
||||||
|
cpus-per-proc.
|
||||||
|
#
|
||||||
|
[unrecog-modifier]
|
||||||
|
A modifier was given to the --map-by directive that is not
|
||||||
|
recognized:
|
||||||
|
|
||||||
|
Modifier: %s
|
||||||
|
|
||||||
|
Please see "mpirun --help" for a description of supported
|
||||||
|
modifiers.
|
||||||
|
|
||||||
|
@ -285,8 +285,8 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
|||||||
if (1 < orte_rmaps_base.cpus_per_rank) {
|
if (1 < orte_rmaps_base.cpus_per_rank) {
|
||||||
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
||||||
"--cpus-per-proc, -cpus-per-proc, --cpus-per-rank, -cpus-per-rank",
|
"--cpus-per-proc, -cpus-per-proc, --cpus-per-rank, -cpus-per-rank",
|
||||||
"--map-by <obj>:PE=N",
|
"--map-by <obj>:PE=N, default <obj>=NUMA",
|
||||||
"rmaps_base_cpus_per_proc", "rmaps_base_mapping_policy=<obj>:PE=N");
|
"rmaps_base_cpus_per_proc", "rmaps_base_mapping_policy=<obj>:PE=N, default <obj>=NUMA");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping,
|
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping,
|
||||||
@ -407,6 +407,20 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
|||||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE);
|
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_CORE);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* we also need to ensure we are mapping to a high-enough level to have
|
||||||
|
* multiple cpus beneath it - by default, we'll go to the NUMA level */
|
||||||
|
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||||
|
if (ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) >= ORTE_MAPPING_BYCORE) {
|
||||||
|
orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low-init", true);
|
||||||
|
return ORTE_ERR_SILENT;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||||
|
"%s rmaps:base pe/rank set - setting mapping to BYNUMA",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||||
|
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNUMA);
|
||||||
|
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -565,6 +579,10 @@ static int check_modifiers(char *ck, orte_mapping_policy_t *tmp)
|
|||||||
return ORTE_ERR_SILENT;
|
return ORTE_ERR_SILENT;
|
||||||
}
|
}
|
||||||
orte_rmaps_base.cpus_per_rank = strtol(ptr, NULL, 10);
|
orte_rmaps_base.cpus_per_rank = strtol(ptr, NULL, 10);
|
||||||
|
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||||
|
"%s rmaps:base setting pe/rank to %d",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
orte_rmaps_base.cpus_per_rank);
|
||||||
found = true;
|
found = true;
|
||||||
} else if (0 == strncasecmp(ck2[i], "oversubscribe", strlen(ck2[i]))) {
|
} else if (0 == strncasecmp(ck2[i], "oversubscribe", strlen(ck2[i]))) {
|
||||||
ORTE_UNSET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
ORTE_UNSET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||||
@ -577,7 +595,8 @@ static int check_modifiers(char *ck, orte_mapping_policy_t *tmp)
|
|||||||
} else {
|
} else {
|
||||||
/* unrecognized modifier */
|
/* unrecognized modifier */
|
||||||
opal_argv_free(ck2);
|
opal_argv_free(ck2);
|
||||||
return ORTE_ERR_BAD_PARAM;
|
orte_show_help("help-orte-rmaps-base.txt", "unrecog-modifier", true, ck2[i]);
|
||||||
|
return ORTE_ERR_SILENT;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
opal_argv_free(ck2);
|
opal_argv_free(ck2);
|
||||||
@ -603,6 +622,11 @@ int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
|
|||||||
tmp = 0;
|
tmp = 0;
|
||||||
*device = NULL;
|
*device = NULL;
|
||||||
|
|
||||||
|
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||||
|
"%s rmaps:base set policy with %s",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
|
(NULL == inspec) ? "NULL" : inspec);
|
||||||
|
|
||||||
if (NULL == inspec) {
|
if (NULL == inspec) {
|
||||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
|
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
|
||||||
} else {
|
} else {
|
||||||
@ -610,9 +634,28 @@ int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
|
|||||||
/* see if a colon was included - if so, then we have a policy + modifier */
|
/* see if a colon was included - if so, then we have a policy + modifier */
|
||||||
ck = strchr(spec, ':');
|
ck = strchr(spec, ':');
|
||||||
if (NULL != ck) {
|
if (NULL != ck) {
|
||||||
|
/* if the colon is the first character of the string, then we
|
||||||
|
* just have modifiers on the default mapping policy */
|
||||||
|
if (ck == spec) {
|
||||||
|
ck++;
|
||||||
|
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||||
|
"%s rmaps:base only modifiers %s provided - assuming bysocket mapping",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ck);
|
||||||
|
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
|
||||||
|
if (ORTE_ERR_SILENT == (rc = check_modifiers(ck, &tmp)) &&
|
||||||
|
ORTE_ERR_BAD_PARAM != rc) {
|
||||||
|
free(spec);
|
||||||
|
return ORTE_ERR_SILENT;
|
||||||
|
}
|
||||||
|
free(spec);
|
||||||
|
goto setpolicy;
|
||||||
|
}
|
||||||
/* split the string */
|
/* split the string */
|
||||||
*ck = '\0';
|
*ck = '\0';
|
||||||
ck++;
|
ck++;
|
||||||
|
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||||
|
"%s rmaps:base policy %s modifiers %s provided",
|
||||||
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), spec, ck);
|
||||||
/* if the policy is "dist", then we set the policy to that value
|
/* if the policy is "dist", then we set the policy to that value
|
||||||
* and save the second argument as the device
|
* and save the second argument as the device
|
||||||
*/
|
*/
|
||||||
@ -721,9 +764,7 @@ int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
|
|||||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
|
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if OPAL_HAVE_HWLOC
|
|
||||||
setpolicy:
|
setpolicy:
|
||||||
#endif
|
|
||||||
*policy = tmp;
|
*policy = tmp;
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user