After a lot of pain, I've managed to resolve the problem of conflicting mapping directives caused by mismatched MCA params - i.e., where someone has one variant of an MCA param (e.g., rmaps_base_mapping_policy) in their default MCA param file, and then specifies another variant (e.g., --npernode) on the command line. I can't fully resolve the problem as there is no way to know precisely what the user meant - we can only guess which param was really intended since the MCA param system
can't apply its normal precedence rules. So...print a big "deprecated" warning for the old params and error out if a conflict is detected. I know that isn't what people really wanted, but it's the best we can do. If only the old style param is given, then process it after the warning. Extend the current map-by param to add support for ppr and cpus-per-proc, adding the latter to the list of allowed modifiers using "pe=n" for processing elements/proc. Thus, you can map-by socket:pe=2,oversubscribe to map by socket, binding 2 processing elements/process, with oversubscription allowed. Or you can map-by ppr:2:socket:pe=4 to map two processes to every socket in the allocation, binding each process to 4 processing elements. For those wondering, a processing element is defined as a hwthread if --use-hwthreads-as-cpus is given, or else as a core. Refs trac:4117 This commit was SVN r30620. The following Trac tickets were found above: Ticket 4117 --> https://svn.open-mpi.org/trac/ompi/ticket/4117
Этот коммит содержится в:
родитель
3a683419c5
Коммит
bc7cc09749
@ -12,6 +12,7 @@
|
||||
* Copyright (c) 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
@ -83,7 +84,6 @@ ORTE_DECLSPEC extern orte_rmaps_base_t orte_rmaps_base;
|
||||
ORTE_DECLSPEC extern bool orte_rmaps_base_pernode;
|
||||
ORTE_DECLSPEC extern int orte_rmaps_base_n_pernode;
|
||||
ORTE_DECLSPEC extern int orte_rmaps_base_n_persocket;
|
||||
ORTE_DECLSPEC extern char *orte_rmaps_base_pattern;
|
||||
|
||||
/**
|
||||
* Select an rmaps component / module
|
||||
|
@ -218,10 +218,10 @@ with the version of hwloc being used by OMPI.
|
||||
Please correct the problem and try again.
|
||||
#
|
||||
[deprecated]
|
||||
The following command line option and corresponding MCA parameter have
|
||||
The following command line options and corresponding MCA parameter have
|
||||
been deprecated and replaced as follows:
|
||||
|
||||
Command line option:
|
||||
Command line options:
|
||||
Deprecated: %s
|
||||
Replacement: %s
|
||||
|
||||
@ -259,3 +259,18 @@ have multiple cpus underneath it:
|
||||
Please specify a mapping level that is no lower than socket, or
|
||||
else let us define a default mapping that will allow multiple
|
||||
cpus-per-proc.
|
||||
#
|
||||
[unrecognized-modifier]
|
||||
The mapping request contains an unrecognized modifier:
|
||||
|
||||
Request: %s
|
||||
|
||||
Please check your request and try again.
|
||||
#
|
||||
[invalid-pattern]
|
||||
The mapping request contains a pattern that doesn't match
|
||||
the required syntax of #:object
|
||||
|
||||
Pattern: %s
|
||||
|
||||
Please check your request and try again.
|
||||
|
@ -52,7 +52,6 @@ orte_rmaps_base_t orte_rmaps_base;
|
||||
bool orte_rmaps_base_pernode = false;
|
||||
int orte_rmaps_base_n_pernode = 0;
|
||||
int orte_rmaps_base_n_persocket = 0;
|
||||
char *orte_rmaps_base_pattern = NULL;
|
||||
|
||||
/*
|
||||
* Local variables
|
||||
@ -74,38 +73,42 @@ static int orte_rmaps_base_register(mca_base_register_flag_t flags)
|
||||
int var_id;
|
||||
|
||||
orte_rmaps_base_pernode = false;
|
||||
(void) mca_base_var_register("orte", "rmaps", "base", "pernode",
|
||||
var_id = mca_base_var_register("orte", "rmaps", "base", "pernode",
|
||||
"Launch one ppn as directed",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&orte_rmaps_base_pernode);
|
||||
(void) mca_base_var_register_synonym(var_id, "orte", "rmaps", "ppr", "pernode", 0);
|
||||
|
||||
orte_rmaps_base_n_pernode = 0;
|
||||
(void) mca_base_var_register("orte", "rmaps", "base", "n_pernode",
|
||||
var_id = mca_base_var_register("orte", "rmaps", "base", "n_pernode",
|
||||
"Launch n procs/node", MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &orte_rmaps_base_n_pernode);
|
||||
(void) mca_base_var_register_synonym(var_id, "orte", "rmaps","ppr", "n_pernode", 0);
|
||||
|
||||
orte_rmaps_base_n_persocket = 0;
|
||||
(void) mca_base_var_register("orte", "rmaps", "base", "n_persocket",
|
||||
var_id = mca_base_var_register("orte", "rmaps", "base", "n_persocket",
|
||||
"Launch n procs/socket", MCA_BASE_VAR_TYPE_INT,
|
||||
NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &orte_rmaps_base_n_persocket);
|
||||
(void) mca_base_var_register_synonym(var_id, "orte", "rmaps","ppr", "n_persocket", 0);
|
||||
|
||||
orte_rmaps_base_pattern = NULL;
|
||||
(void) mca_base_var_register("orte", "rmaps", "base", "pattern",
|
||||
orte_rmaps_base.ppr = NULL;
|
||||
var_id = mca_base_var_register("orte", "rmaps", "base", "pattern",
|
||||
"Comma-separated list of number of processes on a given resource type [default: none]",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &orte_rmaps_base_pattern);
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &orte_rmaps_base.ppr);
|
||||
(void) mca_base_var_register_synonym(var_id, "orte", "rmaps","ppr", "pattern", 0);
|
||||
|
||||
/* define default mapping policy */
|
||||
rmaps_base_mapping_policy = NULL;
|
||||
var_id = mca_base_var_register("orte", "rmaps", "base", "mapping_policy",
|
||||
#if OPAL_HAVE_HWLOC
|
||||
"Mapping Policy [slot | hwthread | core (default:np<=2) | l1cache | l2cache | l3cache | socket (default:np>2) | numa | board | node | seq | dist], with allowed modifiers :SPAN,OVERSUBSCRIBE,NOOVERSUBSCRIBE",
|
||||
"Mapping Policy [slot | hwthread | core (default:np<=2) | l1cache | l2cache | l3cache | socket (default:np>2) | numa | board | node | seq | dist | ppr], with allowed modifiers :PE=y,SPAN,OVERSUBSCRIBE,NOOVERSUBSCRIBE",
|
||||
#else
|
||||
"Mapping Policy [slot (default) | node], with allowed modifiers :SPAN,OVERSUBSCRIBE,NOOVERSUBSCRIBE",
|
||||
#endif
|
||||
@ -247,7 +250,6 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
|
||||
/* init the globals */
|
||||
OBJ_CONSTRUCT(&orte_rmaps_base.selected_modules, opal_list_t);
|
||||
orte_rmaps_base.ppr = NULL;
|
||||
orte_rmaps_base.slot_list = NULL;
|
||||
orte_rmaps_base.mapping = 0;
|
||||
orte_rmaps_base.ranking = 0;
|
||||
@ -266,6 +268,20 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* check for a violation that has to be detected before we parse the mapping option */
|
||||
if (NULL != orte_rmaps_base.ppr) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
||||
"--ppr, -ppr", "--map-by ppr:<pattern>",
|
||||
"rmaps_base_pattern, rmaps_ppr_pattern",
|
||||
"rmaps_base_mapping_policy=ppr:<pattern>");
|
||||
/* if the mapping policy is NULL, then we can proceed */
|
||||
if (NULL == rmaps_base_mapping_policy) {
|
||||
asprintf(&rmaps_base_mapping_policy, "ppr:%s", orte_rmaps_base.ppr);
|
||||
} else {
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_set_mapping_policy(&orte_rmaps_base.mapping,
|
||||
&orte_rmaps_base.device,
|
||||
rmaps_base_mapping_policy))) {
|
||||
@ -280,7 +296,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
|
||||
if (rmaps_base_bycore) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
||||
"--bycore", "--map-by core",
|
||||
"--bycore, -bycore", "--map-by core",
|
||||
"rmaps_base_bycore", "rmaps_base_mapping_policy=core");
|
||||
/* set mapping policy to bycore - error if something else already set */
|
||||
if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
|
||||
@ -306,7 +322,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
|
||||
if (rmaps_base_byslot) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
||||
"--byslot", "--map-by slot",
|
||||
"--byslot, -byslot", "--map-by slot",
|
||||
"rmaps_base_byslot", "rmaps_base_mapping_policy=slot");
|
||||
/* set mapping policy to byslot - error if something else already set */
|
||||
if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
|
||||
@ -332,7 +348,7 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
|
||||
if (rmaps_base_bynode) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
||||
"--bynode", "--map-by node",
|
||||
"--bynode, -bynode", "--map-by node",
|
||||
"rmaps_base_bynode", "rmaps_base_mapping_policy=node");
|
||||
/* set mapping policy to bynode - error if something else already set */
|
||||
if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
|
||||
@ -356,6 +372,10 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
}
|
||||
|
||||
if (1 < orte_rmaps_base.cpus_per_rank) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
||||
"--cpus-per-proc, -cpus-per-proc, --cpus-per-rank, -cpus-per-rank",
|
||||
"--map-by <obj>:PE=N",
|
||||
"rmaps_base_cpus_per_proc", "rmaps_base_mapping_policy=<obj>:PE=N");
|
||||
/* check to see if we were told to map at too low a level */
|
||||
if ((ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) &&
|
||||
ORTE_GET_MAPPING_POLICY(orte_rmaps_base.mapping) > ORTE_MAPPING_BYSOCKET) {
|
||||
@ -394,6 +414,66 @@ static int orte_rmaps_base_open(mca_base_open_flag_t flags)
|
||||
#endif
|
||||
}
|
||||
|
||||
if (orte_rmaps_base_pernode) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
||||
"--pernode, -pernode", "--map-by node:PPR=1",
|
||||
"rmaps_base_pernode, rmaps_ppr_pernode",
|
||||
"rmaps_base_mapping_policy=node:PPR=1");
|
||||
/* there is no way to resolve this conflict, so if something else was
|
||||
* given, we have no choice but to error out
|
||||
*/
|
||||
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
|
||||
"bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* ensure we set the mapping policy to ppr */
|
||||
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
||||
/* define the ppr */
|
||||
orte_rmaps_base.ppr = strdup("1:node");
|
||||
}
|
||||
|
||||
if (0 < orte_rmaps_base_n_pernode) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
||||
"--npernode, -npernode", "--map-by node:PPR=N",
|
||||
"rmaps_base_n_pernode, rmaps_ppr_n_pernode",
|
||||
"rmaps_base_mapping_policy=node:PPR=N");
|
||||
/* there is no way to resolve this conflict, so if something else was
|
||||
* given, we have no choice but to error out
|
||||
*/
|
||||
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
|
||||
"bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* ensure we set the mapping policy to ppr */
|
||||
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
||||
/* define the ppr */
|
||||
asprintf(&orte_rmaps_base.ppr, "%d:node", orte_rmaps_base_n_pernode);
|
||||
}
|
||||
|
||||
if (0 < orte_rmaps_base_n_persocket) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "deprecated", true,
|
||||
"--npersocket, -npersocket", "--map-by socket:PPR=N",
|
||||
"rmaps_base_n_persocket, rmaps_ppr_n_persocket",
|
||||
"rmaps_base_mapping_policy=socket:PPR=N");
|
||||
/* there is no way to resolve this conflict, so if something else was
|
||||
* given, we have no choice but to error out
|
||||
*/
|
||||
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
|
||||
"bynode", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* ensure we set the mapping policy to ppr */
|
||||
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_PPR);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
||||
/* define the ppr */
|
||||
asprintf(&orte_rmaps_base.ppr, "%d:socket", orte_rmaps_base_n_persocket);
|
||||
}
|
||||
|
||||
/* Should we schedule on the local node or not? */
|
||||
if (rmaps_base_no_schedule_local) {
|
||||
orte_rmaps_base.mapping |= ORTE_MAPPING_NO_USE_LOCAL;
|
||||
@ -461,36 +541,82 @@ OBJ_CLASS_INSTANCE(orte_rmaps_base_selected_module_t,
|
||||
NULL, NULL);
|
||||
|
||||
|
||||
int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
|
||||
char **device, char *spec)
|
||||
static int check_modifiers(char *ck, orte_mapping_policy_t *tmp)
|
||||
{
|
||||
char **ck, **ck2;
|
||||
orte_mapping_policy_t tmp;
|
||||
char **ck2, *ptr;
|
||||
int i;
|
||||
bool found = false;
|
||||
|
||||
ck2 = opal_argv_split(ck, ',');
|
||||
for (i=0; NULL != ck2[i]; i++) {
|
||||
if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_SPAN);
|
||||
found = true;
|
||||
} else if (0 == strncasecmp(ck2[i], "pe", strlen("pe"))) {
|
||||
/* break this at the = sign to get the number */
|
||||
if (NULL == (ptr = strchr(ck2[i], '='))) {
|
||||
/* missing the value */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "missing-value", true, "pe", ck2[i]);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
ptr++;
|
||||
if (NULL == ptr) {
|
||||
/* still missing the value */
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
orte_rmaps_base.cpus_per_rank = strtol(ptr, NULL, 10);
|
||||
found = true;
|
||||
} else if (0 == strncasecmp(ck2[i], "oversubscribe", strlen(ck2[i]))) {
|
||||
ORTE_UNSET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_SUBSCRIBE_GIVEN);
|
||||
found = true;
|
||||
} else if (0 == strncasecmp(ck2[i], "nooversubscribe", strlen(ck2[i]))) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(*tmp, ORTE_MAPPING_SUBSCRIBE_GIVEN);
|
||||
found = true;
|
||||
} else {
|
||||
/* unrecognized modifier */
|
||||
opal_argv_free(ck2);
|
||||
return ORTE_ERR_BAD_PARAM;
|
||||
}
|
||||
}
|
||||
opal_argv_free(ck2);
|
||||
if (found) {
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
|
||||
char **device, char *inspec)
|
||||
{
|
||||
char *ck, **ck2, *ptr;
|
||||
orte_mapping_policy_t tmp;
|
||||
int i, rc;
|
||||
size_t len;
|
||||
char *spec;
|
||||
|
||||
/* set defaults */
|
||||
tmp = 0;
|
||||
*device = NULL;
|
||||
|
||||
if (NULL == spec) {
|
||||
if (NULL == inspec) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
|
||||
} else {
|
||||
ck = opal_argv_split(spec, ':');
|
||||
if (2 < opal_argv_count(ck)) {
|
||||
/* incorrect format */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", rmaps_base_mapping_policy);
|
||||
opal_argv_free(ck);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
if (2 == opal_argv_count(ck)) {
|
||||
spec = strdup(inspec); // protect the input string
|
||||
/* see if a colon was included - if so, then we have a policy + modifier */
|
||||
ck = strchr(spec, ':');
|
||||
if (NULL != ck) {
|
||||
/* split the string */
|
||||
*ck = '\0';
|
||||
ck++;
|
||||
/* if the policy is "dist", then we set the policy to that value
|
||||
* and save the second argument as the device
|
||||
*/
|
||||
#if OPAL_HAVE_HWLOC
|
||||
if (0 == strncasecmp(ck[0], "dist", strlen(ck[0]))) {
|
||||
if (0 == strncasecmp(spec, "dist", strlen(spec))) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYDIST);
|
||||
ck2 = opal_argv_split(ck[1], ',');
|
||||
ck2 = opal_argv_split(ck, ',');
|
||||
if (ck2[0] != NULL) {
|
||||
*device = strdup(ck2[0]);
|
||||
for (i=1; NULL != ck2[i]; i++) {
|
||||
@ -502,63 +628,78 @@ int orte_rmaps_base_set_mapping_policy(orte_mapping_policy_t *policy,
|
||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
|
||||
opal_argv_free(ck2);
|
||||
goto setpolicy;
|
||||
} else if (0 == strncasecmp(spec, "ppr", strlen(spec))) {
|
||||
/* we have to allow additional modifiers here - e.g., specifying
|
||||
* #pe's/proc or oversubscribe - so check for modifiers
|
||||
*/
|
||||
if (NULL == (ptr = strrchr(ck, ':'))) {
|
||||
/* this is an error - there had to be at least one
|
||||
* colon to delimit the number from the object type
|
||||
*/
|
||||
orte_show_help("help-orte-rmaps-base.txt", "invalid-pattern", true, inspec);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
ptr++; // move past the colon
|
||||
/* check the remaining string for modifiers - may be none, so
|
||||
* don't emit an error message if the modifier isn't recognized
|
||||
*/
|
||||
if (ORTE_ERR_SILENT == (rc = check_modifiers(ptr, &tmp)) &&
|
||||
ORTE_ERR_BAD_PARAM != rc) {
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* if we found something, then we need to adjust the string */
|
||||
if (ORTE_SUCCESS == rc) {
|
||||
ptr--;
|
||||
*ptr = '\0';
|
||||
}
|
||||
/* now get the pattern */
|
||||
orte_rmaps_base.ppr = strdup(ck);
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_PPR);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
|
||||
goto setpolicy;
|
||||
}
|
||||
#endif
|
||||
ck2 = opal_argv_split(ck[1], ',');
|
||||
for (i=0; NULL != ck2[i]; i++) {
|
||||
if (0 == strncasecmp(ck2[i], "span", strlen(ck2[i]))) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_SPAN);
|
||||
} else if (0 == strncasecmp(ck2[i], "oversubscribe", strlen(ck2[i]))) {
|
||||
if (ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(tmp)) {
|
||||
ORTE_UNSET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_SUBSCRIBE_GIVEN);
|
||||
} else if (0 == strncasecmp(ck2[i], "nooversubscribe", strlen(ck2[i]))) {
|
||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_NO_OVERSUBSCRIBE);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_SUBSCRIBE_GIVEN);
|
||||
} else {
|
||||
/* unrecognized modifier */
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, "mapping", ck2[i]);
|
||||
opal_argv_free(ck);
|
||||
opal_argv_free(ck2);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
if (ORTE_SUCCESS != (rc = check_modifiers(ck, &tmp)) &&
|
||||
ORTE_ERR_TAKE_NEXT_OPTION != rc) {
|
||||
if (ORTE_ERR_BAD_PARAM == rc) {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-modifier", true, inspec);
|
||||
}
|
||||
opal_argv_free(ck2);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
len = strlen(ck[0]);
|
||||
if (0 == strncasecmp(ck[0], "slot", len)) {
|
||||
len = strlen(spec);
|
||||
if (0 == strncasecmp(spec, "slot", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSLOT);
|
||||
} else if (0 == strncasecmp(ck[0], "node", len)) {
|
||||
} else if (0 == strncasecmp(spec, "node", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNODE);
|
||||
#if OPAL_HAVE_HWLOC
|
||||
} else if (0 == strncasecmp(ck[0], "core", len)) {
|
||||
} else if (0 == strncasecmp(spec, "core", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYCORE);
|
||||
} else if (0 == strncasecmp(ck[0], "l1cache", len)) {
|
||||
} else if (0 == strncasecmp(spec, "l1cache", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL1CACHE);
|
||||
} else if (0 == strncasecmp(ck[0], "l2cache", len)) {
|
||||
} else if (0 == strncasecmp(spec, "l2cache", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL2CACHE);
|
||||
} else if (0 == strncasecmp(ck[0], "l3cache", len)) {
|
||||
} else if (0 == strncasecmp(spec, "l3cache", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYL3CACHE);
|
||||
} else if (0 == strncasecmp(ck[0], "socket", len)) {
|
||||
} else if (0 == strncasecmp(spec, "socket", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYSOCKET);
|
||||
} else if (0 == strncasecmp(ck[0], "numa", len)) {
|
||||
} else if (0 == strncasecmp(spec, "numa", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYNUMA);
|
||||
} else if (0 == strncasecmp(ck[0], "board", len)) {
|
||||
} else if (0 == strncasecmp(spec, "board", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYBOARD);
|
||||
} else if (0 == strncasecmp(ck[0], "hwthread", len)) {
|
||||
} else if (0 == strncasecmp(spec, "hwthread", len)) {
|
||||
ORTE_SET_MAPPING_POLICY(tmp, ORTE_MAPPING_BYHWTHREAD);
|
||||
/* if we are mapping processes to individual hwthreads, then
|
||||
* we need to treat those hwthreads as separate cpus
|
||||
*/
|
||||
opal_hwloc_use_hwthreads_as_cpus = true;
|
||||
opal_hwloc_use_hwthreads_as_cpus = true;
|
||||
#endif
|
||||
} else {
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", rmaps_base_mapping_policy);
|
||||
opal_argv_free(ck);
|
||||
orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy", true, "mapping", spec);
|
||||
free(spec);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
opal_argv_free(ck);
|
||||
free(spec);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(tmp, ORTE_MAPPING_GIVEN);
|
||||
}
|
||||
|
||||
|
@ -59,7 +59,6 @@ orte_rmaps_base_component_t mca_rmaps_lama_component = {
|
||||
static int orte_rmaps_lama_register(void)
|
||||
{
|
||||
mca_base_component_t *c = &mca_rmaps_lama_component.base_version;
|
||||
int var_id;
|
||||
|
||||
/* JMS Artifically low for now */
|
||||
module_priority = 0;
|
||||
@ -105,19 +104,6 @@ static int orte_rmaps_lama_register(void)
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&rmaps_lama_cmd_ordering);
|
||||
|
||||
/* NTH: Moved from rmaps_lama_params.c */
|
||||
var_id = mca_base_var_find("orte", "rmaps", "base", "pernode");
|
||||
(void) mca_base_var_register_synonym(var_id, "orte", "rmaps", "lama", "pernode", 0);
|
||||
|
||||
var_id = mca_base_var_find("orte", "rmaps", "base", "n_pernode");
|
||||
(void) mca_base_var_register_synonym(var_id, "orte", "rmaps", "lama", "n_pernode", 0);
|
||||
|
||||
var_id = mca_base_var_find("orte", "rmaps", "base", "n_persocket");
|
||||
(void) mca_base_var_register_synonym(var_id, "orte", "rmaps", "lama", "n_persocket", 0);
|
||||
|
||||
var_id = mca_base_var_find("orte", "rmaps", "base", "pattern");
|
||||
(void) mca_base_var_register_synonym(var_id, "orte", "rmaps", "lama", "pattern", 0);
|
||||
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:lama: Priority %3d",
|
||||
module_priority);
|
||||
|
@ -215,44 +215,11 @@ int rmaps_lama_process_alias_params(orte_job_t *jdata)
|
||||
*/
|
||||
if( NULL == rmaps_lama_cmd_mppr ) {
|
||||
/*
|
||||
* Take what the user specified as the -ppr
|
||||
* The ppr is given in the map
|
||||
*/
|
||||
if( NULL != jdata->map->ppr) {
|
||||
rmaps_lama_cmd_mppr = rmaps_lama_covert_ppr(jdata->map->ppr);
|
||||
}
|
||||
/*
|
||||
* Otherwise look at the parameters registered for the ppn component
|
||||
*/
|
||||
else {
|
||||
/*
|
||||
* -pernode => -mppr 1:n
|
||||
*/
|
||||
if( NULL == rmaps_lama_cmd_mppr && orte_rmaps_base_pernode ) {
|
||||
rmaps_lama_cmd_mppr = strdup("1:n");
|
||||
}
|
||||
|
||||
/*
|
||||
* -npernode X => -mppr X:n
|
||||
*/
|
||||
if( NULL == rmaps_lama_cmd_mppr && orte_rmaps_base_n_pernode > 0) {
|
||||
asprintf(&rmaps_lama_cmd_mppr, "%d:n", orte_rmaps_base_n_pernode);
|
||||
}
|
||||
|
||||
/*
|
||||
* -npersocket X => -mppr X:s
|
||||
*/
|
||||
if( NULL == rmaps_lama_cmd_mppr && orte_rmaps_base_n_persocket > 0) {
|
||||
asprintf(&rmaps_lama_cmd_mppr, "%d:s", orte_rmaps_base_n_persocket);
|
||||
}
|
||||
|
||||
/*
|
||||
* -ppr => ~ -mppr
|
||||
*/
|
||||
if( NULL == rmaps_lama_cmd_mppr && NULL != orte_rmaps_base_pattern ) {
|
||||
jdata->map->ppr = strdup (orte_rmaps_base_pattern);
|
||||
rmaps_lama_cmd_mppr = rmaps_lama_covert_ppr(jdata->map->ppr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -110,7 +110,7 @@ static int ppr_mapper(orte_job_t *jdata)
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
if (NULL == jdata->map->ppr ||
|
||||
!(ORTE_MAPPING_PPR & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
|
||||
ORTE_MAPPING_PPR != ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
|
||||
/* not for us */
|
||||
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
|
||||
"mca:rmaps:ppr: job %s not using ppr mapper",
|
||||
|
@ -21,10 +21,10 @@
|
||||
* Local functions
|
||||
*/
|
||||
|
||||
static int orte_rmaps_ppr_register(void);
|
||||
static int orte_rmaps_ppr_open(void);
|
||||
static int orte_rmaps_ppr_close(void);
|
||||
static int orte_rmaps_ppr_query(mca_base_module_t **module, int *priority);
|
||||
static int orte_rmaps_ppr_register(void);
|
||||
|
||||
orte_rmaps_base_component_t mca_rmaps_ppr_component = {
|
||||
{
|
||||
@ -45,113 +45,17 @@ orte_rmaps_base_component_t mca_rmaps_ppr_component = {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* component register/open/close/init function
|
||||
*/
|
||||
static int orte_rmaps_ppr_register(void)
|
||||
{
|
||||
int var_id;
|
||||
|
||||
var_id = mca_base_var_find("orte", "rmaps", "base", "pernode");
|
||||
(void) mca_base_var_register_synonym(var_id, "orte", "rmaps", "ppr", "pernode", 0);
|
||||
|
||||
var_id = mca_base_var_find("orte", "rmaps", "base", "n_pernode");
|
||||
(void) mca_base_var_register_synonym(var_id, "orte", "rmaps","ppr", "n_pernode", 0);
|
||||
|
||||
#if OPAL_HAVE_HWLOC
|
||||
var_id = mca_base_var_find("orte", "rmaps", "base", "n_persocket");
|
||||
(void) mca_base_var_register_synonym(var_id, "orte", "rmaps","ppr", "n_persocket", 0);
|
||||
|
||||
var_id = mca_base_var_find("orte", "rmaps", "base", "pattern");
|
||||
(void) mca_base_var_register_synonym(var_id, "orte", "rmaps","ppr", "pattern", 0);
|
||||
#endif
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
static int my_priority;
|
||||
|
||||
static int orte_rmaps_ppr_open(void)
|
||||
{
|
||||
/* check for pernode, npernode, and npersocket directives - reqd for backward compatibility */
|
||||
if (orte_rmaps_base_pernode) {
|
||||
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
/* if a non-default mapping is already specified, then we
|
||||
* have an error
|
||||
*/
|
||||
orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
|
||||
"PERNODE", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_CONFLICTED);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_PPR);
|
||||
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNODE);
|
||||
orte_rmaps_base.ppr = strdup("1:node");
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
||||
}
|
||||
|
||||
if (orte_rmaps_base_n_pernode) {
|
||||
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
/* if a non-default mapping is already specified, then we
|
||||
* have an error
|
||||
*/
|
||||
orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
|
||||
"NPERNODE", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_CONFLICTED);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_PPR);
|
||||
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYNODE);
|
||||
asprintf(&orte_rmaps_base.ppr, "%d:node", orte_rmaps_base_n_pernode);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
||||
}
|
||||
|
||||
#if OPAL_HAVE_HWLOC
|
||||
if (orte_rmaps_base_n_persocket) {
|
||||
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
/* if a non-default mapping is already specified, then we
|
||||
* have an error
|
||||
*/
|
||||
orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
|
||||
"NPERSOCKET", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_CONFLICTED);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_PPR);
|
||||
ORTE_SET_MAPPING_POLICY(orte_rmaps_base.mapping, ORTE_MAPPING_BYSOCKET);
|
||||
/* this implies binding to the sockets, unless otherwise directed */
|
||||
if (!OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy)) {
|
||||
OPAL_SET_BINDING_POLICY(opal_hwloc_binding_policy, OPAL_BIND_TO_SOCKET);
|
||||
}
|
||||
asprintf(&orte_rmaps_base.ppr, "%d:socket", orte_rmaps_base_n_persocket);
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
||||
}
|
||||
|
||||
if (NULL != orte_rmaps_base_pattern) {
|
||||
if (ORTE_MAPPING_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping)) {
|
||||
/* if a non-default mapping is already specified, then we
|
||||
* have an error
|
||||
*/
|
||||
orte_show_help("help-orte-rmaps-base.txt", "redefining-policy", true, "mapping",
|
||||
"PPR", orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_CONFLICTED);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_PPR);
|
||||
/* since we don't know what pattern was given, leave the policy undefined
|
||||
* for now - we will assign it when we analyze the pattern later
|
||||
*/
|
||||
orte_rmaps_base.ppr = orte_rmaps_base_pattern;
|
||||
ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_GIVEN);
|
||||
}
|
||||
#endif
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int orte_rmaps_ppr_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
*priority = 90;
|
||||
*priority = my_priority;
|
||||
*module = (mca_base_module_t *)&orte_rmaps_ppr_module;
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
@ -166,3 +70,14 @@ static int orte_rmaps_ppr_close(void)
|
||||
}
|
||||
|
||||
|
||||
static int orte_rmaps_ppr_register(void)
|
||||
{
|
||||
my_priority = 90;
|
||||
(void) mca_base_component_var_register(&mca_rmaps_ppr_component.base_version,
|
||||
"priority", "Priority of the ppr rmaps component",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY, &my_priority);
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
@ -89,7 +89,6 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_job_map_t);
|
||||
#define ORTE_MAPPING_NO_OVERSUBSCRIBE 0x0200
|
||||
#define ORTE_MAPPING_SUBSCRIBE_GIVEN 0x0400
|
||||
#define ORTE_MAPPING_SPAN 0x0800
|
||||
#define ORTE_MAPPING_PPR 0x1000
|
||||
/* an error flag */
|
||||
#define ORTE_MAPPING_CONFLICTED 0x2000
|
||||
#define ORTE_MAPPING_GIVEN 0x4000
|
||||
@ -120,6 +119,8 @@ ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_job_map_t);
|
||||
#define ORTE_MAPPING_STAGED 21
|
||||
/* rank file and other user-defined mapping */
|
||||
#define ORTE_MAPPING_BYUSER 22
|
||||
/* pattern-based mapping */
|
||||
#define ORTE_MAPPING_PPR 23
|
||||
/* macro to separate out the mapping policy
|
||||
* from the directives
|
||||
*/
|
||||
|
@ -170,16 +170,19 @@ error (without beginning execution of the application) otherwise.
|
||||
On each node, launch this many processes times the number of processor
|
||||
sockets on the node.
|
||||
The \fI-npersocket\fP option also turns on the \fI-bind-to-socket\fP option.
|
||||
(deprecated in favor of --map-by ppr:n:socket)
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -npernode\fR,\fP --npernode <#pernode>
|
||||
On each node, launch this many processes.
|
||||
(deprecated in favor of --map-by ppr:n:node)
|
||||
.
|
||||
.
|
||||
.TP
|
||||
.B -pernode\fR,\fP --pernode
|
||||
On each node, launch one process -- equivalent to \fI-npernode\fP 1.
|
||||
(deprecated in favor of --map-by ppr:1:node)
|
||||
.
|
||||
.
|
||||
.
|
||||
@ -191,7 +194,14 @@ To map processes:
|
||||
.TP
|
||||
.B --map-by <foo>
|
||||
Map to the specified object, defaults to \fIsocket\fP. Supported options
|
||||
include slot, hwthread, core, socket, numa, board, and node.
|
||||
include slot, hwthread, core, L1cache, L2cache, L3cache, socket, numa,
|
||||
board, node, sequential, distance, and ppr. Any object can include
|
||||
modifiers by adding a \fR:\fP and any combination of PE=n (bind n
|
||||
processing elements to each proc), SPAN (load
|
||||
balance the processes across the allocation), OVERSUBSCRIBE (allow
|
||||
more processes on a node than processing elements), and NOOVERSUBSCRIBE.
|
||||
This includes PPR, where the pattern would be terminated by another colon
|
||||
to separate it from the modifiers.
|
||||
.
|
||||
.TP
|
||||
.B -bycore\fR,\fP --bycore
|
||||
@ -247,10 +257,12 @@ include slot, hwthread, core, socket, numa, board, and none.
|
||||
.TP
|
||||
.B -cpus-per-proc\fR,\fP --cpus-per-proc <#perproc>
|
||||
Bind each process to the specified number of cpus.
|
||||
(deprecated in favor of --map-by <obj>:PE=n)
|
||||
.
|
||||
.TP
|
||||
.B -cpus-per-rank\fR,\fP --cpus-per-rank <#perrank>
|
||||
Alias for \fI-cpus-per-proc\fP.
|
||||
(deprecated in favor of --map-by <obj>:PE=n)
|
||||
.
|
||||
.TP
|
||||
.B -bind-to-core\fR,\fP --bind-to-core
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user