1
1
openmpi/orte/mca/rmaps/lama/rmaps_lama_component.c
Ralph Castain bc7cc09749 After a lot of pain, I've managed to resolve the problem of conflicting mapping directives caused by mismatched MCA params - i.e., where someone has one variant of an MCA param (e.g., rmaps_base_mapping_policy) in their default MCA param file, and then specifies another variant (e.g., --npernode) on the command line. I can't fully resolve the problem as there is no way to know precisely what the user meant - we can only guess which param was really intended since the MCA param system
can't apply its normal precedence rules.

So...print a big "deprecated" warning for the old params and error out if a conflict is detected. I know that isn't what people really wanted, but it's the best we
 can do. If only the old style param is given, then process it after the warning.

Extend the current map-by param to add support for ppr and cpus-per-proc, adding the latter to the list of allowed modifiers using "pe=n" for processing elements/proc. Thus, you can map-by socket:pe=2,oversubscribe to map by socket, binding 2 processing elements/process, with oversubscription allowed. Or you can map-by ppr:2:socket:pe=4 to map two processes to every socket in the allocation, binding each process to 4 processing elements.

For those wondering, a processing element is defined as a hwthread if --use-hwthreads-as-cpus is given, or else as a core.

Refs trac:4117

This commit was SVN r30620.

The following Trac tickets were found above:
  Ticket 4117 --> https://svn.open-mpi.org/trac/ompi/ticket/4117
2014-02-07 21:25:40 +00:00

137 строки
5.5 KiB
C

/*
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
*
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/base/base.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/base/base.h"
#include "rmaps_lama.h"
/*
* Local functions
*/
static int orte_rmaps_lama_register(void);
static int orte_rmaps_lama_query(mca_base_module_t **module, int *priority);
static int module_priority;
char * rmaps_lama_cmd_map = NULL;
char * rmaps_lama_cmd_bind = NULL;
char * rmaps_lama_cmd_mppr = NULL;
char * rmaps_lama_cmd_ordering = NULL;
bool rmaps_lama_timing_enabled = false;
bool rmaps_lama_can_oversubscribe = false;
bool rmaps_lama_am_oversubscribing = false;
orte_rmaps_base_component_t mca_rmaps_lama_component = {
{
ORTE_RMAPS_BASE_VERSION_2_0_0,
"lama", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
NULL, /* component open */
NULL, /* component close */
orte_rmaps_lama_query, /* component query */
orte_rmaps_lama_register /* component register */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
};
static int orte_rmaps_lama_register(void)
{
mca_base_component_t *c = &mca_rmaps_lama_component.base_version;
/* JMS Artifically low for now */
module_priority = 0;
(void) mca_base_component_var_register (c, "priority", "Priority of the LAMA rmaps component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&module_priority);
rmaps_lama_timing_enabled = false;
(void) mca_base_component_var_register (c, "timing",
"Enable timing information. [Default = disabled]",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&rmaps_lama_timing_enabled);
rmaps_lama_cmd_map = NULL;
(void) mca_base_component_var_register (c, "map", "LAMA Map: Process layout iteration ordering (See documentation)",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&rmaps_lama_cmd_map);
rmaps_lama_cmd_bind = NULL;
(void) mca_base_component_var_register (c, "bind", "LAMA Bind: Bind to the specified number of resources (See documentation)",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&rmaps_lama_cmd_bind);
rmaps_lama_cmd_mppr = NULL;
(void) mca_base_component_var_register (c, "mppr", "LAMA MPPR: Maximum number of the specified resources available (See documentation)",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&rmaps_lama_cmd_mppr);
rmaps_lama_cmd_ordering = NULL;
(void) mca_base_component_var_register (c, "ordering", "LAMA Ordering: Ordering (s) sequential, (n) natural - Default: n (See documentation)",
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_READONLY,
&rmaps_lama_cmd_ordering);
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:lama: Priority %3d",
module_priority);
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:lama: Map : %s",
(NULL == rmaps_lama_cmd_map) ? "NULL" : rmaps_lama_cmd_map);
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:lama: Bind : %s",
(NULL == rmaps_lama_cmd_bind) ? "NULL" : rmaps_lama_cmd_bind);
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:lama: MPPR : %s",
(NULL == rmaps_lama_cmd_mppr) ? "NULL" : rmaps_lama_cmd_mppr);
opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
"mca:rmaps:lama: Order : %s",
(NULL == rmaps_lama_cmd_ordering) ? "NULL" : rmaps_lama_cmd_ordering);
return ORTE_SUCCESS;
}
static int orte_rmaps_lama_query(mca_base_module_t **module, int *priority)
{
/* Only run on the HNP */
*priority = module_priority;
*module = (mca_base_module_t *)&orte_rmaps_lama_module;
return ORTE_SUCCESS;
}