George raised some valid concerns about the extensibility of the revised rmaps framework. Address those by:
1. removing the enum of mapper values 2. change the req_mapper and last_mapper fields to char* so they can hold the component name instead of a mapper flag 3. revise the selection logic in the mapper components to reflect the change. Components now look for their name in the req_mapper field, or to see if other criteria (e.g., npernode) are set that mandate their doing the mapping Several MCA params resided in the rmaps base for historical reasons - they have been in the base since at least the original 1.2 release (and perhaps earlier). However, George correctly pointed out that they really should reside in their respective components. Accordingly, move them to the components, but register synonyms to the old names to avoid breaking backward compatibility. These revisions retain the current functionality of allowing comm_spawn'd jobs to use different mappers than the original job, and for the errmgr to utilize the resilient mapper to recover processes regardless of how they were originally mapped. Given the large number of possible combinations, I am sure that someone will find a corner-case combination of values and selection criteria that cause either no mapper to be selected, or one other than the intended to be used. No one can test all the ways people will use this system, so I expect debugging to continue for awhile. The ability of comm_spawn'd jobs to exploit this functionality relies on changes to the orte_dpm component - this will be committed separately. This commit was SVN r24520.
Этот коммит содержится в:
родитель
0867454a06
Коммит
1297acde13
@ -58,16 +58,8 @@ typedef struct {
|
||||
opal_list_t available_components;
|
||||
/* list of selected modules */
|
||||
opal_list_t selected_modules;
|
||||
/* desired default mapper */
|
||||
orte_rmaps_mapper_type_t default_mapper;
|
||||
/** whether or not we allow oversubscription of nodes */
|
||||
bool oversubscribe;
|
||||
/** number of ppn for n_per_node mode */
|
||||
int npernode;
|
||||
/* number of procs/board */
|
||||
int nperboard;
|
||||
/* number of procs/socket */
|
||||
int npersocket;
|
||||
/* cpus per rank */
|
||||
int cpus_per_rank;
|
||||
/* stride */
|
||||
@ -107,8 +99,6 @@ ORTE_DECLSPEC int orte_rmaps_base_get_vpid_range(orte_jobid_t jobid,
|
||||
ORTE_DECLSPEC int orte_rmaps_base_set_vpid_range(orte_jobid_t jobid,
|
||||
orte_vpid_t start, orte_vpid_t range);
|
||||
|
||||
ORTE_DECLSPEC char* orte_rmaps_base_print_mapper(orte_rmaps_mapper_type_t mapper);
|
||||
|
||||
/**
|
||||
* Close down the rmaps framework
|
||||
*/
|
||||
|
@ -72,25 +72,18 @@ int orte_rmaps_base_map_job(orte_job_t *jdata)
|
||||
}
|
||||
/* load it with the system defaults */
|
||||
map->policy = orte_default_mapping_policy;
|
||||
map->npernode = orte_rmaps_base.npernode;
|
||||
map->nperboard = orte_rmaps_base.nperboard;
|
||||
map->npersocket = orte_rmaps_base.npersocket;
|
||||
map->cpus_per_rank = orte_rmaps_base.cpus_per_rank;
|
||||
map->stride = orte_rmaps_base.stride;
|
||||
map->oversubscribe = orte_rmaps_base.oversubscribe;
|
||||
map->display_map = orte_rmaps_base.display_map;
|
||||
map->req_mapper = orte_rmaps_base.default_mapper;
|
||||
/* assign the map object to this job */
|
||||
jdata->map = map;
|
||||
} else {
|
||||
if (!jdata->map->display_map) {
|
||||
jdata->map->display_map = orte_rmaps_base.display_map;
|
||||
}
|
||||
if (ORTE_RMAPS_UNDEF == jdata->map->req_mapper) {
|
||||
jdata->map->req_mapper = orte_rmaps_base.default_mapper;
|
||||
}
|
||||
if (0 == jdata->map->policy) {
|
||||
jdata->map->policy = orte_default_mapping_policy;
|
||||
if (!ORTE_MAPPING_POLICY_IS_SET(jdata->map->policy)) {
|
||||
jdata->map->policy = jdata->map->policy | orte_default_mapping_policy;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -88,7 +88,6 @@ int orte_rmaps_base_open(void)
|
||||
|
||||
/* init the globals */
|
||||
OBJ_CONSTRUCT(&orte_rmaps_base.selected_modules, opal_list_t);
|
||||
orte_rmaps_base.default_mapper = ORTE_RMAPS_UNDEF;
|
||||
|
||||
/* Debugging / verbose output. Always have stream open, with
|
||||
verbose set by the mca open system... */
|
||||
@ -113,52 +112,6 @@ int orte_rmaps_base_open(void)
|
||||
ORTE_XSET_MAPPING_POLICY(ORTE_MAPPING_BYNODE);
|
||||
}
|
||||
|
||||
/* check for procs/xxx directives */
|
||||
param = mca_base_param_reg_int_name("rmaps", "base_pernode",
|
||||
"Launch one ppn as directed",
|
||||
false, false, (int)false, &value);
|
||||
if (value) {
|
||||
orte_rmaps_base.npernode = 1;
|
||||
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
|
||||
}
|
||||
|
||||
/* #procs/node */
|
||||
param = mca_base_param_reg_int_name("rmaps", "base_n_pernode",
|
||||
"Launch n procs/node",
|
||||
false, false, -1, &value);
|
||||
if (0 < value) {
|
||||
orte_rmaps_base.npernode = value;
|
||||
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
|
||||
}
|
||||
|
||||
/* #procs/board */
|
||||
param = mca_base_param_reg_int_name("rmaps", "base_n_perboard",
|
||||
"Launch n procs/board",
|
||||
false, false, -1, &orte_rmaps_base.nperboard);
|
||||
if (0 < orte_rmaps_base.nperboard) {
|
||||
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX);
|
||||
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
|
||||
}
|
||||
|
||||
/* #procs/socket */
|
||||
param = mca_base_param_reg_int_name("rmaps", "base_n_persocket",
|
||||
"Launch n procs/socket",
|
||||
false, false, -1, &orte_rmaps_base.npersocket);
|
||||
if (0 < orte_rmaps_base.npersocket) {
|
||||
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX);
|
||||
/* force bind to socket if not overridden by user */
|
||||
ORTE_XSET_BINDING_POLICY(ORTE_BIND_TO_SOCKET);
|
||||
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
|
||||
}
|
||||
|
||||
/* Do we want to loadbalance the job */
|
||||
param = mca_base_param_reg_int_name("rmaps", "base_loadbalance",
|
||||
"Balance total number of procs across all allocated nodes",
|
||||
false, false, (int)false, &value);
|
||||
if (value) {
|
||||
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
|
||||
}
|
||||
|
||||
/* #cpus/rank to use */
|
||||
param = mca_base_param_reg_int_name("rmaps", "base_cpus_per_proc",
|
||||
"Number of cpus to use for each rank [1-2**15 (default=1)]",
|
||||
@ -192,17 +145,6 @@ int orte_rmaps_base_open(void)
|
||||
false, false, 1, &value);
|
||||
orte_rmaps_base.stride = value;
|
||||
|
||||
/* did the user provide a slot list? */
|
||||
param = mca_base_param_reg_string_name("rmaps", "base_slot_list",
|
||||
"List of processor IDs to bind MPI processes to (e.g., used in conjunction with rank files) [default=NULL]",
|
||||
false, false, NULL, &orte_rmaps_base.slot_list);
|
||||
/* ensure we flag mapping by user */
|
||||
if (NULL != orte_rmaps_base.slot_list ||
|
||||
NULL != orte_rankfile) {
|
||||
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_BYUSER);
|
||||
orte_rmaps_base.default_mapper = ORTE_RMAPS_RF;
|
||||
}
|
||||
|
||||
/* Should we schedule on the local node or not? */
|
||||
mca_base_param_reg_int_name("rmaps", "base_no_schedule_local",
|
||||
"If false, allow scheduling MPI applications on the same node as mpirun (default). If true, do not schedule any MPI applications on the same node as mpirun",
|
||||
|
@ -890,23 +890,3 @@ int orte_rmaps_base_setup_virtual_machine(orte_job_t *jdata)
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
char* orte_rmaps_base_print_mapper(orte_rmaps_mapper_type_t mapper)
|
||||
{
|
||||
switch(mapper) {
|
||||
case ORTE_RMAPS_UNDEF:
|
||||
return "UNDEF";
|
||||
case ORTE_RMAPS_RR:
|
||||
return "ROUND_ROBIN";
|
||||
case ORTE_RMAPS_LOADBALANCE:
|
||||
return "LOADBALANCE";
|
||||
case ORTE_RMAPS_SEQ:
|
||||
return "SEQUENTIAL";
|
||||
case ORTE_RMAPS_RF:
|
||||
return "RANK_FILE";
|
||||
case ORTE_RMAPS_RESILIENT:
|
||||
return "RESILIENT";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
@ -54,6 +54,7 @@ static int loadbalance(orte_job_t *jdata);
|
||||
static int switchyard(orte_job_t *jdata)
|
||||
{
|
||||
int rc;
|
||||
mca_base_component_t *c = &mca_rmaps_load_balance_component.super.base_version;
|
||||
|
||||
/* only handle initial launch of loadbalanced
|
||||
* or NPERxxx jobs - allow restarting of failed apps
|
||||
@ -64,8 +65,8 @@ static int switchyard(orte_job_t *jdata)
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
if (ORTE_RMAPS_UNDEF != jdata->map->req_mapper &&
|
||||
ORTE_RMAPS_LOADBALANCE != jdata->map->req_mapper) {
|
||||
if (NULL != jdata->map->req_mapper &&
|
||||
0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) {
|
||||
/* a mapper has been specified, and it isn't me */
|
||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||
"mca:rmaps:lb: job %s not using loadbalance mapper",
|
||||
@ -78,13 +79,16 @@ static int switchyard(orte_job_t *jdata)
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
|
||||
/* flag that I did the mapping */
|
||||
jdata->map->last_mapper = ORTE_RMAPS_LOADBALANCE;
|
||||
jdata->map->last_mapper = strdup(c->mca_component_name);
|
||||
|
||||
if (0 < orte_rmaps_base.npernode) {
|
||||
if (0 < mca_rmaps_load_balance_component.npernode ||
|
||||
0 < jdata->map->npernode) {
|
||||
rc = npernode(jdata);
|
||||
} else if (0 < orte_rmaps_base.nperboard) {
|
||||
} else if (0 < mca_rmaps_load_balance_component.nperboard ||
|
||||
0 < jdata->map->nperboard) {
|
||||
rc = nperboard(jdata);
|
||||
} else if (0 < orte_rmaps_base.npersocket) {
|
||||
} else if (0 < mca_rmaps_load_balance_component.npersocket ||
|
||||
0 < jdata->map->npersocket) {
|
||||
rc = npersocket(jdata);
|
||||
} else {
|
||||
rc = loadbalance(jdata);
|
||||
@ -152,7 +156,7 @@ static int npernode(orte_job_t *jdata)
|
||||
while (NULL != (item = opal_list_remove_first(&node_list))) {
|
||||
node = (orte_node_t*)item;
|
||||
/* put the specified number of procs on each node */
|
||||
for (j=0; j < orte_rmaps_base.npernode && nprocs < np; j++) {
|
||||
for (j=0; j < mca_rmaps_load_balance_component.npernode && nprocs < np; j++) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node,
|
||||
jdata->map->cpus_per_rank, app->idx,
|
||||
&node_list, jdata->map->oversubscribe,
|
||||
@ -161,7 +165,7 @@ static int npernode(orte_job_t *jdata)
|
||||
* more procs to place, then that is an error
|
||||
*/
|
||||
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) ||
|
||||
j < orte_rmaps_base.npernode-1) {
|
||||
j < mca_rmaps_load_balance_component.npernode-1) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(node);
|
||||
goto error;
|
||||
@ -180,7 +184,7 @@ static int npernode(orte_job_t *jdata)
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:too-many-procs", true,
|
||||
app->app, app->num_procs,
|
||||
"number of nodes", num_nodes,
|
||||
"npernode", orte_rmaps_base.npernode);
|
||||
"npernode", mca_rmaps_load_balance_component.npernode);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* update the number of procs in the job */
|
||||
@ -244,7 +248,7 @@ static int nperboard(orte_job_t *jdata)
|
||||
/* loop through the number of boards in this node */
|
||||
for (k=0; k < node->boards && nprocs < np; k++) {
|
||||
/* put the specified number of procs on each board */
|
||||
for (j=0; j < orte_rmaps_base.nperboard && nprocs < np; j++) {
|
||||
for (j=0; j < mca_rmaps_load_balance_component.nperboard && nprocs < np; j++) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node,
|
||||
jdata->map->cpus_per_rank, app->idx,
|
||||
&node_list, jdata->map->oversubscribe,
|
||||
@ -253,7 +257,7 @@ static int nperboard(orte_job_t *jdata)
|
||||
* more procs to place, then that is an error
|
||||
*/
|
||||
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) ||
|
||||
j < orte_rmaps_base.nperboard-1) {
|
||||
j < mca_rmaps_load_balance_component.nperboard-1) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(node);
|
||||
goto error;
|
||||
@ -273,7 +277,7 @@ static int nperboard(orte_job_t *jdata)
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:too-many-procs", true,
|
||||
app->app, app->num_procs,
|
||||
"number of boards", num_boards,
|
||||
"nperboard", orte_rmaps_base.nperboard);
|
||||
"nperboard", mca_rmaps_load_balance_component.nperboard);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* update the number of procs in the job */
|
||||
@ -340,7 +344,7 @@ static int npersocket(orte_job_t *jdata)
|
||||
/* loop through the number of sockets/board */
|
||||
for (n=0; n < node->sockets_per_board && nprocs < np; n++) {
|
||||
/* put the specified number of procs on each socket */
|
||||
for (j=0; j < orte_rmaps_base.npersocket && nprocs < np; j++) {
|
||||
for (j=0; j < mca_rmaps_load_balance_component.npersocket && nprocs < np; j++) {
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node,
|
||||
jdata->map->cpus_per_rank, app->idx,
|
||||
&node_list, jdata->map->oversubscribe,
|
||||
@ -349,7 +353,7 @@ static int npersocket(orte_job_t *jdata)
|
||||
* more procs to place, then that is an error
|
||||
*/
|
||||
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) ||
|
||||
j < orte_rmaps_base.npersocket-1) {
|
||||
j < mca_rmaps_load_balance_component.npersocket-1) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
OBJ_RELEASE(node);
|
||||
goto error;
|
||||
@ -371,7 +375,7 @@ static int npersocket(orte_job_t *jdata)
|
||||
orte_show_help("help-orte-rmaps-base.txt", "rmaps:too-many-procs", true,
|
||||
app->app, app->num_procs,
|
||||
"number of sockets", num_sockets,
|
||||
"npersocket", orte_rmaps_base.npersocket);
|
||||
"npersocket", mca_rmaps_load_balance_component.npersocket);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
/* update the number of procs in the job */
|
||||
|
@ -28,7 +28,15 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_load_balance_component;
|
||||
struct orte_rmaps_lb_component_t {
|
||||
orte_rmaps_base_component_t super;
|
||||
int npernode;
|
||||
int nperboard;
|
||||
int npersocket;
|
||||
};
|
||||
typedef struct orte_rmaps_lb_component_t orte_rmaps_lb_component_t;
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_rmaps_lb_component_t mca_rmaps_load_balance_component;
|
||||
extern orte_rmaps_base_module_t orte_rmaps_load_balance_module;
|
||||
|
||||
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
#include "opal/mca/base/base.h"
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "opal/mca/paffinity/paffinity.h"
|
||||
|
||||
#include "orte/mca/rmaps/base/base.h"
|
||||
#include "rmaps_lb.h"
|
||||
@ -35,21 +36,23 @@ static int orte_rmaps_lb_query(mca_base_module_t **module, int *priority);
|
||||
|
||||
static int my_priority;
|
||||
|
||||
orte_rmaps_base_component_t mca_rmaps_load_balance_component = {
|
||||
orte_rmaps_lb_component_t mca_rmaps_load_balance_component = {
|
||||
{
|
||||
ORTE_RMAPS_BASE_VERSION_2_0_0,
|
||||
{
|
||||
ORTE_RMAPS_BASE_VERSION_2_0_0,
|
||||
|
||||
"load_balance", /* MCA component name */
|
||||
ORTE_MAJOR_VERSION, /* MCA component major version */
|
||||
ORTE_MINOR_VERSION, /* MCA component minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA component release version */
|
||||
orte_rmaps_lb_open, /* component open */
|
||||
orte_rmaps_lb_close, /* component close */
|
||||
orte_rmaps_lb_query /* component query */
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
"load_balance", /* MCA component name */
|
||||
ORTE_MAJOR_VERSION, /* MCA component major version */
|
||||
ORTE_MINOR_VERSION, /* MCA component minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA component release version */
|
||||
orte_rmaps_lb_open, /* component open */
|
||||
orte_rmaps_lb_close, /* component close */
|
||||
orte_rmaps_lb_query /* component query */
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -59,19 +62,70 @@ orte_rmaps_base_component_t mca_rmaps_load_balance_component = {
|
||||
*/
|
||||
static int orte_rmaps_lb_open(void)
|
||||
{
|
||||
mca_base_component_t *c = &mca_rmaps_load_balance_component.base_version;
|
||||
mca_base_component_t *c = &mca_rmaps_load_balance_component.super.base_version;
|
||||
int value, tmp;
|
||||
|
||||
/* initialize */
|
||||
mca_rmaps_load_balance_component.npernode = 0;
|
||||
mca_rmaps_load_balance_component.nperboard = 0;
|
||||
mca_rmaps_load_balance_component.npersocket = 0;
|
||||
|
||||
mca_base_param_reg_int(c, "priority",
|
||||
"Priority of the loadbalance rmaps component",
|
||||
false, false, 80,
|
||||
&my_priority);
|
||||
|
||||
/* check for procs/xxx directives */
|
||||
tmp = mca_base_param_reg_int(c, "pernode",
|
||||
"Launch one ppn as directed",
|
||||
false, false, (int)false, NULL);
|
||||
mca_base_param_reg_syn_name(tmp, "rmaps", "base_pernode", false);
|
||||
mca_base_param_lookup_int(tmp, &value);
|
||||
if (value) {
|
||||
mca_rmaps_load_balance_component.npernode = 1;
|
||||
}
|
||||
|
||||
/* #procs/node */
|
||||
tmp = mca_base_param_reg_int(c, "n_pernode",
|
||||
"Launch n procs/node",
|
||||
false, false, mca_rmaps_load_balance_component.npernode, NULL);
|
||||
mca_base_param_reg_syn_name(tmp, "rmaps", "base_n_pernode", false);
|
||||
mca_base_param_lookup_int(tmp, &mca_rmaps_load_balance_component.npernode);
|
||||
|
||||
/* #procs/board */
|
||||
tmp = mca_base_param_reg_int(c, "n_perboard",
|
||||
"Launch n procs/board",
|
||||
false, false, -1, NULL);
|
||||
mca_base_param_reg_syn_name(tmp, "rmaps", "base_n_perboard", false);
|
||||
mca_base_param_lookup_int(tmp, &mca_rmaps_load_balance_component.nperboard);
|
||||
if (0 < mca_rmaps_load_balance_component.nperboard) {
|
||||
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX);
|
||||
}
|
||||
|
||||
/* #procs/socket */
|
||||
tmp = mca_base_param_reg_int(c, "n_persocket",
|
||||
"Launch n procs/socket",
|
||||
false, false, -1, NULL);
|
||||
mca_base_param_reg_syn_name(tmp, "rmaps", "base_n_persocket", false);
|
||||
mca_base_param_lookup_int(tmp, &mca_rmaps_load_balance_component.npersocket);
|
||||
if (0 < mca_rmaps_load_balance_component.npersocket) {
|
||||
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX);
|
||||
/* force bind to socket if not overridden by user */
|
||||
ORTE_XSET_BINDING_POLICY(ORTE_BIND_TO_SOCKET);
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int orte_rmaps_lb_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
/* after rr */
|
||||
/* after rr, unless lb values are set */
|
||||
if (0 < mca_rmaps_load_balance_component.npernode ||
|
||||
0 < mca_rmaps_load_balance_component.nperboard ||
|
||||
0 < mca_rmaps_load_balance_component.npersocket) {
|
||||
my_priority = 10000;
|
||||
}
|
||||
*priority = my_priority;
|
||||
*module = (mca_base_module_t *)&orte_rmaps_load_balance_module;
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -133,8 +133,8 @@ static int map_app_by_node(orte_app_context_t* app,
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
if (NULL != orte_rmaps_base.slot_list) {
|
||||
proc->slot_list = strdup(orte_rmaps_base.slot_list);
|
||||
if (NULL != mca_rmaps_rank_file_component.slot_list) {
|
||||
proc->slot_list = strdup(mca_rmaps_rank_file_component.slot_list);
|
||||
}
|
||||
++num_alloc;
|
||||
cur_node_item = next;
|
||||
@ -243,8 +243,8 @@ static int map_app_by_slot(orte_app_context_t* app,
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
if (NULL != orte_rmaps_base.slot_list) {
|
||||
proc->slot_list = strdup(orte_rmaps_base.slot_list);
|
||||
if (NULL != mca_rmaps_rank_file_component.slot_list) {
|
||||
proc->slot_list = strdup(mca_rmaps_rank_file_component.slot_list);
|
||||
}
|
||||
/* Update the rank */
|
||||
++num_alloc;
|
||||
@ -293,7 +293,8 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
||||
orte_std_cntr_t slots_per_node, relative_index, tmp_cnt;
|
||||
int rc;
|
||||
orte_proc_t *proc;
|
||||
|
||||
mca_base_component_t *c = &mca_rmaps_rank_file_component.super.base_version;
|
||||
|
||||
/* only handle initial launch of rf job */
|
||||
if (ORTE_JOB_STATE_INIT != jdata->state) {
|
||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||
@ -301,8 +302,8 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
if (ORTE_RMAPS_UNDEF != jdata->map->req_mapper &&
|
||||
ORTE_RMAPS_RF != jdata->map->req_mapper) {
|
||||
if (NULL != jdata->map->req_mapper &&
|
||||
0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) {
|
||||
/* a mapper has been specified, and it isn't me */
|
||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||
"mca:rmaps:rf: job %s not using rank_file mapper",
|
||||
@ -315,7 +316,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
|
||||
/* flag that I did the mapping */
|
||||
jdata->map->last_mapper = ORTE_RMAPS_RF;
|
||||
jdata->map->last_mapper = strdup(c->mca_component_name);
|
||||
|
||||
/* convenience def */
|
||||
map = jdata->map;
|
||||
|
@ -36,10 +36,13 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* RMGR Component
|
||||
*/
|
||||
ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_rank_file_component;
|
||||
struct orte_rmaps_rf_component_t {
|
||||
orte_rmaps_base_component_t super;
|
||||
char *slot_list;
|
||||
};
|
||||
typedef struct orte_rmaps_rf_component_t orte_rmaps_rf_component_t;
|
||||
|
||||
ORTE_MODULE_DECLSPEC extern orte_rmaps_rf_component_t mca_rmaps_rank_file_component;
|
||||
extern orte_rmaps_base_module_t orte_rmaps_rank_file_module;
|
||||
|
||||
|
||||
|
@ -45,24 +45,26 @@ static int orte_rmaps_rank_file_query(mca_base_module_t **module, int *priority)
|
||||
|
||||
static int my_priority;
|
||||
|
||||
orte_rmaps_base_component_t mca_rmaps_rank_file_component = {
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
orte_rmaps_rf_component_t mca_rmaps_rank_file_component = {
|
||||
{
|
||||
ORTE_RMAPS_BASE_VERSION_2_0_0,
|
||||
/* First, the mca_base_component_t struct containing meta
|
||||
information about the component itself */
|
||||
|
||||
"rank_file", /* MCA component name */
|
||||
ORTE_MAJOR_VERSION, /* MCA component major version */
|
||||
ORTE_MINOR_VERSION, /* MCA component minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA component release version */
|
||||
orte_rmaps_rank_file_open, /* component open */
|
||||
orte_rmaps_rank_file_close, /* component close */
|
||||
orte_rmaps_rank_file_query /* component query */
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
{
|
||||
ORTE_RMAPS_BASE_VERSION_2_0_0,
|
||||
|
||||
"rank_file", /* MCA component name */
|
||||
ORTE_MAJOR_VERSION, /* MCA component major version */
|
||||
ORTE_MINOR_VERSION, /* MCA component minor version */
|
||||
ORTE_RELEASE_VERSION, /* MCA component release version */
|
||||
orte_rmaps_rank_file_open, /* component open */
|
||||
orte_rmaps_rank_file_close, /* component close */
|
||||
orte_rmaps_rank_file_query /* component query */
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
@ -72,17 +74,27 @@ orte_rmaps_base_component_t mca_rmaps_rank_file_component = {
|
||||
*/
|
||||
static int orte_rmaps_rank_file_open(void)
|
||||
{
|
||||
mca_base_component_t *c = &mca_rmaps_rank_file_component.base_version;
|
||||
mca_base_component_t *c = &mca_rmaps_rank_file_component.super.base_version;
|
||||
int tmp;
|
||||
|
||||
mca_base_param_reg_int(c, "priority",
|
||||
"Priority of the rank_file rmaps component",
|
||||
false, false, 0,
|
||||
&my_priority);
|
||||
|
||||
if (NULL != orte_rankfile ||
|
||||
NULL != orte_rmaps_base.slot_list) {
|
||||
/* did the user provide a slot list? */
|
||||
tmp = mca_base_param_reg_string(c, "slot_list",
|
||||
"List of processor IDs to bind MPI processes to (e.g., used in conjunction with rank files) [default=NULL]",
|
||||
false, false, NULL, NULL);
|
||||
mca_base_param_reg_syn_name(tmp, "rmaps", "base_slot_list", false);
|
||||
mca_base_param_lookup_string(tmp, &mca_rmaps_rank_file_component.slot_list);
|
||||
|
||||
/* ensure we flag mapping by user */
|
||||
if (NULL != mca_rmaps_rank_file_component.slot_list ||
|
||||
NULL != orte_rankfile) {
|
||||
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_BYUSER);
|
||||
/* make us first */
|
||||
my_priority = 1000;
|
||||
my_priority = 10000;
|
||||
}
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
@ -68,10 +68,11 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
|
||||
opal_list_t node_list;
|
||||
orte_std_cntr_t num_slots;
|
||||
opal_list_item_t *item;
|
||||
mca_base_component_t *c = &mca_rmaps_resilient_component.super.base_version;
|
||||
|
||||
if (ORTE_JOB_STATE_INIT == jdata->state) {
|
||||
if (ORTE_RMAPS_UNDEF != jdata->map->req_mapper &&
|
||||
ORTE_RMAPS_RESILIENT != jdata->map->req_mapper) {
|
||||
if (NULL != jdata->map->req_mapper &&
|
||||
0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) {
|
||||
/* a mapper has been specified, and it isn't me */
|
||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||
"mca:rmaps:resilient: job %s not using loadbalance mapper",
|
||||
@ -97,7 +98,7 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
|
||||
/* flag that I did the mapping */
|
||||
jdata->map->last_mapper = ORTE_RMAPS_RESILIENT;
|
||||
jdata->map->last_mapper = strdup(c->mca_component_name);
|
||||
|
||||
/* have we already constructed the fault group list? */
|
||||
if (!made_ftgrps) {
|
||||
|
@ -33,17 +33,6 @@
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* enumerate selectable mappers */
|
||||
enum orte_rmaps_mapper_type_t {
|
||||
ORTE_RMAPS_UNDEF,
|
||||
ORTE_RMAPS_RR,
|
||||
ORTE_RMAPS_LOADBALANCE,
|
||||
ORTE_RMAPS_SEQ,
|
||||
ORTE_RMAPS_RF,
|
||||
ORTE_RMAPS_RESILIENT
|
||||
};
|
||||
typedef enum orte_rmaps_mapper_type_t orte_rmaps_mapper_type_t;
|
||||
|
||||
/*
|
||||
* Structure that represents the mapping of a job to an
|
||||
* allocated set of resources.
|
||||
@ -51,8 +40,8 @@ typedef enum orte_rmaps_mapper_type_t orte_rmaps_mapper_type_t;
|
||||
struct orte_job_map_t {
|
||||
opal_object_t super;
|
||||
/* user-specified mapping params */
|
||||
orte_rmaps_mapper_type_t req_mapper; /* requested mapper */
|
||||
orte_rmaps_mapper_type_t last_mapper; /* last mapper used */
|
||||
char *req_mapper; /* requested mapper */
|
||||
char *last_mapper; /* last mapper used */
|
||||
orte_mapping_policy_t policy;
|
||||
int npernode;
|
||||
int nperboard;
|
||||
|
@ -53,7 +53,8 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
orte_std_cntr_t num_nodes, num_slots;
|
||||
int rc;
|
||||
opal_list_item_t *cur_node_item;
|
||||
|
||||
mca_base_component_t *c = &mca_rmaps_round_robin_component.base_version;
|
||||
|
||||
/* this mapper can only handle initial launch
|
||||
* when rr mapping is desired - allow
|
||||
* restarting of failed apps
|
||||
@ -65,21 +66,30 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
orte_job_state_to_str(jdata->state));
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
if (ORTE_RMAPS_UNDEF != jdata->map->req_mapper &&
|
||||
ORTE_RMAPS_RR != jdata->map->req_mapper) {
|
||||
if (NULL != jdata->map->req_mapper &&
|
||||
0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) {
|
||||
/* a mapper has been specified, and it isn't me */
|
||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||
"mca:rmaps:rr: job %s not using rr mapper",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
if (0 < jdata->map->npernode ||
|
||||
0 < jdata->map->nperboard ||
|
||||
0 < jdata->map->npersocket) {
|
||||
/* I don't know how to do these - defer */
|
||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||
"mca:rmaps:rr: job %s not using rr mapper",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||
"mca:rmaps:rr: mapping job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
|
||||
/* flag that I did the mapping */
|
||||
jdata->map->last_mapper = ORTE_RMAPS_RR;
|
||||
jdata->map->last_mapper = strdup(c->mca_component_name);
|
||||
|
||||
/* start at the beginning... */
|
||||
jdata->num_procs = 0;
|
||||
|
@ -70,6 +70,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
|
||||
opal_list_t *default_node_list=NULL;
|
||||
opal_list_t *node_list=NULL;
|
||||
orte_proc_t *proc;
|
||||
mca_base_component_t *c = &mca_rmaps_seq_component.base_version;
|
||||
|
||||
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
|
||||
"%s rmaps:seq mapping job %s",
|
||||
@ -86,21 +87,30 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
if (ORTE_RMAPS_UNDEF != jdata->map->req_mapper &&
|
||||
ORTE_RMAPS_SEQ != jdata->map->req_mapper) {
|
||||
if (NULL != jdata->map->req_mapper &&
|
||||
0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) {
|
||||
/* a mapper has been specified, and it isn't me */
|
||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||
"mca:rmaps:seq: job %s not using sequential mapper",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
if (0 < jdata->map->npernode ||
|
||||
0 < jdata->map->nperboard ||
|
||||
0 < jdata->map->npersocket) {
|
||||
/* I don't know how to do these - defer */
|
||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||
"mca:rmaps:seq: job %s not using seq mapper",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
return ORTE_ERR_TAKE_NEXT_OPTION;
|
||||
}
|
||||
|
||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||
"mca:rmaps:seq: mapping job %s",
|
||||
ORTE_JOBID_PRINT(jdata->jobid));
|
||||
|
||||
/* flag that I did the mapping */
|
||||
jdata->map->last_mapper = ORTE_RMAPS_SEQ;
|
||||
jdata->map->last_mapper = strdup(c->mca_component_name);
|
||||
|
||||
/* conveniece def */
|
||||
map = jdata->map;
|
||||
|
@ -872,13 +872,7 @@ int orte_dt_pack_map(opal_buffer_t *buffer, const void *src,
|
||||
|
||||
for (i=0; i < num_vals; i++) {
|
||||
/* pack the requested mapper */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->req_mapper), 1, OPAL_INT32))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* pack the mapper used to generate it */
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->last_mapper), 1, OPAL_INT32))) {
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->req_mapper), 1, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
@ -654,8 +654,9 @@ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_dat
|
||||
|
||||
if (orte_devel_level_output) {
|
||||
asprintf(&tmp, "\n%sMapper requested: %s\tLast mapper: %s\tMapping policy: %04x\n%s\tNpernode: %ld\tOversubscribe allowed: %s\tCPU Lists: %s",
|
||||
pfx2, orte_rmaps_base_print_mapper(src->req_mapper),
|
||||
orte_rmaps_base_print_mapper(src->last_mapper), src->policy, pfx2, (long)src->npernode,
|
||||
pfx2, (NULL == src->req_mapper) ? "NULL" : src->req_mapper,
|
||||
(NULL == src->last_mapper) ? "NULL" : src->last_mapper,
|
||||
src->policy, pfx2, (long)src->npernode,
|
||||
(src->oversubscribe) ? "TRUE" : "FALSE",
|
||||
(src->cpu_lists) ? "TRUE" : "FALSE");
|
||||
|
||||
|
@ -955,7 +955,7 @@ int orte_dt_unpack_map(opal_buffer_t *buffer, void *dest,
|
||||
/* unpack the requested mapper */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
|
||||
&(maps[i]->req_mapper), &n, OPAL_INT32))) {
|
||||
&(maps[i]->req_mapper), &n, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
@ -963,7 +963,7 @@ int orte_dt_unpack_map(opal_buffer_t *buffer, void *dest,
|
||||
/* unpack the mapper used */
|
||||
n = 1;
|
||||
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
|
||||
&(maps[i]->last_mapper), &n, OPAL_INT32))) {
|
||||
&(maps[i]->last_mapper), &n, OPAL_STRING))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
@ -1049,8 +1049,8 @@ OBJ_CLASS_INSTANCE(orte_jmap_t,
|
||||
|
||||
static void orte_job_map_construct(orte_job_map_t* map)
|
||||
{
|
||||
map->req_mapper = ORTE_RMAPS_UNDEF;
|
||||
map->last_mapper = ORTE_RMAPS_UNDEF;
|
||||
map->req_mapper = NULL;
|
||||
map->last_mapper = NULL;
|
||||
map->policy = 0;
|
||||
map->npernode = 0;
|
||||
map->nperboard = 0;
|
||||
@ -1074,6 +1074,12 @@ static void orte_job_map_destruct(orte_job_map_t* map)
|
||||
{
|
||||
orte_std_cntr_t i;
|
||||
|
||||
if (NULL != map->req_mapper) {
|
||||
free(map->req_mapper);
|
||||
}
|
||||
if (NULL != map->last_mapper) {
|
||||
free(map->last_mapper);
|
||||
}
|
||||
for (i=0; i < map->nodes->size; i++) {
|
||||
if (NULL != map->nodes->addr[i]) {
|
||||
OBJ_RELEASE(map->nodes->addr[i]);
|
||||
|
@ -330,6 +330,8 @@ typedef uint16_t orte_job_controls_t;
|
||||
#define ORTE_MAPPING_NO_USE_LOCAL 0x2000
|
||||
#define ORTE_MAPPING_NPERXXX 0x4000
|
||||
#define ORTE_MAPPING_BYUSER 0x8000
|
||||
/* check if policy is set */
|
||||
#define ORTE_MAPPING_POLICY_IS_SET(pol) (pol & 0xff00)
|
||||
/* nice macro for setting these */
|
||||
#define ORTE_SET_MAPPING_POLICY(pol) \
|
||||
orte_default_mapping_policy = (orte_default_mapping_policy & 0x00ff) | (pol);
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user