From 1297acde1346220445184f7c5240eff08f79c589 Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Sat, 12 Mar 2011 05:30:09 +0000 Subject: [PATCH] George raised some valid concerns about the extensibility of the revised rmaps framework. Address those by: 1. removing the enum of mapper values 2. change the req_mapper and last_mapper fields to char* so they can hold the component name instead of a mapper flag 3. revise the selection logic in the mapper components to reflect the change. Components now look for their name in the req_mapper field, or to see if other criteria (e.g., npernode) are set that mandate their doing the mapping Several MCA params resided in the rmaps base for historical reasons - they have been in the base since at least the original 1.2 release (and perhaps earlier). However, George correctly pointed out that they really should reside in their respective components. Accordingly, move them to the components, but register synonyms to the old names to avoid breaking backward compatibility. These revisions retain the current functionality of allowing comm_spawn'd jobs to use different mappers than the original job, and for the errmgr to utilize the resilient mapper to recover processes regardless of how they were originally mapped. Given the large number of possible combinations, I am sure that someone will find a corner-case combination of values and selection criteria that cause either no mapper to be selected, or one other than the intended to be used. No one can test all the ways people will use this system, so I expect debugging to continue for awhile. The ability of comm_spawn'd jobs to exploit this functionality relies on changes to the orte_dpm component - this will be committed separately. This commit was SVN r24520. --- orte/mca/rmaps/base/base.h | 10 --- orte/mca/rmaps/base/rmaps_base_map_job.c | 11 +-- orte/mca/rmaps/base/rmaps_base_open.c | 58 ------------- orte/mca/rmaps/base/rmaps_base_support_fns.c | 20 ----- orte/mca/rmaps/load_balance/rmaps_lb.c | 34 ++++---- orte/mca/rmaps/load_balance/rmaps_lb.h | 10 ++- .../rmaps/load_balance/rmaps_lb_component.c | 84 +++++++++++++++---- orte/mca/rmaps/rank_file/rmaps_rank_file.c | 17 ++-- orte/mca/rmaps/rank_file/rmaps_rank_file.h | 11 ++- .../rank_file/rmaps_rank_file_component.c | 52 +++++++----- orte/mca/rmaps/resilient/rmaps_resilient.c | 7 +- orte/mca/rmaps/rmaps_types.h | 15 +--- orte/mca/rmaps/round_robin/rmaps_rr.c | 18 +++- orte/mca/rmaps/seq/rmaps_seq.c | 16 +++- .../data_type_support/orte_dt_packing_fns.c | 8 +- .../data_type_support/orte_dt_print_fns.c | 5 +- .../data_type_support/orte_dt_unpacking_fns.c | 4 +- orte/runtime/orte_globals.c | 10 ++- orte/runtime/orte_globals.h | 2 + 19 files changed, 196 insertions(+), 196 deletions(-) diff --git a/orte/mca/rmaps/base/base.h b/orte/mca/rmaps/base/base.h index 186044d77f..e96b544560 100644 --- a/orte/mca/rmaps/base/base.h +++ b/orte/mca/rmaps/base/base.h @@ -58,16 +58,8 @@ typedef struct { opal_list_t available_components; /* list of selected modules */ opal_list_t selected_modules; - /* desired default mapper */ - orte_rmaps_mapper_type_t default_mapper; /** whether or not we allow oversubscription of nodes */ bool oversubscribe; - /** number of ppn for n_per_node mode */ - int npernode; - /* number of procs/board */ - int nperboard; - /* number of procs/socket */ - int npersocket; /* cpus per rank */ int cpus_per_rank; /* stride */ @@ -107,8 +99,6 @@ ORTE_DECLSPEC int orte_rmaps_base_get_vpid_range(orte_jobid_t jobid, ORTE_DECLSPEC int orte_rmaps_base_set_vpid_range(orte_jobid_t jobid, orte_vpid_t start, orte_vpid_t range); -ORTE_DECLSPEC char* orte_rmaps_base_print_mapper(orte_rmaps_mapper_type_t mapper); - /** * Close down the rmaps framework */ diff --git a/orte/mca/rmaps/base/rmaps_base_map_job.c b/orte/mca/rmaps/base/rmaps_base_map_job.c index f8cfe342f1..ffee0fd1f8 100644 --- a/orte/mca/rmaps/base/rmaps_base_map_job.c +++ b/orte/mca/rmaps/base/rmaps_base_map_job.c @@ -72,25 +72,18 @@ int orte_rmaps_base_map_job(orte_job_t *jdata) } /* load it with the system defaults */ map->policy = orte_default_mapping_policy; - map->npernode = orte_rmaps_base.npernode; - map->nperboard = orte_rmaps_base.nperboard; - map->npersocket = orte_rmaps_base.npersocket; map->cpus_per_rank = orte_rmaps_base.cpus_per_rank; map->stride = orte_rmaps_base.stride; map->oversubscribe = orte_rmaps_base.oversubscribe; map->display_map = orte_rmaps_base.display_map; - map->req_mapper = orte_rmaps_base.default_mapper; /* assign the map object to this job */ jdata->map = map; } else { if (!jdata->map->display_map) { jdata->map->display_map = orte_rmaps_base.display_map; } - if (ORTE_RMAPS_UNDEF == jdata->map->req_mapper) { - jdata->map->req_mapper = orte_rmaps_base.default_mapper; - } - if (0 == jdata->map->policy) { - jdata->map->policy = orte_default_mapping_policy; + if (!ORTE_MAPPING_POLICY_IS_SET(jdata->map->policy)) { + jdata->map->policy = jdata->map->policy | orte_default_mapping_policy; } } diff --git a/orte/mca/rmaps/base/rmaps_base_open.c b/orte/mca/rmaps/base/rmaps_base_open.c index b47af0e7de..4682cd1dae 100644 --- a/orte/mca/rmaps/base/rmaps_base_open.c +++ b/orte/mca/rmaps/base/rmaps_base_open.c @@ -88,7 +88,6 @@ int orte_rmaps_base_open(void) /* init the globals */ OBJ_CONSTRUCT(&orte_rmaps_base.selected_modules, opal_list_t); - orte_rmaps_base.default_mapper = ORTE_RMAPS_UNDEF; /* Debugging / verbose output. Always have stream open, with verbose set by the mca open system... */ @@ -113,52 +112,6 @@ int orte_rmaps_base_open(void) ORTE_XSET_MAPPING_POLICY(ORTE_MAPPING_BYNODE); } - /* check for procs/xxx directives */ - param = mca_base_param_reg_int_name("rmaps", "base_pernode", - "Launch one ppn as directed", - false, false, (int)false, &value); - if (value) { - orte_rmaps_base.npernode = 1; - orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE; - } - - /* #procs/node */ - param = mca_base_param_reg_int_name("rmaps", "base_n_pernode", - "Launch n procs/node", - false, false, -1, &value); - if (0 < value) { - orte_rmaps_base.npernode = value; - orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE; - } - - /* #procs/board */ - param = mca_base_param_reg_int_name("rmaps", "base_n_perboard", - "Launch n procs/board", - false, false, -1, &orte_rmaps_base.nperboard); - if (0 < orte_rmaps_base.nperboard) { - ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX); - orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE; - } - - /* #procs/socket */ - param = mca_base_param_reg_int_name("rmaps", "base_n_persocket", - "Launch n procs/socket", - false, false, -1, &orte_rmaps_base.npersocket); - if (0 < orte_rmaps_base.npersocket) { - ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX); - /* force bind to socket if not overridden by user */ - ORTE_XSET_BINDING_POLICY(ORTE_BIND_TO_SOCKET); - orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE; - } - - /* Do we want to loadbalance the job */ - param = mca_base_param_reg_int_name("rmaps", "base_loadbalance", - "Balance total number of procs across all allocated nodes", - false, false, (int)false, &value); - if (value) { - orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE; - } - /* #cpus/rank to use */ param = mca_base_param_reg_int_name("rmaps", "base_cpus_per_proc", "Number of cpus to use for each rank [1-2**15 (default=1)]", @@ -192,17 +145,6 @@ int orte_rmaps_base_open(void) false, false, 1, &value); orte_rmaps_base.stride = value; - /* did the user provide a slot list? */ - param = mca_base_param_reg_string_name("rmaps", "base_slot_list", - "List of processor IDs to bind MPI processes to (e.g., used in conjunction with rank files) [default=NULL]", - false, false, NULL, &orte_rmaps_base.slot_list); - /* ensure we flag mapping by user */ - if (NULL != orte_rmaps_base.slot_list || - NULL != orte_rankfile) { - ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_BYUSER); - orte_rmaps_base.default_mapper = ORTE_RMAPS_RF; - } - /* Should we schedule on the local node or not? */ mca_base_param_reg_int_name("rmaps", "base_no_schedule_local", "If false, allow scheduling MPI applications on the same node as mpirun (default). If true, do not schedule any MPI applications on the same node as mpirun", diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index 857cd10d73..b1ff7d0195 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -890,23 +890,3 @@ int orte_rmaps_base_setup_virtual_machine(orte_job_t *jdata) return ORTE_SUCCESS; } - -char* orte_rmaps_base_print_mapper(orte_rmaps_mapper_type_t mapper) -{ - switch(mapper) { - case ORTE_RMAPS_UNDEF: - return "UNDEF"; - case ORTE_RMAPS_RR: - return "ROUND_ROBIN"; - case ORTE_RMAPS_LOADBALANCE: - return "LOADBALANCE"; - case ORTE_RMAPS_SEQ: - return "SEQUENTIAL"; - case ORTE_RMAPS_RF: - return "RANK_FILE"; - case ORTE_RMAPS_RESILIENT: - return "RESILIENT"; - default: - return "UNKNOWN"; - } -} diff --git a/orte/mca/rmaps/load_balance/rmaps_lb.c b/orte/mca/rmaps/load_balance/rmaps_lb.c index 0b9e20cbc8..2afb5e4fdf 100644 --- a/orte/mca/rmaps/load_balance/rmaps_lb.c +++ b/orte/mca/rmaps/load_balance/rmaps_lb.c @@ -54,6 +54,7 @@ static int loadbalance(orte_job_t *jdata); static int switchyard(orte_job_t *jdata) { int rc; + mca_base_component_t *c = &mca_rmaps_load_balance_component.super.base_version; /* only handle initial launch of loadbalanced * or NPERxxx jobs - allow restarting of failed apps @@ -64,8 +65,8 @@ static int switchyard(orte_job_t *jdata) ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } - if (ORTE_RMAPS_UNDEF != jdata->map->req_mapper && - ORTE_RMAPS_LOADBALANCE != jdata->map->req_mapper) { + if (NULL != jdata->map->req_mapper && + 0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base.rmaps_output, "mca:rmaps:lb: job %s not using loadbalance mapper", @@ -78,13 +79,16 @@ static int switchyard(orte_job_t *jdata) ORTE_JOBID_PRINT(jdata->jobid)); /* flag that I did the mapping */ - jdata->map->last_mapper = ORTE_RMAPS_LOADBALANCE; + jdata->map->last_mapper = strdup(c->mca_component_name); - if (0 < orte_rmaps_base.npernode) { + if (0 < mca_rmaps_load_balance_component.npernode || + 0 < jdata->map->npernode) { rc = npernode(jdata); - } else if (0 < orte_rmaps_base.nperboard) { + } else if (0 < mca_rmaps_load_balance_component.nperboard || + 0 < jdata->map->nperboard) { rc = nperboard(jdata); - } else if (0 < orte_rmaps_base.npersocket) { + } else if (0 < mca_rmaps_load_balance_component.npersocket || + 0 < jdata->map->npersocket) { rc = npersocket(jdata); } else { rc = loadbalance(jdata); @@ -152,7 +156,7 @@ static int npernode(orte_job_t *jdata) while (NULL != (item = opal_list_remove_first(&node_list))) { node = (orte_node_t*)item; /* put the specified number of procs on each node */ - for (j=0; j < orte_rmaps_base.npernode && nprocs < np; j++) { + for (j=0; j < mca_rmaps_load_balance_component.npernode && nprocs < np; j++) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, jdata->map->cpus_per_rank, app->idx, &node_list, jdata->map->oversubscribe, @@ -161,7 +165,7 @@ static int npernode(orte_job_t *jdata) * more procs to place, then that is an error */ if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) || - j < orte_rmaps_base.npernode-1) { + j < mca_rmaps_load_balance_component.npernode-1) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(node); goto error; @@ -180,7 +184,7 @@ static int npernode(orte_job_t *jdata) orte_show_help("help-orte-rmaps-base.txt", "rmaps:too-many-procs", true, app->app, app->num_procs, "number of nodes", num_nodes, - "npernode", orte_rmaps_base.npernode); + "npernode", mca_rmaps_load_balance_component.npernode); return ORTE_ERR_SILENT; } /* update the number of procs in the job */ @@ -244,7 +248,7 @@ static int nperboard(orte_job_t *jdata) /* loop through the number of boards in this node */ for (k=0; k < node->boards && nprocs < np; k++) { /* put the specified number of procs on each board */ - for (j=0; j < orte_rmaps_base.nperboard && nprocs < np; j++) { + for (j=0; j < mca_rmaps_load_balance_component.nperboard && nprocs < np; j++) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, jdata->map->cpus_per_rank, app->idx, &node_list, jdata->map->oversubscribe, @@ -253,7 +257,7 @@ static int nperboard(orte_job_t *jdata) * more procs to place, then that is an error */ if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) || - j < orte_rmaps_base.nperboard-1) { + j < mca_rmaps_load_balance_component.nperboard-1) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(node); goto error; @@ -273,7 +277,7 @@ static int nperboard(orte_job_t *jdata) orte_show_help("help-orte-rmaps-base.txt", "rmaps:too-many-procs", true, app->app, app->num_procs, "number of boards", num_boards, - "nperboard", orte_rmaps_base.nperboard); + "nperboard", mca_rmaps_load_balance_component.nperboard); return ORTE_ERR_SILENT; } /* update the number of procs in the job */ @@ -340,7 +344,7 @@ static int npersocket(orte_job_t *jdata) /* loop through the number of sockets/board */ for (n=0; n < node->sockets_per_board && nprocs < np; n++) { /* put the specified number of procs on each socket */ - for (j=0; j < orte_rmaps_base.npersocket && nprocs < np; j++) { + for (j=0; j < mca_rmaps_load_balance_component.npersocket && nprocs < np; j++) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, jdata->map->cpus_per_rank, app->idx, &node_list, jdata->map->oversubscribe, @@ -349,7 +353,7 @@ static int npersocket(orte_job_t *jdata) * more procs to place, then that is an error */ if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) || - j < orte_rmaps_base.npersocket-1) { + j < mca_rmaps_load_balance_component.npersocket-1) { ORTE_ERROR_LOG(rc); OBJ_RELEASE(node); goto error; @@ -371,7 +375,7 @@ static int npersocket(orte_job_t *jdata) orte_show_help("help-orte-rmaps-base.txt", "rmaps:too-many-procs", true, app->app, app->num_procs, "number of sockets", num_sockets, - "npersocket", orte_rmaps_base.npersocket); + "npersocket", mca_rmaps_load_balance_component.npersocket); return ORTE_ERR_SILENT; } /* update the number of procs in the job */ diff --git a/orte/mca/rmaps/load_balance/rmaps_lb.h b/orte/mca/rmaps/load_balance/rmaps_lb.h index 1635ac87b1..cf998690d3 100644 --- a/orte/mca/rmaps/load_balance/rmaps_lb.h +++ b/orte/mca/rmaps/load_balance/rmaps_lb.h @@ -28,7 +28,15 @@ BEGIN_C_DECLS -ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_load_balance_component; +struct orte_rmaps_lb_component_t { + orte_rmaps_base_component_t super; + int npernode; + int nperboard; + int npersocket; +}; +typedef struct orte_rmaps_lb_component_t orte_rmaps_lb_component_t; + +ORTE_MODULE_DECLSPEC extern orte_rmaps_lb_component_t mca_rmaps_load_balance_component; extern orte_rmaps_base_module_t orte_rmaps_load_balance_module; diff --git a/orte/mca/rmaps/load_balance/rmaps_lb_component.c b/orte/mca/rmaps/load_balance/rmaps_lb_component.c index dd1c3a5de9..6bfcd8a192 100644 --- a/orte/mca/rmaps/load_balance/rmaps_lb_component.c +++ b/orte/mca/rmaps/load_balance/rmaps_lb_component.c @@ -21,6 +21,7 @@ #include "opal/mca/base/base.h" #include "opal/mca/base/mca_base_param.h" +#include "opal/mca/paffinity/paffinity.h" #include "orte/mca/rmaps/base/base.h" #include "rmaps_lb.h" @@ -35,21 +36,23 @@ static int orte_rmaps_lb_query(mca_base_module_t **module, int *priority); static int my_priority; -orte_rmaps_base_component_t mca_rmaps_load_balance_component = { +orte_rmaps_lb_component_t mca_rmaps_load_balance_component = { { - ORTE_RMAPS_BASE_VERSION_2_0_0, + { + ORTE_RMAPS_BASE_VERSION_2_0_0, - "load_balance", /* MCA component name */ - ORTE_MAJOR_VERSION, /* MCA component major version */ - ORTE_MINOR_VERSION, /* MCA component minor version */ - ORTE_RELEASE_VERSION, /* MCA component release version */ - orte_rmaps_lb_open, /* component open */ - orte_rmaps_lb_close, /* component close */ - orte_rmaps_lb_query /* component query */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT + "load_balance", /* MCA component name */ + ORTE_MAJOR_VERSION, /* MCA component major version */ + ORTE_MINOR_VERSION, /* MCA component minor version */ + ORTE_RELEASE_VERSION, /* MCA component release version */ + orte_rmaps_lb_open, /* component open */ + orte_rmaps_lb_close, /* component close */ + orte_rmaps_lb_query /* component query */ + }, + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } } }; @@ -59,19 +62,70 @@ orte_rmaps_base_component_t mca_rmaps_load_balance_component = { */ static int orte_rmaps_lb_open(void) { - mca_base_component_t *c = &mca_rmaps_load_balance_component.base_version; + mca_base_component_t *c = &mca_rmaps_load_balance_component.super.base_version; + int value, tmp; + + /* initialize */ + mca_rmaps_load_balance_component.npernode = 0; + mca_rmaps_load_balance_component.nperboard = 0; + mca_rmaps_load_balance_component.npersocket = 0; mca_base_param_reg_int(c, "priority", "Priority of the loadbalance rmaps component", false, false, 80, &my_priority); + + /* check for procs/xxx directives */ + tmp = mca_base_param_reg_int(c, "pernode", + "Launch one ppn as directed", + false, false, (int)false, NULL); + mca_base_param_reg_syn_name(tmp, "rmaps", "base_pernode", false); + mca_base_param_lookup_int(tmp, &value); + if (value) { + mca_rmaps_load_balance_component.npernode = 1; + } + + /* #procs/node */ + tmp = mca_base_param_reg_int(c, "n_pernode", + "Launch n procs/node", + false, false, mca_rmaps_load_balance_component.npernode, NULL); + mca_base_param_reg_syn_name(tmp, "rmaps", "base_n_pernode", false); + mca_base_param_lookup_int(tmp, &mca_rmaps_load_balance_component.npernode); + + /* #procs/board */ + tmp = mca_base_param_reg_int(c, "n_perboard", + "Launch n procs/board", + false, false, -1, NULL); + mca_base_param_reg_syn_name(tmp, "rmaps", "base_n_perboard", false); + mca_base_param_lookup_int(tmp, &mca_rmaps_load_balance_component.nperboard); + if (0 < mca_rmaps_load_balance_component.nperboard) { + ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX); + } + + /* #procs/socket */ + tmp = mca_base_param_reg_int(c, "n_persocket", + "Launch n procs/socket", + false, false, -1, NULL); + mca_base_param_reg_syn_name(tmp, "rmaps", "base_n_persocket", false); + mca_base_param_lookup_int(tmp, &mca_rmaps_load_balance_component.npersocket); + if (0 < mca_rmaps_load_balance_component.npersocket) { + ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX); + /* force bind to socket if not overridden by user */ + ORTE_XSET_BINDING_POLICY(ORTE_BIND_TO_SOCKET); + } + return ORTE_SUCCESS; } static int orte_rmaps_lb_query(mca_base_module_t **module, int *priority) { - /* after rr */ + /* after rr, unless lb values are set */ + if (0 < mca_rmaps_load_balance_component.npernode || + 0 < mca_rmaps_load_balance_component.nperboard || + 0 < mca_rmaps_load_balance_component.npersocket) { + my_priority = 10000; + } *priority = my_priority; *module = (mca_base_module_t *)&orte_rmaps_load_balance_module; return ORTE_SUCCESS; diff --git a/orte/mca/rmaps/rank_file/rmaps_rank_file.c b/orte/mca/rmaps/rank_file/rmaps_rank_file.c index b5392a6329..6670ebb0b2 100644 --- a/orte/mca/rmaps/rank_file/rmaps_rank_file.c +++ b/orte/mca/rmaps/rank_file/rmaps_rank_file.c @@ -133,8 +133,8 @@ static int map_app_by_node(orte_app_context_t* app, return rc; } } - if (NULL != orte_rmaps_base.slot_list) { - proc->slot_list = strdup(orte_rmaps_base.slot_list); + if (NULL != mca_rmaps_rank_file_component.slot_list) { + proc->slot_list = strdup(mca_rmaps_rank_file_component.slot_list); } ++num_alloc; cur_node_item = next; @@ -243,8 +243,8 @@ static int map_app_by_slot(orte_app_context_t* app, return rc; } } - if (NULL != orte_rmaps_base.slot_list) { - proc->slot_list = strdup(orte_rmaps_base.slot_list); + if (NULL != mca_rmaps_rank_file_component.slot_list) { + proc->slot_list = strdup(mca_rmaps_rank_file_component.slot_list); } /* Update the rank */ ++num_alloc; @@ -293,7 +293,8 @@ static int orte_rmaps_rf_map(orte_job_t *jdata) orte_std_cntr_t slots_per_node, relative_index, tmp_cnt; int rc; orte_proc_t *proc; - + mca_base_component_t *c = &mca_rmaps_rank_file_component.super.base_version; + /* only handle initial launch of rf job */ if (ORTE_JOB_STATE_INIT != jdata->state) { opal_output_verbose(5, orte_rmaps_base.rmaps_output, @@ -301,8 +302,8 @@ static int orte_rmaps_rf_map(orte_job_t *jdata) ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } - if (ORTE_RMAPS_UNDEF != jdata->map->req_mapper && - ORTE_RMAPS_RF != jdata->map->req_mapper) { + if (NULL != jdata->map->req_mapper && + 0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base.rmaps_output, "mca:rmaps:rf: job %s not using rank_file mapper", @@ -315,7 +316,7 @@ static int orte_rmaps_rf_map(orte_job_t *jdata) ORTE_JOBID_PRINT(jdata->jobid)); /* flag that I did the mapping */ - jdata->map->last_mapper = ORTE_RMAPS_RF; + jdata->map->last_mapper = strdup(c->mca_component_name); /* convenience def */ map = jdata->map; diff --git a/orte/mca/rmaps/rank_file/rmaps_rank_file.h b/orte/mca/rmaps/rank_file/rmaps_rank_file.h index eda69e4691..9d786dc767 100644 --- a/orte/mca/rmaps/rank_file/rmaps_rank_file.h +++ b/orte/mca/rmaps/rank_file/rmaps_rank_file.h @@ -36,10 +36,13 @@ BEGIN_C_DECLS -/** - * RMGR Component - */ -ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_rank_file_component; +struct orte_rmaps_rf_component_t { + orte_rmaps_base_component_t super; + char *slot_list; +}; +typedef struct orte_rmaps_rf_component_t orte_rmaps_rf_component_t; + +ORTE_MODULE_DECLSPEC extern orte_rmaps_rf_component_t mca_rmaps_rank_file_component; extern orte_rmaps_base_module_t orte_rmaps_rank_file_module; diff --git a/orte/mca/rmaps/rank_file/rmaps_rank_file_component.c b/orte/mca/rmaps/rank_file/rmaps_rank_file_component.c index 4adde92b6f..ad518fe957 100644 --- a/orte/mca/rmaps/rank_file/rmaps_rank_file_component.c +++ b/orte/mca/rmaps/rank_file/rmaps_rank_file_component.c @@ -45,24 +45,26 @@ static int orte_rmaps_rank_file_query(mca_base_module_t **module, int *priority) static int my_priority; -orte_rmaps_base_component_t mca_rmaps_rank_file_component = { - /* First, the mca_base_component_t struct containing meta - information about the component itself */ - +orte_rmaps_rf_component_t mca_rmaps_rank_file_component = { { - ORTE_RMAPS_BASE_VERSION_2_0_0, + /* First, the mca_base_component_t struct containing meta + information about the component itself */ - "rank_file", /* MCA component name */ - ORTE_MAJOR_VERSION, /* MCA component major version */ - ORTE_MINOR_VERSION, /* MCA component minor version */ - ORTE_RELEASE_VERSION, /* MCA component release version */ - orte_rmaps_rank_file_open, /* component open */ - orte_rmaps_rank_file_close, /* component close */ - orte_rmaps_rank_file_query /* component query */ - }, - { - /* The component is checkpoint ready */ - MCA_BASE_METADATA_PARAM_CHECKPOINT + { + ORTE_RMAPS_BASE_VERSION_2_0_0, + + "rank_file", /* MCA component name */ + ORTE_MAJOR_VERSION, /* MCA component major version */ + ORTE_MINOR_VERSION, /* MCA component minor version */ + ORTE_RELEASE_VERSION, /* MCA component release version */ + orte_rmaps_rank_file_open, /* component open */ + orte_rmaps_rank_file_close, /* component close */ + orte_rmaps_rank_file_query /* component query */ + }, + { + /* The component is checkpoint ready */ + MCA_BASE_METADATA_PARAM_CHECKPOINT + } } }; @@ -72,17 +74,27 @@ orte_rmaps_base_component_t mca_rmaps_rank_file_component = { */ static int orte_rmaps_rank_file_open(void) { - mca_base_component_t *c = &mca_rmaps_rank_file_component.base_version; + mca_base_component_t *c = &mca_rmaps_rank_file_component.super.base_version; + int tmp; mca_base_param_reg_int(c, "priority", "Priority of the rank_file rmaps component", false, false, 0, &my_priority); - if (NULL != orte_rankfile || - NULL != orte_rmaps_base.slot_list) { + /* did the user provide a slot list? */ + tmp = mca_base_param_reg_string(c, "slot_list", + "List of processor IDs to bind MPI processes to (e.g., used in conjunction with rank files) [default=NULL]", + false, false, NULL, NULL); + mca_base_param_reg_syn_name(tmp, "rmaps", "base_slot_list", false); + mca_base_param_lookup_string(tmp, &mca_rmaps_rank_file_component.slot_list); + + /* ensure we flag mapping by user */ + if (NULL != mca_rmaps_rank_file_component.slot_list || + NULL != orte_rankfile) { + ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_BYUSER); /* make us first */ - my_priority = 1000; + my_priority = 10000; } return ORTE_SUCCESS; diff --git a/orte/mca/rmaps/resilient/rmaps_resilient.c b/orte/mca/rmaps/resilient/rmaps_resilient.c index 3bb825845c..e1e0128551 100644 --- a/orte/mca/rmaps/resilient/rmaps_resilient.c +++ b/orte/mca/rmaps/resilient/rmaps_resilient.c @@ -68,10 +68,11 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata) opal_list_t node_list; orte_std_cntr_t num_slots; opal_list_item_t *item; + mca_base_component_t *c = &mca_rmaps_resilient_component.super.base_version; if (ORTE_JOB_STATE_INIT == jdata->state) { - if (ORTE_RMAPS_UNDEF != jdata->map->req_mapper && - ORTE_RMAPS_RESILIENT != jdata->map->req_mapper) { + if (NULL != jdata->map->req_mapper && + 0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base.rmaps_output, "mca:rmaps:resilient: job %s not using loadbalance mapper", @@ -97,7 +98,7 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata) ORTE_JOBID_PRINT(jdata->jobid)); /* flag that I did the mapping */ - jdata->map->last_mapper = ORTE_RMAPS_RESILIENT; + jdata->map->last_mapper = strdup(c->mca_component_name); /* have we already constructed the fault group list? */ if (!made_ftgrps) { diff --git a/orte/mca/rmaps/rmaps_types.h b/orte/mca/rmaps/rmaps_types.h index 69d1f1d86e..90120e346a 100644 --- a/orte/mca/rmaps/rmaps_types.h +++ b/orte/mca/rmaps/rmaps_types.h @@ -33,17 +33,6 @@ BEGIN_C_DECLS -/* enumerate selectable mappers */ -enum orte_rmaps_mapper_type_t { - ORTE_RMAPS_UNDEF, - ORTE_RMAPS_RR, - ORTE_RMAPS_LOADBALANCE, - ORTE_RMAPS_SEQ, - ORTE_RMAPS_RF, - ORTE_RMAPS_RESILIENT -}; -typedef enum orte_rmaps_mapper_type_t orte_rmaps_mapper_type_t; - /* * Structure that represents the mapping of a job to an * allocated set of resources. @@ -51,8 +40,8 @@ typedef enum orte_rmaps_mapper_type_t orte_rmaps_mapper_type_t; struct orte_job_map_t { opal_object_t super; /* user-specified mapping params */ - orte_rmaps_mapper_type_t req_mapper; /* requested mapper */ - orte_rmaps_mapper_type_t last_mapper; /* last mapper used */ + char *req_mapper; /* requested mapper */ + char *last_mapper; /* last mapper used */ orte_mapping_policy_t policy; int npernode; int nperboard; diff --git a/orte/mca/rmaps/round_robin/rmaps_rr.c b/orte/mca/rmaps/round_robin/rmaps_rr.c index 4f2e23f18c..3a68994570 100644 --- a/orte/mca/rmaps/round_robin/rmaps_rr.c +++ b/orte/mca/rmaps/round_robin/rmaps_rr.c @@ -53,7 +53,8 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) orte_std_cntr_t num_nodes, num_slots; int rc; opal_list_item_t *cur_node_item; - + mca_base_component_t *c = &mca_rmaps_round_robin_component.base_version; + /* this mapper can only handle initial launch * when rr mapping is desired - allow * restarting of failed apps @@ -65,21 +66,30 @@ static int orte_rmaps_rr_map(orte_job_t *jdata) orte_job_state_to_str(jdata->state)); return ORTE_ERR_TAKE_NEXT_OPTION; } - if (ORTE_RMAPS_UNDEF != jdata->map->req_mapper && - ORTE_RMAPS_RR != jdata->map->req_mapper) { + if (NULL != jdata->map->req_mapper && + 0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base.rmaps_output, "mca:rmaps:rr: job %s not using rr mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } + if (0 < jdata->map->npernode || + 0 < jdata->map->nperboard || + 0 < jdata->map->npersocket) { + /* I don't know how to do these - defer */ + opal_output_verbose(5, orte_rmaps_base.rmaps_output, + "mca:rmaps:rr: job %s not using rr mapper", + ORTE_JOBID_PRINT(jdata->jobid)); + return ORTE_ERR_TAKE_NEXT_OPTION; + } opal_output_verbose(5, orte_rmaps_base.rmaps_output, "mca:rmaps:rr: mapping job %s", ORTE_JOBID_PRINT(jdata->jobid)); /* flag that I did the mapping */ - jdata->map->last_mapper = ORTE_RMAPS_RR; + jdata->map->last_mapper = strdup(c->mca_component_name); /* start at the beginning... */ jdata->num_procs = 0; diff --git a/orte/mca/rmaps/seq/rmaps_seq.c b/orte/mca/rmaps/seq/rmaps_seq.c index 75ffd40696..0462aa540e 100644 --- a/orte/mca/rmaps/seq/rmaps_seq.c +++ b/orte/mca/rmaps/seq/rmaps_seq.c @@ -70,6 +70,7 @@ static int orte_rmaps_seq_map(orte_job_t *jdata) opal_list_t *default_node_list=NULL; opal_list_t *node_list=NULL; orte_proc_t *proc; + mca_base_component_t *c = &mca_rmaps_seq_component.base_version; OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output, "%s rmaps:seq mapping job %s", @@ -86,21 +87,30 @@ static int orte_rmaps_seq_map(orte_job_t *jdata) ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } - if (ORTE_RMAPS_UNDEF != jdata->map->req_mapper && - ORTE_RMAPS_SEQ != jdata->map->req_mapper) { + if (NULL != jdata->map->req_mapper && + 0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) { /* a mapper has been specified, and it isn't me */ opal_output_verbose(5, orte_rmaps_base.rmaps_output, "mca:rmaps:seq: job %s not using sequential mapper", ORTE_JOBID_PRINT(jdata->jobid)); return ORTE_ERR_TAKE_NEXT_OPTION; } + if (0 < jdata->map->npernode || + 0 < jdata->map->nperboard || + 0 < jdata->map->npersocket) { + /* I don't know how to do these - defer */ + opal_output_verbose(5, orte_rmaps_base.rmaps_output, + "mca:rmaps:seq: job %s not using seq mapper", + ORTE_JOBID_PRINT(jdata->jobid)); + return ORTE_ERR_TAKE_NEXT_OPTION; + } opal_output_verbose(5, orte_rmaps_base.rmaps_output, "mca:rmaps:seq: mapping job %s", ORTE_JOBID_PRINT(jdata->jobid)); /* flag that I did the mapping */ - jdata->map->last_mapper = ORTE_RMAPS_SEQ; + jdata->map->last_mapper = strdup(c->mca_component_name); /* conveniece def */ map = jdata->map; diff --git a/orte/runtime/data_type_support/orte_dt_packing_fns.c b/orte/runtime/data_type_support/orte_dt_packing_fns.c index 0a05761f57..bcf3b8f629 100644 --- a/orte/runtime/data_type_support/orte_dt_packing_fns.c +++ b/orte/runtime/data_type_support/orte_dt_packing_fns.c @@ -872,13 +872,7 @@ int orte_dt_pack_map(opal_buffer_t *buffer, const void *src, for (i=0; i < num_vals; i++) { /* pack the requested mapper */ - if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->req_mapper), 1, OPAL_INT32))) { - ORTE_ERROR_LOG(rc); - return rc; - } - - /* pack the mapper used to generate it */ - if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->last_mapper), 1, OPAL_INT32))) { + if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->req_mapper), 1, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; } diff --git a/orte/runtime/data_type_support/orte_dt_print_fns.c b/orte/runtime/data_type_support/orte_dt_print_fns.c index 47580b7d99..52851ed43b 100644 --- a/orte/runtime/data_type_support/orte_dt_print_fns.c +++ b/orte/runtime/data_type_support/orte_dt_print_fns.c @@ -654,8 +654,9 @@ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_dat if (orte_devel_level_output) { asprintf(&tmp, "\n%sMapper requested: %s\tLast mapper: %s\tMapping policy: %04x\n%s\tNpernode: %ld\tOversubscribe allowed: %s\tCPU Lists: %s", - pfx2, orte_rmaps_base_print_mapper(src->req_mapper), - orte_rmaps_base_print_mapper(src->last_mapper), src->policy, pfx2, (long)src->npernode, + pfx2, (NULL == src->req_mapper) ? "NULL" : src->req_mapper, + (NULL == src->last_mapper) ? "NULL" : src->last_mapper, + src->policy, pfx2, (long)src->npernode, (src->oversubscribe) ? "TRUE" : "FALSE", (src->cpu_lists) ? "TRUE" : "FALSE"); diff --git a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c index 6ab22fb8b8..9c345ae31e 100644 --- a/orte/runtime/data_type_support/orte_dt_unpacking_fns.c +++ b/orte/runtime/data_type_support/orte_dt_unpacking_fns.c @@ -955,7 +955,7 @@ int orte_dt_unpack_map(opal_buffer_t *buffer, void *dest, /* unpack the requested mapper */ n = 1; if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, - &(maps[i]->req_mapper), &n, OPAL_INT32))) { + &(maps[i]->req_mapper), &n, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; } @@ -963,7 +963,7 @@ int orte_dt_unpack_map(opal_buffer_t *buffer, void *dest, /* unpack the mapper used */ n = 1; if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, - &(maps[i]->last_mapper), &n, OPAL_INT32))) { + &(maps[i]->last_mapper), &n, OPAL_STRING))) { ORTE_ERROR_LOG(rc); return rc; } diff --git a/orte/runtime/orte_globals.c b/orte/runtime/orte_globals.c index 7cf9e9bd8c..d0a1129f81 100644 --- a/orte/runtime/orte_globals.c +++ b/orte/runtime/orte_globals.c @@ -1049,8 +1049,8 @@ OBJ_CLASS_INSTANCE(orte_jmap_t, static void orte_job_map_construct(orte_job_map_t* map) { - map->req_mapper = ORTE_RMAPS_UNDEF; - map->last_mapper = ORTE_RMAPS_UNDEF; + map->req_mapper = NULL; + map->last_mapper = NULL; map->policy = 0; map->npernode = 0; map->nperboard = 0; @@ -1074,6 +1074,12 @@ static void orte_job_map_destruct(orte_job_map_t* map) { orte_std_cntr_t i; + if (NULL != map->req_mapper) { + free(map->req_mapper); + } + if (NULL != map->last_mapper) { + free(map->last_mapper); + } for (i=0; i < map->nodes->size; i++) { if (NULL != map->nodes->addr[i]) { OBJ_RELEASE(map->nodes->addr[i]); diff --git a/orte/runtime/orte_globals.h b/orte/runtime/orte_globals.h index 30b7cccd86..7d0bf292bc 100644 --- a/orte/runtime/orte_globals.h +++ b/orte/runtime/orte_globals.h @@ -330,6 +330,8 @@ typedef uint16_t orte_job_controls_t; #define ORTE_MAPPING_NO_USE_LOCAL 0x2000 #define ORTE_MAPPING_NPERXXX 0x4000 #define ORTE_MAPPING_BYUSER 0x8000 +/* check if policy is set */ +#define ORTE_MAPPING_POLICY_IS_SET(pol) (pol & 0xff00) /* nice macro for setting these */ #define ORTE_SET_MAPPING_POLICY(pol) \ orte_default_mapping_policy = (orte_default_mapping_policy & 0x00ff) | (pol);