1
1

Redefine the rmaps framework to allow multiple mapper modules to be active at the same time. This allows users to map the primary job one way, and map any comm_spawn'd job in a different way. Modules are given the opportunity to map a job in priority order, with the round-robin mapper having the highest default priority. Priority of each module can be defined using mca param.

When called, each mapper checks to see if it can map the job. If npernode is provided, for example, then the loadbalance mapper accepts the assignment and performs the operation - all mappers before it will "pass" as they can't map npernode requests.

Also remove the stale and never completed topo mapper.

This commit was SVN r24393.
Этот коммит содержится в:
Ralph Castain 2011-02-15 23:24:31 +00:00
родитель 29785e4ea1
Коммит 5120e6aec3
32 изменённых файлов: 936 добавлений и 1343 удалений

Просмотреть файл

@ -120,7 +120,9 @@ enum {
ORTE_ERR_PROC_STALLED = (ORTE_ERR_BASE - 38), ORTE_ERR_PROC_STALLED = (ORTE_ERR_BASE - 38),
ORTE_ERR_NO_APP_SPECIFIED = (ORTE_ERR_BASE - 39), ORTE_ERR_NO_APP_SPECIFIED = (ORTE_ERR_BASE - 39),
ORTE_ERR_NO_EXE_SPECIFIED = (ORTE_ERR_BASE - 40), ORTE_ERR_NO_EXE_SPECIFIED = (ORTE_ERR_BASE - 40),
ORTE_ERR_COMM_DISABLED = (ORTE_ERR_BASE - 41) ORTE_ERR_COMM_DISABLED = (ORTE_ERR_BASE - 41),
ORTE_ERR_FAILED_TO_MAP = (ORTE_ERR_BASE - 42),
ORTE_ERR_TAKE_NEXT_OPTION = (ORTE_ERR_BASE - 43)
}; };
#define ORTE_ERR_MAX (ORTE_ERR_BASE - 100) #define ORTE_ERR_MAX (ORTE_ERR_BASE - 100)

Просмотреть файл

@ -93,6 +93,9 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
ljob = ORTE_LOCAL_JOBID(jdata->jobid); ljob = ORTE_LOCAL_JOBID(jdata->jobid);
opal_pointer_array_set_item(orte_job_data, ljob, jdata); opal_pointer_array_set_item(orte_job_data, ljob, jdata);
/* set the job state */
jdata->state = ORTE_JOB_STATE_INIT;
/* if job recovery is not defined, set it to default */ /* if job recovery is not defined, set it to default */
if (!jdata->recovery_defined) { if (!jdata->recovery_defined) {
/* set to system default */ /* set to system default */

Просмотреть файл

@ -56,8 +56,10 @@ typedef struct {
int rmaps_output; int rmaps_output;
/** List of available components */ /** List of available components */
opal_list_t available_components; opal_list_t available_components;
/** selected module */ /* list of selected modules */
orte_rmaps_base_module_t *active_module; opal_list_t selected_modules;
/* desired default mapper */
int default_mapper;
/** whether or not we allow oversubscription of nodes */ /** whether or not we allow oversubscription of nodes */
bool oversubscribe; bool oversubscribe;
/** number of ppn for n_per_node mode */ /** number of ppn for n_per_node mode */
@ -74,8 +76,6 @@ typedef struct {
bool no_use_local; bool no_use_local;
/* display the map after it is computed */ /* display the map after it is computed */
bool display_map; bool display_map;
/* balance load across nodes */
bool loadbalance;
/* slot list, if provided by user */ /* slot list, if provided by user */
char *slot_list; char *slot_list;
} orte_rmaps_base_t; } orte_rmaps_base_t;
@ -88,6 +88,14 @@ ORTE_DECLSPEC extern orte_rmaps_base_t orte_rmaps_base;
/** /**
* Select an rmaps component / module * Select an rmaps component / module
*/ */
typedef struct {
opal_list_item_t super;
int pri;
orte_rmaps_base_module_t *module;
mca_base_component_t *component;
} orte_rmaps_base_selected_module_t;
OBJ_CLASS_DECLARATION(orte_rmaps_base_selected_module_t);
ORTE_DECLSPEC int orte_rmaps_base_select(void); ORTE_DECLSPEC int orte_rmaps_base_select(void);
/** /**

Просмотреть файл

@ -87,3 +87,10 @@ are cpus in a socket:
#cpus/socket: %d #cpus/socket: %d
Please correct one or both of these values and try again. Please correct one or both of these values and try again.
#
[failed-map]
Your job failed to map. Either no mapper was available, or none
of the available mappers was able to perform the requested
mapping operation. This can happen if you request a map type
(e.g., loadbalance) and the corresponding mapper was not built.

Просмотреть файл

@ -28,6 +28,14 @@
int orte_rmaps_base_close(void) int orte_rmaps_base_close(void)
{ {
opal_list_item_t *item;
/* cleanup globals */
while (NULL != (item = opal_list_remove_first(&orte_rmaps_base.selected_modules))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&orte_rmaps_base.selected_modules);
mca_base_components_close(orte_rmaps_base.rmaps_output, mca_base_components_close(orte_rmaps_base.rmaps_output,
&orte_rmaps_base.available_components, NULL); &orte_rmaps_base.available_components, NULL);

Просмотреть файл

@ -25,10 +25,11 @@
#include "opal/util/output.h" #include "opal/util/output.h"
#include "opal/util/opal_sos.h" #include "opal/util/opal_sos.h"
#include "opal/mca/base/base.h" #include "opal/mca/base/base.h"
#include "opal/dss/dss.h" #include "opal/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/orte_globals.h" #include "orte/runtime/orte_globals.h"
#include "orte/util/show_help.h"
#include "orte/mca/rmaps/base/base.h" #include "orte/mca/rmaps/base/base.h"
#include "orte/mca/rmaps/base/rmaps_private.h" #include "orte/mca/rmaps/base/rmaps_private.h"
@ -42,6 +43,9 @@ int orte_rmaps_base_map_job(orte_job_t *jdata)
{ {
orte_job_map_t *map; orte_job_map_t *map;
int rc; int rc;
bool did_map;
opal_list_item_t *item;
orte_rmaps_base_selected_module_t *mod;
/* NOTE: NO PROXY COMPONENT REQUIRED - REMOTE PROCS ARE NOT /* NOTE: NO PROXY COMPONENT REQUIRED - REMOTE PROCS ARE NOT
* ALLOWED TO CALL RMAPS INDEPENDENTLY. ONLY THE PLM CAN * ALLOWED TO CALL RMAPS INDEPENDENTLY. ONLY THE PLM CAN
@ -76,12 +80,16 @@ int orte_rmaps_base_map_job(orte_job_t *jdata)
map->stride = orte_rmaps_base.stride; map->stride = orte_rmaps_base.stride;
map->oversubscribe = orte_rmaps_base.oversubscribe; map->oversubscribe = orte_rmaps_base.oversubscribe;
map->display_map = orte_rmaps_base.display_map; map->display_map = orte_rmaps_base.display_map;
map->mapper = orte_rmaps_base.default_mapper;
/* assign the map object to this job */ /* assign the map object to this job */
jdata->map = map; jdata->map = map;
} else { } else {
if (!jdata->map->display_map) { if (!jdata->map->display_map) {
jdata->map->display_map = orte_rmaps_base.display_map; jdata->map->display_map = orte_rmaps_base.display_map;
} }
if (ORTE_RMAPS_UNDEF == jdata->map->mapper) {
jdata->map->mapper = orte_rmaps_base.default_mapper;
}
} }
/* if the job is the daemon job, then we are just mapping daemons and /* if the job is the daemon job, then we are just mapping daemons and
@ -93,12 +101,32 @@ int orte_rmaps_base_map_job(orte_job_t *jdata)
return rc; return rc;
} }
} else { } else {
/* go ahead and map the job */ /* cycle thru the available mappers until one agrees to map
if (ORTE_SUCCESS != (rc = orte_rmaps_base.active_module->map_job(jdata))) { * the job
*/
did_map = false;
for (item = opal_list_get_first(&orte_rmaps_base.selected_modules);
item != opal_list_get_end(&orte_rmaps_base.selected_modules);
item = opal_list_get_next(item)) {
mod = (orte_rmaps_base_selected_module_t*)item;
if (ORTE_SUCCESS == (rc = mod->module->map_job(jdata))) {
did_map = true;
break;
}
/* mappers return "next option" if they didn't attempt to
* map the job. anything else is a true error.
*/
if (ORTE_ERR_TAKE_NEXT_OPTION != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
} }
/* if we get here without doing the map, then that's an error */
if (!did_map) {
orte_show_help("help-orte-rmaps-base.txt", "failed-map", true);
return ORTE_ERR_FAILED_TO_MAP;
}
}
/* if we wanted to display the map, now is the time to do it */ /* if we wanted to display the map, now is the time to do it */
if (jdata->map->display_map) { if (jdata->map->display_map) {

Просмотреть файл

@ -87,7 +87,8 @@ int orte_rmaps_base_open(void)
bool btmp; bool btmp;
/* init the globals */ /* init the globals */
orte_rmaps_base.active_module = NULL; OBJ_CONSTRUCT(&orte_rmaps_base.selected_modules, opal_list_t);
orte_rmaps_base.default_mapper = ORTE_RMAPS_UNDEF;
/* Debugging / verbose output. Always have stream open, with /* Debugging / verbose output. Always have stream open, with
verbose set by the mca open system... */ verbose set by the mca open system... */
@ -118,6 +119,7 @@ int orte_rmaps_base_open(void)
false, false, (int)false, &value); false, false, (int)false, &value);
if (value) { if (value) {
orte_rmaps_base.npernode = 1; orte_rmaps_base.npernode = 1;
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
} }
/* #procs/node */ /* #procs/node */
@ -126,6 +128,7 @@ int orte_rmaps_base_open(void)
false, false, -1, &value); false, false, -1, &value);
if (0 < value) { if (0 < value) {
orte_rmaps_base.npernode = value; orte_rmaps_base.npernode = value;
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
} }
/* #procs/board */ /* #procs/board */
@ -134,6 +137,7 @@ int orte_rmaps_base_open(void)
false, false, -1, &orte_rmaps_base.nperboard); false, false, -1, &orte_rmaps_base.nperboard);
if (0 < orte_rmaps_base.nperboard) { if (0 < orte_rmaps_base.nperboard) {
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX); ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX);
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
} }
/* #procs/socket */ /* #procs/socket */
@ -144,13 +148,16 @@ int orte_rmaps_base_open(void)
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX); ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX);
/* force bind to socket if not overridden by user */ /* force bind to socket if not overridden by user */
ORTE_XSET_BINDING_POLICY(ORTE_BIND_TO_SOCKET); ORTE_XSET_BINDING_POLICY(ORTE_BIND_TO_SOCKET);
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
} }
/* Do we want to loadbalance the job */ /* Do we want to loadbalance the job */
param = mca_base_param_reg_int_name("rmaps", "base_loadbalance", param = mca_base_param_reg_int_name("rmaps", "base_loadbalance",
"Balance total number of procs across all allocated nodes", "Balance total number of procs across all allocated nodes",
false, false, (int)false, &value); false, false, (int)false, &value);
orte_rmaps_base.loadbalance = OPAL_INT_TO_BOOL(value); if (value) {
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
}
/* #cpus/rank to use */ /* #cpus/rank to use */
param = mca_base_param_reg_int_name("rmaps", "base_cpus_per_proc", param = mca_base_param_reg_int_name("rmaps", "base_cpus_per_proc",
@ -193,6 +200,7 @@ int orte_rmaps_base_open(void)
if (NULL != orte_rmaps_base.slot_list || if (NULL != orte_rmaps_base.slot_list ||
NULL != orte_rankfile) { NULL != orte_rankfile) {
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_BYUSER); ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_BYUSER);
orte_rmaps_base.default_mapper = ORTE_RMAPS_RF;
} }
/* Should we schedule on the local node or not? */ /* Should we schedule on the local node or not? */
@ -243,4 +251,8 @@ int orte_rmaps_base_open(void)
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
OBJ_CLASS_INSTANCE(orte_rmaps_base_selected_module_t,
opal_list_item_t,
NULL, NULL);
#endif /* ORTE_DISABLE_FULL_SUPPORT */ #endif /* ORTE_DISABLE_FULL_SUPPORT */

Просмотреть файл

@ -26,29 +26,98 @@
#include "orte/mca/rmaps/base/base.h" #include "orte/mca/rmaps/base/base.h"
static bool selected = false;
/* /*
* Function for selecting one component from all those that are * Function for selecting one component from all those that are
* available. * available.
*/ */
int orte_rmaps_base_select(void) int orte_rmaps_base_select(void)
{ {
orte_rmaps_base_component_t *best_component = NULL; opal_list_item_t *item, *itm2;
orte_rmaps_base_module_t *best_module = NULL; mca_base_component_list_item_t *cli = NULL;
mca_base_component_t *component = NULL;
mca_base_module_t *module = NULL;
orte_rmaps_base_module_t *nmodule;
orte_rmaps_base_selected_module_t *newmodule, *mod;
int rc, priority;
bool inserted;
/* if (selected) {
* Select the best component /* ensure we don't do this twice */
*/ return ORTE_SUCCESS;
if( OPAL_SUCCESS != mca_base_select("rmaps", orte_rmaps_base.rmaps_output, }
&orte_rmaps_base.available_components, selected = true;
(mca_base_module_t **) &best_module,
(mca_base_component_t **) &best_component) ) { /* Query all available components and ask if they have a module */
/* This will only happen if no component was selected */ for (item = opal_list_get_first(&orte_rmaps_base.available_components);
return ORTE_ERR_NOT_FOUND; opal_list_get_end(&orte_rmaps_base.available_components) != item;
item = opal_list_get_next(item)) {
cli = (mca_base_component_list_item_t *) item;
component = (mca_base_component_t *) cli->cli_component;
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:select: checking available component %s", component->mca_component_name);
/* If there's no query function, skip it */
if (NULL == component->mca_query_component) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:select: Skipping component [%s]. It does not implement a query function",
component->mca_component_name );
continue;
} }
/* Save the winner */ /* Query the component */
/* No global component structure */ opal_output_verbose(5, orte_rmaps_base.rmaps_output,
orte_rmaps_base.active_module = best_module; "mca:rmaps:select: Querying component [%s]",
component->mca_component_name);
rc = component->mca_query_component(&module, &priority);
/* If no module was returned, then skip component */
if (ORTE_SUCCESS != rc || NULL == module) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:select: Skipping component [%s]. Query failed to return a module",
component->mca_component_name );
continue;
}
/* If we got a module, keep it */
nmodule = (orte_rmaps_base_module_t*) module;
/* add to the list of selected modules */
newmodule = OBJ_NEW(orte_rmaps_base_selected_module_t);
newmodule->pri = priority;
newmodule->module = nmodule;
newmodule->component = component;
/* maintain priority order */
inserted = false;
for (itm2 = opal_list_get_first(&orte_rmaps_base.selected_modules);
itm2 != opal_list_get_end(&orte_rmaps_base.selected_modules);
itm2 = opal_list_get_next(itm2)) {
mod = (orte_rmaps_base_selected_module_t*)itm2;
if (priority > mod->pri) {
opal_list_insert_pos(&orte_rmaps_base.selected_modules,
itm2, &newmodule->super);
inserted = true;
break;
}
}
if (!inserted) {
/* must be lowest priority - add to end */
opal_list_append(&orte_rmaps_base.selected_modules, &newmodule->super);
}
}
if (4 < opal_output_get_verbosity(orte_rmaps_base.rmaps_output)) {
opal_output(0, "%s: Final mapper priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* show the prioritized list */
for (itm2 = opal_list_get_first(&orte_rmaps_base.selected_modules);
itm2 != opal_list_get_end(&orte_rmaps_base.selected_modules);
itm2 = opal_list_get_next(itm2)) {
mod = (orte_rmaps_base_selected_module_t*)itm2;
opal_output(0, "\tMapper: %s Priority: %d", mod->component->mca_component_name, mod->pri);
}
}
return ORTE_SUCCESS;; return ORTE_SUCCESS;;
} }

Просмотреть файл

@ -60,12 +60,12 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
/* if the hnp was allocated, include it unless flagged not to */ /* if the hnp was allocated, include it unless flagged not to */
if (orte_hnp_is_allocated) { if (orte_hnp_is_allocated) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) {
if (ORTE_NODE_STATE_UP == node->state) { if (ORTE_NODE_STATE_DO_NOT_USE == node->state) {
/* clear this for future use, but don't include it */
node->state = ORTE_NODE_STATE_UP;
} else if (ORTE_NODE_STATE_NOT_INCLUDED != node->state) {
OBJ_RETAIN(node); OBJ_RETAIN(node);
opal_list_append(allocated_nodes, &node->super); opal_list_append(allocated_nodes, &node->super);
} else if (ORTE_NODE_STATE_DO_NOT_USE == node->state) {
/* clear this for future use */
node->state = ORTE_NODE_STATE_UP;
} }
} }
} }
@ -73,16 +73,19 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
/* add everything in the node pool that can be used */ /* add everything in the node pool that can be used */
for (i=1; i < orte_node_pool->size; i++) { for (i=1; i < orte_node_pool->size; i++) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
/* ignore nodes that are "down" */
if (ORTE_NODE_STATE_DOWN == node->state) {
continue;
}
/* ignore nodes that are marked as do-not-use for this mapping */ /* ignore nodes that are marked as do-not-use for this mapping */
if (ORTE_NODE_STATE_DO_NOT_USE == node->state) { if (ORTE_NODE_STATE_DO_NOT_USE == node->state) {
/* reset the state so it can be used another time */ /* reset the state so it can be used another time */
node->state = ORTE_NODE_STATE_UP; node->state = ORTE_NODE_STATE_UP;
continue; continue;
} }
if (ORTE_NODE_STATE_DOWN == node->state) {
continue;
}
if (ORTE_NODE_STATE_NOT_INCLUDED == node->state) {
/* not to be used */
continue;
}
/* retain a copy for our use in case the item gets /* retain a copy for our use in case the item gets
* destructed along the way * destructed along the way
*/ */
@ -247,36 +250,6 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
/* if we are mapping an application, check to see if we are to
* use a virtual machine
*/
if (policy & ORTE_MAPPING_USE_VM) {
/* remove all nodes that do NOT have an "alive" daemon on them */
item = opal_list_get_first(allocated_nodes);
while (item != opal_list_get_end(allocated_nodes)) {
/** save the next pointer in case we remove this node */
next = opal_list_get_next(item);
/** already have a daemon? */
node = (orte_node_t*)item;
if (NULL == node->daemon ||
ORTE_PROC_STATE_RUNNING != node->daemon->state) {
opal_list_remove_item(allocated_nodes, item);
OBJ_RELEASE(item); /* "un-retain" it */
}
/** go on to next item */
item = next;
}
/** check that anything is left! */
if (0 == opal_list_get_size(allocated_nodes)) {
orte_show_help("help-orte-rmaps-base.txt",
"orte-rmaps-base:nolocal-no-available-resources", true);
return ORTE_ERR_SILENT;
}
}
/* remove all nodes that are already at max usage, and /* remove all nodes that are already at max usage, and
* compute the total number of allocated slots while * compute the total number of allocated slots while
* we do so * we do so

Просмотреть файл

@ -55,6 +55,27 @@ static int switchyard(orte_job_t *jdata)
{ {
int rc; int rc;
/* only handle initial launch of loadbalanced
* or NPERxxx jobs - allow restarting of failed apps
*/
if (ORTE_JOB_STATE_INIT != jdata->state) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:lb: not job %s not in initial state - loadbalance cannot map",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
if (0 < jdata->map->mapper && ORTE_RMAPS_LOADBALANCE != jdata->map->mapper) {
/* a mapper has been specified, and it isn't me */
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:lb: job %s not using loadbalance mapper",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:loadbalance: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid));
if (0 < orte_rmaps_base.npernode) { if (0 < orte_rmaps_base.npernode) {
rc = npernode(jdata); rc = npernode(jdata);
} else if (0 < orte_rmaps_base.nperboard) { } else if (0 < orte_rmaps_base.nperboard) {
@ -78,10 +99,9 @@ static int switchyard(orte_job_t *jdata)
/* define the daemons that we will use for this job */ /* define the daemons that we will use for this job */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata->map))) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata->map))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc;
} }
return ORTE_SUCCESS; return rc;
} }

Просмотреть файл

@ -33,6 +33,7 @@ static int orte_rmaps_lb_open(void);
static int orte_rmaps_lb_close(void); static int orte_rmaps_lb_close(void);
static int orte_rmaps_lb_query(mca_base_module_t **module, int *priority); static int orte_rmaps_lb_query(mca_base_module_t **module, int *priority);
static int my_priority;
orte_rmaps_base_component_t mca_rmaps_load_balance_component = { orte_rmaps_base_component_t mca_rmaps_load_balance_component = {
{ {
@ -58,30 +59,22 @@ orte_rmaps_base_component_t mca_rmaps_load_balance_component = {
*/ */
static int orte_rmaps_lb_open(void) static int orte_rmaps_lb_open(void)
{ {
mca_base_component_t *c = &mca_rmaps_load_balance_component.base_version;
mca_base_param_reg_int(c, "priority",
"Priority of the loadbalance rmaps component",
false, false, 80,
&my_priority);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static int orte_rmaps_lb_query(mca_base_module_t **module, int *priority) static int orte_rmaps_lb_query(mca_base_module_t **module, int *priority)
{ {
/* the RMAPS framework is -only- opened on HNP's, /* after rr */
* so no need to check for that here *priority = my_priority;
*/
/* if load balancing, or any nperxxx, was requested, then we must be selected */
if (orte_rmaps_base.loadbalance ||
0 < orte_rmaps_base.npernode ||
0 < orte_rmaps_base.nperboard ||
0 < orte_rmaps_base.npersocket) {
*priority = 1000; /* must be selected */
*module = (mca_base_module_t *)&orte_rmaps_load_balance_module; *module = (mca_base_module_t *)&orte_rmaps_load_balance_module;
return ORTE_SUCCESS; return ORTE_SUCCESS;
}
/* otherwise, ignore us */
*priority = 0;
*module = NULL;
return ORTE_ERROR;
} }
/** /**

Просмотреть файл

@ -294,6 +294,25 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
int rc; int rc;
orte_proc_t *proc; orte_proc_t *proc;
/* only handle initial launch of rf job */
if (ORTE_JOB_STATE_INIT != jdata->state) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rf: not job %s not in initial state - rank_file cannot map",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
if (0 < jdata->map->mapper && ORTE_RMAPS_RF != jdata->map->mapper) {
/* a mapper has been specified, and it isn't me */
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rf: job %s not using rank_file mapper",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rank_file: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid));
/* convenience def */ /* convenience def */
map = jdata->map; map = jdata->map;

Просмотреть файл

@ -39,14 +39,7 @@ BEGIN_C_DECLS
/** /**
* RMGR Component * RMGR Component
*/ */
struct orte_rmaps_rank_file_component_t { ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_rank_file_component;
orte_rmaps_base_component_t super;
int debug;
int priority;
};
typedef struct orte_rmaps_rank_file_component_t orte_rmaps_rank_file_component_t;
ORTE_MODULE_DECLSPEC extern orte_rmaps_rank_file_component_t mca_rmaps_rank_file_component;
extern orte_rmaps_base_module_t orte_rmaps_rank_file_module; extern orte_rmaps_base_module_t orte_rmaps_rank_file_module;

Просмотреть файл

@ -43,8 +43,9 @@ static int orte_rmaps_rank_file_open(void);
static int orte_rmaps_rank_file_close(void); static int orte_rmaps_rank_file_close(void);
static int orte_rmaps_rank_file_query(mca_base_module_t **module, int *priority); static int orte_rmaps_rank_file_query(mca_base_module_t **module, int *priority);
orte_rmaps_rank_file_component_t mca_rmaps_rank_file_component = { static int my_priority;
{
orte_rmaps_base_component_t mca_rmaps_rank_file_component = {
/* First, the mca_base_component_t struct containing meta /* First, the mca_base_component_t struct containing meta
information about the component itself */ information about the component itself */
@ -63,7 +64,6 @@ orte_rmaps_rank_file_component_t mca_rmaps_rank_file_component = {
/* The component is checkpoint ready */ /* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT MCA_BASE_METADATA_PARAM_CHECKPOINT
} }
}
}; };
@ -72,11 +72,17 @@ orte_rmaps_rank_file_component_t mca_rmaps_rank_file_component = {
*/ */
static int orte_rmaps_rank_file_open(void) static int orte_rmaps_rank_file_open(void)
{ {
mca_rmaps_rank_file_component.priority = 0; mca_base_component_t *c = &mca_rmaps_rank_file_component.base_version;
mca_base_param_reg_int(c, "priority",
"Priority of the rank_file rmaps component",
false, false, 0,
&my_priority);
if (NULL != orte_rankfile || if (NULL != orte_rankfile ||
NULL != orte_rmaps_base.slot_list) { NULL != orte_rmaps_base.slot_list) {
mca_rmaps_rank_file_component.priority = 100; /* make us first */
my_priority = 1000;
} }
return ORTE_SUCCESS; return ORTE_SUCCESS;
@ -84,11 +90,7 @@ static int orte_rmaps_rank_file_open(void)
static int orte_rmaps_rank_file_query(mca_base_module_t **module, int *priority) static int orte_rmaps_rank_file_query(mca_base_module_t **module, int *priority)
{ {
/* the RMAPS framework is -only- opened on HNP's, *priority = my_priority;
* so no need to check for that here
*/
*priority = mca_rmaps_rank_file_component.priority;
*module = (mca_base_module_t *)&orte_rmaps_rank_file_module; *module = (mca_base_module_t *)&orte_rmaps_rank_file_module;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -26,7 +26,6 @@
#include "opal/mca/base/mca_base_param.h" #include "opal/mca/base/mca_base_param.h"
#include "opal/util/argv.h" #include "opal/util/argv.h"
#include "opal/util/opal_sos.h"
#include "opal/class/opal_pointer_array.h" #include "opal/class/opal_pointer_array.h"
#include "orte/util/show_help.h" #include "orte/util/show_help.h"
@ -40,98 +39,235 @@
/* /*
* Local variable * Local variable
*/ */
static opal_list_item_t *cur_node_item = NULL;
static char *orte_getline(FILE *fp); static char *orte_getline(FILE *fp);
static bool have_ftgrps=false;
/* default round-robin mapper */ static int construct_ftgrps(void);
static int rr_map_default(orte_job_t *jdata, orte_app_context_t *app, static int get_ftgrp_target(orte_proc_t *proc,
opal_list_t *node_list, orte_vpid_t num_procs) orte_rmaps_res_ftgrp_t **target,
{ orte_node_t **nd);
int rc; static int get_new_node(orte_proc_t *proc,
orte_app_context_t *app,
/* if a bookmark exists from some prior mapping, set us to start there */ orte_job_map_t *map,
cur_node_item = orte_rmaps_base_get_starting_point(node_list, jdata); orte_node_t **ndret);
static int map_to_ftgrps(orte_job_t *jdata);
/* now perform the mapping */
if (ORTE_MAPPING_BYNODE & jdata->map->policy) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_map_bynode(jdata, app, node_list,
num_procs, cur_node_item))) {
ORTE_ERROR_LOG(rc);
return rc;
}
} else {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_map_byslot(jdata, app, node_list,
num_procs, cur_node_item))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
return ORTE_SUCCESS;
}
static void flag_nodes(opal_list_t *node_list)
{
opal_list_item_t *item, *nitem;
orte_node_t *node, *nd;
orte_rmaps_res_ftgrp_t *ftgrp;
int k;
for (item = opal_list_get_first(&mca_rmaps_resilient_component.fault_grps);
item != opal_list_get_end(&mca_rmaps_resilient_component.fault_grps);
item = opal_list_get_next(item)) {
ftgrp = (orte_rmaps_res_ftgrp_t*)item;
/* reset the flags */
ftgrp->used = false;
ftgrp->included = false;
/* if at least one node in our list is included in this
* ftgrp, then flag it as included
*/
for (nitem = opal_list_get_first(node_list);
!ftgrp->included && nitem != opal_list_get_end(node_list);
nitem = opal_list_get_next(nitem)) {
node = (orte_node_t*)nitem;
for (k=0; k < ftgrp->nodes.size; k++) {
if (NULL == (nd = (orte_node_t*)opal_pointer_array_get_item(&ftgrp->nodes, k))) {
continue;
}
if (0 == strcmp(nd->name, node->name)) {
ftgrp->included = true;
break;
}
}
}
}
}
/* /*
* Loadbalance the cluster * Loadbalance the cluster
*/ */
static int orte_rmaps_resilient_map(orte_job_t *jdata) static int orte_rmaps_resilient_map(orte_job_t *jdata)
{ {
orte_job_map_t *map;
orte_app_context_t *app; orte_app_context_t *app;
int i, j, k, totnodes; int i;
opal_list_t node_list;
opal_list_item_t *item;
orte_std_cntr_t num_slots;
int rc = ORTE_SUCCESS; int rc = ORTE_SUCCESS;
float avgload, minload; orte_node_t *nd=NULL, *oldnode, *node;
orte_node_t *node, *nd=NULL, *oldnode; orte_rmaps_res_ftgrp_t *target = NULL;
orte_rmaps_res_ftgrp_t *ftgrp, *target = NULL; orte_proc_t *proc;
orte_vpid_t totprocs, lowprocs, num_assigned; orte_vpid_t totprocs;
opal_list_t node_list;
orte_std_cntr_t num_slots;
opal_list_item_t *item;
if (0 < jdata->map->mapper && ORTE_RMAPS_RESILIENT != jdata->map->mapper) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:resilient: cannot map job %s - other mapper specified",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
if (ORTE_JOB_STATE_INIT == jdata->state &&
NULL == mca_rmaps_resilient_component.fault_group_file) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:resilient: cannot perform initial map of job %s",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:resilient: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid));
/* have we already constructed the fault group list? */
if (!have_ftgrps) {
construct_ftgrps();
}
if (ORTE_JOB_STATE_INIT == jdata->state) {
/* this is an initial map - let the fault group mapper
* handle it
*/
return map_to_ftgrps(jdata);
}
/*
* NOTE: if a proc is being ADDED to an existing job, then its
* node field will be NULL.
*/
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: remapping job %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdata->jobid)));
/* cycle through all the procs in this job to find the one(s) that failed */
for (i=0; i < jdata->procs->size; i++) {
/* get the proc object */
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
continue;
}
/* is this proc to be restarted? */
if (proc->state != ORTE_PROC_STATE_RESTART) {
continue;
}
/* save the current node */
oldnode = proc->node;
/* point to the app */
app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, proc->app_idx);
if( NULL == app ) {
ORTE_ERROR_LOG(ORTE_ERR_FAILED_TO_MAP);
rc = ORTE_ERR_FAILED_TO_MAP;
goto error;
}
if (NULL == oldnode) {
/* this proc was not previously running - likely it is being added
* to the job. So place it on the node with the fewest procs to
* balance the load
*/
OBJ_CONSTRUCT(&node_list, opal_list_t);
if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list,
&num_slots,
app,
jdata->map->policy))) {
ORTE_ERROR_LOG(rc);
goto error;
}
if (0 == opal_list_get_size(&node_list)) {
ORTE_ERROR_LOG(ORTE_ERROR);
rc = ORTE_ERROR;
goto error;
}
totprocs = 1000000;
nd = NULL;
while (NULL != (item = opal_list_remove_first(&node_list))) {
node = (orte_node_t*)item;
if (node->num_procs < totprocs) {
nd = node;
totprocs = node->num_procs;
}
OBJ_RELEASE(item); /* maintain accounting */
}
OBJ_DESTRUCT(&node_list);
/* we already checked to ensure there was at least one node,
* so we couldn't have come out of the loop with nd=NULL
*/
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: Placing new process on node %s daemon %s (no ftgrp)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
nd->name, ORTE_NAME_PRINT((&nd->daemon->name))));
} else {
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: proc %s from node %s is to be restarted",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&proc->name),
(NULL == proc->node) ? "NULL" : proc->node->name));
/* if we have fault groups, use them */
if (have_ftgrps) {
if (ORTE_SUCCESS != (rc = get_ftgrp_target(proc, &target, &nd))) {
ORTE_ERROR_LOG(rc);
goto error;
}
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: placing proc %s into fault group %d node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_NAME_PRINT(&proc->name), target->ftgrp, nd->name));
} else {
if (ORTE_SUCCESS != (rc = get_new_node(proc, app, jdata->map, &nd))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
}
/*
* Put the process on the found node (add it if not already in the map)
*/
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata,
nd,
jdata->map->cpus_per_rank,
proc->app_idx,
NULL,
jdata->map->oversubscribe,
false,
&proc))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error
*/
if (ORTE_ERR_NODE_FULLY_USED != rc) {
ORTE_ERROR_LOG(rc);
goto error;
}
}
/* flag the proc state as non-launched so we'll know to launch it */
proc->state = ORTE_PROC_STATE_INIT;
/* update the node and local ranks so static ports can
* be properly selected if active
*/
orte_rmaps_base_update_local_ranks(jdata, oldnode, nd, proc);
}
if (!(ORTE_MAPPING_USE_VM & jdata->map->policy)) {
/* define the daemons that we will use for this job */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata->map))) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
error:
return rc;
}
orte_rmaps_base_module_t orte_rmaps_resilient_module = {
orte_rmaps_resilient_map
};
static char *orte_getline(FILE *fp)
{
char *ret, *buff;
char input[1024];
ret = fgets(input, 1024, fp);
if (NULL != ret) {
input[strlen(input)-1] = '\0'; /* remove newline */
buff = strdup(input);
return buff;
}
return NULL;
}
static int construct_ftgrps(void)
{
orte_rmaps_res_ftgrp_t *ftgrp;
orte_node_t *node;
FILE *fp; FILE *fp;
char *ftinput; char *ftinput;
int grp; int grp;
char **nodes; char **nodes;
bool found; bool found;
orte_proc_t *proc; int i, k;
/* flag that we did this */
have_ftgrps = true;
if (NULL == mca_rmaps_resilient_component.fault_group_file) {
/* nothing to build */
return ORTE_SUCCESS;
}
/* have we already constructed the fault group list? */
if (0 == opal_list_get_size(&mca_rmaps_resilient_component.fault_grps) &&
NULL != mca_rmaps_resilient_component.fault_group_file) {
/* construct it */ /* construct it */
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output, OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: constructing fault groups", "%s rmaps:resilient: constructing fault groups",
@ -140,8 +276,9 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
if (NULL == fp) { /* not found */ if (NULL == fp) { /* not found */
orte_show_help("help-orte-rmaps-resilient.txt", "orte-rmaps-resilient:file-not-found", orte_show_help("help-orte-rmaps-resilient.txt", "orte-rmaps-resilient:file-not-found",
true, mca_rmaps_resilient_component.fault_group_file); true, mca_rmaps_resilient_component.fault_group_file);
return ORTE_ERR_SILENT; return ORTE_ERR_FAILED_TO_MAP;
} }
/* build list of fault groups */ /* build list of fault groups */
grp = 0; grp = 0;
while (NULL != (ftinput = orte_getline(fp))) { while (NULL != (ftinput = orte_getline(fp))) {
@ -172,49 +309,30 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
free(ftinput); free(ftinput);
} }
fclose(fp); fclose(fp);
}
/* the map will never be NULL as we initialize it before getting here, return ORTE_SUCCESS;
* so check to see if the job state is RESTART }
*
* NOTE: if a proc is being ADDED to an existing job, then its
* node field will be NULL.
*/
if (ORTE_JOB_STATE_RESTART == jdata->state) {
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: remapping job %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdata->jobid)));
/* cycle through all the procs in this job to find the one(s) that failed */
for (i=0; i < jdata->procs->size; i++) {
/* get the proc object */
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, i))) {
continue;
}
/* is this proc to be restarted? */
if (proc->state != ORTE_PROC_STATE_RESTART) {
continue;
}
/* save the current node */
oldnode = proc->node;
/* point to the app */
app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, proc->app_idx);
if( NULL == app ) {
ORTE_ERROR_LOG(ORTE_ERROR);
rc = ORTE_ERROR;
goto error;
}
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output, static int get_ftgrp_target(orte_proc_t *proc,
"%s rmaps:resilient: proc %s from node %s is to be restarted", orte_rmaps_res_ftgrp_t **tgt,
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), orte_node_t **ndret)
ORTE_NAME_PRINT(&proc->name), {
(NULL == proc->node) ? "NULL" : proc->node->name)); opal_list_item_t *item;
/* if we have fault groups, flag all the fault groups that int k, totnodes;
orte_node_t *node, *nd;
orte_rmaps_res_ftgrp_t *target, *ftgrp;
float avgload, minload;
orte_vpid_t totprocs, lowprocs;
/* set defaults */
*tgt = NULL;
*ndret = NULL;
/* flag all the fault groups that
* include this node so we don't reuse them * include this node so we don't reuse them
*/ */
target = NULL;
minload = 1000000.0; minload = 1000000.0;
target = NULL;
for (item = opal_list_get_first(&mca_rmaps_resilient_component.fault_grps); for (item = opal_list_get_first(&mca_rmaps_resilient_component.fault_grps);
item != opal_list_get_end(&mca_rmaps_resilient_component.fault_grps); item != opal_list_get_end(&mca_rmaps_resilient_component.fault_grps);
item = opal_list_get_next(item)) { item = opal_list_get_next(item)) {
@ -261,96 +379,12 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
ftgrp->ftgrp)); ftgrp->ftgrp));
} }
} }
/* if no ftgrps are available, then just map it on the lightest loaded
* node known to the system, avoiding the current node if possible and
* taking into account any limitations specified by user in hostfile
* and -host options
*/
if (NULL == target) { if (NULL == target) {
nd = NULL; /* nothing found */
return ORTE_ERR_NOT_FOUND;
/*
* Get a list of all nodes
*/
OBJ_CONSTRUCT(&node_list, opal_list_t);
map = jdata->map;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list,
&num_slots,
app,
map->policy))) {
ORTE_ERROR_LOG(rc);
goto error;
} }
/* Ask the ErrMgr components if they have a suggestion for this process */
orte_errmgr.suggest_map_targets(proc, proc->node, &node_list);
nd = (orte_node_t*)opal_list_get_first(&node_list);
if( NULL == nd ) {
ORTE_ERROR_LOG(ORTE_ERROR);
rc = ORTE_ERROR;
goto error;
}
/*
* Look though the list for the least loaded machine.
*/
nd = oldnode; /* Put it back where it was if nothing else is found */
totprocs = 1000000;
found = false;
/* find the lightest loaded node while deconstructing the list */
while (NULL != (item = opal_list_remove_first(&node_list))) {
node = (orte_node_t*)item;
if( !found ) {
if( ((int)node->num_procs) < orte_rmaps_base.npernode ) {
nd = node;
totprocs = 0;
found = true;
}
else if( node->num_procs < totprocs) {
nd = node;
totprocs = node->num_procs;
}
}
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node_list);
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: Placing process on node %s (no ftgrp)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
nd->name));
/*
* Put the process on the found node (add it if not already in the map)
*/
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata,
nd,
jdata->map->cpus_per_rank,
proc->app_idx,
NULL,
jdata->map->oversubscribe,
false,
&proc))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error
*/
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc)) {
ORTE_ERROR_LOG(rc);
goto error;
}
}
/* flag the proc state as non-launched so we'll know to launch it */
proc->state = ORTE_PROC_STATE_INIT;
/* update the node and local ranks so static ports can
* be properly selected if active
*/
orte_rmaps_base_update_local_ranks(jdata, oldnode, nd, proc);
continue;
}
/* if we did find a target, re-map the proc to the lightest loaded /* if we did find a target, re-map the proc to the lightest loaded
* node in that group * node in that group
*/ */
@ -365,48 +399,174 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
nd = node; nd = node;
} }
} }
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: placing proc %s into fault group %d node %s", /* return the results */
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), *tgt = target;
ORTE_NAME_PRINT(&proc->name), target->ftgrp, nd->name)); *ndret = nd;
if (NULL != proc->node) {
OBJ_RELEASE(proc->node); /* required to maintain bookkeeping */ return ORTE_SUCCESS;
} }
/* put proc on the found node */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, nd, jdata->map->cpus_per_rank, proc->app_idx, static int get_new_node(orte_proc_t *proc,
NULL, jdata->map->oversubscribe, false, &proc))) { orte_app_context_t *app,
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this orte_job_map_t *map,
* really isn't an error orte_node_t **ndret)
{
orte_node_t *nd, *oldnode, *node;
int rc;
orte_vpid_t totprocs;
opal_list_t node_list;
opal_list_item_t *item;
orte_std_cntr_t num_slots;
/* if no ftgrps are available, then just map it on the lightest loaded
* node known to the system, avoiding the current node if possible and
* taking into account any limitations specified by user in hostfile
* and -host options
*/ */
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc)) { *ndret = NULL;
nd = NULL;
oldnode = proc->node;
/*
* Get a list of all nodes
*/
OBJ_CONSTRUCT(&node_list, opal_list_t);
if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list,
&num_slots,
app,
map->policy))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto error; goto error;
} }
if (0 == opal_list_get_size(&node_list)) {
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto error;
} }
/* flag the proc state as non-launched so we'll know to launch it */
proc->state = ORTE_PROC_STATE_INIT; /*
/* update the node and local ranks so static ports can * Cycle thru the list to find the current node and remove it
* be properly selected if active *
*/ */
orte_rmaps_base_update_local_ranks(jdata, oldnode, nd, proc); nd = NULL;
for (item = opal_list_get_first(&node_list);
item != opal_list_get_end(&node_list);
item = opal_list_get_next(item)) {
node = (orte_node_t*)item;
OPAL_OUTPUT_VERBOSE((7, orte_rmaps_base.rmaps_output,
"%s CHECKING NODE %s[%s] AGAINST NODE %s[%s]",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
node->name,
(NULL == node->daemon) ? "?" : ORTE_VPID_PRINT(node->daemon->name.vpid),
oldnode->name,
(NULL == oldnode->daemon) ? "?" : ORTE_VPID_PRINT(oldnode->daemon->name.vpid)));
if (node == oldnode) {
/* remove it from the list */
opal_list_remove_item(&node_list, item);
/* maintain acctg */
OBJ_RELEASE(item);
break;
} }
/* define the daemons that we will use for this job */ }
if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata->map))) {
ORTE_ERROR_LOG(rc); /* if the list is empty, then go ahead and install it back on
* its original node as this is better than not restarting
* at all
*/
if (0 == opal_list_get_size(&node_list)) {
nd = oldnode;
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: Placing process back on original node %s daemon %s (no ftgrp)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
nd->name, ORTE_NAME_PRINT((&nd->daemon->name))));
} else {
/* put it on the lightest loaded node on the list */
totprocs = 1000000;
nd = NULL;
for (item = opal_list_get_first(&node_list);
item != opal_list_get_end(&node_list);
item = opal_list_get_next(item)) {
node = (orte_node_t*)item;
if (node->num_procs < totprocs) {
nd = node;
totprocs = node->num_procs;
}
}
if (NULL == nd) {
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s ERROR - NO NODES AVAILABLE",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
/* this can only happen if no nodes are available - quietly return */
ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
rc = ORTE_ERR_OUT_OF_RESOURCE;
goto error;
}
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: Placing process on node %s daemon %s (no ftgrp)",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
(nd == oldnode) ? "OLDNODE" : nd->name,
ORTE_NAME_PRINT((&nd->daemon->name))));
}
error:
while (NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node_list);
*ndret = nd;
return rc; return rc;
} }
return ORTE_SUCCESS;
}
static void flag_nodes(opal_list_t *node_list)
{
opal_list_item_t *item, *nitem;
orte_node_t *node, *nd;
orte_rmaps_res_ftgrp_t *ftgrp;
int k;
/* CREATE INITIAL MAP FOR A JOB */ for (item = opal_list_get_first(&mca_rmaps_resilient_component.fault_grps);
/* we map each app_context separately when creating an initial job map. For item != opal_list_get_end(&mca_rmaps_resilient_component.fault_grps);
* each app_context, we get the list of available nodes as this can be item = opal_list_get_next(item)) {
* app_context specific based on hostfile and -host options. We then organize ftgrp = (orte_rmaps_res_ftgrp_t*)item;
* that list into fault groups based on the fault group definitions, if /* reset the flags */
* provided, and then divide the specified number of copies across them in ftgrp->used = false;
* a load-balanced way ftgrp->included = false;
/* if at least one node in our list is included in this
* ftgrp, then flag it as included
*/ */
for (nitem = opal_list_get_first(node_list);
!ftgrp->included && nitem != opal_list_get_end(node_list);
nitem = opal_list_get_next(nitem)) {
node = (orte_node_t*)nitem;
for (k=0; k < ftgrp->nodes.size; k++) {
if (NULL == (nd = (orte_node_t*)opal_pointer_array_get_item(&ftgrp->nodes, k))) {
continue;
}
if (0 == strcmp(nd->name, node->name)) {
ftgrp->included = true;
break;
}
}
}
}
}
static int map_to_ftgrps(orte_job_t *jdata)
{
orte_job_map_t *map;
orte_app_context_t *app;
int i, j, k, totnodes;
opal_list_t node_list;
opal_list_item_t *item, *next, *curitem;
orte_std_cntr_t num_slots;
int rc = ORTE_SUCCESS;
float avgload, minload;
orte_node_t *node, *nd=NULL;
orte_rmaps_res_ftgrp_t *ftgrp, *target = NULL;
orte_vpid_t totprocs, num_assigned;
orte_proc_t *proc;
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output, OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: creating initial map for job %s", "%s rmaps:resilient: creating initial map for job %s",
@ -440,13 +600,23 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app, if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app,
map->policy))) { map->policy))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto error; return rc;
} }
/* were we given a fault group definition? */ /* remove all nodes that are not "up" or do not have a running daemon on them */
if (0 < opal_list_get_size(&mca_rmaps_resilient_component.fault_grps)) { item = opal_list_get_first(&node_list);
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output, while (item != opal_list_get_end(&node_list)) {
"%s rmaps:resilient: using fault groups", next = opal_list_get_next(item);
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); node = (orte_node_t*)item;
if (ORTE_NODE_STATE_UP != node->state ||
NULL == node->daemon ||
ORTE_PROC_STATE_RUNNING != node->daemon->state) {
opal_list_remove_item(&node_list, item);
OBJ_RELEASE(item);
}
item = next;
}
curitem = opal_list_get_first(&node_list);
/* flag the fault groups included by these nodes */ /* flag the fault groups included by these nodes */
flag_nodes(&node_list); flag_nodes(&node_list);
/* map each copy to a different fault group - if more copies are /* map each copy to a different fault group - if more copies are
@ -501,13 +671,14 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
*/ */
if (NULL == target) { if (NULL == target) {
OPAL_OUTPUT_VERBOSE((2, orte_rmaps_base.rmaps_output, OPAL_OUTPUT_VERBOSE((2, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: no available fault group - mapping rr", "%s rmaps:resilient: more procs than fault groups - mapping excess rr",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (ORTE_SUCCESS != (rc = rr_map_default(jdata, app, &node_list, app->num_procs-num_assigned))) { nd = (orte_node_t*)curitem;
goto error; curitem = opal_list_get_next(curitem);
} if (curitem == opal_list_get_end(&node_list)) {
goto cleanup; curitem = opal_list_get_first(&node_list);
} }
} else {
/* pick node with lowest load from within that group */ /* pick node with lowest load from within that group */
totprocs = 1000000; totprocs = 1000000;
for (k=0; k < target->nodes.size; k++) { for (k=0; k < target->nodes.size; k++) {
@ -519,40 +690,35 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
nd = node; nd = node;
} }
} }
}
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output, OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: placing proc into fault group %d node %s", "%s rmaps:resilient: placing proc into fault group %d node %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
target->ftgrp, nd->name)); (NULL == target) ? -1 : target->ftgrp, nd->name));
/* put proc on that node */ /* put proc on that node */
proc=NULL;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, nd, jdata->map->cpus_per_rank, app->idx, if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, nd, jdata->map->cpus_per_rank, app->idx,
&node_list, jdata->map->oversubscribe, false, NULL))) { &node_list, jdata->map->oversubscribe, false, &proc))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this /** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error * really isn't an error
*/ */
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc)) { if (ORTE_ERR_NODE_FULLY_USED != rc) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
goto error; return rc;
} }
} }
/* flag the proc as ready for launch */
proc->state = ORTE_PROC_STATE_INIT;
/* track number of procs mapped */ /* track number of procs mapped */
num_assigned++; num_assigned++;
/* flag this fault group as used */ /* flag this fault group as used */
if (NULL != target) {
target->used = true; target->used = true;
} }
} else {
/* if we don't have a fault group definition, then just map the
* procs in a round-robin manner
*/
OPAL_OUTPUT_VERBOSE((1, orte_rmaps_base.rmaps_output,
"%s rmaps:resilient: no fault groups provided - mapping rr",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (ORTE_SUCCESS != (rc = rr_map_default(jdata, app, &node_list, app->num_procs))) {
goto error;
}
} }
cleanup:
/* track number of procs */ /* track number of procs */
jdata->num_procs += app->num_procs; jdata->num_procs += app->num_procs;
@ -580,39 +746,13 @@ static int orte_rmaps_resilient_map(orte_job_t *jdata)
return rc; return rc;
} }
if (!(ORTE_MAPPING_USE_VM & jdata->map->policy)) {
/* define the daemons that we will use for this job */ /* define the daemons that we will use for this job */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata->map))) { if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata->map))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);
return rc; return rc;
} }
}
return ORTE_SUCCESS; return ORTE_SUCCESS;
error:
while (NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node_list);
return rc;
} }
orte_rmaps_base_module_t orte_rmaps_resilient_module = {
orte_rmaps_resilient_map
};
static char *orte_getline(FILE *fp)
{
char *ret, *buff;
char input[1024];
ret = fgets(input, 1024, fp);
if (NULL != ret) {
input[strlen(input)-1] = '\0'; /* remove newline */
buff = strdup(input);
return buff;
}
return NULL;
}

Просмотреть файл

@ -37,6 +37,8 @@ static int orte_rmaps_resilient_open(void);
static int orte_rmaps_resilient_close(void); static int orte_rmaps_resilient_close(void);
static int orte_rmaps_resilient_query(mca_base_module_t **module, int *priority); static int orte_rmaps_resilient_query(mca_base_module_t **module, int *priority);
static int my_priority;
orte_rmaps_res_component_t mca_rmaps_resilient_component = { orte_rmaps_res_component_t mca_rmaps_resilient_component = {
{ {
{ {
@ -73,16 +75,20 @@ static int orte_rmaps_resilient_open(void)
"Filename that contains a description of fault groups for this system", "Filename that contains a description of fault groups for this system",
false, false, NULL, &mca_rmaps_resilient_component.fault_group_file); false, false, NULL, &mca_rmaps_resilient_component.fault_group_file);
mca_base_param_reg_int(c, "priority",
"Priority of the resilient rmaps component",
false, false, 40,
&my_priority);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static int orte_rmaps_resilient_query(mca_base_module_t **module, int *priority) static int orte_rmaps_resilient_query(mca_base_module_t **module, int *priority)
{ {
*priority = 0; /* select only if specified */ *priority = my_priority;
*module = (mca_base_module_t *)&orte_rmaps_resilient_module; *module = (mca_base_module_t *)&orte_rmaps_resilient_module;
/* if a fault group file was provided, we definitely want to be selected */ /* if a fault group file was provided, we should be first */
if (NULL != mca_rmaps_resilient_component.fault_group_file) { if (NULL != mca_rmaps_resilient_component.fault_group_file) {
*priority = 1000; *priority = 1000;
} }

Просмотреть файл

@ -33,6 +33,16 @@
BEGIN_C_DECLS BEGIN_C_DECLS
/* enumerate selectable mappers */
enum {
ORTE_RMAPS_UNDEF,
ORTE_RMAPS_RR,
ORTE_RMAPS_LOADBALANCE,
ORTE_RMAPS_SEQ,
ORTE_RMAPS_RF,
ORTE_RMAPS_RESILIENT
};
/* /*
* Structure that represents the mapping of a job to an * Structure that represents the mapping of a job to an
* allocated set of resources. * allocated set of resources.
@ -40,6 +50,7 @@ BEGIN_C_DECLS
struct orte_job_map_t { struct orte_job_map_t {
opal_object_t super; opal_object_t super;
/* user-specified mapping params */ /* user-specified mapping params */
int32_t mapper;
orte_mapping_policy_t policy; orte_mapping_policy_t policy;
int npernode; int npernode;
int nperboard; int nperboard;

Просмотреть файл

@ -34,6 +34,7 @@
#include "orte/util/show_help.h" #include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h" #include "orte/mca/errmgr/errmgr.h"
#include "orte/util/error_strings.h"
#include "orte/mca/rmaps/base/rmaps_private.h" #include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/base/base.h" #include "orte/mca/rmaps/base/base.h"
@ -53,6 +54,29 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
int rc; int rc;
opal_list_item_t *cur_node_item; opal_list_item_t *cur_node_item;
/* this mapper can only handle initial launch
* when rr mapping is desired - allow
* restarting of failed apps
*/
if (ORTE_JOB_STATE_INIT != jdata->state) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rr: not job %s in state %s - rr cannot map",
ORTE_JOBID_PRINT(jdata->jobid),
orte_job_state_to_str(jdata->state));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
if (0 < jdata->map->mapper && ORTE_RMAPS_RR != jdata->map->mapper) {
/* a mapper has been specified, and it isn't me */
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rr: job %s not using rr mapper",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rr: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid));
/* start at the beginning... */ /* start at the beginning... */
jdata->num_procs = 0; jdata->num_procs = 0;
@ -138,7 +162,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
return ORTE_SUCCESS; return ORTE_SUCCESS;
error: error:
while(NULL != (item = opal_list_remove_first(&node_list))) { while(NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item); OBJ_RELEASE(item);
} }

Просмотреть файл

@ -33,6 +33,7 @@ static int orte_rmaps_round_robin_open(void);
static int orte_rmaps_round_robin_close(void); static int orte_rmaps_round_robin_close(void);
static int orte_rmaps_round_robin_query(mca_base_module_t **module, int *priority); static int orte_rmaps_round_robin_query(mca_base_module_t **module, int *priority);
static int my_priority;
orte_rmaps_base_component_t mca_rmaps_round_robin_component = { orte_rmaps_base_component_t mca_rmaps_round_robin_component = {
{ {
@ -58,6 +59,12 @@ orte_rmaps_base_component_t mca_rmaps_round_robin_component = {
*/ */
static int orte_rmaps_round_robin_open(void) static int orte_rmaps_round_robin_open(void)
{ {
mca_base_component_t *c = &mca_rmaps_round_robin_component.base_version;
mca_base_param_reg_int(c, "priority",
"Priority of the rr rmaps component",
false, false, 100,
&my_priority);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
@ -68,7 +75,7 @@ static int orte_rmaps_round_robin_query(mca_base_module_t **module, int *priorit
* so no need to check for that here * so no need to check for that here
*/ */
*priority = 70; /* this is the default mapper */ *priority = my_priority;
*module = (mca_base_module_t *)&orte_rmaps_round_robin_module; *module = (mca_base_module_t *)&orte_rmaps_round_robin_module;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -76,6 +76,28 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdata->jobid))); ORTE_JOBID_PRINT(jdata->jobid)));
/* this mapper can only handle initial launch
* when seq mapping is desired - allow
* restarting of failed apps
*/
if (ORTE_JOB_STATE_INIT != jdata->state) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:seq: not job %s not in initial state - seq cannot map",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
if (0 < jdata->map->mapper && ORTE_RMAPS_SEQ != jdata->map->mapper) {
/* a mapper has been specified, and it isn't me */
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:seq: job %s not using sequential mapper",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:seq: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid));
/* conveniece def */ /* conveniece def */
map = jdata->map; map = jdata->map;

Просмотреть файл

@ -33,6 +33,7 @@ static int orte_rmaps_seq_open(void);
static int orte_rmaps_seq_close(void); static int orte_rmaps_seq_close(void);
static int orte_rmaps_seq_query(mca_base_module_t **module, int *priority); static int orte_rmaps_seq_query(mca_base_module_t **module, int *priority);
static int my_priority;
orte_rmaps_base_component_t mca_rmaps_seq_component = { orte_rmaps_base_component_t mca_rmaps_seq_component = {
{ {
@ -58,17 +59,19 @@ orte_rmaps_base_component_t mca_rmaps_seq_component = {
*/ */
static int orte_rmaps_seq_open(void) static int orte_rmaps_seq_open(void)
{ {
mca_base_component_t *c = &mca_rmaps_seq_component.base_version;
mca_base_param_reg_int(c, "priority",
"Priority of the seq rmaps component",
false, false, 60,
&my_priority);
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }
static int orte_rmaps_seq_query(mca_base_module_t **module, int *priority) static int orte_rmaps_seq_query(mca_base_module_t **module, int *priority)
{ {
/* the RMAPS framework is -only- opened on HNP's, *priority = my_priority;
* so no need to check for that here
*/
*priority = 0; /* only select if specified */
*module = (mca_base_module_t *)&orte_rmaps_seq_module; *module = (mca_base_module_t *)&orte_rmaps_seq_module;
return ORTE_SUCCESS; return ORTE_SUCCESS;
} }

Просмотреть файл

@ -1,12 +0,0 @@
#
# Copyright (c) 2008-2010 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
mca_link_libraries=libopen-rte

Просмотреть файл

@ -1,48 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
EXTRA_DIST = .windows
dist_pkgdata_DATA = help-orte-rmaps-topo.txt
sources = \
rmaps_topo.c \
rmaps_topo.h \
rmaps_topo_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_rmaps_topo_DSO
component_noinst =
component_install = mca_rmaps_topo.la
else
component_noinst = libmca_rmaps_topo.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_rmaps_topo_la_SOURCES = $(sources)
mca_rmaps_topo_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_rmaps_topo_la_SOURCES =$(sources)
libmca_rmaps_topo_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -1,53 +0,0 @@
# -*- text -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for Open RTE's orterun.
#
[orte-rmaps-topo:alloc-error]
There are not enough slots available in the system to satisfy the %d slots
that were requested by the application:
%s
Either request fewer slots for your application, or make more slots available
for use.
[orte-rmaps-topo:multi-apps-and-zero-np]
RMAPS found multiple applications to be launched, with
at least one that failed to specify the number of processes to execute.
When specifying multiple applications, you must specify how many processes
of each to launch via the -np argument.
[orte-rmaps-topo:per-node-and-too-many-procs]
There are not enough nodes in your allocation to satisfy your request to launch
%d processes on a per-node basis - only %d nodes were available.
Either request fewer processes, or obtain a larger allocation.
[orte-rmaps-topo:n-per-node-and-too-many-procs]
There are not enough nodes in your allocation to satisfy your request to launch
%d processes on a %d per-node basis - only %d nodes with a total of %d slots were available.
Either request fewer processes, or obtain a larger allocation.
[orte-rmaps-topo:n-per-node-and-not-enough-slots]
There are not enough slots on the nodes in your allocation to satisfy your request to launch on a %d process-per-node basis - only %d slots/node were available.
Either request fewer processes/node, or obtain a larger allocation.
[orte-rmaps-topo:no-np-and-user-map]
You have specified a rank-to-node/slot mapping, but failed to provide
the number of processes to be executed. For some reason, this information
could not be obtained from the mapping you provided, so we cannot continue
with executing the specified application.

Просмотреть файл

@ -1,546 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/trace.h"
#include "opal/util/opal_sos.h"
#include "opal/mca/carto/base/base.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/base/base.h"
#include "rmaps_topo.h"
static int topo_map(orte_job_t *jdata);
orte_rmaps_base_module_t orte_rmaps_topo_module = {
topo_map
};
/*
* Local variable
*/
static opal_list_item_t *cur_node_item = NULL;
static int ppn = 0;
/*
* Create a default mapping for the application, scheduling round
* robin by node.
*/
static int map_app_by_node(
orte_app_context_t* app,
orte_job_t* jdata,
orte_vpid_t vpid_start,
opal_list_t* nodes)
{
int rc = ORTE_SUCCESS;
opal_list_item_t *next;
orte_node_t *node;
orte_std_cntr_t num_alloc=0;
OPAL_TRACE(2);
/* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of
resources" when all nodes have slots_max processes mapped to them,
thus there are no free slots for a process to be mapped, or we have
hit the soft limit on all nodes and are in a "no oversubscribe" state.
If we still have processes that haven't been mapped yet, then it's an
"out of resources" error.
In this scenario, we rely on the claim_slot function to handle the
oversubscribed case. The claim_slot function will leave a node on the
list until it either reaches slots_max OR reaches the
soft limit and the "no_oversubscribe" flag has been set - at which point,
the node will be removed to prevent any more processes from being mapped to
it. Since we are taking one slot from each node as we cycle through, the
list, oversubscription is automatically taken care of via this logic.
*/
while (num_alloc < app->num_procs) {
/** see if any nodes remain unused and available. We need to do this check
* each time since we may remove nodes from the list (as they become fully
* used) as we cycle through the loop */
if(0 >= opal_list_get_size(nodes) ) {
/* No more nodes to allocate :( */
orte_show_help("help-orte-rmaps-topo.txt", "orte-rmaps-topo:alloc-error",
true, app->num_procs, app->app);
return ORTE_ERR_SILENT;
}
/* Save the next node we can use before claiming slots, since
* we may need to prune the nodes list removing overused nodes.
* Wrap around to beginning if we are at the end of the list */
if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) {
next = opal_list_get_first(nodes);
}
else {
next = opal_list_get_next(cur_node_item);
}
/* Allocate a slot on this node */
node = (orte_node_t*) cur_node_item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, 1, app->idx,
nodes, jdata->map->oversubscribe, true, NULL))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop
* since the node is fully used up. For now, just don't report
* an error
*/
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc)) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
++num_alloc;
cur_node_item = next;
}
return ORTE_SUCCESS;
}
/*
* Create a default mapping for the application, scheduling one round
* robin by slot.
*/
static int map_app_by_slot(
orte_app_context_t* app,
orte_job_t* jdata,
orte_vpid_t vpid_start,
opal_list_t* nodes)
{
int rc = ORTE_SUCCESS;
orte_std_cntr_t i, num_slots_to_take;
orte_node_t *node;
opal_list_item_t *next;
orte_std_cntr_t num_alloc=0;
OPAL_TRACE(2);
/* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of
resources" when either all nodes have slots_max processes mapped to them,
(thus there are no free slots for a process to be mapped), OR all nodes
have reached their soft limit and the user directed us to "no oversubscribe".
If we still have processes that haven't been mapped yet, then it's an
"out of resources" error. */
while ( num_alloc < app->num_procs) {
/** see if any nodes remain unused and available. We need to do this check
* each time since we may remove nodes from the list (as they become fully
* used) as we cycle through the loop */
if(0 >= opal_list_get_size(nodes) ) {
/* Everything is at max usage! :( */
orte_show_help("help-orte-rmaps-topo.txt", "orte-rmaps-topo:alloc-error",
true, app->num_procs, app->app);
return ORTE_ERR_SILENT;
}
/* Save the next node we can use before claiming slots, since
* we may need to prune the nodes list removing overused nodes.
* Wrap around to beginning if we are at the end of the list */
if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) {
next = opal_list_get_first(nodes);
}
else {
next = opal_list_get_next(cur_node_item);
}
/** declare a shorter name for convenience in the code below */
node = (orte_node_t*) cur_node_item;
/* If we have available slots on this node, claim all of them
* If node_slots == 0, assume 1 slot for that node.
* JJH - is this assumption fully justified?
*
* If we are now oversubscribing the nodes, then we still take:
* (a) if the node has not been used yet, we take a full node_slots
* (b) if some of the slots are in-use, then we take the number of
* remaining slots before hitting the soft limit (node_slots)
* (c) if we are at or above the soft limit, we take a full node_slots
*
* Note: if node_slots is zero, then we always just take 1 slot
*
* We continue this process until either everything is done,
* or all nodes have hit their hard limit. This algorithm ensures we
* fully utilize each node before oversubscribing, and preserves the ratio
* of processes between the nodes thereafter (e.g., if one node has twice as
* many processes as another before oversubscribing, it will continue
* to do so after oversubscribing).
*/
if (0 == node->slots_inuse ||
node->slots_inuse >= node->slots) {
num_slots_to_take = (node->slots == 0) ? 1 : node->slots;
} else {
num_slots_to_take = node->slots - node->slots_inuse;
}
/* check if we are in npernode mode - if so, then set the num_slots_to_take
* to the num_per_node
*/
if (0 < jdata->map->npernode) {
num_slots_to_take = jdata->map->npernode;
}
for( i = 0; i < num_slots_to_take; ++i) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, 1, app->idx,
nodes, jdata->map->oversubscribe, true, NULL))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop
* since the node is fully used up. For now, just don't report
* an error
*/
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc)) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* Update the number of procs allocated */
++num_alloc;
/** if all the procs have been mapped, we return */
if (num_alloc == app->num_procs) {
return ORTE_SUCCESS;
}
/* if we have fully used up this node
* OR we are at our ppn and loadbalancing, then break from the loop
*/
if (ORTE_ERR_NODE_FULLY_USED == OPAL_SOS_GET_ERROR_CODE(rc) ||
(orte_rmaps_base.loadbalance && (int)node->num_procs >= ppn)) {
break;
}
}
/* we move on to the next node in all cases EXCEPT if we came
* out of the loop without having taken a full bite AND the
* node is NOT max'd out
*
*/
if (i < (num_slots_to_take-1) &&
ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) &&
(orte_rmaps_base.loadbalance && (int)node->num_procs < ppn)) {
continue;
}
cur_node_item = next;
}
return ORTE_SUCCESS;
}
/*
* Create a topo-aware mapping for the job.
*/
static int topo_map(orte_job_t *jdata)
{
orte_job_map_t *map;
orte_app_context_t *app;
int i;
opal_list_t node_list;
opal_list_item_t *item;
orte_node_t *node, *nd1;
orte_vpid_t vpid_start;
orte_std_cntr_t num_nodes, num_slots;
int rc;
orte_std_cntr_t slots_per_node;
opal_carto_graph_t *graph;
opal_carto_base_node_t *crnode;
opal_value_array_t distance;
OPAL_TRACE(1);
/* conveniece def */
map = jdata->map;
/* start at the beginning... */
vpid_start = 0;
jdata->num_procs = 0;
/* get the graph of nodes */
if (ORTE_SUCCESS != (rc = opal_carto_base_get_host_graph(&graph, "SLOT"))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* cycle through the app_contexts, mapping them sequentially */
for(i=0; i < jdata->apps->size; i++) {
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
continue;
}
/* if the number of processes wasn't specified, then we know there can be only
* one app_context allowed in the launch, and that we are to launch it across
* all available slots. We'll double-check the single app_context rule first
*/
if (0 == app->num_procs && 1 < jdata->num_apps) {
orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np",
true, jdata->num_apps, NULL);
rc = ORTE_ERR_SILENT;
goto error;
}
/* for each app_context, we have to get the list of nodes that it can
* use since that can now be modified with a hostfile and/or -host
* option
*/
OBJ_CONSTRUCT(&node_list, opal_list_t);
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app,
map->policy))) {
ORTE_ERROR_LOG(rc);
goto error;
}
num_nodes = (orte_std_cntr_t)opal_list_get_size(&node_list);
/* if a bookmark exists from some prior mapping, set us to start there */
if (NULL != jdata->bookmark) {
cur_node_item = NULL;
/* find this node on the list */
for (item = opal_list_get_first(&node_list);
item != opal_list_get_end(&node_list);
item = opal_list_get_next(item)) {
node = (orte_node_t*)item;
if (node->index == jdata->bookmark->index) {
cur_node_item = item;
break;
}
}
/* see if we found it - if not, just start at the beginning */
if (NULL == cur_node_item) {
cur_node_item = opal_list_get_first(&node_list);
}
} else {
/* if no bookmark, then just start at the beginning of the list */
cur_node_item = opal_list_get_first(&node_list);
}
/* order this list by network nearness - i.e., the next item in the
* list should be the node that is closest [in a network sense] to
* the prior item in the list
*
* RHC: start the list with the bookmark nodeas this is where
* we would start mapping
*/
node = (orte_node_t*)cur_node_item;
if (NULL == (crnode = opal_carto_base_find_node(graph, node->name))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto error;
}
OBJ_CONSTRUCT(&distance, opal_value_array_t);
if (ORTE_SUCCESS != (rc = opal_carto_base_get_nodes_distance(graph, crnode,
"SLOT", &distance))) {
ORTE_ERROR_LOG(rc);
goto error;
}
/* cycle through the nodes in the distance array - these
* should be in order based on distance
*/
#if 0
/* RHC: need to create a working list of nodes that is ordered
* according to distance. The get_nodes_distance function returns
* this, but it covers -all- nodes, so we have to filter that
* against the allocated node list to create the new
* working_node_list
*/
for (i=0; i < distance.size; i++) {
if
}
for (item = opal_list_get_first(&node_list);
item != opal_list_get_end(&node_list);
item = opal_list_get_next(item)) {
node = (orte_node_t*)item;
if (NULL == (crnode = opal_carto.find_node(graph, node->name))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto error;
}
/* look this node up in the distance array */
#endif
/* is this node oversubscribed? */
node = (orte_node_t*)cur_node_item;
if (node->slots_inuse > node->slots) {
/* work down the list - is there another node that
* would not be oversubscribed?
*/
if (cur_node_item != opal_list_get_end(&node_list)) {
item = opal_list_get_next(cur_node_item);
} else {
item = opal_list_get_first(&node_list);
}
while (item != cur_node_item) {
nd1 = (orte_node_t*)item;
if (nd1->slots_inuse < nd1->slots) {
/* this node is not oversubscribed! use it! */
cur_node_item = item;
goto proceed;
}
if (item == opal_list_get_end(&node_list)) {
item = opal_list_get_first(&node_list);
} else {
item= opal_list_get_next(item);
}
}
/* if we get here, then we cycled all the way around the
* list without finding a better answer - just use what
* we have
*/
}
proceed:
if (map->npernode == 1) {
/* there are three use-cases that we need to deal with:
* (a) if -np was not provided, then we just use the number of nodes
* (b) if -np was provided AND #procs > #nodes, then error out
* (c) if -np was provided AND #procs <= #nodes, then launch
* the specified #procs one/node. In this case, we just
* leave app->num_procs alone
*/
if (0 == app->num_procs) {
app->num_procs = num_nodes;
} else if (app->num_procs > num_nodes) {
orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:per-node-and-too-many-procs",
true, app->num_procs, num_nodes, NULL);
rc = ORTE_ERR_SILENT;
goto error;
}
} else if (map->npernode > 1) {
/* first, let's check to see if there are enough slots/node to
* meet the request - error out if not
*/
slots_per_node = num_slots / num_nodes;
if (map->npernode > slots_per_node) {
orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:n-per-node-and-not-enough-slots",
true, map->npernode, slots_per_node, NULL);
rc = ORTE_ERR_SILENT;
goto error;
}
/* there are three use-cases that we need to deal with:
* (a) if -np was not provided, then we just use the n/node * #nodes
* (b) if -np was provided AND #procs > (n/node * #nodes), then error out
* (c) if -np was provided AND #procs <= (n/node * #nodes), then launch
* the specified #procs n/node. In this case, we just
* leave app->num_procs alone
*/
if (0 == app->num_procs) {
/* set the num_procs to equal the specified num/node * the number of nodes */
app->num_procs = map->npernode * num_nodes;
} else if (app->num_procs > (map->npernode * num_nodes)) {
orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:n-per-node-and-too-many-procs",
true, app->num_procs, map->npernode, num_nodes, num_slots, NULL);
rc = ORTE_ERR_SILENT;
goto error;
}
} else if (0 == app->num_procs) {
/** set the num_procs to equal the number of slots on these mapped nodes - if
user has specified "-bynode", then set it to the number of nodes
*/
if (map->policy & ORTE_MAPPING_BYNODE) {
app->num_procs = num_nodes;
} else if (map->policy & ORTE_MAPPING_BYSLOT) {
app->num_procs = num_slots;
} else {
/* we can't handle this - it should have been set when we got
* the map info. If it wasn't, then we can only error out
*/
orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:no-np-and-user-map",
true, app->num_procs, map->npernode, num_nodes, num_slots, NULL);
rc = ORTE_ERR_SILENT;
goto error;
}
}
/** track the total number of processes we mapped */
jdata->num_procs += app->num_procs;
/* Make assignments */
if (map->policy == ORTE_MAPPING_BYNODE) {
rc = map_app_by_node(app, jdata, vpid_start, &node_list);
} else {
rc = map_app_by_slot(app, jdata, vpid_start, &node_list);
}
/* update the starting vpid for the next app_context */
vpid_start += app->num_procs;
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto error;
}
/* save the bookmark */
jdata->bookmark = (orte_node_t*)cur_node_item;
/* cleanup the node list - it can differ from one app_context
* to another, so we have to get it every time
*/
while(NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node_list);
}
/* compute and save convenience values */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* define the daemons that we will use for this job */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(map))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
error:
while(NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node_list);
return rc;
}

Просмотреть файл

@ -1,37 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Resource Mapping
*/
#ifndef ORTE_RMAPS_TOPO_H
#define ORTE_RMAPS_TOPO_H
#include "orte_config.h"
#include "orte/mca/rmaps/rmaps.h"
BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_topo_component;
extern orte_rmaps_base_module_t orte_rmaps_topo_module;
END_C_DECLS
#endif

Просмотреть файл

@ -1,85 +0,0 @@
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "rmaps_topo.h"
/*
* Local functions
*/
static int orte_rmaps_topo_open(void);
static int orte_rmaps_topo_close(void);
static int orte_rmaps_topo_query(mca_base_module_t **module, int *priority);
orte_rmaps_base_component_t mca_rmaps_topo_component = {
{
ORTE_RMAPS_BASE_VERSION_2_0_0,
"topo", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_rmaps_topo_open, /* component open */
orte_rmaps_topo_close, /* component close */
orte_rmaps_topo_query /* component query */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
};
/**
* component open/close/init function
*/
static int orte_rmaps_topo_open(void)
{
return ORTE_SUCCESS;
}
static int orte_rmaps_topo_query(mca_base_module_t **module, int *priority)
{
/* the RMAPS framework is -only- opened on HNP's,
* so no need to check for that here
*/
*priority = 0; /* only select if specified */
*module = (mca_base_module_t *)&orte_rmaps_topo_module;
return ORTE_SUCCESS;
}
/**
* Close all subsystems.
*/
static int orte_rmaps_topo_close(void)
{
return ORTE_SUCCESS;
}

Просмотреть файл

@ -860,6 +860,12 @@ int orte_dt_pack_map(opal_buffer_t *buffer, const void *src,
maps = (orte_job_map_t**) src; maps = (orte_job_map_t**) src;
for (i=0; i < num_vals; i++) { for (i=0; i < num_vals; i++) {
/* pack the mapper used to generate it */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->mapper), 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the policy used to generate it */ /* pack the policy used to generate it */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->policy), 1, ORTE_MAPPING_POLICY))) { if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->policy), 1, ORTE_MAPPING_POLICY))) {
ORTE_ERROR_LOG(rc); ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -650,8 +650,8 @@ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_dat
asprintf(&pfx, "%s\t", pfx2); asprintf(&pfx, "%s\t", pfx2);
if (orte_devel_level_output) { if (orte_devel_level_output) {
asprintf(&tmp, "\n%sMap generated by mapping policy: %04x\n%s\tNpernode: %ld\tOversubscribe allowed: %s\tCPU Lists: %s", asprintf(&tmp, "\n%sMap generated by mapper: %d\tMapping policy: %04x\n%s\tNpernode: %ld\tOversubscribe allowed: %s\tCPU Lists: %s",
pfx2, src->policy, pfx2, (long)src->npernode, pfx2, src->mapper, src->policy, pfx2, (long)src->npernode,
(src->oversubscribe) ? "TRUE" : "FALSE", (src->oversubscribe) ? "TRUE" : "FALSE",
(src->cpu_lists) ? "TRUE" : "FALSE"); (src->cpu_lists) ? "TRUE" : "FALSE");

Просмотреть файл

@ -938,6 +938,14 @@ int orte_dt_unpack_map(opal_buffer_t *buffer, void *dest,
return ORTE_ERR_OUT_OF_RESOURCE; return ORTE_ERR_OUT_OF_RESOURCE;
} }
/* unpack the mapper */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
&(maps[i]->mapper), &n, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the policy */ /* unpack the policy */
n = 1; n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer, if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,

Просмотреть файл

@ -1040,6 +1040,7 @@ OBJ_CLASS_INSTANCE(orte_jmap_t,
static void orte_job_map_construct(orte_job_map_t* map) static void orte_job_map_construct(orte_job_map_t* map)
{ {
map->mapper = ORTE_RMAPS_UNDEF;
map->policy = 0; map->policy = 0;
map->npernode = 0; map->npernode = 0;
map->nperboard = 0; map->nperboard = 0;

Просмотреть файл

@ -154,7 +154,16 @@ int orte_err2str(int errnum, const char **errmsg)
case ORTE_ERR_COMM_DISABLED: case ORTE_ERR_COMM_DISABLED:
retval = "Communications have been disabled"; retval = "Communications have been disabled";
break; break;
case ORTE_ERR_FAILED_TO_MAP:
retval = "Unable to map job";
break;
case ORTE_ERR_TAKE_NEXT_OPTION:
if (orte_report_silent_errors) {
retval = "Next option";
} else {
retval = NULL;
}
break;
default: default:
if (orte_report_silent_errors) { if (orte_report_silent_errors) {
retval = "Unknown error"; retval = "Unknown error";