1
1

Redefine the rmaps framework to allow multiple mapper modules to be active at the same time. This allows users to map the primary job one way, and map any comm_spawn'd job in a different way. Modules are given the opportunity to map a job in priority order, with the round-robin mapper having the highest default priority. Priority of each module can be defined using mca param.

When called, each mapper checks to see if it can map the job. If npernode is provided, for example, then the loadbalance mapper accepts the assignment and performs the operation - all mappers before it will "pass" as they can't map npernode requests.

Also remove the stale and never completed topo mapper.

This commit was SVN r24393.
Этот коммит содержится в:
Ralph Castain 2011-02-15 23:24:31 +00:00
родитель 29785e4ea1
Коммит 5120e6aec3
32 изменённых файлов: 936 добавлений и 1343 удалений

Просмотреть файл

@ -120,7 +120,9 @@ enum {
ORTE_ERR_PROC_STALLED = (ORTE_ERR_BASE - 38),
ORTE_ERR_NO_APP_SPECIFIED = (ORTE_ERR_BASE - 39),
ORTE_ERR_NO_EXE_SPECIFIED = (ORTE_ERR_BASE - 40),
ORTE_ERR_COMM_DISABLED = (ORTE_ERR_BASE - 41)
ORTE_ERR_COMM_DISABLED = (ORTE_ERR_BASE - 41),
ORTE_ERR_FAILED_TO_MAP = (ORTE_ERR_BASE - 42),
ORTE_ERR_TAKE_NEXT_OPTION = (ORTE_ERR_BASE - 43)
};
#define ORTE_ERR_MAX (ORTE_ERR_BASE - 100)

Просмотреть файл

@ -93,6 +93,9 @@ int orte_plm_base_setup_job(orte_job_t *jdata)
ljob = ORTE_LOCAL_JOBID(jdata->jobid);
opal_pointer_array_set_item(orte_job_data, ljob, jdata);
/* set the job state */
jdata->state = ORTE_JOB_STATE_INIT;
/* if job recovery is not defined, set it to default */
if (!jdata->recovery_defined) {
/* set to system default */

Просмотреть файл

@ -56,8 +56,10 @@ typedef struct {
int rmaps_output;
/** List of available components */
opal_list_t available_components;
/** selected module */
orte_rmaps_base_module_t *active_module;
/* list of selected modules */
opal_list_t selected_modules;
/* desired default mapper */
int default_mapper;
/** whether or not we allow oversubscription of nodes */
bool oversubscribe;
/** number of ppn for n_per_node mode */
@ -74,8 +76,6 @@ typedef struct {
bool no_use_local;
/* display the map after it is computed */
bool display_map;
/* balance load across nodes */
bool loadbalance;
/* slot list, if provided by user */
char *slot_list;
} orte_rmaps_base_t;
@ -88,6 +88,14 @@ ORTE_DECLSPEC extern orte_rmaps_base_t orte_rmaps_base;
/**
* Select an rmaps component / module
*/
typedef struct {
opal_list_item_t super;
int pri;
orte_rmaps_base_module_t *module;
mca_base_component_t *component;
} orte_rmaps_base_selected_module_t;
OBJ_CLASS_DECLARATION(orte_rmaps_base_selected_module_t);
ORTE_DECLSPEC int orte_rmaps_base_select(void);
/**

Просмотреть файл

@ -87,3 +87,10 @@ are cpus in a socket:
#cpus/socket: %d
Please correct one or both of these values and try again.
#
[failed-map]
Your job failed to map. Either no mapper was available, or none
of the available mappers was able to perform the requested
mapping operation. This can happen if you request a map type
(e.g., loadbalance) and the corresponding mapper was not built.

Просмотреть файл

@ -28,6 +28,14 @@
int orte_rmaps_base_close(void)
{
opal_list_item_t *item;
/* cleanup globals */
while (NULL != (item = opal_list_remove_first(&orte_rmaps_base.selected_modules))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&orte_rmaps_base.selected_modules);
mca_base_components_close(orte_rmaps_base.rmaps_output,
&orte_rmaps_base.available_components, NULL);

Просмотреть файл

@ -25,10 +25,11 @@
#include "opal/util/output.h"
#include "opal/util/opal_sos.h"
#include "opal/mca/base/base.h"
#include "opal/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/runtime/orte_globals.h"
#include "orte/util/show_help.h"
#include "orte/mca/rmaps/base/base.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
@ -42,7 +43,10 @@ int orte_rmaps_base_map_job(orte_job_t *jdata)
{
orte_job_map_t *map;
int rc;
bool did_map;
opal_list_item_t *item;
orte_rmaps_base_selected_module_t *mod;
/* NOTE: NO PROXY COMPONENT REQUIRED - REMOTE PROCS ARE NOT
* ALLOWED TO CALL RMAPS INDEPENDENTLY. ONLY THE PLM CAN
* DO SO, AND ALL PLM COMMANDS ARE RELAYED TO HNP
@ -76,12 +80,16 @@ int orte_rmaps_base_map_job(orte_job_t *jdata)
map->stride = orte_rmaps_base.stride;
map->oversubscribe = orte_rmaps_base.oversubscribe;
map->display_map = orte_rmaps_base.display_map;
map->mapper = orte_rmaps_base.default_mapper;
/* assign the map object to this job */
jdata->map = map;
} else {
if (!jdata->map->display_map) {
jdata->map->display_map = orte_rmaps_base.display_map;
}
if (ORTE_RMAPS_UNDEF == jdata->map->mapper) {
jdata->map->mapper = orte_rmaps_base.default_mapper;
}
}
/* if the job is the daemon job, then we are just mapping daemons and
@ -93,10 +101,30 @@ int orte_rmaps_base_map_job(orte_job_t *jdata)
return rc;
}
} else {
/* go ahead and map the job */
if (ORTE_SUCCESS != (rc = orte_rmaps_base.active_module->map_job(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
/* cycle thru the available mappers until one agrees to map
* the job
*/
did_map = false;
for (item = opal_list_get_first(&orte_rmaps_base.selected_modules);
item != opal_list_get_end(&orte_rmaps_base.selected_modules);
item = opal_list_get_next(item)) {
mod = (orte_rmaps_base_selected_module_t*)item;
if (ORTE_SUCCESS == (rc = mod->module->map_job(jdata))) {
did_map = true;
break;
}
/* mappers return "next option" if they didn't attempt to
* map the job. anything else is a true error.
*/
if (ORTE_ERR_TAKE_NEXT_OPTION != rc) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* if we get here without doing the map, then that's an error */
if (!did_map) {
orte_show_help("help-orte-rmaps-base.txt", "failed-map", true);
return ORTE_ERR_FAILED_TO_MAP;
}
}

Просмотреть файл

@ -87,8 +87,9 @@ int orte_rmaps_base_open(void)
bool btmp;
/* init the globals */
orte_rmaps_base.active_module = NULL;
OBJ_CONSTRUCT(&orte_rmaps_base.selected_modules, opal_list_t);
orte_rmaps_base.default_mapper = ORTE_RMAPS_UNDEF;
/* Debugging / verbose output. Always have stream open, with
verbose set by the mca open system... */
orte_rmaps_base.rmaps_output = opal_output_open(NULL);
@ -118,6 +119,7 @@ int orte_rmaps_base_open(void)
false, false, (int)false, &value);
if (value) {
orte_rmaps_base.npernode = 1;
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
}
/* #procs/node */
@ -126,6 +128,7 @@ int orte_rmaps_base_open(void)
false, false, -1, &value);
if (0 < value) {
orte_rmaps_base.npernode = value;
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
}
/* #procs/board */
@ -134,6 +137,7 @@ int orte_rmaps_base_open(void)
false, false, -1, &orte_rmaps_base.nperboard);
if (0 < orte_rmaps_base.nperboard) {
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX);
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
}
/* #procs/socket */
@ -144,13 +148,16 @@ int orte_rmaps_base_open(void)
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_NPERXXX);
/* force bind to socket if not overridden by user */
ORTE_XSET_BINDING_POLICY(ORTE_BIND_TO_SOCKET);
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
}
/* Do we want to loadbalance the job */
param = mca_base_param_reg_int_name("rmaps", "base_loadbalance",
"Balance total number of procs across all allocated nodes",
false, false, (int)false, &value);
orte_rmaps_base.loadbalance = OPAL_INT_TO_BOOL(value);
if (value) {
orte_rmaps_base.default_mapper = ORTE_RMAPS_LOADBALANCE;
}
/* #cpus/rank to use */
param = mca_base_param_reg_int_name("rmaps", "base_cpus_per_proc",
@ -193,6 +200,7 @@ int orte_rmaps_base_open(void)
if (NULL != orte_rmaps_base.slot_list ||
NULL != orte_rankfile) {
ORTE_ADD_MAPPING_POLICY(ORTE_MAPPING_BYUSER);
orte_rmaps_base.default_mapper = ORTE_RMAPS_RF;
}
/* Should we schedule on the local node or not? */
@ -243,4 +251,8 @@ int orte_rmaps_base_open(void)
return ORTE_SUCCESS;
}
OBJ_CLASS_INSTANCE(orte_rmaps_base_selected_module_t,
opal_list_item_t,
NULL, NULL);
#endif /* ORTE_DISABLE_FULL_SUPPORT */

Просмотреть файл

@ -26,29 +26,98 @@
#include "orte/mca/rmaps/base/base.h"
static bool selected = false;
/*
* Function for selecting one component from all those that are
* available.
*/
int orte_rmaps_base_select(void)
{
orte_rmaps_base_component_t *best_component = NULL;
orte_rmaps_base_module_t *best_module = NULL;
opal_list_item_t *item, *itm2;
mca_base_component_list_item_t *cli = NULL;
mca_base_component_t *component = NULL;
mca_base_module_t *module = NULL;
orte_rmaps_base_module_t *nmodule;
orte_rmaps_base_selected_module_t *newmodule, *mod;
int rc, priority;
bool inserted;
/*
* Select the best component
*/
if( OPAL_SUCCESS != mca_base_select("rmaps", orte_rmaps_base.rmaps_output,
&orte_rmaps_base.available_components,
(mca_base_module_t **) &best_module,
(mca_base_component_t **) &best_component) ) {
/* This will only happen if no component was selected */
return ORTE_ERR_NOT_FOUND;
if (selected) {
/* ensure we don't do this twice */
return ORTE_SUCCESS;
}
selected = true;
/* Query all available components and ask if they have a module */
for (item = opal_list_get_first(&orte_rmaps_base.available_components);
opal_list_get_end(&orte_rmaps_base.available_components) != item;
item = opal_list_get_next(item)) {
cli = (mca_base_component_list_item_t *) item;
component = (mca_base_component_t *) cli->cli_component;
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:select: checking available component %s", component->mca_component_name);
/* If there's no query function, skip it */
if (NULL == component->mca_query_component) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:select: Skipping component [%s]. It does not implement a query function",
component->mca_component_name );
continue;
}
/* Query the component */
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:select: Querying component [%s]",
component->mca_component_name);
rc = component->mca_query_component(&module, &priority);
/* If no module was returned, then skip component */
if (ORTE_SUCCESS != rc || NULL == module) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:select: Skipping component [%s]. Query failed to return a module",
component->mca_component_name );
continue;
}
/* If we got a module, keep it */
nmodule = (orte_rmaps_base_module_t*) module;
/* add to the list of selected modules */
newmodule = OBJ_NEW(orte_rmaps_base_selected_module_t);
newmodule->pri = priority;
newmodule->module = nmodule;
newmodule->component = component;
/* maintain priority order */
inserted = false;
for (itm2 = opal_list_get_first(&orte_rmaps_base.selected_modules);
itm2 != opal_list_get_end(&orte_rmaps_base.selected_modules);
itm2 = opal_list_get_next(itm2)) {
mod = (orte_rmaps_base_selected_module_t*)itm2;
if (priority > mod->pri) {
opal_list_insert_pos(&orte_rmaps_base.selected_modules,
itm2, &newmodule->super);
inserted = true;
break;
}
}
if (!inserted) {
/* must be lowest priority - add to end */
opal_list_append(&orte_rmaps_base.selected_modules, &newmodule->super);
}
}
/* Save the winner */
/* No global component structure */
orte_rmaps_base.active_module = best_module;
if (4 < opal_output_get_verbosity(orte_rmaps_base.rmaps_output)) {
opal_output(0, "%s: Final mapper priorities", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
/* show the prioritized list */
for (itm2 = opal_list_get_first(&orte_rmaps_base.selected_modules);
itm2 != opal_list_get_end(&orte_rmaps_base.selected_modules);
itm2 = opal_list_get_next(itm2)) {
mod = (orte_rmaps_base_selected_module_t*)itm2;
opal_output(0, "\tMapper: %s Priority: %d", mod->component->mca_component_name, mod->pri);
}
}
return ORTE_SUCCESS;;
}

Просмотреть файл

@ -60,12 +60,12 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
/* if the hnp was allocated, include it unless flagged not to */
if (orte_hnp_is_allocated) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0))) {
if (ORTE_NODE_STATE_UP == node->state) {
if (ORTE_NODE_STATE_DO_NOT_USE == node->state) {
/* clear this for future use, but don't include it */
node->state = ORTE_NODE_STATE_UP;
} else if (ORTE_NODE_STATE_NOT_INCLUDED != node->state) {
OBJ_RETAIN(node);
opal_list_append(allocated_nodes, &node->super);
} else if (ORTE_NODE_STATE_DO_NOT_USE == node->state) {
/* clear this for future use */
node->state = ORTE_NODE_STATE_UP;
}
}
}
@ -73,16 +73,19 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
/* add everything in the node pool that can be used */
for (i=1; i < orte_node_pool->size; i++) {
if (NULL != (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
/* ignore nodes that are "down" */
if (ORTE_NODE_STATE_DOWN == node->state) {
continue;
}
/* ignore nodes that are marked as do-not-use for this mapping */
if (ORTE_NODE_STATE_DO_NOT_USE == node->state) {
/* reset the state so it can be used another time */
node->state = ORTE_NODE_STATE_UP;
continue;
}
if (ORTE_NODE_STATE_DOWN == node->state) {
continue;
}
if (ORTE_NODE_STATE_NOT_INCLUDED == node->state) {
/* not to be used */
continue;
}
/* retain a copy for our use in case the item gets
* destructed along the way
*/
@ -247,36 +250,6 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
return ORTE_SUCCESS;
}
/* if we are mapping an application, check to see if we are to
* use a virtual machine
*/
if (policy & ORTE_MAPPING_USE_VM) {
/* remove all nodes that do NOT have an "alive" daemon on them */
item = opal_list_get_first(allocated_nodes);
while (item != opal_list_get_end(allocated_nodes)) {
/** save the next pointer in case we remove this node */
next = opal_list_get_next(item);
/** already have a daemon? */
node = (orte_node_t*)item;
if (NULL == node->daemon ||
ORTE_PROC_STATE_RUNNING != node->daemon->state) {
opal_list_remove_item(allocated_nodes, item);
OBJ_RELEASE(item); /* "un-retain" it */
}
/** go on to next item */
item = next;
}
/** check that anything is left! */
if (0 == opal_list_get_size(allocated_nodes)) {
orte_show_help("help-orte-rmaps-base.txt",
"orte-rmaps-base:nolocal-no-available-resources", true);
return ORTE_ERR_SILENT;
}
}
/* remove all nodes that are already at max usage, and
* compute the total number of allocated slots while
* we do so

Просмотреть файл

@ -55,6 +55,27 @@ static int switchyard(orte_job_t *jdata)
{
int rc;
/* only handle initial launch of loadbalanced
* or NPERxxx jobs - allow restarting of failed apps
*/
if (ORTE_JOB_STATE_INIT != jdata->state) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:lb: not job %s not in initial state - loadbalance cannot map",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
if (0 < jdata->map->mapper && ORTE_RMAPS_LOADBALANCE != jdata->map->mapper) {
/* a mapper has been specified, and it isn't me */
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:lb: job %s not using loadbalance mapper",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:loadbalance: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid));
if (0 < orte_rmaps_base.npernode) {
rc = npernode(jdata);
} else if (0 < orte_rmaps_base.nperboard) {
@ -78,10 +99,9 @@ static int switchyard(orte_job_t *jdata)
/* define the daemons that we will use for this job */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(jdata->map))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
return rc;
}

Просмотреть файл

@ -33,6 +33,7 @@ static int orte_rmaps_lb_open(void);
static int orte_rmaps_lb_close(void);
static int orte_rmaps_lb_query(mca_base_module_t **module, int *priority);
static int my_priority;
orte_rmaps_base_component_t mca_rmaps_load_balance_component = {
{
@ -58,30 +59,22 @@ orte_rmaps_base_component_t mca_rmaps_load_balance_component = {
*/
static int orte_rmaps_lb_open(void)
{
mca_base_component_t *c = &mca_rmaps_load_balance_component.base_version;
mca_base_param_reg_int(c, "priority",
"Priority of the loadbalance rmaps component",
false, false, 80,
&my_priority);
return ORTE_SUCCESS;
}
static int orte_rmaps_lb_query(mca_base_module_t **module, int *priority)
{
/* the RMAPS framework is -only- opened on HNP's,
* so no need to check for that here
*/
/* if load balancing, or any nperxxx, was requested, then we must be selected */
if (orte_rmaps_base.loadbalance ||
0 < orte_rmaps_base.npernode ||
0 < orte_rmaps_base.nperboard ||
0 < orte_rmaps_base.npersocket) {
*priority = 1000; /* must be selected */
*module = (mca_base_module_t *)&orte_rmaps_load_balance_module;
return ORTE_SUCCESS;
}
/* otherwise, ignore us */
*priority = 0;
*module = NULL;
return ORTE_ERROR;
/* after rr */
*priority = my_priority;
*module = (mca_base_module_t *)&orte_rmaps_load_balance_module;
return ORTE_SUCCESS;
}
/**

Просмотреть файл

@ -294,6 +294,25 @@ static int orte_rmaps_rf_map(orte_job_t *jdata)
int rc;
orte_proc_t *proc;
/* only handle initial launch of rf job */
if (ORTE_JOB_STATE_INIT != jdata->state) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rf: not job %s not in initial state - rank_file cannot map",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
if (0 < jdata->map->mapper && ORTE_RMAPS_RF != jdata->map->mapper) {
/* a mapper has been specified, and it isn't me */
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rf: job %s not using rank_file mapper",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rank_file: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid));
/* convenience def */
map = jdata->map;

Просмотреть файл

@ -39,14 +39,7 @@ BEGIN_C_DECLS
/**
* RMGR Component
*/
struct orte_rmaps_rank_file_component_t {
orte_rmaps_base_component_t super;
int debug;
int priority;
};
typedef struct orte_rmaps_rank_file_component_t orte_rmaps_rank_file_component_t;
ORTE_MODULE_DECLSPEC extern orte_rmaps_rank_file_component_t mca_rmaps_rank_file_component;
ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_rank_file_component;
extern orte_rmaps_base_module_t orte_rmaps_rank_file_module;

Просмотреть файл

@ -43,12 +43,13 @@ static int orte_rmaps_rank_file_open(void);
static int orte_rmaps_rank_file_close(void);
static int orte_rmaps_rank_file_query(mca_base_module_t **module, int *priority);
orte_rmaps_rank_file_component_t mca_rmaps_rank_file_component = {
{
/* First, the mca_base_component_t struct containing meta
information about the component itself */
static int my_priority;
{
orte_rmaps_base_component_t mca_rmaps_rank_file_component = {
/* First, the mca_base_component_t struct containing meta
information about the component itself */
{
ORTE_RMAPS_BASE_VERSION_2_0_0,
"rank_file", /* MCA component name */
@ -58,11 +59,10 @@ orte_rmaps_rank_file_component_t mca_rmaps_rank_file_component = {
orte_rmaps_rank_file_open, /* component open */
orte_rmaps_rank_file_close, /* component close */
orte_rmaps_rank_file_query /* component query */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
};
@ -72,11 +72,17 @@ orte_rmaps_rank_file_component_t mca_rmaps_rank_file_component = {
*/
static int orte_rmaps_rank_file_open(void)
{
mca_rmaps_rank_file_component.priority = 0;
mca_base_component_t *c = &mca_rmaps_rank_file_component.base_version;
mca_base_param_reg_int(c, "priority",
"Priority of the rank_file rmaps component",
false, false, 0,
&my_priority);
if (NULL != orte_rankfile ||
NULL != orte_rmaps_base.slot_list) {
mca_rmaps_rank_file_component.priority = 100;
/* make us first */
my_priority = 1000;
}
return ORTE_SUCCESS;
@ -84,11 +90,7 @@ static int orte_rmaps_rank_file_open(void)
static int orte_rmaps_rank_file_query(mca_base_module_t **module, int *priority)
{
/* the RMAPS framework is -only- opened on HNP's,
* so no need to check for that here
*/
*priority = mca_rmaps_rank_file_component.priority;
*priority = my_priority;
*module = (mca_base_module_t *)&orte_rmaps_rank_file_module;
return ORTE_SUCCESS;
}

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -37,6 +37,8 @@ static int orte_rmaps_resilient_open(void);
static int orte_rmaps_resilient_close(void);
static int orte_rmaps_resilient_query(mca_base_module_t **module, int *priority);
static int my_priority;
orte_rmaps_res_component_t mca_rmaps_resilient_component = {
{
{
@ -72,21 +74,25 @@ static int orte_rmaps_resilient_open(void)
mca_base_param_reg_string(c, "fault_grp_file",
"Filename that contains a description of fault groups for this system",
false, false, NULL, &mca_rmaps_resilient_component.fault_group_file);
mca_base_param_reg_int(c, "priority",
"Priority of the resilient rmaps component",
false, false, 40,
&my_priority);
return ORTE_SUCCESS;
}
static int orte_rmaps_resilient_query(mca_base_module_t **module, int *priority)
{
*priority = 0; /* select only if specified */
{
*priority = my_priority;
*module = (mca_base_module_t *)&orte_rmaps_resilient_module;
/* if a fault group file was provided, we definitely want to be selected */
/* if a fault group file was provided, we should be first */
if (NULL != mca_rmaps_resilient_component.fault_group_file) {
*priority = 1000;
}
return ORTE_SUCCESS;
}

Просмотреть файл

@ -33,6 +33,16 @@
BEGIN_C_DECLS
/* enumerate selectable mappers */
enum {
ORTE_RMAPS_UNDEF,
ORTE_RMAPS_RR,
ORTE_RMAPS_LOADBALANCE,
ORTE_RMAPS_SEQ,
ORTE_RMAPS_RF,
ORTE_RMAPS_RESILIENT
};
/*
* Structure that represents the mapping of a job to an
* allocated set of resources.
@ -40,6 +50,7 @@ BEGIN_C_DECLS
struct orte_job_map_t {
opal_object_t super;
/* user-specified mapping params */
int32_t mapper;
orte_mapping_policy_t policy;
int npernode;
int nperboard;

Просмотреть файл

@ -34,6 +34,7 @@
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/error_strings.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/base/base.h"
@ -53,6 +54,29 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
int rc;
opal_list_item_t *cur_node_item;
/* this mapper can only handle initial launch
* when rr mapping is desired - allow
* restarting of failed apps
*/
if (ORTE_JOB_STATE_INIT != jdata->state) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rr: not job %s in state %s - rr cannot map",
ORTE_JOBID_PRINT(jdata->jobid),
orte_job_state_to_str(jdata->state));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
if (0 < jdata->map->mapper && ORTE_RMAPS_RR != jdata->map->mapper) {
/* a mapper has been specified, and it isn't me */
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rr: job %s not using rr mapper",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:rr: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid));
/* start at the beginning... */
jdata->num_procs = 0;
@ -88,7 +112,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
/* if a bookmark exists from some prior mapping, set us to start there */
cur_node_item = orte_rmaps_base_get_starting_point(&node_list, jdata);
if (0 == app->num_procs) {
if (0 == app->num_procs) {
/* set the num_procs to equal the number of slots on these mapped nodes */
app->num_procs = num_slots;
}
@ -138,7 +162,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
return ORTE_SUCCESS;
error:
error:
while(NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item);
}

Просмотреть файл

@ -33,6 +33,7 @@ static int orte_rmaps_round_robin_open(void);
static int orte_rmaps_round_robin_close(void);
static int orte_rmaps_round_robin_query(mca_base_module_t **module, int *priority);
static int my_priority;
orte_rmaps_base_component_t mca_rmaps_round_robin_component = {
{
@ -58,6 +59,12 @@ orte_rmaps_base_component_t mca_rmaps_round_robin_component = {
*/
static int orte_rmaps_round_robin_open(void)
{
mca_base_component_t *c = &mca_rmaps_round_robin_component.base_version;
mca_base_param_reg_int(c, "priority",
"Priority of the rr rmaps component",
false, false, 100,
&my_priority);
return ORTE_SUCCESS;
}
@ -68,7 +75,7 @@ static int orte_rmaps_round_robin_query(mca_base_module_t **module, int *priorit
* so no need to check for that here
*/
*priority = 70; /* this is the default mapper */
*priority = my_priority;
*module = (mca_base_module_t *)&orte_rmaps_round_robin_module;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -76,6 +76,28 @@ static int orte_rmaps_seq_map(orte_job_t *jdata)
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
ORTE_JOBID_PRINT(jdata->jobid)));
/* this mapper can only handle initial launch
* when seq mapping is desired - allow
* restarting of failed apps
*/
if (ORTE_JOB_STATE_INIT != jdata->state) {
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:seq: not job %s not in initial state - seq cannot map",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
if (0 < jdata->map->mapper && ORTE_RMAPS_SEQ != jdata->map->mapper) {
/* a mapper has been specified, and it isn't me */
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:seq: job %s not using sequential mapper",
ORTE_JOBID_PRINT(jdata->jobid));
return ORTE_ERR_TAKE_NEXT_OPTION;
}
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"mca:rmaps:seq: mapping job %s",
ORTE_JOBID_PRINT(jdata->jobid));
/* conveniece def */
map = jdata->map;

Просмотреть файл

@ -33,6 +33,7 @@ static int orte_rmaps_seq_open(void);
static int orte_rmaps_seq_close(void);
static int orte_rmaps_seq_query(mca_base_module_t **module, int *priority);
static int my_priority;
orte_rmaps_base_component_t mca_rmaps_seq_component = {
{
@ -58,17 +59,19 @@ orte_rmaps_base_component_t mca_rmaps_seq_component = {
*/
static int orte_rmaps_seq_open(void)
{
mca_base_component_t *c = &mca_rmaps_seq_component.base_version;
mca_base_param_reg_int(c, "priority",
"Priority of the seq rmaps component",
false, false, 60,
&my_priority);
return ORTE_SUCCESS;
}
static int orte_rmaps_seq_query(mca_base_module_t **module, int *priority)
{
/* the RMAPS framework is -only- opened on HNP's,
* so no need to check for that here
*/
*priority = 0; /* only select if specified */
*priority = my_priority;
*module = (mca_base_module_t *)&orte_rmaps_seq_module;
return ORTE_SUCCESS;
}

Просмотреть файл

@ -1,12 +0,0 @@
#
# Copyright (c) 2008-2010 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Specific to this module
mca_link_libraries=libopen-rte

Просмотреть файл

@ -1,48 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
EXTRA_DIST = .windows
dist_pkgdata_DATA = help-orte-rmaps-topo.txt
sources = \
rmaps_topo.c \
rmaps_topo.h \
rmaps_topo_component.c
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_orte_rmaps_topo_DSO
component_noinst =
component_install = mca_rmaps_topo.la
else
component_noinst = libmca_rmaps_topo.la
component_install =
endif
mcacomponentdir = $(pkglibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_rmaps_topo_la_SOURCES = $(sources)
mca_rmaps_topo_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_rmaps_topo_la_SOURCES =$(sources)
libmca_rmaps_topo_la_LDFLAGS = -module -avoid-version

Просмотреть файл

@ -1,53 +0,0 @@
# -*- text -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# This is the US/English general help file for Open RTE's orterun.
#
[orte-rmaps-topo:alloc-error]
There are not enough slots available in the system to satisfy the %d slots
that were requested by the application:
%s
Either request fewer slots for your application, or make more slots available
for use.
[orte-rmaps-topo:multi-apps-and-zero-np]
RMAPS found multiple applications to be launched, with
at least one that failed to specify the number of processes to execute.
When specifying multiple applications, you must specify how many processes
of each to launch via the -np argument.
[orte-rmaps-topo:per-node-and-too-many-procs]
There are not enough nodes in your allocation to satisfy your request to launch
%d processes on a per-node basis - only %d nodes were available.
Either request fewer processes, or obtain a larger allocation.
[orte-rmaps-topo:n-per-node-and-too-many-procs]
There are not enough nodes in your allocation to satisfy your request to launch
%d processes on a %d per-node basis - only %d nodes with a total of %d slots were available.
Either request fewer processes, or obtain a larger allocation.
[orte-rmaps-topo:n-per-node-and-not-enough-slots]
There are not enough slots on the nodes in your allocation to satisfy your request to launch on a %d process-per-node basis - only %d slots/node were available.
Either request fewer processes/node, or obtain a larger allocation.
[orte-rmaps-topo:no-np-and-user-map]
You have specified a rank-to-node/slot mapping, but failed to provide
the number of processes to be executed. For some reason, this information
could not be obtained from the mapping you provided, so we cannot continue
with executing the specified application.

Просмотреть файл

@ -1,546 +0,0 @@
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2006 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include <errno.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif /* HAVE_UNISTD_H */
#ifdef HAVE_STRING_H
#include <string.h>
#endif /* HAVE_STRING_H */
#include "opal/mca/base/mca_base_param.h"
#include "opal/util/trace.h"
#include "opal/util/opal_sos.h"
#include "opal/mca/carto/base/base.h"
#include "orte/util/show_help.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "orte/mca/rmaps/base/base.h"
#include "rmaps_topo.h"
static int topo_map(orte_job_t *jdata);
orte_rmaps_base_module_t orte_rmaps_topo_module = {
topo_map
};
/*
* Local variable
*/
static opal_list_item_t *cur_node_item = NULL;
static int ppn = 0;
/*
* Create a default mapping for the application, scheduling round
* robin by node.
*/
static int map_app_by_node(
orte_app_context_t* app,
orte_job_t* jdata,
orte_vpid_t vpid_start,
opal_list_t* nodes)
{
int rc = ORTE_SUCCESS;
opal_list_item_t *next;
orte_node_t *node;
orte_std_cntr_t num_alloc=0;
OPAL_TRACE(2);
/* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of
resources" when all nodes have slots_max processes mapped to them,
thus there are no free slots for a process to be mapped, or we have
hit the soft limit on all nodes and are in a "no oversubscribe" state.
If we still have processes that haven't been mapped yet, then it's an
"out of resources" error.
In this scenario, we rely on the claim_slot function to handle the
oversubscribed case. The claim_slot function will leave a node on the
list until it either reaches slots_max OR reaches the
soft limit and the "no_oversubscribe" flag has been set - at which point,
the node will be removed to prevent any more processes from being mapped to
it. Since we are taking one slot from each node as we cycle through, the
list, oversubscription is automatically taken care of via this logic.
*/
while (num_alloc < app->num_procs) {
/** see if any nodes remain unused and available. We need to do this check
* each time since we may remove nodes from the list (as they become fully
* used) as we cycle through the loop */
if(0 >= opal_list_get_size(nodes) ) {
/* No more nodes to allocate :( */
orte_show_help("help-orte-rmaps-topo.txt", "orte-rmaps-topo:alloc-error",
true, app->num_procs, app->app);
return ORTE_ERR_SILENT;
}
/* Save the next node we can use before claiming slots, since
* we may need to prune the nodes list removing overused nodes.
* Wrap around to beginning if we are at the end of the list */
if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) {
next = opal_list_get_first(nodes);
}
else {
next = opal_list_get_next(cur_node_item);
}
/* Allocate a slot on this node */
node = (orte_node_t*) cur_node_item;
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, 1, app->idx,
nodes, jdata->map->oversubscribe, true, NULL))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop
* since the node is fully used up. For now, just don't report
* an error
*/
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc)) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
++num_alloc;
cur_node_item = next;
}
return ORTE_SUCCESS;
}
/*
* Create a default mapping for the application, scheduling one round
* robin by slot.
*/
static int map_app_by_slot(
orte_app_context_t* app,
orte_job_t* jdata,
orte_vpid_t vpid_start,
opal_list_t* nodes)
{
int rc = ORTE_SUCCESS;
orte_std_cntr_t i, num_slots_to_take;
orte_node_t *node;
opal_list_item_t *next;
orte_std_cntr_t num_alloc=0;
OPAL_TRACE(2);
/* This loop continues until all procs have been mapped or we run
out of resources. We determine that we have "run out of
resources" when either all nodes have slots_max processes mapped to them,
(thus there are no free slots for a process to be mapped), OR all nodes
have reached their soft limit and the user directed us to "no oversubscribe".
If we still have processes that haven't been mapped yet, then it's an
"out of resources" error. */
while ( num_alloc < app->num_procs) {
/** see if any nodes remain unused and available. We need to do this check
* each time since we may remove nodes from the list (as they become fully
* used) as we cycle through the loop */
if(0 >= opal_list_get_size(nodes) ) {
/* Everything is at max usage! :( */
orte_show_help("help-orte-rmaps-topo.txt", "orte-rmaps-topo:alloc-error",
true, app->num_procs, app->app);
return ORTE_ERR_SILENT;
}
/* Save the next node we can use before claiming slots, since
* we may need to prune the nodes list removing overused nodes.
* Wrap around to beginning if we are at the end of the list */
if (opal_list_get_end(nodes) == opal_list_get_next(cur_node_item)) {
next = opal_list_get_first(nodes);
}
else {
next = opal_list_get_next(cur_node_item);
}
/** declare a shorter name for convenience in the code below */
node = (orte_node_t*) cur_node_item;
/* If we have available slots on this node, claim all of them
* If node_slots == 0, assume 1 slot for that node.
* JJH - is this assumption fully justified?
*
* If we are now oversubscribing the nodes, then we still take:
* (a) if the node has not been used yet, we take a full node_slots
* (b) if some of the slots are in-use, then we take the number of
* remaining slots before hitting the soft limit (node_slots)
* (c) if we are at or above the soft limit, we take a full node_slots
*
* Note: if node_slots is zero, then we always just take 1 slot
*
* We continue this process until either everything is done,
* or all nodes have hit their hard limit. This algorithm ensures we
* fully utilize each node before oversubscribing, and preserves the ratio
* of processes between the nodes thereafter (e.g., if one node has twice as
* many processes as another before oversubscribing, it will continue
* to do so after oversubscribing).
*/
if (0 == node->slots_inuse ||
node->slots_inuse >= node->slots) {
num_slots_to_take = (node->slots == 0) ? 1 : node->slots;
} else {
num_slots_to_take = node->slots - node->slots_inuse;
}
/* check if we are in npernode mode - if so, then set the num_slots_to_take
* to the num_per_node
*/
if (0 < jdata->map->npernode) {
num_slots_to_take = jdata->map->npernode;
}
for( i = 0; i < num_slots_to_take; ++i) {
if (ORTE_SUCCESS != (rc = orte_rmaps_base_claim_slot(jdata, node, 1, app->idx,
nodes, jdata->map->oversubscribe, true, NULL))) {
/** if the code is ORTE_ERR_NODE_FULLY_USED, then we know this
* really isn't an error - we just need to break from the loop
* since the node is fully used up. For now, just don't report
* an error
*/
if (ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc)) {
ORTE_ERROR_LOG(rc);
return rc;
}
}
/* Update the number of procs allocated */
++num_alloc;
/** if all the procs have been mapped, we return */
if (num_alloc == app->num_procs) {
return ORTE_SUCCESS;
}
/* if we have fully used up this node
* OR we are at our ppn and loadbalancing, then break from the loop
*/
if (ORTE_ERR_NODE_FULLY_USED == OPAL_SOS_GET_ERROR_CODE(rc) ||
(orte_rmaps_base.loadbalance && (int)node->num_procs >= ppn)) {
break;
}
}
/* we move on to the next node in all cases EXCEPT if we came
* out of the loop without having taken a full bite AND the
* node is NOT max'd out
*
*/
if (i < (num_slots_to_take-1) &&
ORTE_ERR_NODE_FULLY_USED != OPAL_SOS_GET_ERROR_CODE(rc) &&
(orte_rmaps_base.loadbalance && (int)node->num_procs < ppn)) {
continue;
}
cur_node_item = next;
}
return ORTE_SUCCESS;
}
/*
* Create a topo-aware mapping for the job.
*/
static int topo_map(orte_job_t *jdata)
{
orte_job_map_t *map;
orte_app_context_t *app;
int i;
opal_list_t node_list;
opal_list_item_t *item;
orte_node_t *node, *nd1;
orte_vpid_t vpid_start;
orte_std_cntr_t num_nodes, num_slots;
int rc;
orte_std_cntr_t slots_per_node;
opal_carto_graph_t *graph;
opal_carto_base_node_t *crnode;
opal_value_array_t distance;
OPAL_TRACE(1);
/* conveniece def */
map = jdata->map;
/* start at the beginning... */
vpid_start = 0;
jdata->num_procs = 0;
/* get the graph of nodes */
if (ORTE_SUCCESS != (rc = opal_carto_base_get_host_graph(&graph, "SLOT"))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* cycle through the app_contexts, mapping them sequentially */
for(i=0; i < jdata->apps->size; i++) {
if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
continue;
}
/* if the number of processes wasn't specified, then we know there can be only
* one app_context allowed in the launch, and that we are to launch it across
* all available slots. We'll double-check the single app_context rule first
*/
if (0 == app->num_procs && 1 < jdata->num_apps) {
orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np",
true, jdata->num_apps, NULL);
rc = ORTE_ERR_SILENT;
goto error;
}
/* for each app_context, we have to get the list of nodes that it can
* use since that can now be modified with a hostfile and/or -host
* option
*/
OBJ_CONSTRUCT(&node_list, opal_list_t);
if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app,
map->policy))) {
ORTE_ERROR_LOG(rc);
goto error;
}
num_nodes = (orte_std_cntr_t)opal_list_get_size(&node_list);
/* if a bookmark exists from some prior mapping, set us to start there */
if (NULL != jdata->bookmark) {
cur_node_item = NULL;
/* find this node on the list */
for (item = opal_list_get_first(&node_list);
item != opal_list_get_end(&node_list);
item = opal_list_get_next(item)) {
node = (orte_node_t*)item;
if (node->index == jdata->bookmark->index) {
cur_node_item = item;
break;
}
}
/* see if we found it - if not, just start at the beginning */
if (NULL == cur_node_item) {
cur_node_item = opal_list_get_first(&node_list);
}
} else {
/* if no bookmark, then just start at the beginning of the list */
cur_node_item = opal_list_get_first(&node_list);
}
/* order this list by network nearness - i.e., the next item in the
* list should be the node that is closest [in a network sense] to
* the prior item in the list
*
* RHC: start the list with the bookmark nodeas this is where
* we would start mapping
*/
node = (orte_node_t*)cur_node_item;
if (NULL == (crnode = opal_carto_base_find_node(graph, node->name))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto error;
}
OBJ_CONSTRUCT(&distance, opal_value_array_t);
if (ORTE_SUCCESS != (rc = opal_carto_base_get_nodes_distance(graph, crnode,
"SLOT", &distance))) {
ORTE_ERROR_LOG(rc);
goto error;
}
/* cycle through the nodes in the distance array - these
* should be in order based on distance
*/
#if 0
/* RHC: need to create a working list of nodes that is ordered
* according to distance. The get_nodes_distance function returns
* this, but it covers -all- nodes, so we have to filter that
* against the allocated node list to create the new
* working_node_list
*/
for (i=0; i < distance.size; i++) {
if
}
for (item = opal_list_get_first(&node_list);
item != opal_list_get_end(&node_list);
item = opal_list_get_next(item)) {
node = (orte_node_t*)item;
if (NULL == (crnode = opal_carto.find_node(graph, node->name))) {
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
rc = ORTE_ERR_NOT_FOUND;
goto error;
}
/* look this node up in the distance array */
#endif
/* is this node oversubscribed? */
node = (orte_node_t*)cur_node_item;
if (node->slots_inuse > node->slots) {
/* work down the list - is there another node that
* would not be oversubscribed?
*/
if (cur_node_item != opal_list_get_end(&node_list)) {
item = opal_list_get_next(cur_node_item);
} else {
item = opal_list_get_first(&node_list);
}
while (item != cur_node_item) {
nd1 = (orte_node_t*)item;
if (nd1->slots_inuse < nd1->slots) {
/* this node is not oversubscribed! use it! */
cur_node_item = item;
goto proceed;
}
if (item == opal_list_get_end(&node_list)) {
item = opal_list_get_first(&node_list);
} else {
item= opal_list_get_next(item);
}
}
/* if we get here, then we cycled all the way around the
* list without finding a better answer - just use what
* we have
*/
}
proceed:
if (map->npernode == 1) {
/* there are three use-cases that we need to deal with:
* (a) if -np was not provided, then we just use the number of nodes
* (b) if -np was provided AND #procs > #nodes, then error out
* (c) if -np was provided AND #procs <= #nodes, then launch
* the specified #procs one/node. In this case, we just
* leave app->num_procs alone
*/
if (0 == app->num_procs) {
app->num_procs = num_nodes;
} else if (app->num_procs > num_nodes) {
orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:per-node-and-too-many-procs",
true, app->num_procs, num_nodes, NULL);
rc = ORTE_ERR_SILENT;
goto error;
}
} else if (map->npernode > 1) {
/* first, let's check to see if there are enough slots/node to
* meet the request - error out if not
*/
slots_per_node = num_slots / num_nodes;
if (map->npernode > slots_per_node) {
orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:n-per-node-and-not-enough-slots",
true, map->npernode, slots_per_node, NULL);
rc = ORTE_ERR_SILENT;
goto error;
}
/* there are three use-cases that we need to deal with:
* (a) if -np was not provided, then we just use the n/node * #nodes
* (b) if -np was provided AND #procs > (n/node * #nodes), then error out
* (c) if -np was provided AND #procs <= (n/node * #nodes), then launch
* the specified #procs n/node. In this case, we just
* leave app->num_procs alone
*/
if (0 == app->num_procs) {
/* set the num_procs to equal the specified num/node * the number of nodes */
app->num_procs = map->npernode * num_nodes;
} else if (app->num_procs > (map->npernode * num_nodes)) {
orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:n-per-node-and-too-many-procs",
true, app->num_procs, map->npernode, num_nodes, num_slots, NULL);
rc = ORTE_ERR_SILENT;
goto error;
}
} else if (0 == app->num_procs) {
/** set the num_procs to equal the number of slots on these mapped nodes - if
user has specified "-bynode", then set it to the number of nodes
*/
if (map->policy & ORTE_MAPPING_BYNODE) {
app->num_procs = num_nodes;
} else if (map->policy & ORTE_MAPPING_BYSLOT) {
app->num_procs = num_slots;
} else {
/* we can't handle this - it should have been set when we got
* the map info. If it wasn't, then we can only error out
*/
orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:no-np-and-user-map",
true, app->num_procs, map->npernode, num_nodes, num_slots, NULL);
rc = ORTE_ERR_SILENT;
goto error;
}
}
/** track the total number of processes we mapped */
jdata->num_procs += app->num_procs;
/* Make assignments */
if (map->policy == ORTE_MAPPING_BYNODE) {
rc = map_app_by_node(app, jdata, vpid_start, &node_list);
} else {
rc = map_app_by_slot(app, jdata, vpid_start, &node_list);
}
/* update the starting vpid for the next app_context */
vpid_start += app->num_procs;
if (ORTE_SUCCESS != rc) {
ORTE_ERROR_LOG(rc);
goto error;
}
/* save the bookmark */
jdata->bookmark = (orte_node_t*)cur_node_item;
/* cleanup the node list - it can differ from one app_context
* to another, so we have to get it every time
*/
while(NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node_list);
}
/* compute and save convenience values */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_compute_local_ranks(jdata))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* define the daemons that we will use for this job */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_define_daemons(map))) {
ORTE_ERROR_LOG(rc);
return rc;
}
return ORTE_SUCCESS;
error:
while(NULL != (item = opal_list_remove_first(&node_list))) {
OBJ_RELEASE(item);
}
OBJ_DESTRUCT(&node_list);
return rc;
}

Просмотреть файл

@ -1,37 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file
*
* Resource Mapping
*/
#ifndef ORTE_RMAPS_TOPO_H
#define ORTE_RMAPS_TOPO_H
#include "orte_config.h"
#include "orte/mca/rmaps/rmaps.h"
BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_topo_component;
extern orte_rmaps_base_module_t orte_rmaps_topo_module;
END_C_DECLS
#endif

Просмотреть файл

@ -1,85 +0,0 @@
/*
* Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "opal/mca/base/base.h"
#include "opal/mca/base/mca_base_param.h"
#include "orte/mca/rmaps/base/rmaps_private.h"
#include "rmaps_topo.h"
/*
* Local functions
*/
static int orte_rmaps_topo_open(void);
static int orte_rmaps_topo_close(void);
static int orte_rmaps_topo_query(mca_base_module_t **module, int *priority);
orte_rmaps_base_component_t mca_rmaps_topo_component = {
{
ORTE_RMAPS_BASE_VERSION_2_0_0,
"topo", /* MCA component name */
ORTE_MAJOR_VERSION, /* MCA component major version */
ORTE_MINOR_VERSION, /* MCA component minor version */
ORTE_RELEASE_VERSION, /* MCA component release version */
orte_rmaps_topo_open, /* component open */
orte_rmaps_topo_close, /* component close */
orte_rmaps_topo_query /* component query */
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
}
};
/**
* component open/close/init function
*/
static int orte_rmaps_topo_open(void)
{
return ORTE_SUCCESS;
}
static int orte_rmaps_topo_query(mca_base_module_t **module, int *priority)
{
/* the RMAPS framework is -only- opened on HNP's,
* so no need to check for that here
*/
*priority = 0; /* only select if specified */
*module = (mca_base_module_t *)&orte_rmaps_topo_module;
return ORTE_SUCCESS;
}
/**
* Close all subsystems.
*/
static int orte_rmaps_topo_close(void)
{
return ORTE_SUCCESS;
}

Просмотреть файл

@ -860,6 +860,12 @@ int orte_dt_pack_map(opal_buffer_t *buffer, const void *src,
maps = (orte_job_map_t**) src;
for (i=0; i < num_vals; i++) {
/* pack the mapper used to generate it */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->mapper), 1, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* pack the policy used to generate it */
if (ORTE_SUCCESS != (rc = opal_dss_pack_buffer(buffer, &(maps[i]->policy), 1, ORTE_MAPPING_POLICY))) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -650,8 +650,8 @@ int orte_dt_print_map(char **output, char *prefix, orte_job_map_t *src, opal_dat
asprintf(&pfx, "%s\t", pfx2);
if (orte_devel_level_output) {
asprintf(&tmp, "\n%sMap generated by mapping policy: %04x\n%s\tNpernode: %ld\tOversubscribe allowed: %s\tCPU Lists: %s",
pfx2, src->policy, pfx2, (long)src->npernode,
asprintf(&tmp, "\n%sMap generated by mapper: %d\tMapping policy: %04x\n%s\tNpernode: %ld\tOversubscribe allowed: %s\tCPU Lists: %s",
pfx2, src->mapper, src->policy, pfx2, (long)src->npernode,
(src->oversubscribe) ? "TRUE" : "FALSE",
(src->cpu_lists) ? "TRUE" : "FALSE");

Просмотреть файл

@ -938,6 +938,14 @@ int orte_dt_unpack_map(opal_buffer_t *buffer, void *dest,
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* unpack the mapper */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,
&(maps[i]->mapper), &n, OPAL_INT32))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/* unpack the policy */
n = 1;
if (ORTE_SUCCESS != (rc = opal_dss_unpack_buffer(buffer,

Просмотреть файл

@ -1040,6 +1040,7 @@ OBJ_CLASS_INSTANCE(orte_jmap_t,
static void orte_job_map_construct(orte_job_map_t* map)
{
map->mapper = ORTE_RMAPS_UNDEF;
map->policy = 0;
map->npernode = 0;
map->nperboard = 0;

Просмотреть файл

@ -154,7 +154,16 @@ int orte_err2str(int errnum, const char **errmsg)
case ORTE_ERR_COMM_DISABLED:
retval = "Communications have been disabled";
break;
case ORTE_ERR_FAILED_TO_MAP:
retval = "Unable to map job";
break;
case ORTE_ERR_TAKE_NEXT_OPTION:
if (orte_report_silent_errors) {
retval = "Next option";
} else {
retval = NULL;
}
break;
default:
if (orte_report_silent_errors) {
retval = "Unknown error";