1
1

Add an option to treat dash-host specifications as "requested, but not required". So-called "soft" location requests can allow an application to execute even if the ideal allocation isn't available.

This commit was SVN r27242.
Этот коммит содержится в:
Ralph Castain 2012-09-05 18:42:09 +00:00
родитель 6d29cecce1
Коммит d772e0fc3d
7 изменённых файлов: 64 добавлений и 6 удалений

Просмотреть файл

@ -233,7 +233,14 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
OBJ_RELEASE(caddy);
return;
}
} else if (NULL != app->dash_host) {
} else if (!orte_soft_locations && NULL != app->dash_host) {
/* if we are using soft locations, then any dash-host would
* just include desired nodes and not required. We don't want
* to pick them up here as this would mean the request was
* always satisfied - instead, we want to allow the request
* to fail later on and use whatever nodes are actually
* available
*/
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
"%s ras:base:allocate adding dash_hosts",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));

Просмотреть файл

@ -147,8 +147,10 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
*/
if (!orte_managed_allocation) {
OBJ_CONSTRUCT(&nodes, opal_list_t);
/* if the app provided a dash-host, then use those nodes */
if (NULL != app->dash_host) {
/* if the app provided a dash-host, and we are not treating
* them as requested or "soft" locations, then use those nodes
*/
if (!orte_soft_locations && NULL != app->dash_host) {
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
"%s using dash_host",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
@ -441,7 +443,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
"%s Filtering thru apps",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
if (ORTE_SUCCESS != (rc = orte_rmaps_base_filter_nodes(app, allocated_nodes, true))
&& ORTE_ERR_TAKE_NEXT_OPTION != rc) {
ORTE_ERROR_LOG(rc);

Просмотреть файл

@ -41,7 +41,7 @@ orte_rmaps_base_module_t orte_rmaps_staged_module = {
static int staged_mapper(orte_job_t *jdata)
{
mca_base_component_t *c=&mca_rmaps_staged_component.base_version;
int i, j, rc;
int i, j, k, rc;
orte_app_context_t *app;
opal_list_t node_list;
orte_std_cntr_t num_slots;
@ -49,6 +49,7 @@ static int staged_mapper(orte_job_t *jdata)
orte_node_t *node;
bool work_to_do = false, first_pass = false;
opal_list_item_t *item;
char *cptr, **minimap;
/* only use this mapper if it was specified */
if (NULL == jdata->map->req_mapper ||
@ -101,8 +102,12 @@ static int staged_mapper(orte_job_t *jdata)
* -hostfile or -host directives
*/
OBJ_CONSTRUCT(&node_list, opal_list_t);
/* get nodes based on a strict interpretation of the location hints */
if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app,
jdata->map->mapping, false, true))) {
/* we were unable to get any nodes that match those
* specified in the app
*/
if (ORTE_ERR_RESOURCE_BUSY == rc) {
/* if the return is "busy", then at least one of the
* specified resources must exist, but no slots are
@ -127,6 +132,41 @@ static int staged_mapper(orte_job_t *jdata)
return rc;
}
}
/* if we are using soft locations, search the list of nodes
* for those that match the requested locations and bubble those
* to the top so we use them first
*/
if (orte_soft_locations && NULL != app->dash_host) {
/* scan the dash hosts in reverse order as we want
* the first entry to be on top of the list
*/
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
"%s mca:rmaps:staged: ordering nodes by desired location",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
for (j=opal_argv_count(app->dash_host)-1; 0 <= j; j--) {
minimap = opal_argv_split(app->dash_host[j], ',');
for (k=opal_argv_count(minimap)-1; 0 <= k; k--) {
cptr = minimap[k];
for (item = opal_list_get_first(&node_list);
item != opal_list_get_end(&node_list);
item = opal_list_get_next(item)) {
node = (orte_node_t*)item;
if (0 == strcmp(node->name, cptr) ||
(0 == strcmp("localhost", cptr) &&
0 == strcmp(node->name, orte_process_info.nodename))) {
opal_list_remove_item(&node_list, item);
opal_list_prepend(&node_list, item);
opal_output_verbose(10, orte_rmaps_base.rmaps_output,
"%s mca:rmaps:staged: placing node %s at top of list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
node->name);
break;
}
}
}
opal_argv_free(minimap);
}
}
/* assign any unmapped procs to an available slot */
for (j=0; j < app->procs.size; j++) {

Просмотреть файл

@ -20,7 +20,6 @@ BEGIN_C_DECLS
ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_staged_component;
extern orte_rmaps_base_module_t orte_rmaps_staged_module;
END_C_DECLS
#endif

Просмотреть файл

@ -98,6 +98,7 @@ bool orte_managed_allocation = false;
char *orte_set_slots = NULL;
bool orte_display_allocation;
bool orte_display_devel_allocation;
bool orte_soft_locations = false;
/* launch agents */
char *orte_launch_agent = NULL;

Просмотреть файл

@ -617,6 +617,7 @@ ORTE_DECLSPEC extern bool orte_managed_allocation;
ORTE_DECLSPEC extern char *orte_set_slots;
ORTE_DECLSPEC extern bool orte_display_allocation;
ORTE_DECLSPEC extern bool orte_display_devel_allocation;
ORTE_DECLSPEC extern bool orte_soft_locations;
/* launch agents */
ORTE_DECLSPEC extern char *orte_launch_agent;

Просмотреть файл

@ -576,6 +576,14 @@ int orte_register_params(void)
orte_devel_level_output = true;
}
/* should we treat any -host directives as "soft" - i.e., desired
* but not required
*/
mca_base_param_reg_int_name("orte", "soft_locations",
"Treat -host directives as desired, but not required",
false, false, (int)false, &value);
orte_soft_locations = OPAL_INT_TO_BOOL(value);
#endif /* ORTE_DISABLE_FULL_SUPPORT */
return ORTE_SUCCESS;