Add an option to treat dash-host specifications as "requested, but not required". So-called "soft" location requests can allow an application to execute even if the ideal allocation isn't available.
This commit was SVN r27242.
Этот коммит содержится в:
родитель
6d29cecce1
Коммит
d772e0fc3d
@ -233,7 +233,14 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
|
||||
OBJ_RELEASE(caddy);
|
||||
return;
|
||||
}
|
||||
} else if (NULL != app->dash_host) {
|
||||
} else if (!orte_soft_locations && NULL != app->dash_host) {
|
||||
/* if we are using soft locations, then any dash-host would
|
||||
* just include desired nodes and not required. We don't want
|
||||
* to pick them up here as this would mean the request was
|
||||
* always satisfied - instead, we want to allow the request
|
||||
* to fail later on and use whatever nodes are actually
|
||||
* available
|
||||
*/
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
|
||||
"%s ras:base:allocate adding dash_hosts",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
@ -147,8 +147,10 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
||||
*/
|
||||
if (!orte_managed_allocation) {
|
||||
OBJ_CONSTRUCT(&nodes, opal_list_t);
|
||||
/* if the app provided a dash-host, then use those nodes */
|
||||
if (NULL != app->dash_host) {
|
||||
/* if the app provided a dash-host, and we are not treating
|
||||
* them as requested or "soft" locations, then use those nodes
|
||||
*/
|
||||
if (!orte_soft_locations && NULL != app->dash_host) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
||||
"%s using dash_host",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
@ -441,7 +443,7 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base.rmaps_output,
|
||||
"%s Filtering thru apps",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
|
||||
|
||||
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_filter_nodes(app, allocated_nodes, true))
|
||||
&& ORTE_ERR_TAKE_NEXT_OPTION != rc) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
|
@ -41,7 +41,7 @@ orte_rmaps_base_module_t orte_rmaps_staged_module = {
|
||||
static int staged_mapper(orte_job_t *jdata)
|
||||
{
|
||||
mca_base_component_t *c=&mca_rmaps_staged_component.base_version;
|
||||
int i, j, rc;
|
||||
int i, j, k, rc;
|
||||
orte_app_context_t *app;
|
||||
opal_list_t node_list;
|
||||
orte_std_cntr_t num_slots;
|
||||
@ -49,6 +49,7 @@ static int staged_mapper(orte_job_t *jdata)
|
||||
orte_node_t *node;
|
||||
bool work_to_do = false, first_pass = false;
|
||||
opal_list_item_t *item;
|
||||
char *cptr, **minimap;
|
||||
|
||||
/* only use this mapper if it was specified */
|
||||
if (NULL == jdata->map->req_mapper ||
|
||||
@ -101,8 +102,12 @@ static int staged_mapper(orte_job_t *jdata)
|
||||
* -hostfile or -host directives
|
||||
*/
|
||||
OBJ_CONSTRUCT(&node_list, opal_list_t);
|
||||
/* get nodes based on a strict interpretation of the location hints */
|
||||
if (ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app,
|
||||
jdata->map->mapping, false, true))) {
|
||||
/* we were unable to get any nodes that match those
|
||||
* specified in the app
|
||||
*/
|
||||
if (ORTE_ERR_RESOURCE_BUSY == rc) {
|
||||
/* if the return is "busy", then at least one of the
|
||||
* specified resources must exist, but no slots are
|
||||
@ -127,6 +132,41 @@ static int staged_mapper(orte_job_t *jdata)
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
/* if we are using soft locations, search the list of nodes
|
||||
* for those that match the requested locations and bubble those
|
||||
* to the top so we use them first
|
||||
*/
|
||||
if (orte_soft_locations && NULL != app->dash_host) {
|
||||
/* scan the dash hosts in reverse order as we want
|
||||
* the first entry to be on top of the list
|
||||
*/
|
||||
opal_output_verbose(5, orte_rmaps_base.rmaps_output,
|
||||
"%s mca:rmaps:staged: ordering nodes by desired location",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
|
||||
for (j=opal_argv_count(app->dash_host)-1; 0 <= j; j--) {
|
||||
minimap = opal_argv_split(app->dash_host[j], ',');
|
||||
for (k=opal_argv_count(minimap)-1; 0 <= k; k--) {
|
||||
cptr = minimap[k];
|
||||
for (item = opal_list_get_first(&node_list);
|
||||
item != opal_list_get_end(&node_list);
|
||||
item = opal_list_get_next(item)) {
|
||||
node = (orte_node_t*)item;
|
||||
if (0 == strcmp(node->name, cptr) ||
|
||||
(0 == strcmp("localhost", cptr) &&
|
||||
0 == strcmp(node->name, orte_process_info.nodename))) {
|
||||
opal_list_remove_item(&node_list, item);
|
||||
opal_list_prepend(&node_list, item);
|
||||
opal_output_verbose(10, orte_rmaps_base.rmaps_output,
|
||||
"%s mca:rmaps:staged: placing node %s at top of list",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
node->name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
opal_argv_free(minimap);
|
||||
}
|
||||
}
|
||||
|
||||
/* assign any unmapped procs to an available slot */
|
||||
for (j=0; j < app->procs.size; j++) {
|
||||
|
@ -20,7 +20,6 @@ BEGIN_C_DECLS
|
||||
ORTE_MODULE_DECLSPEC extern orte_rmaps_base_component_t mca_rmaps_staged_component;
|
||||
extern orte_rmaps_base_module_t orte_rmaps_staged_module;
|
||||
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -98,6 +98,7 @@ bool orte_managed_allocation = false;
|
||||
char *orte_set_slots = NULL;
|
||||
bool orte_display_allocation;
|
||||
bool orte_display_devel_allocation;
|
||||
bool orte_soft_locations = false;
|
||||
|
||||
/* launch agents */
|
||||
char *orte_launch_agent = NULL;
|
||||
|
@ -617,6 +617,7 @@ ORTE_DECLSPEC extern bool orte_managed_allocation;
|
||||
ORTE_DECLSPEC extern char *orte_set_slots;
|
||||
ORTE_DECLSPEC extern bool orte_display_allocation;
|
||||
ORTE_DECLSPEC extern bool orte_display_devel_allocation;
|
||||
ORTE_DECLSPEC extern bool orte_soft_locations;
|
||||
|
||||
/* launch agents */
|
||||
ORTE_DECLSPEC extern char *orte_launch_agent;
|
||||
|
@ -576,6 +576,14 @@ int orte_register_params(void)
|
||||
orte_devel_level_output = true;
|
||||
}
|
||||
|
||||
/* should we treat any -host directives as "soft" - i.e., desired
|
||||
* but not required
|
||||
*/
|
||||
mca_base_param_reg_int_name("orte", "soft_locations",
|
||||
"Treat -host directives as desired, but not required",
|
||||
false, false, (int)false, &value);
|
||||
orte_soft_locations = OPAL_INT_TO_BOOL(value);
|
||||
|
||||
#endif /* ORTE_DISABLE_FULL_SUPPORT */
|
||||
|
||||
return ORTE_SUCCESS;
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user