1
1
openmpi/orte/util/dash_host/dash_host.c
Ralph Castain d70e2e8c2b Merge the ORTE devel branch into the main trunk. Details of what this means will be circulated separately.
Remains to be tested to ensure everything came over cleanly, so please continue to withhold commits a little longer

This commit was SVN r17632.
2008-02-28 01:57:57 +00:00

235 строки
7.7 KiB
C

/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "orte_config.h"
#include "orte/constants.h"
#include "orte/types.h"
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "opal/util/if.h"
#include "opal/util/show_help.h"
#include "opal/dss/dss.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/mca/plm/plm_types.h"
#include "orte/util/proc_info.h"
#include "orte/util/sys_info.h"
#include "orte/runtime/orte_globals.h"
#include "dash_host.h"
int orte_util_add_dash_host_nodes(opal_list_t *nodes,
bool *override_oversubscribed,
orte_std_cntr_t num_map,
orte_app_context_map_t **map)
{
opal_list_item_t* item;
orte_std_cntr_t i, j, k;
int rc;
char **mapped_nodes = NULL, **mini_map;
orte_node_t *node;
/* Accumulate all of the host name mappings */
for (j = 0; j < num_map; ++j) {
if (ORTE_APP_CONTEXT_MAP_HOSTNAME == map[j]->map_type) {
mini_map = opal_argv_split(map[j]->map_data, ',');
if (mapped_nodes == NULL) {
mapped_nodes = mini_map;
} else {
for (k = 0; NULL != mini_map[k]; ++k) {
rc = opal_argv_append_nosize(&mapped_nodes,
mini_map[k]);
if (OPAL_SUCCESS != rc) {
goto cleanup;
}
}
}
}
}
/* Did we find anything? If not, then do nothing */
if (NULL == mapped_nodes) {
return ORTE_SUCCESS;
}
/* go through the names found and
add them to the host list. If they're not unique, then
bump the slots count for each duplicate */
for (i = 0; NULL != mapped_nodes[i]; ++i) {
for (item = opal_list_get_first(nodes);
item != opal_list_get_end(nodes);
item = opal_list_get_next(item)) {
node = (orte_node_t*) item;
if (0 == strcmp(node->name, mapped_nodes[i]) ||
(0 == strcmp(node->name, orte_system_info.nodename) &&
(0 == strcmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])))) {
++node->slots;
break;
}
}
/* If we didn't find it, add it to the list */
if (item == opal_list_get_end(nodes)) {
node = OBJ_NEW(orte_node_t);
if (NULL == node) {
return ORTE_ERR_OUT_OF_RESOURCE;
}
/* check to see if this is a local name */
if (0 == strcmp(mapped_nodes[i], "localhost") ||
opal_ifislocal(mapped_nodes[i])) {
/* it is local, so use the local nodename to avoid
* later confusion
*/
node->name = strdup(orte_system_info.nodename);
} else {
/* not local - use the given name */
node->name = strdup(mapped_nodes[i]);
}
node->state = ORTE_NODE_STATE_UP;
node->slots_inuse = 0;
node->slots_max = 0;
node->slots = 1;
/* indicate that ORTE should override any oversubscribed conditions
* based on local hardware limits since the user (a) might not have
* provided us any info on the #slots for a node, and (b) the user
* might have been wrong! If we don't check the number of local physical
* processors, then we could be too aggressive on our sched_yield setting
* and cause performance problems.
*/
*override_oversubscribed = true;
opal_list_append(nodes, &node->super);
}
}
rc = ORTE_SUCCESS;
cleanup:
if (NULL != mapped_nodes) {
opal_argv_free(mapped_nodes);
}
return rc;
}
int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
orte_std_cntr_t num_map,
orte_app_context_map_t **map)
{
opal_list_item_t* item;
bool found;
opal_list_item_t *next;
orte_std_cntr_t i, j, k;
int rc;
char **mapped_nodes = NULL, **mini_map;
orte_node_t *node;
/* if the incoming node list is empty, then there
* is nothing to filter!
*/
if (opal_list_is_empty(nodes)) {
return ORTE_SUCCESS;
}
/* Accumulate all of the host name mappings */
for (j = 0; j < num_map; ++j) {
if (ORTE_APP_CONTEXT_MAP_HOSTNAME == map[j]->map_type) {
mini_map = opal_argv_split(map[j]->map_data, ',');
if (mapped_nodes == NULL) {
mapped_nodes = mini_map;
} else {
for (k = 0; NULL != mini_map[k]; ++k) {
rc = opal_argv_append_nosize(&mapped_nodes,
mini_map[k]);
if (OPAL_SUCCESS != rc) {
goto cleanup;
}
}
}
}
}
/* Did we find anything? If not, then do nothing */
if (NULL == mapped_nodes) {
return ORTE_SUCCESS;
}
/* we found some info - filter what is on the list...
* i.e., go through the list and remove any nodes that
* were -not- included on the -host list
*/
j=0;
k = opal_argv_count(mapped_nodes);
item = opal_list_get_first(nodes);
while (item != opal_list_get_end(nodes)) {
/* hang on to next item in case this one gets removed */
next = opal_list_get_next(item);
node = (orte_node_t*)item;
/* search -host list to see if this one is found */
found = false;
for (i = 0; NULL != mapped_nodes[i]; ++i) {
/* we have a match if one of two conditions is met:
* 1. the node_name and mapped_nodes directly match
* 2. the node_name is the local system name AND
* either the mapped_node is "localhost" OR it
* is a local interface as found by opal_ifislocal
*/
if (0 == strcmp(node->name, mapped_nodes[i]) ||
(0 == strcmp(node->name, orte_system_info.nodename) &&
(0 == strcmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])))) {
found = true; /* found it - leave it alone */
j++;
break;
}
}
if (!found) {
opal_list_remove_item(nodes, item);
OBJ_RELEASE(item);
}
item = next; /* move on */
}
/* was something specified that was -not- found? */
if (j < k) {
char *tmp;
tmp = opal_argv_join(mapped_nodes, ',');
opal_show_help("help-dash-host.txt", "not-all-mapped-alloc",
true, tmp);
free(tmp);
rc = ORTE_ERR_SILENT;
goto cleanup;
}
rc = ORTE_SUCCESS;
/* done filtering existing list */
cleanup:
if (NULL != mapped_nodes) {
opal_argv_free(mapped_nodes);
}
return rc;
}