1
1

Add a loadbalancing feature to the round-robin mapper - more to be sent to devel list

Fix a potential problem with RM-provided nodenames not matching returns from gethostname - ensure that the HNP's nodename gets DNS-resolved when comparing against RM-provided hostnames. Note that this may be an issue for RM-based clusters that don't have local DNS resolution, but hopefully that is more indicative of a poorly configured system.

This commit was SVN r18252.
Этот коммит содержится в:
Ralph Castain 2008-04-23 14:52:09 +00:00
родитель 456ce6c4da
Коммит 5311b13b60
6 изменённых файлов: 92 добавлений и 36 удалений

Просмотреть файл

@ -23,6 +23,7 @@
#include "opal/util/output.h"
#include "opal/util/argv.h"
#include "opal/util/if.h"
#include "orte/mca/errmgr/errmgr.h"
#include "orte/util/name_fns.h"
@ -111,7 +112,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
* first position since it is the first one entered. We need to check to see
* if this node is the same as the HNP's node so we don't double-enter it
*/
if (0 == strcmp(node->name, hnp_node->name)) {
if (0 == strcmp(node->name, hnp_node->name) || opal_ifislocal(node->name)) {
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
"%s ras:base:node_insert updating HNP info to %ld slots",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
@ -124,6 +125,9 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
hnp_node->slots_alloc = node->slots_alloc;
hnp_node->slots_max = node->slots_max;
hnp_node->launch_id = node->launch_id;
/* use the RM's name for the node */
free(hnp_node->name);
hnp_node->name = strdup(node->name);
/* set the node to available for use */
hnp_node->allocate = true;
/* update the total slots in the job */

Просмотреть файл

@ -57,10 +57,12 @@ typedef struct {
bool pernode;
/** number of ppn for n_per_node mode */
int npernode;
/* do we not allow use of the localhost */
/* do not allow use of the localhost */
bool no_use_local;
/* display the map after it is computed */
bool display_map;
/* balance load across nodes */
bool loadbalance;
} orte_rmaps_base_t;
/**

Просмотреть файл

@ -125,6 +125,16 @@ int orte_rmaps_base_open(void)
orte_rmaps_base.oversubscribe = true;
}
/* Do we want to loadbalance the job */
param = mca_base_param_reg_int_name("rmaps", "base_loadbalance",
"Balance total number of procs across all allocated nodes",
false, false, (int)false, &value);
orte_rmaps_base.loadbalance = OPAL_INT_TO_BOOL(value);
/* if we are doing npernode or pernode, then we cannot loadbalance */
if (orte_rmaps_base.pernode) {
orte_rmaps_base.loadbalance = false;
}
/* should we display the map after determining it? */
mca_base_param_reg_int_name("rmaps", "base_display_map",
"Whether to display the process map after it is computed",

Просмотреть файл

@ -88,18 +88,17 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
ORTE_ERROR_LOG(rc);
return rc;
}
}
/** check that anything is here */
if (0 == opal_list_get_size(allocated_nodes)) {
opal_show_help("help-orte-rmaps-base.txt",
"orte-rmaps-base:no-available-resources",
true);
return ORTE_ERR_SILENT;
/** check that anything is here */
if (0 == opal_list_get_size(allocated_nodes)) {
opal_show_help("help-orte-rmaps-base.txt",
"orte-rmaps-base:no-available-resources",
true);
return ORTE_ERR_SILENT;
}
}
/* did the app_context contain a hostfile? */
if (NULL != app->hostfile) {
if (NULL != app && NULL != app->hostfile) {
/* yes - filter the node list through the file, removing
* any nodes not found in the file
*/
@ -108,27 +107,27 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
ORTE_ERROR_LOG(rc);
return rc;
}
/** check that anything is here */
if (0 == opal_list_get_size(allocated_nodes)) {
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
true, app->app, app->hostfile);
return ORTE_ERR_SILENT;
}
}
/** check that anything is here */
if (0 == opal_list_get_size(allocated_nodes)) {
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
true, app->app, app->hostfile);
return ORTE_ERR_SILENT;
}
/* now filter the list through any -host specification */
if (ORTE_SUCCESS != (rc = orte_util_filter_dash_host_nodes(allocated_nodes,
app->dash_host))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/** check that anything is left! */
if (0 == opal_list_get_size(allocated_nodes)) {
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
true, app->app, "");
return ORTE_ERR_SILENT;
/* now filter the list through any -host specification */
if (NULL != app) {
if (ORTE_SUCCESS != (rc = orte_util_filter_dash_host_nodes(allocated_nodes,
app->dash_host))) {
ORTE_ERROR_LOG(rc);
return rc;
}
/** check that anything is left! */
if (0 == opal_list_get_size(allocated_nodes)) {
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
true, app->app, "");
return ORTE_ERR_SILENT;
}
}
/* If the "no local" option was set, then remove the local node

Просмотреть файл

@ -46,6 +46,7 @@
* Local variable
*/
static opal_list_item_t *cur_node_item = NULL;
static int ppn = 0;
/*
* Create a default mapping for the application, scheduling round
@ -228,10 +229,12 @@ static int map_app_by_slot(
/* Update the number of procs allocated */
++num_alloc;
/** if all the procs have been mapped OR we have fully used up this node, then
* break from the loop
/** if all the procs have been mapped OR we have fully used up this node
* OR we are at our ppn and loadbalancing, then break from the loop
*/
if(num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) {
if (num_alloc == app->num_procs ||
ORTE_ERR_NODE_FULLY_USED == rc ||
(orte_rmaps_base.loadbalance && i == ppn)) {
break;
}
}
@ -241,7 +244,9 @@ static int map_app_by_slot(
* node is NOT max'd out
*
*/
if (i < (num_slots_to_take-1) && ORTE_ERR_NODE_FULLY_USED != rc) {
if (i < (num_slots_to_take-1) &&
ORTE_ERR_NODE_FULLY_USED != rc &&
i != ppn) {
continue;
}
cur_node_item = next;
@ -261,7 +266,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
orte_std_cntr_t i;
opal_list_t node_list;
opal_list_item_t *item;
orte_node_t *node;
orte_node_t *node, **nodes;
orte_vpid_t vpid_start;
orte_std_cntr_t num_nodes, num_slots;
int rc;
@ -276,6 +281,39 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
/* start at the beginning... */
vpid_start = 0;
/* if loadbalancing is requested, then we need to compute
* the #procs/node - note that this cannot be done
* if we are doing pernode or if #procs was not given
*/
if (orte_rmaps_base.loadbalance && !map->pernode) {
/* compute total #procs */
for(i=0; i < jdata->num_apps; i++) {
app = apps[i];
if (0 == app->num_procs) {
/* can't do it - just move on */
opal_show_help("help-orte-rmaps-rr.txt",
"orte-rmaps-rr:loadbalance-and-zero-np",
true);
rc = ORTE_ERR_SILENT;
goto error;
}
ppn += app->num_procs;
}
/* get the total avail nodes */
nodes = (orte_node_t**)orte_node_pool->addr;
num_nodes=0;
for (i=0; i < orte_node_pool->size; i++) {
if (NULL == nodes[i]) {
break; /* nodes are left aligned, so stop when we hit a null */
}
if (nodes[i]->allocate) {
num_nodes++;
}
}
/* compute the balance */
ppn = ppn / num_nodes;
}
/* cycle through the app_contexts, mapping them sequentially */
for(i=0; i < jdata->num_apps; i++) {
app = apps[i];
@ -387,7 +425,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
goto error;
}
}
/** track the total number of processes we mapped */
jdata->num_procs += app->num_procs;

Просмотреть файл

@ -215,6 +215,9 @@ static opal_cmd_line_init_t cmd_line_init[] = {
{ "rmaps", "base", "no_oversubscribe", '\0', "nooversubscribe", "nooversubscribe", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Nodes are not to be oversubscribed, even if the system supports such operation"},
{ "rmaps", "base", "loadbalance", '\0', "loadbalance", "loadbalance", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Balance total number of procs across all allocated nodes"},
{ "rmaps", "base", "display_map", '\0', "display-map", "display-map", 0,
NULL, OPAL_CMD_LINE_TYPE_BOOL,
"Display the process map just before launch"},