Add a loadbalancing feature to the round-robin mapper - more to be sent to devel list
Fix a potential problem with RM-provided nodenames not matching returns from gethostname - ensure that the HNP's nodename gets DNS-resolved when comparing against RM-provided hostnames. Note that this may be an issue for RM-based clusters that don't have local DNS resolution, but hopefully that is more indicative of a poorly configured system. This commit was SVN r18252.
Этот коммит содержится в:
родитель
456ce6c4da
Коммит
5311b13b60
@ -23,6 +23,7 @@
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/argv.h"
|
||||
#include "opal/util/if.h"
|
||||
|
||||
#include "orte/mca/errmgr/errmgr.h"
|
||||
#include "orte/util/name_fns.h"
|
||||
@ -111,7 +112,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
|
||||
* first position since it is the first one entered. We need to check to see
|
||||
* if this node is the same as the HNP's node so we don't double-enter it
|
||||
*/
|
||||
if (0 == strcmp(node->name, hnp_node->name)) {
|
||||
if (0 == strcmp(node->name, hnp_node->name) || opal_ifislocal(node->name)) {
|
||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
|
||||
"%s ras:base:node_insert updating HNP info to %ld slots",
|
||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||
@ -124,6 +125,9 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
|
||||
hnp_node->slots_alloc = node->slots_alloc;
|
||||
hnp_node->slots_max = node->slots_max;
|
||||
hnp_node->launch_id = node->launch_id;
|
||||
/* use the RM's name for the node */
|
||||
free(hnp_node->name);
|
||||
hnp_node->name = strdup(node->name);
|
||||
/* set the node to available for use */
|
||||
hnp_node->allocate = true;
|
||||
/* update the total slots in the job */
|
||||
|
@ -57,10 +57,12 @@ typedef struct {
|
||||
bool pernode;
|
||||
/** number of ppn for n_per_node mode */
|
||||
int npernode;
|
||||
/* do we not allow use of the localhost */
|
||||
/* do not allow use of the localhost */
|
||||
bool no_use_local;
|
||||
/* display the map after it is computed */
|
||||
bool display_map;
|
||||
/* balance load across nodes */
|
||||
bool loadbalance;
|
||||
} orte_rmaps_base_t;
|
||||
|
||||
/**
|
||||
|
@ -125,6 +125,16 @@ int orte_rmaps_base_open(void)
|
||||
orte_rmaps_base.oversubscribe = true;
|
||||
}
|
||||
|
||||
/* Do we want to loadbalance the job */
|
||||
param = mca_base_param_reg_int_name("rmaps", "base_loadbalance",
|
||||
"Balance total number of procs across all allocated nodes",
|
||||
false, false, (int)false, &value);
|
||||
orte_rmaps_base.loadbalance = OPAL_INT_TO_BOOL(value);
|
||||
/* if we are doing npernode or pernode, then we cannot loadbalance */
|
||||
if (orte_rmaps_base.pernode) {
|
||||
orte_rmaps_base.loadbalance = false;
|
||||
}
|
||||
|
||||
/* should we display the map after determining it? */
|
||||
mca_base_param_reg_int_name("rmaps", "base_display_map",
|
||||
"Whether to display the process map after it is computed",
|
||||
|
@ -88,18 +88,17 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/** check that anything is here */
|
||||
if (0 == opal_list_get_size(allocated_nodes)) {
|
||||
opal_show_help("help-orte-rmaps-base.txt",
|
||||
"orte-rmaps-base:no-available-resources",
|
||||
true);
|
||||
return ORTE_ERR_SILENT;
|
||||
/** check that anything is here */
|
||||
if (0 == opal_list_get_size(allocated_nodes)) {
|
||||
opal_show_help("help-orte-rmaps-base.txt",
|
||||
"orte-rmaps-base:no-available-resources",
|
||||
true);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
|
||||
/* did the app_context contain a hostfile? */
|
||||
if (NULL != app->hostfile) {
|
||||
if (NULL != app && NULL != app->hostfile) {
|
||||
/* yes - filter the node list through the file, removing
|
||||
* any nodes not found in the file
|
||||
*/
|
||||
@ -108,27 +107,27 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/** check that anything is here */
|
||||
if (0 == opal_list_get_size(allocated_nodes)) {
|
||||
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
|
||||
true, app->app, app->hostfile);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
|
||||
/** check that anything is here */
|
||||
if (0 == opal_list_get_size(allocated_nodes)) {
|
||||
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
|
||||
true, app->app, app->hostfile);
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
|
||||
/* now filter the list through any -host specification */
|
||||
if (ORTE_SUCCESS != (rc = orte_util_filter_dash_host_nodes(allocated_nodes,
|
||||
app->dash_host))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** check that anything is left! */
|
||||
if (0 == opal_list_get_size(allocated_nodes)) {
|
||||
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
|
||||
true, app->app, "");
|
||||
return ORTE_ERR_SILENT;
|
||||
/* now filter the list through any -host specification */
|
||||
if (NULL != app) {
|
||||
if (ORTE_SUCCESS != (rc = orte_util_filter_dash_host_nodes(allocated_nodes,
|
||||
app->dash_host))) {
|
||||
ORTE_ERROR_LOG(rc);
|
||||
return rc;
|
||||
}
|
||||
/** check that anything is left! */
|
||||
if (0 == opal_list_get_size(allocated_nodes)) {
|
||||
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
|
||||
true, app->app, "");
|
||||
return ORTE_ERR_SILENT;
|
||||
}
|
||||
}
|
||||
|
||||
/* If the "no local" option was set, then remove the local node
|
||||
|
@ -46,6 +46,7 @@
|
||||
* Local variable
|
||||
*/
|
||||
static opal_list_item_t *cur_node_item = NULL;
|
||||
static int ppn = 0;
|
||||
|
||||
/*
|
||||
* Create a default mapping for the application, scheduling round
|
||||
@ -228,10 +229,12 @@ static int map_app_by_slot(
|
||||
/* Update the number of procs allocated */
|
||||
++num_alloc;
|
||||
|
||||
/** if all the procs have been mapped OR we have fully used up this node, then
|
||||
* break from the loop
|
||||
/** if all the procs have been mapped OR we have fully used up this node
|
||||
* OR we are at our ppn and loadbalancing, then break from the loop
|
||||
*/
|
||||
if(num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) {
|
||||
if (num_alloc == app->num_procs ||
|
||||
ORTE_ERR_NODE_FULLY_USED == rc ||
|
||||
(orte_rmaps_base.loadbalance && i == ppn)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -241,7 +244,9 @@ static int map_app_by_slot(
|
||||
* node is NOT max'd out
|
||||
*
|
||||
*/
|
||||
if (i < (num_slots_to_take-1) && ORTE_ERR_NODE_FULLY_USED != rc) {
|
||||
if (i < (num_slots_to_take-1) &&
|
||||
ORTE_ERR_NODE_FULLY_USED != rc &&
|
||||
i != ppn) {
|
||||
continue;
|
||||
}
|
||||
cur_node_item = next;
|
||||
@ -261,7 +266,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
orte_std_cntr_t i;
|
||||
opal_list_t node_list;
|
||||
opal_list_item_t *item;
|
||||
orte_node_t *node;
|
||||
orte_node_t *node, **nodes;
|
||||
orte_vpid_t vpid_start;
|
||||
orte_std_cntr_t num_nodes, num_slots;
|
||||
int rc;
|
||||
@ -276,6 +281,39 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
/* start at the beginning... */
|
||||
vpid_start = 0;
|
||||
|
||||
/* if loadbalancing is requested, then we need to compute
|
||||
* the #procs/node - note that this cannot be done
|
||||
* if we are doing pernode or if #procs was not given
|
||||
*/
|
||||
if (orte_rmaps_base.loadbalance && !map->pernode) {
|
||||
/* compute total #procs */
|
||||
for(i=0; i < jdata->num_apps; i++) {
|
||||
app = apps[i];
|
||||
if (0 == app->num_procs) {
|
||||
/* can't do it - just move on */
|
||||
opal_show_help("help-orte-rmaps-rr.txt",
|
||||
"orte-rmaps-rr:loadbalance-and-zero-np",
|
||||
true);
|
||||
rc = ORTE_ERR_SILENT;
|
||||
goto error;
|
||||
}
|
||||
ppn += app->num_procs;
|
||||
}
|
||||
/* get the total avail nodes */
|
||||
nodes = (orte_node_t**)orte_node_pool->addr;
|
||||
num_nodes=0;
|
||||
for (i=0; i < orte_node_pool->size; i++) {
|
||||
if (NULL == nodes[i]) {
|
||||
break; /* nodes are left aligned, so stop when we hit a null */
|
||||
}
|
||||
if (nodes[i]->allocate) {
|
||||
num_nodes++;
|
||||
}
|
||||
}
|
||||
/* compute the balance */
|
||||
ppn = ppn / num_nodes;
|
||||
}
|
||||
|
||||
/* cycle through the app_contexts, mapping them sequentially */
|
||||
for(i=0; i < jdata->num_apps; i++) {
|
||||
app = apps[i];
|
||||
@ -387,7 +425,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** track the total number of processes we mapped */
|
||||
jdata->num_procs += app->num_procs;
|
||||
|
||||
|
@ -215,6 +215,9 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
||||
{ "rmaps", "base", "no_oversubscribe", '\0', "nooversubscribe", "nooversubscribe", 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Nodes are not to be oversubscribed, even if the system supports such operation"},
|
||||
{ "rmaps", "base", "loadbalance", '\0', "loadbalance", "loadbalance", 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Balance total number of procs across all allocated nodes"},
|
||||
{ "rmaps", "base", "display_map", '\0', "display-map", "display-map", 0,
|
||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||
"Display the process map just before launch"},
|
||||
|
Загрузка…
Ссылка в новой задаче
Block a user