Add a loadbalancing feature to the round-robin mapper - more to be sent to devel list
Fix a potential problem with RM-provided nodenames not matching returns from gethostname - ensure that the HNP's nodename gets DNS-resolved when comparing against RM-provided hostnames. Note that this may be an issue for RM-based clusters that don't have local DNS resolution, but hopefully that is more indicative of a poorly configured system. This commit was SVN r18252.
Этот коммит содержится в:
родитель
456ce6c4da
Коммит
5311b13b60
@ -23,6 +23,7 @@
|
|||||||
|
|
||||||
#include "opal/util/output.h"
|
#include "opal/util/output.h"
|
||||||
#include "opal/util/argv.h"
|
#include "opal/util/argv.h"
|
||||||
|
#include "opal/util/if.h"
|
||||||
|
|
||||||
#include "orte/mca/errmgr/errmgr.h"
|
#include "orte/mca/errmgr/errmgr.h"
|
||||||
#include "orte/util/name_fns.h"
|
#include "orte/util/name_fns.h"
|
||||||
@ -111,7 +112,7 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
|
|||||||
* first position since it is the first one entered. We need to check to see
|
* first position since it is the first one entered. We need to check to see
|
||||||
* if this node is the same as the HNP's node so we don't double-enter it
|
* if this node is the same as the HNP's node so we don't double-enter it
|
||||||
*/
|
*/
|
||||||
if (0 == strcmp(node->name, hnp_node->name)) {
|
if (0 == strcmp(node->name, hnp_node->name) || opal_ifislocal(node->name)) {
|
||||||
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
|
OPAL_OUTPUT_VERBOSE((5, orte_ras_base.ras_output,
|
||||||
"%s ras:base:node_insert updating HNP info to %ld slots",
|
"%s ras:base:node_insert updating HNP info to %ld slots",
|
||||||
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
|
||||||
@ -124,6 +125,9 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
|
|||||||
hnp_node->slots_alloc = node->slots_alloc;
|
hnp_node->slots_alloc = node->slots_alloc;
|
||||||
hnp_node->slots_max = node->slots_max;
|
hnp_node->slots_max = node->slots_max;
|
||||||
hnp_node->launch_id = node->launch_id;
|
hnp_node->launch_id = node->launch_id;
|
||||||
|
/* use the RM's name for the node */
|
||||||
|
free(hnp_node->name);
|
||||||
|
hnp_node->name = strdup(node->name);
|
||||||
/* set the node to available for use */
|
/* set the node to available for use */
|
||||||
hnp_node->allocate = true;
|
hnp_node->allocate = true;
|
||||||
/* update the total slots in the job */
|
/* update the total slots in the job */
|
||||||
|
@ -57,10 +57,12 @@ typedef struct {
|
|||||||
bool pernode;
|
bool pernode;
|
||||||
/** number of ppn for n_per_node mode */
|
/** number of ppn for n_per_node mode */
|
||||||
int npernode;
|
int npernode;
|
||||||
/* do we not allow use of the localhost */
|
/* do not allow use of the localhost */
|
||||||
bool no_use_local;
|
bool no_use_local;
|
||||||
/* display the map after it is computed */
|
/* display the map after it is computed */
|
||||||
bool display_map;
|
bool display_map;
|
||||||
|
/* balance load across nodes */
|
||||||
|
bool loadbalance;
|
||||||
} orte_rmaps_base_t;
|
} orte_rmaps_base_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -125,6 +125,16 @@ int orte_rmaps_base_open(void)
|
|||||||
orte_rmaps_base.oversubscribe = true;
|
orte_rmaps_base.oversubscribe = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Do we want to loadbalance the job */
|
||||||
|
param = mca_base_param_reg_int_name("rmaps", "base_loadbalance",
|
||||||
|
"Balance total number of procs across all allocated nodes",
|
||||||
|
false, false, (int)false, &value);
|
||||||
|
orte_rmaps_base.loadbalance = OPAL_INT_TO_BOOL(value);
|
||||||
|
/* if we are doing npernode or pernode, then we cannot loadbalance */
|
||||||
|
if (orte_rmaps_base.pernode) {
|
||||||
|
orte_rmaps_base.loadbalance = false;
|
||||||
|
}
|
||||||
|
|
||||||
/* should we display the map after determining it? */
|
/* should we display the map after determining it? */
|
||||||
mca_base_param_reg_int_name("rmaps", "base_display_map",
|
mca_base_param_reg_int_name("rmaps", "base_display_map",
|
||||||
"Whether to display the process map after it is computed",
|
"Whether to display the process map after it is computed",
|
||||||
|
@ -88,18 +88,17 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
|||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
}
|
/** check that anything is here */
|
||||||
|
if (0 == opal_list_get_size(allocated_nodes)) {
|
||||||
/** check that anything is here */
|
opal_show_help("help-orte-rmaps-base.txt",
|
||||||
if (0 == opal_list_get_size(allocated_nodes)) {
|
"orte-rmaps-base:no-available-resources",
|
||||||
opal_show_help("help-orte-rmaps-base.txt",
|
true);
|
||||||
"orte-rmaps-base:no-available-resources",
|
return ORTE_ERR_SILENT;
|
||||||
true);
|
}
|
||||||
return ORTE_ERR_SILENT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* did the app_context contain a hostfile? */
|
/* did the app_context contain a hostfile? */
|
||||||
if (NULL != app->hostfile) {
|
if (NULL != app && NULL != app->hostfile) {
|
||||||
/* yes - filter the node list through the file, removing
|
/* yes - filter the node list through the file, removing
|
||||||
* any nodes not found in the file
|
* any nodes not found in the file
|
||||||
*/
|
*/
|
||||||
@ -108,27 +107,27 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
|
|||||||
ORTE_ERROR_LOG(rc);
|
ORTE_ERROR_LOG(rc);
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
|
/** check that anything is here */
|
||||||
|
if (0 == opal_list_get_size(allocated_nodes)) {
|
||||||
|
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
|
||||||
|
true, app->app, app->hostfile);
|
||||||
|
return ORTE_ERR_SILENT;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** check that anything is here */
|
/* now filter the list through any -host specification */
|
||||||
if (0 == opal_list_get_size(allocated_nodes)) {
|
if (NULL != app) {
|
||||||
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
|
if (ORTE_SUCCESS != (rc = orte_util_filter_dash_host_nodes(allocated_nodes,
|
||||||
true, app->app, app->hostfile);
|
app->dash_host))) {
|
||||||
return ORTE_ERR_SILENT;
|
ORTE_ERROR_LOG(rc);
|
||||||
}
|
return rc;
|
||||||
|
}
|
||||||
/* now filter the list through any -host specification */
|
/** check that anything is left! */
|
||||||
if (ORTE_SUCCESS != (rc = orte_util_filter_dash_host_nodes(allocated_nodes,
|
if (0 == opal_list_get_size(allocated_nodes)) {
|
||||||
app->dash_host))) {
|
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
|
||||||
ORTE_ERROR_LOG(rc);
|
true, app->app, "");
|
||||||
return rc;
|
return ORTE_ERR_SILENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** check that anything is left! */
|
|
||||||
if (0 == opal_list_get_size(allocated_nodes)) {
|
|
||||||
opal_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:no-mapped-node",
|
|
||||||
true, app->app, "");
|
|
||||||
return ORTE_ERR_SILENT;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If the "no local" option was set, then remove the local node
|
/* If the "no local" option was set, then remove the local node
|
||||||
|
@ -46,6 +46,7 @@
|
|||||||
* Local variable
|
* Local variable
|
||||||
*/
|
*/
|
||||||
static opal_list_item_t *cur_node_item = NULL;
|
static opal_list_item_t *cur_node_item = NULL;
|
||||||
|
static int ppn = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create a default mapping for the application, scheduling round
|
* Create a default mapping for the application, scheduling round
|
||||||
@ -228,10 +229,12 @@ static int map_app_by_slot(
|
|||||||
/* Update the number of procs allocated */
|
/* Update the number of procs allocated */
|
||||||
++num_alloc;
|
++num_alloc;
|
||||||
|
|
||||||
/** if all the procs have been mapped OR we have fully used up this node, then
|
/** if all the procs have been mapped OR we have fully used up this node
|
||||||
* break from the loop
|
* OR we are at our ppn and loadbalancing, then break from the loop
|
||||||
*/
|
*/
|
||||||
if(num_alloc == app->num_procs || ORTE_ERR_NODE_FULLY_USED == rc) {
|
if (num_alloc == app->num_procs ||
|
||||||
|
ORTE_ERR_NODE_FULLY_USED == rc ||
|
||||||
|
(orte_rmaps_base.loadbalance && i == ppn)) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -241,7 +244,9 @@ static int map_app_by_slot(
|
|||||||
* node is NOT max'd out
|
* node is NOT max'd out
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
if (i < (num_slots_to_take-1) && ORTE_ERR_NODE_FULLY_USED != rc) {
|
if (i < (num_slots_to_take-1) &&
|
||||||
|
ORTE_ERR_NODE_FULLY_USED != rc &&
|
||||||
|
i != ppn) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
cur_node_item = next;
|
cur_node_item = next;
|
||||||
@ -261,7 +266,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
|||||||
orte_std_cntr_t i;
|
orte_std_cntr_t i;
|
||||||
opal_list_t node_list;
|
opal_list_t node_list;
|
||||||
opal_list_item_t *item;
|
opal_list_item_t *item;
|
||||||
orte_node_t *node;
|
orte_node_t *node, **nodes;
|
||||||
orte_vpid_t vpid_start;
|
orte_vpid_t vpid_start;
|
||||||
orte_std_cntr_t num_nodes, num_slots;
|
orte_std_cntr_t num_nodes, num_slots;
|
||||||
int rc;
|
int rc;
|
||||||
@ -276,6 +281,39 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
|||||||
/* start at the beginning... */
|
/* start at the beginning... */
|
||||||
vpid_start = 0;
|
vpid_start = 0;
|
||||||
|
|
||||||
|
/* if loadbalancing is requested, then we need to compute
|
||||||
|
* the #procs/node - note that this cannot be done
|
||||||
|
* if we are doing pernode or if #procs was not given
|
||||||
|
*/
|
||||||
|
if (orte_rmaps_base.loadbalance && !map->pernode) {
|
||||||
|
/* compute total #procs */
|
||||||
|
for(i=0; i < jdata->num_apps; i++) {
|
||||||
|
app = apps[i];
|
||||||
|
if (0 == app->num_procs) {
|
||||||
|
/* can't do it - just move on */
|
||||||
|
opal_show_help("help-orte-rmaps-rr.txt",
|
||||||
|
"orte-rmaps-rr:loadbalance-and-zero-np",
|
||||||
|
true);
|
||||||
|
rc = ORTE_ERR_SILENT;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
ppn += app->num_procs;
|
||||||
|
}
|
||||||
|
/* get the total avail nodes */
|
||||||
|
nodes = (orte_node_t**)orte_node_pool->addr;
|
||||||
|
num_nodes=0;
|
||||||
|
for (i=0; i < orte_node_pool->size; i++) {
|
||||||
|
if (NULL == nodes[i]) {
|
||||||
|
break; /* nodes are left aligned, so stop when we hit a null */
|
||||||
|
}
|
||||||
|
if (nodes[i]->allocate) {
|
||||||
|
num_nodes++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* compute the balance */
|
||||||
|
ppn = ppn / num_nodes;
|
||||||
|
}
|
||||||
|
|
||||||
/* cycle through the app_contexts, mapping them sequentially */
|
/* cycle through the app_contexts, mapping them sequentially */
|
||||||
for(i=0; i < jdata->num_apps; i++) {
|
for(i=0; i < jdata->num_apps; i++) {
|
||||||
app = apps[i];
|
app = apps[i];
|
||||||
@ -387,7 +425,7 @@ static int orte_rmaps_rr_map(orte_job_t *jdata)
|
|||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** track the total number of processes we mapped */
|
/** track the total number of processes we mapped */
|
||||||
jdata->num_procs += app->num_procs;
|
jdata->num_procs += app->num_procs;
|
||||||
|
|
||||||
|
@ -215,6 +215,9 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
|||||||
{ "rmaps", "base", "no_oversubscribe", '\0', "nooversubscribe", "nooversubscribe", 0,
|
{ "rmaps", "base", "no_oversubscribe", '\0', "nooversubscribe", "nooversubscribe", 0,
|
||||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||||
"Nodes are not to be oversubscribed, even if the system supports such operation"},
|
"Nodes are not to be oversubscribed, even if the system supports such operation"},
|
||||||
|
{ "rmaps", "base", "loadbalance", '\0', "loadbalance", "loadbalance", 0,
|
||||||
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||||
|
"Balance total number of procs across all allocated nodes"},
|
||||||
{ "rmaps", "base", "display_map", '\0', "display-map", "display-map", 0,
|
{ "rmaps", "base", "display_map", '\0', "display-map", "display-map", 0,
|
||||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||||
"Display the process map just before launch"},
|
"Display the process map just before launch"},
|
||||||
|
Загрузка…
Ссылка в новой задаче
Block a user