If (and only if) a user requests, set the default number of slots on any node to the number of objects of the specified type. This *only* takes effect in an unmanaged environment - i.e., if an external resource manager assigns us a number of slots, then that is what we use. However, if we are using a hostfile, then the user may or may not have given us a value for the number of slots on each node.
For those nodes (and *only* those nodes) where the user does *not* specify a slot count, we will set the number of slots according to their direction: either to the number of cores, numas, sockets, or hwthreads. Otherwise, the slot count is set to 1. Note that the default behavior remains unchanged: in the absence of any value for #slots, and in the absence of any directive to set #slots, we will set #slots=1. This commit was SVN r27236.
Этот коммит содержится в:
родитель
ee6c7702d2
Коммит
bae5dab916
@ -107,9 +107,64 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/* if this is an unmanaged allocation, then set the default
|
||||||
|
* slots on each node as directed or using default
|
||||||
|
*/
|
||||||
|
if (!orte_managed_allocation) {
|
||||||
|
if (NULL != orte_set_slots) {
|
||||||
|
for (i=0; i < orte_node_pool->size; i++) {
|
||||||
|
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!node->slots_given) {
|
||||||
|
if (0 == strncmp(orte_set_slots, "cores", strlen(orte_set_slots))) {
|
||||||
|
node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology,
|
||||||
|
HWLOC_OBJ_CORE, 0,
|
||||||
|
OPAL_HWLOC_LOGICAL);
|
||||||
|
} else if (0 == strncmp(orte_set_slots, "sockets", strlen(orte_set_slots))) {
|
||||||
|
if (0 == (node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology,
|
||||||
|
HWLOC_OBJ_SOCKET, 0,
|
||||||
|
OPAL_HWLOC_LOGICAL))) {
|
||||||
|
/* some systems don't report sockets - in this case,
|
||||||
|
* use numanodes
|
||||||
|
*/
|
||||||
|
node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology,
|
||||||
|
HWLOC_OBJ_NODE, 0,
|
||||||
|
OPAL_HWLOC_LOGICAL);
|
||||||
|
}
|
||||||
|
} else if (0 == strncmp(orte_set_slots, "numas", strlen(orte_set_slots))) {
|
||||||
|
node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology,
|
||||||
|
HWLOC_OBJ_NODE, 0,
|
||||||
|
OPAL_HWLOC_LOGICAL);
|
||||||
|
} else if (0 == strncmp(orte_set_slots, "hwthreads", strlen(orte_set_slots))) {
|
||||||
|
node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology,
|
||||||
|
HWLOC_OBJ_PU, 0,
|
||||||
|
OPAL_HWLOC_LOGICAL);
|
||||||
|
} else {
|
||||||
|
/* must be a number */
|
||||||
|
node->slots = strtol(orte_set_slots, NULL, 10);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* set any non-specified slot counts to 1 */
|
||||||
|
for (i=0; i < orte_node_pool->size; i++) {
|
||||||
|
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!node->slots_given) {
|
||||||
|
node->slots = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (orte_display_allocation) {
|
||||||
|
orte_ras_base_display_alloc();
|
||||||
|
}
|
||||||
|
|
||||||
/* progress the job */
|
/* progress the job */
|
||||||
caddy->jdata->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
|
caddy->jdata->state = ORTE_JOB_STATE_DAEMONS_REPORTED;
|
||||||
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_VM_READY);
|
ORTE_ACTIVATE_JOB_STATE(caddy->jdata, ORTE_JOB_STATE_VM_READY);
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -45,7 +45,6 @@ typedef struct orte_ras_base_t {
|
|||||||
int ras_output;
|
int ras_output;
|
||||||
opal_list_t ras_opened;
|
opal_list_t ras_opened;
|
||||||
bool allocation_read;
|
bool allocation_read;
|
||||||
bool display_alloc;
|
|
||||||
orte_ras_base_module_t *active_module;
|
orte_ras_base_module_t *active_module;
|
||||||
int total_slots_alloc;
|
int total_slots_alloc;
|
||||||
} orte_ras_base_t;
|
} orte_ras_base_t;
|
||||||
@ -59,6 +58,8 @@ ORTE_DECLSPEC int orte_ras_base_select(void);
|
|||||||
ORTE_DECLSPEC int orte_ras_base_finalize(void);
|
ORTE_DECLSPEC int orte_ras_base_finalize(void);
|
||||||
ORTE_DECLSPEC int orte_ras_base_close(void);
|
ORTE_DECLSPEC int orte_ras_base_close(void);
|
||||||
|
|
||||||
|
ORTE_DECLSPEC void orte_ras_base_display_alloc(void);
|
||||||
|
|
||||||
ORTE_DECLSPEC void orte_ras_base_allocate(int fd, short args, void *cbdata);
|
ORTE_DECLSPEC void orte_ras_base_allocate(int fd, short args, void *cbdata);
|
||||||
|
|
||||||
ORTE_DECLSPEC int orte_ras_base_add_hosts(orte_job_t *jdata);
|
ORTE_DECLSPEC int orte_ras_base_add_hosts(orte_job_t *jdata);
|
||||||
|
@ -48,8 +48,8 @@
|
|||||||
|
|
||||||
#include "orte/mca/ras/base/ras_private.h"
|
#include "orte/mca/ras/base/ras_private.h"
|
||||||
|
|
||||||
/* static function to display allocation */
|
/* function to display allocation */
|
||||||
static void display_alloc(void)
|
void orte_ras_base_display_alloc(void)
|
||||||
{
|
{
|
||||||
char *tmp=NULL, *tmp2, *tmp3, *pfx=NULL;
|
char *tmp=NULL, *tmp2, *tmp3, *pfx=NULL;
|
||||||
int i, istart;
|
int i, istart;
|
||||||
@ -367,8 +367,8 @@ void orte_ras_base_allocate(int fd, short args, void *cbdata)
|
|||||||
|
|
||||||
DISPLAY:
|
DISPLAY:
|
||||||
/* shall we display the results? */
|
/* shall we display the results? */
|
||||||
if (4 < opal_output_get_verbosity(orte_ras_base.ras_output) || orte_ras_base.display_alloc) {
|
if (4 < opal_output_get_verbosity(orte_ras_base.ras_output)) {
|
||||||
display_alloc();
|
orte_ras_base_display_alloc();
|
||||||
}
|
}
|
||||||
|
|
||||||
next_state:
|
next_state:
|
||||||
@ -470,8 +470,8 @@ int orte_ras_base_add_hosts(orte_job_t *jdata)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* shall we display the results? */
|
/* shall we display the results? */
|
||||||
if (0 < opal_output_get_verbosity(orte_ras_base.ras_output) || orte_ras_base.display_alloc) {
|
if (0 < opal_output_get_verbosity(orte_ras_base.ras_output)) {
|
||||||
display_alloc();
|
orte_ras_base_display_alloc();
|
||||||
}
|
}
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
* University of Stuttgart. All rights reserved.
|
* University of Stuttgart. All rights reserved.
|
||||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||||
* All rights reserved.
|
* All rights reserved.
|
||||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||||
* reserved.
|
* reserved.
|
||||||
* $COPYRIGHT$
|
* $COPYRIGHT$
|
||||||
*
|
*
|
||||||
@ -72,30 +72,11 @@ orte_ras_base_t orte_ras_base;
|
|||||||
*/
|
*/
|
||||||
int orte_ras_base_open(void)
|
int orte_ras_base_open(void)
|
||||||
{
|
{
|
||||||
int value;
|
|
||||||
bool btmp;
|
|
||||||
|
|
||||||
/* set default flags */
|
/* set default flags */
|
||||||
orte_ras_base.active_module = NULL;
|
orte_ras_base.active_module = NULL;
|
||||||
orte_ras_base.allocation_read = false;
|
orte_ras_base.allocation_read = false;
|
||||||
orte_ras_base.total_slots_alloc = 0;
|
orte_ras_base.total_slots_alloc = 0;
|
||||||
|
|
||||||
/* should we display the allocation after determining it? */
|
|
||||||
mca_base_param_reg_int_name("ras", "base_display_alloc",
|
|
||||||
"Whether to display the allocation after it is determined",
|
|
||||||
false, false, (int)false, &value);
|
|
||||||
orte_ras_base.display_alloc = OPAL_INT_TO_BOOL(value);
|
|
||||||
|
|
||||||
/* should we display a detailed (developer-quality) version of the allocation after determining it? */
|
|
||||||
mca_base_param_reg_int_name("ras", "base_display_devel_alloc",
|
|
||||||
"Whether to display a developer-detail allocation after it is determined",
|
|
||||||
false, false, (int)false, &value);
|
|
||||||
btmp = OPAL_INT_TO_BOOL(value);
|
|
||||||
if (btmp) {
|
|
||||||
orte_ras_base.display_alloc = true;
|
|
||||||
orte_devel_level_output = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Debugging / verbose output. Always have stream open, with
|
/* Debugging / verbose output. Always have stream open, with
|
||||||
verbose set by the mca open system... */
|
verbose set by the mca open system... */
|
||||||
orte_ras_base.ras_output = opal_output_open(NULL);
|
orte_ras_base.ras_output = opal_output_open(NULL);
|
||||||
|
@ -95,6 +95,9 @@ char **orte_launch_environ;
|
|||||||
bool orte_hnp_is_allocated = false;
|
bool orte_hnp_is_allocated = false;
|
||||||
bool orte_allocation_required;
|
bool orte_allocation_required;
|
||||||
bool orte_managed_allocation = false;
|
bool orte_managed_allocation = false;
|
||||||
|
char *orte_set_slots = NULL;
|
||||||
|
bool orte_display_allocation;
|
||||||
|
bool orte_display_devel_allocation;
|
||||||
|
|
||||||
/* launch agents */
|
/* launch agents */
|
||||||
char *orte_launch_agent = NULL;
|
char *orte_launch_agent = NULL;
|
||||||
@ -826,6 +829,7 @@ static void orte_node_construct(orte_node_t* node)
|
|||||||
node->oversubscribed = false;
|
node->oversubscribed = false;
|
||||||
node->state = ORTE_NODE_STATE_UNKNOWN;
|
node->state = ORTE_NODE_STATE_UNKNOWN;
|
||||||
node->slots = 0;
|
node->slots = 0;
|
||||||
|
node->slots_given = false;
|
||||||
node->slots_inuse = 0;
|
node->slots_inuse = 0;
|
||||||
node->slots_alloc = 0;
|
node->slots_alloc = 0;
|
||||||
node->slots_max = 0;
|
node->slots_max = 0;
|
||||||
|
@ -333,6 +333,10 @@ typedef struct {
|
|||||||
that we have been allocated on this note and would be the
|
that we have been allocated on this note and would be the
|
||||||
"ideal" number of processes for us to launch. */
|
"ideal" number of processes for us to launch. */
|
||||||
orte_std_cntr_t slots;
|
orte_std_cntr_t slots;
|
||||||
|
/* a flag indicating that the number of slots was specified - used
|
||||||
|
* only in non-managed environments
|
||||||
|
*/
|
||||||
|
bool slots_given;
|
||||||
/** How many processes have already been launched, used by one or
|
/** How many processes have already been launched, used by one or
|
||||||
more jobs on this node. */
|
more jobs on this node. */
|
||||||
orte_std_cntr_t slots_inuse;
|
orte_std_cntr_t slots_inuse;
|
||||||
@ -610,6 +614,9 @@ ORTE_DECLSPEC extern char **orte_launch_environ;
|
|||||||
ORTE_DECLSPEC extern bool orte_hnp_is_allocated;
|
ORTE_DECLSPEC extern bool orte_hnp_is_allocated;
|
||||||
ORTE_DECLSPEC extern bool orte_allocation_required;
|
ORTE_DECLSPEC extern bool orte_allocation_required;
|
||||||
ORTE_DECLSPEC extern bool orte_managed_allocation;
|
ORTE_DECLSPEC extern bool orte_managed_allocation;
|
||||||
|
ORTE_DECLSPEC extern char *orte_set_slots;
|
||||||
|
ORTE_DECLSPEC extern bool orte_display_allocation;
|
||||||
|
ORTE_DECLSPEC extern bool orte_display_devel_allocation;
|
||||||
|
|
||||||
/* launch agents */
|
/* launch agents */
|
||||||
ORTE_DECLSPEC extern char *orte_launch_agent;
|
ORTE_DECLSPEC extern char *orte_launch_agent;
|
||||||
|
@ -44,7 +44,7 @@ static bool passed_thru = false;
|
|||||||
|
|
||||||
int orte_register_params(void)
|
int orte_register_params(void)
|
||||||
{
|
{
|
||||||
int value;
|
int value, id;
|
||||||
char *strval, *strval1, *strval2;
|
char *strval, *strval1, *strval2;
|
||||||
|
|
||||||
/* only go thru this once - mpirun calls it twice, which causes
|
/* only go thru this once - mpirun calls it twice, which causes
|
||||||
@ -553,6 +553,29 @@ int orte_register_params(void)
|
|||||||
orte_use_common_port = false;
|
orte_use_common_port = false;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
mca_base_param_reg_string_name("orte", "set_default_slots",
|
||||||
|
"Set the number of slots on nodes that lack such info to the number of specified objects [a number, \"cores\", \"numas\", \"sockets\", or \"hwthreads\"]",
|
||||||
|
false, false, NULL, &orte_set_slots);
|
||||||
|
|
||||||
|
/* should we display the allocation after determining it? */
|
||||||
|
id = mca_base_param_reg_int_name("orte", "display_alloc",
|
||||||
|
"Whether to display the allocation after it is determined",
|
||||||
|
false, false, (int)false, NULL);
|
||||||
|
mca_base_param_reg_syn_name(id, "ras", "base_display_alloc", true);
|
||||||
|
mca_base_param_lookup_int(id, &value);
|
||||||
|
orte_display_allocation = OPAL_INT_TO_BOOL(value);
|
||||||
|
|
||||||
|
/* should we display a detailed (developer-quality) version of the allocation after determining it? */
|
||||||
|
id = mca_base_param_reg_int_name("orte", "display_devel_alloc",
|
||||||
|
"Whether to display a developer-detail allocation after it is determined",
|
||||||
|
false, false, 0, NULL);
|
||||||
|
mca_base_param_reg_syn_name(id, "ras", "base_display_devel_alloc", true);
|
||||||
|
mca_base_param_lookup_int(id, &value);
|
||||||
|
if (0 != value) {
|
||||||
|
orte_display_allocation = true;
|
||||||
|
orte_devel_level_output = true;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* ORTE_DISABLE_FULL_SUPPORT */
|
#endif /* ORTE_DISABLE_FULL_SUPPORT */
|
||||||
|
|
||||||
return ORTE_SUCCESS;
|
return ORTE_SUCCESS;
|
||||||
|
@ -410,10 +410,10 @@ static opal_cmd_line_init_t cmd_line_init[] = {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Allocation options */
|
/* Allocation options */
|
||||||
{ "ras", "base", "display_alloc", '\0', "display-allocation", "display-allocation", 0,
|
{ "orte", "display", "alloc", '\0', "display-allocation", "display-allocation", 0,
|
||||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||||
"Display the allocation being used by this job"},
|
"Display the allocation being used by this job"},
|
||||||
{ "ras", "base", "display_devel_alloc", '\0', "display-devel-allocation", "display-devel-allocation", 0,
|
{ "orte", "display", "devel_alloc", '\0', "display-devel-allocation", "display-devel-allocation", 0,
|
||||||
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
NULL, OPAL_CMD_LINE_TYPE_BOOL,
|
||||||
"Display a detailed list (mostly intended for developers) of the allocation being used by this job"},
|
"Display a detailed list (mostly intended for developers) of the allocation being used by this job"},
|
||||||
#if OPAL_HAVE_HWLOC
|
#if OPAL_HAVE_HWLOC
|
||||||
|
@ -96,6 +96,8 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
|
|||||||
(0 == strcmp(node->name, orte_process_info.nodename) &&
|
(0 == strcmp(node->name, orte_process_info.nodename) &&
|
||||||
(0 == strcmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])))) {
|
(0 == strcmp(mapped_nodes[i], "localhost") || opal_ifislocal(mapped_nodes[i])))) {
|
||||||
++node->slots;
|
++node->slots;
|
||||||
|
/* the dash-host option presumes definition of num_slots */
|
||||||
|
node->slots_given = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -127,6 +129,8 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
|
|||||||
node->slots_inuse = 0;
|
node->slots_inuse = 0;
|
||||||
node->slots_max = 0;
|
node->slots_max = 0;
|
||||||
node->slots = 1;
|
node->slots = 1;
|
||||||
|
/* the dash-host option presumes definition of num_slots */
|
||||||
|
node->slots_given = true;
|
||||||
opal_list_append(nodes, &node->super);
|
opal_list_append(nodes, &node->super);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -391,12 +391,14 @@ static int hostfile_parse_line(int token, opal_list_t* updates, opal_list_t* exc
|
|||||||
}
|
}
|
||||||
|
|
||||||
done:
|
done:
|
||||||
if (!got_count) {
|
if (got_count) {
|
||||||
if (got_max) {
|
node->slots_given = true;
|
||||||
node->slots = node->slots_max;
|
} else if (got_max) {
|
||||||
} else {
|
node->slots = node->slots_max;
|
||||||
++node->slots;
|
node->slots_given = true;
|
||||||
}
|
} else {
|
||||||
|
/* should be set by obj_new, but just to be clear */
|
||||||
|
node->slots_given = false;
|
||||||
}
|
}
|
||||||
opal_list_append(updates, &node->super);
|
opal_list_append(updates, &node->super);
|
||||||
|
|
||||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user