1
1

Add the ability to specify the number of desired slots in the --host option. Just giving a host name => one slot (multiple copies of the name yield one slot per copy). Giving "foo:3" indicates you want three slots - a shorthand notation for saying "foo" three times. Giving "foo:*" indicates you want the topology to set the number of slots based on the orte_set_slots param.

Этот коммит содержится в:
Ralph Castain 2015-04-30 20:33:43 -07:00
родитель 53e9543be4
Коммит 7d1980ba83
4 изменённых файлов: 72 добавлений и 31 удалений

Просмотреть файл

@ -123,7 +123,19 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
node->topology = t;
}
}
} else if (orte_do_not_launch) {
node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
t = node->topology;
for (i=1; i < orte_node_pool->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
continue;
}
if (NULL == node->topology) {
node->topology = t;
}
}
}
/* if this is an unmanaged allocation, then set the default
* slots on each node as directed or using default
*/
@ -135,6 +147,9 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
continue;
}
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
"%s plm:base:setting slots for node %s by %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, orte_set_slots));
if (0 == strncmp(orte_set_slots, "cores", strlen(orte_set_slots))) {
node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology,
HWLOC_OBJ_CORE, 0,
@ -1736,7 +1751,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
}
OBJ_RELEASE(nptr);
}
OBJ_DESTRUCT(&tnodes);
OPAL_LIST_DESTRUCT(&tnodes);
/* if we didn't get anything, then we are the only node in the
* allocation - so there is nothing else to do as no other
* daemons are to be launched

Просмотреть файл

@ -113,9 +113,9 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
/* flag that hnp has been allocated */
orte_hnp_is_allocated = true;
/* update the total slots in the job */
orte_ras_base.total_slots_alloc += node->slots;
orte_ras_base.total_slots_alloc = node->slots;
/* copy the allocation data to that node's info */
hnp_node->slots += node->slots;
hnp_node->slots = node->slots;
hnp_node->slots_max = node->slots_max;
/* copy across any attributes */
OPAL_LIST_FOREACH(kv, &node->attributes, orte_attribute_t) {

Просмотреть файл

@ -540,23 +540,6 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
}
}
/* - if a max number was given, only take that many */
if (0 < orte_max_vm_size &&
orte_max_vm_size < (int)opal_list_get_size(allocated_nodes)) {
item = opal_list_get_first(allocated_nodes);
num_slots = 0;
for (i=0; i < orte_max_vm_size; i++) {
node = (orte_node_t*)item;
num_slots += node->slots - node->slots_inuse;
item = opal_list_get_next(item);
}
while (item != opal_list_get_end(allocated_nodes)) {
next = opal_list_get_next(item);
opal_list_remove_item(allocated_nodes, item);
OBJ_RELEASE(item); /* "un-retain" it */
}
}
/* pass back the total number of available slots */
*total_num_slots = num_slots;

Просмотреть файл

@ -56,7 +56,10 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
orte_node_t *node, *nd;
opal_list_t adds;
bool found;
int slots;
bool slots_given;
char *cptr;
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: parsing args",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
@ -105,6 +108,18 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
rc = ORTE_ERR_SILENT;
goto cleanup;
}
/* see if the node contains the number of slots */
slots_given = false;
if (NULL != (cptr = strchr(mapped_nodes[i], ':'))) {
*cptr = '\0';
++cptr;
if ('*' == *cptr) {
slots = 0;
} else {
slots = strtol(cptr, NULL, 10);
}
slots_given = true;
}
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: working node %s",
@ -122,12 +137,18 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
OPAL_LIST_FOREACH(node, &adds, orte_node_t) {
if (0 == strcmp(node->name, ndname)) {
found = true;
++node->slots;
if (slots_given) {
node->slots += slots;
if (0 < slots) {
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
}
} else {
++node->slots;
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
}
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: node %s already on list - slots %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots));
/* the dash-host option presumes definition of num_slots */
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
break;
}
}
@ -145,9 +166,15 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
node->state = ORTE_NODE_STATE_UP;
node->slots_inuse = 0;
node->slots_max = 0;
node->slots = 1;
/* the dash-host option presumes definition of num_slots */
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
if (slots_given) {
node->slots = slots;
if (0 < slots) {
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
}
} else {
node->slots = 1;
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
}
opal_list_append(&adds, &node->super);
}
}
@ -165,13 +192,20 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: found existing node %s on input list - ignoring",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name));
if (ORTE_FLAG_TEST(nd, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
}
/* don't ignore a slots directive */
if (slots_given) {
node->slots = slots;
}
break;
}
}
if (!found) {
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: adding node %s to final list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nd->name));
"%s dashhost: adding node %s with %d slots to final list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nd->name, nd->slots));
opal_list_append(nodes, &nd->super);
} else {
OBJ_RELEASE(item);
@ -202,7 +236,7 @@ static int parse_dash_host(char ***mapped_nodes, char *hosts)
int nodeidx;
orte_node_t *node;
char **host_argv=NULL;
host_argv = opal_argv_split(hosts, ',');
/* Accumulate all of the host name mappings */
@ -265,6 +299,10 @@ static int parse_dash_host(char ***mapped_nodes, char *hosts)
goto cleanup;
}
} else { /* non-relative syntax - add to list */
/* remove any modifier */
if (NULL != (cptr = strchr(mini_map[k], ':'))) {
*cptr = '\0';
}
/* check for local alias */
if (orte_ifislocal(mini_map[k])) {
opal_argv_append_nosize(mapped_nodes, orte_process_info.nodename);
@ -300,7 +338,8 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
int num_empty=0;
opal_list_t keep;
bool want_all_empty=false;
char *cptr;
/* if the incoming node list is empty, then there
* is nothing to filter!
*/
@ -383,6 +422,10 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
while (item != opal_list_get_end(nodes)) {
next = opal_list_get_next(item); /* save this position */
node = (orte_node_t*)item;
/* remove any modifier */
if (NULL != (cptr = strchr(mapped_nodes[i], ':'))) {
*cptr = '\0';
}
/* search -host list to see if this one is found */
if (0 == strcmp(node->name, mapped_nodes[i])) {
if (remove) {