1
1

Add the ability to specify the number of desired slots in the --host option. Just giving a host name => one slot (multiple copies of the name yield one slot per copy). Giving "foo:3" indicates you want three slots - a shorthand notation for saying "foo" three times. Giving "foo:*" indicates you want the topology to set the number of slots based on the orte_set_slots param.

Этот коммит содержится в:
Ralph Castain 2015-04-30 20:33:43 -07:00
родитель 53e9543be4
Коммит 7d1980ba83
4 изменённых файлов: 72 добавлений и 31 удалений

Просмотреть файл

@ -123,7 +123,19 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
node->topology = t; node->topology = t;
} }
} }
} else if (orte_do_not_launch) {
node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
t = node->topology;
for (i=1; i < orte_node_pool->size; i++) {
if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
continue;
}
if (NULL == node->topology) {
node->topology = t;
}
}
} }
/* if this is an unmanaged allocation, then set the default /* if this is an unmanaged allocation, then set the default
* slots on each node as directed or using default * slots on each node as directed or using default
*/ */
@ -135,6 +147,9 @@ void orte_plm_base_daemons_reported(int fd, short args, void *cbdata)
continue; continue;
} }
if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) { if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
"%s plm:base:setting slots for node %s by %s",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, orte_set_slots));
if (0 == strncmp(orte_set_slots, "cores", strlen(orte_set_slots))) { if (0 == strncmp(orte_set_slots, "cores", strlen(orte_set_slots))) {
node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology, node->slots = opal_hwloc_base_get_nbobjs_by_type(node->topology,
HWLOC_OBJ_CORE, 0, HWLOC_OBJ_CORE, 0,
@ -1736,7 +1751,7 @@ int orte_plm_base_setup_virtual_machine(orte_job_t *jdata)
} }
OBJ_RELEASE(nptr); OBJ_RELEASE(nptr);
} }
OBJ_DESTRUCT(&tnodes); OPAL_LIST_DESTRUCT(&tnodes);
/* if we didn't get anything, then we are the only node in the /* if we didn't get anything, then we are the only node in the
* allocation - so there is nothing else to do as no other * allocation - so there is nothing else to do as no other
* daemons are to be launched * daemons are to be launched

Просмотреть файл

@ -113,9 +113,9 @@ int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
/* flag that hnp has been allocated */ /* flag that hnp has been allocated */
orte_hnp_is_allocated = true; orte_hnp_is_allocated = true;
/* update the total slots in the job */ /* update the total slots in the job */
orte_ras_base.total_slots_alloc += node->slots; orte_ras_base.total_slots_alloc = node->slots;
/* copy the allocation data to that node's info */ /* copy the allocation data to that node's info */
hnp_node->slots += node->slots; hnp_node->slots = node->slots;
hnp_node->slots_max = node->slots_max; hnp_node->slots_max = node->slots_max;
/* copy across any attributes */ /* copy across any attributes */
OPAL_LIST_FOREACH(kv, &node->attributes, orte_attribute_t) { OPAL_LIST_FOREACH(kv, &node->attributes, orte_attribute_t) {

Просмотреть файл

@ -540,23 +540,6 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr
} }
} }
/* - if a max number was given, only take that many */
if (0 < orte_max_vm_size &&
orte_max_vm_size < (int)opal_list_get_size(allocated_nodes)) {
item = opal_list_get_first(allocated_nodes);
num_slots = 0;
for (i=0; i < orte_max_vm_size; i++) {
node = (orte_node_t*)item;
num_slots += node->slots - node->slots_inuse;
item = opal_list_get_next(item);
}
while (item != opal_list_get_end(allocated_nodes)) {
next = opal_list_get_next(item);
opal_list_remove_item(allocated_nodes, item);
OBJ_RELEASE(item); /* "un-retain" it */
}
}
/* pass back the total number of available slots */ /* pass back the total number of available slots */
*total_num_slots = num_slots; *total_num_slots = num_slots;

Просмотреть файл

@ -56,7 +56,10 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
orte_node_t *node, *nd; orte_node_t *node, *nd;
opal_list_t adds; opal_list_t adds;
bool found; bool found;
int slots;
bool slots_given;
char *cptr;
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: parsing args", "%s dashhost: parsing args",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME))); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
@ -105,6 +108,18 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
rc = ORTE_ERR_SILENT; rc = ORTE_ERR_SILENT;
goto cleanup; goto cleanup;
} }
/* see if the node contains the number of slots */
slots_given = false;
if (NULL != (cptr = strchr(mapped_nodes[i], ':'))) {
*cptr = '\0';
++cptr;
if ('*' == *cptr) {
slots = 0;
} else {
slots = strtol(cptr, NULL, 10);
}
slots_given = true;
}
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: working node %s", "%s dashhost: working node %s",
@ -122,12 +137,18 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
OPAL_LIST_FOREACH(node, &adds, orte_node_t) { OPAL_LIST_FOREACH(node, &adds, orte_node_t) {
if (0 == strcmp(node->name, ndname)) { if (0 == strcmp(node->name, ndname)) {
found = true; found = true;
++node->slots; if (slots_given) {
node->slots += slots;
if (0 < slots) {
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
}
} else {
++node->slots;
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
}
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: node %s already on list - slots %d", "%s dashhost: node %s already on list - slots %d",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots));
/* the dash-host option presumes definition of num_slots */
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
break; break;
} }
} }
@ -145,9 +166,15 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
node->state = ORTE_NODE_STATE_UP; node->state = ORTE_NODE_STATE_UP;
node->slots_inuse = 0; node->slots_inuse = 0;
node->slots_max = 0; node->slots_max = 0;
node->slots = 1; if (slots_given) {
/* the dash-host option presumes definition of num_slots */ node->slots = slots;
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN); if (0 < slots) {
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
}
} else {
node->slots = 1;
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
}
opal_list_append(&adds, &node->super); opal_list_append(&adds, &node->super);
} }
} }
@ -165,13 +192,20 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes,
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: found existing node %s on input list - ignoring", "%s dashhost: found existing node %s on input list - ignoring",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name));
if (ORTE_FLAG_TEST(nd, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
}
/* don't ignore a slots directive */
if (slots_given) {
node->slots = slots;
}
break; break;
} }
} }
if (!found) { if (!found) {
OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output, OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
"%s dashhost: adding node %s to final list", "%s dashhost: adding node %s with %d slots to final list",
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nd->name)); ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nd->name, nd->slots));
opal_list_append(nodes, &nd->super); opal_list_append(nodes, &nd->super);
} else { } else {
OBJ_RELEASE(item); OBJ_RELEASE(item);
@ -202,7 +236,7 @@ static int parse_dash_host(char ***mapped_nodes, char *hosts)
int nodeidx; int nodeidx;
orte_node_t *node; orte_node_t *node;
char **host_argv=NULL; char **host_argv=NULL;
host_argv = opal_argv_split(hosts, ','); host_argv = opal_argv_split(hosts, ',');
/* Accumulate all of the host name mappings */ /* Accumulate all of the host name mappings */
@ -265,6 +299,10 @@ static int parse_dash_host(char ***mapped_nodes, char *hosts)
goto cleanup; goto cleanup;
} }
} else { /* non-relative syntax - add to list */ } else { /* non-relative syntax - add to list */
/* remove any modifier */
if (NULL != (cptr = strchr(mini_map[k], ':'))) {
*cptr = '\0';
}
/* check for local alias */ /* check for local alias */
if (orte_ifislocal(mini_map[k])) { if (orte_ifislocal(mini_map[k])) {
opal_argv_append_nosize(mapped_nodes, orte_process_info.nodename); opal_argv_append_nosize(mapped_nodes, orte_process_info.nodename);
@ -300,7 +338,8 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
int num_empty=0; int num_empty=0;
opal_list_t keep; opal_list_t keep;
bool want_all_empty=false; bool want_all_empty=false;
char *cptr;
/* if the incoming node list is empty, then there /* if the incoming node list is empty, then there
* is nothing to filter! * is nothing to filter!
*/ */
@ -383,6 +422,10 @@ int orte_util_filter_dash_host_nodes(opal_list_t *nodes,
while (item != opal_list_get_end(nodes)) { while (item != opal_list_get_end(nodes)) {
next = opal_list_get_next(item); /* save this position */ next = opal_list_get_next(item); /* save this position */
node = (orte_node_t*)item; node = (orte_node_t*)item;
/* remove any modifier */
if (NULL != (cptr = strchr(mapped_nodes[i], ':'))) {
*cptr = '\0';
}
/* search -host list to see if this one is found */ /* search -host list to see if this one is found */
if (0 == strcmp(node->name, mapped_nodes[i])) { if (0 == strcmp(node->name, mapped_nodes[i])) {
if (remove) { if (remove) {