* improve interface description for ompi_rte_allocate_resources
* make hostfile llm properly deal with over subscribe situation. Rather than returning smaller than requested (which is no longer possible as it made for a book keeping nightmaer and no one was paying attention to it anyway), we just over subscribe the nodes. In the future, we need to add a flag to allocate resources as to whether to allow over subscription (if the resource allocator permits - clearly rsh does, rms not so much). This commit was SVN r2808.
Этот коммит содержится в:
родитель
a9010be2e5
Коммит
bc6ecff582
@ -29,6 +29,8 @@ struct mca_llm_base_hostfile_node_t {
|
||||
char hostname[MAXHOSTNAMELEN];
|
||||
/** number of MPI processes Open MPI can start on this host */
|
||||
int count;
|
||||
/** count argument in the hostfile */
|
||||
int given_count;
|
||||
/** generic key=value storage mechanism */
|
||||
ompi_list_t *info;
|
||||
};
|
||||
|
@ -75,6 +75,7 @@ mca_llm_base_collapse_resources(ompi_list_t *hostlist)
|
||||
(!has_conflicts(curr_node, check_node))) {
|
||||
/* they are mergeable */
|
||||
curr_node->count += check_node->count;
|
||||
curr_node->given_count += check_node->given_count;
|
||||
keyval_merge(curr_node, check_node);
|
||||
|
||||
/* delete from the list */
|
||||
|
@ -31,40 +31,39 @@ mca_llm_base_map_resources(int nodes,
|
||||
} else if (0 == nodes && 0 != procs) {
|
||||
/* allocate procs process count as dense as possible */
|
||||
int alloc_procs = 0;
|
||||
int iters = 0;
|
||||
|
||||
for (nodeitem = ompi_list_get_first(hostlist);
|
||||
nodeitem != ompi_list_get_end(hostlist);
|
||||
nodeitem = ompi_list_get_next(nodeitem)) {
|
||||
node = (mca_llm_base_hostfile_node_t*) nodeitem;
|
||||
/* loop until we are done */
|
||||
for (iters = 1 ; alloc_procs < procs ; ++iters) {
|
||||
for (nodeitem = ompi_list_get_first(hostlist);
|
||||
nodeitem != ompi_list_get_end(hostlist);
|
||||
nodeitem = ompi_list_get_next(nodeitem)) {
|
||||
node = (mca_llm_base_hostfile_node_t*) nodeitem;
|
||||
|
||||
if (alloc_procs >= procs) {
|
||||
/* we've allocated enough - release this guy from the
|
||||
list */
|
||||
tmp = ompi_list_remove_item(hostlist, nodeitem);
|
||||
OBJ_RELEASE(nodeitem);
|
||||
nodeitem = tmp;
|
||||
} else if (alloc_procs + node->count < procs) {
|
||||
/* the entire host allocation is needed... */
|
||||
alloc_procs += node->count;
|
||||
} else {
|
||||
/* the entire host allocation isn't needed. dump the
|
||||
unneeded parts */
|
||||
node->count = procs - alloc_procs;
|
||||
alloc_procs = procs;
|
||||
if (alloc_procs >= procs) {
|
||||
/* we've allocated enough. If we are on first
|
||||
loop, remove from list. Otherwise, break out of
|
||||
loop */
|
||||
if (1 == iters) {
|
||||
tmp = ompi_list_remove_item(hostlist, nodeitem);
|
||||
OBJ_RELEASE(nodeitem);
|
||||
nodeitem = tmp;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} else if (alloc_procs + node->given_count <= procs) {
|
||||
/* the entire host allocation is needed... */
|
||||
node->count += node->given_count;
|
||||
alloc_procs += node->given_count;
|
||||
} else {
|
||||
/* the entire host allocation isn't needed. dump the
|
||||
unneeded parts */
|
||||
node->count += procs - alloc_procs;
|
||||
alloc_procs = procs;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} else if (0 != nodes && 0 == procs) {
|
||||
/* allocate as many nodes as possible with each node having
|
||||
one slot */
|
||||
|
||||
for (nodeitem = ompi_list_get_first(hostlist);
|
||||
nodeitem != ompi_list_get_end(hostlist);
|
||||
nodeitem = ompi_list_get_next(nodeitem)) {
|
||||
node = (mca_llm_base_hostfile_node_t*) nodeitem;
|
||||
node->count = 1;
|
||||
}
|
||||
|
||||
} else if (0 != nodes && 0 != procs) {
|
||||
/* allocate as best we can */
|
||||
/* BWB - implement me */
|
||||
|
@ -89,6 +89,9 @@ void
|
||||
llm_base_int_hostfile_node_construct(ompi_object_t *obj)
|
||||
{
|
||||
mca_llm_base_hostfile_node_t *node = (mca_llm_base_hostfile_node_t*) obj;
|
||||
(node->hostname)[0] = '\0';
|
||||
node->count = 0;
|
||||
node->given_count = 0;
|
||||
node->info = OBJ_NEW(ompi_list_t);
|
||||
}
|
||||
|
||||
|
@ -96,7 +96,7 @@ parse_line(int first, mca_llm_base_hostfile_node_t *node)
|
||||
|
||||
if (MCA_LLM_BASE_STRING == first) {
|
||||
strncpy(node->hostname, mca_llm_base_string, MAXHOSTNAMELEN);
|
||||
node->count = 1;
|
||||
node->given_count = 1;
|
||||
} else {
|
||||
parse_error();
|
||||
return OMPI_ERROR;
|
||||
@ -117,7 +117,7 @@ parse_line(int first, mca_llm_base_hostfile_node_t *node)
|
||||
ret = parse_count();
|
||||
if (ret < 0) return OMPI_ERROR;
|
||||
|
||||
node->count = ret;
|
||||
node->given_count = ret;
|
||||
break;
|
||||
|
||||
case MCA_LLM_BASE_STRING:
|
||||
|
@ -1 +1 @@
|
||||
localhost count=200
|
||||
localhost count=1
|
||||
|
@ -32,6 +32,10 @@ ompi_rte_allocate_resources(ompi_rte_spawn_handle_t *handle,
|
||||
errno = OMPI_ERR_BAD_PARAM;
|
||||
return NULL;
|
||||
}
|
||||
if (nodes != 0 && procs == 0) {
|
||||
errno = OMPI_ERR_BAD_PARAM;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* remove for multi-cell */
|
||||
assert(1 == handle->modules_len);
|
||||
|
@ -166,7 +166,13 @@ extern "C" {
|
||||
* for \c nodes for usage.
|
||||
* @return List of <code>ompi_rte_node_allocation_t</code>s
|
||||
* describing the allocated resources or NULL on
|
||||
* error (error will be in errno)
|
||||
* error (error will be in errno). If the
|
||||
* number of requested resources is not
|
||||
* available, errno will be set to \c
|
||||
* OMPI_ERR_OUT_OF_RESOURCE. This is not a
|
||||
* fatal error - \c ompi_rte_allocate_resources
|
||||
* can be called again, but with a smaller
|
||||
* resource request.
|
||||
*
|
||||
* @note In the future, a more complex resource allocation
|
||||
* function may be added, which allows for complicated
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user