1
1

* improve interface description for ompi_rte_allocate_resources

* make hostfile llm properly deal with over subscribe situation.  Rather
  than returning smaller than requested (which is no longer possible as
  it made for a book keeping nightmaer and no one was paying attention
  to it anyway), we just over subscribe the nodes.  In the future, we
  need to add a flag to allocate resources as to whether to allow
  over subscription (if the resource allocator permits - clearly rsh 
  does, rms not so much).

This commit was SVN r2808.
Этот коммит содержится в:
Brian Barrett 2004-09-22 22:27:40 +00:00
родитель a9010be2e5
Коммит bc6ecff582
8 изменённых файлов: 48 добавлений и 33 удалений

Просмотреть файл

@ -29,6 +29,8 @@ struct mca_llm_base_hostfile_node_t {
char hostname[MAXHOSTNAMELEN];
/** number of MPI processes Open MPI can start on this host */
int count;
/** count argument in the hostfile */
int given_count;
/** generic key=value storage mechanism */
ompi_list_t *info;
};

Просмотреть файл

@ -75,6 +75,7 @@ mca_llm_base_collapse_resources(ompi_list_t *hostlist)
(!has_conflicts(curr_node, check_node))) {
/* they are mergeable */
curr_node->count += check_node->count;
curr_node->given_count += check_node->given_count;
keyval_merge(curr_node, check_node);
/* delete from the list */

Просмотреть файл

@ -31,40 +31,39 @@ mca_llm_base_map_resources(int nodes,
} else if (0 == nodes && 0 != procs) {
/* allocate procs process count as dense as possible */
int alloc_procs = 0;
int iters = 0;
for (nodeitem = ompi_list_get_first(hostlist);
nodeitem != ompi_list_get_end(hostlist);
nodeitem = ompi_list_get_next(nodeitem)) {
node = (mca_llm_base_hostfile_node_t*) nodeitem;
/* loop until we are done */
for (iters = 1 ; alloc_procs < procs ; ++iters) {
for (nodeitem = ompi_list_get_first(hostlist);
nodeitem != ompi_list_get_end(hostlist);
nodeitem = ompi_list_get_next(nodeitem)) {
node = (mca_llm_base_hostfile_node_t*) nodeitem;
if (alloc_procs >= procs) {
/* we've allocated enough - release this guy from the
list */
tmp = ompi_list_remove_item(hostlist, nodeitem);
OBJ_RELEASE(nodeitem);
nodeitem = tmp;
} else if (alloc_procs + node->count < procs) {
/* the entire host allocation is needed... */
alloc_procs += node->count;
} else {
/* the entire host allocation isn't needed. dump the
unneeded parts */
node->count = procs - alloc_procs;
alloc_procs = procs;
if (alloc_procs >= procs) {
/* we've allocated enough. If we are on first
loop, remove from list. Otherwise, break out of
loop */
if (1 == iters) {
tmp = ompi_list_remove_item(hostlist, nodeitem);
OBJ_RELEASE(nodeitem);
nodeitem = tmp;
} else {
break;
}
} else if (alloc_procs + node->given_count <= procs) {
/* the entire host allocation is needed... */
node->count += node->given_count;
alloc_procs += node->given_count;
} else {
/* the entire host allocation isn't needed. dump the
unneeded parts */
node->count += procs - alloc_procs;
alloc_procs = procs;
}
}
}
} else if (0 != nodes && 0 == procs) {
/* allocate as many nodes as possible with each node having
one slot */
for (nodeitem = ompi_list_get_first(hostlist);
nodeitem != ompi_list_get_end(hostlist);
nodeitem = ompi_list_get_next(nodeitem)) {
node = (mca_llm_base_hostfile_node_t*) nodeitem;
node->count = 1;
}
} else if (0 != nodes && 0 != procs) {
/* allocate as best we can */
/* BWB - implement me */

Просмотреть файл

@ -89,6 +89,9 @@ void
llm_base_int_hostfile_node_construct(ompi_object_t *obj)
{
mca_llm_base_hostfile_node_t *node = (mca_llm_base_hostfile_node_t*) obj;
(node->hostname)[0] = '\0';
node->count = 0;
node->given_count = 0;
node->info = OBJ_NEW(ompi_list_t);
}

Просмотреть файл

@ -96,7 +96,7 @@ parse_line(int first, mca_llm_base_hostfile_node_t *node)
if (MCA_LLM_BASE_STRING == first) {
strncpy(node->hostname, mca_llm_base_string, MAXHOSTNAMELEN);
node->count = 1;
node->given_count = 1;
} else {
parse_error();
return OMPI_ERROR;
@ -117,7 +117,7 @@ parse_line(int first, mca_llm_base_hostfile_node_t *node)
ret = parse_count();
if (ret < 0) return OMPI_ERROR;
node->count = ret;
node->given_count = ret;
break;
case MCA_LLM_BASE_STRING:

Просмотреть файл

@ -1 +1 @@
localhost count=200
localhost count=1

Просмотреть файл

@ -32,6 +32,10 @@ ompi_rte_allocate_resources(ompi_rte_spawn_handle_t *handle,
errno = OMPI_ERR_BAD_PARAM;
return NULL;
}
if (nodes != 0 && procs == 0) {
errno = OMPI_ERR_BAD_PARAM;
return NULL;
}
/* remove for multi-cell */
assert(1 == handle->modules_len);

Просмотреть файл

@ -166,7 +166,13 @@ extern "C" {
* for \c nodes for usage.
* @return List of <code>ompi_rte_node_allocation_t</code>s
* describing the allocated resources or NULL on
* error (error will be in errno)
* error (error will be in errno). If the
* number of requested resources is not
* available, errno will be set to \c
* OMPI_ERR_OUT_OF_RESOURCE. This is not a
* fatal error - \c ompi_rte_allocate_resources
* can be called again, but with a smaller
* resource request.
*
* @note In the future, a more complex resource allocation
* function may be added, which allows for complicated