diff --git a/src/mca/llm/base/base_internal.h b/src/mca/llm/base/base_internal.h index 548a9d48ff..e0276f2aa4 100644 --- a/src/mca/llm/base/base_internal.h +++ b/src/mca/llm/base/base_internal.h @@ -17,36 +17,99 @@ #include "mca/llm/base/base.h" #include "class/ompi_value_array.h" + +/** + * Container for per-node hostfile-specific data + */ +struct mca_llm_base_hostfile_node_t { + /** make us an instance of list item */ + ompi_list_item_t super; + /** hostname for this node. Can be used as generic description + field if hostnames aren't used on this platform */ + char hostname[MAXHOSTNAMELEN]; + /** number of MPI processes Open MPI can start on this host */ + int count; + /** generic key=value storage mechanism */ + ompi_list_t *info; +}; +/** shorten ompi_rte_base_hostfile_node_t declarations */ +typedef struct mca_llm_base_hostfile_node_t mca_llm_base_hostfile_node_t; +/** create the required instance information */ +OBJ_CLASS_DECLARATION(mca_llm_base_hostfile_node_t); + + +/** + * extra data for the \c ompi_rte_node_allocation_t structures when + * using the \c mca_llm_base_* functions. + */ +struct mca_llm_base_hostfile_data_t { + /** make ourselves an instance of the data base class */ + ompi_rte_node_allocation_data_t super; + /** keep a list of the hosts allocated to this description */ + ompi_list_t *hostlist; +}; +/** shorten ompi_rte_base_hostfile_data_t declarations */ +typedef struct mca_llm_base_hostfile_data_t mca_llm_base_hostfile_data_t; +/** create the required instance information */ +OBJ_CLASS_DECLARATION(mca_llm_base_hostfile_data_t); + + #if defined(c_plusplus) || defined(__cplusplus) extern "C" { #endif + /** + * Do all the pre-use setup code. This should only be called by + * unit tests or mca_llm_base_open. In other words, you probably + * don't want to call this function. + */ + void mca_llm_base_setup(void); + /** * Parse input file and return a list of host entries + * + * \return ompi_list_t containing a list of + * mca_llm_base_hostfile_node_t information. */ ompi_list_t *mca_llm_base_parse_hostfile(const char* filename); - /** - * Rearrage the provide hostlist to meet the requirements of - * nodes / procs - */ - int mca_llm_base_map_resources(int nodes, - int procs, - ompi_list_t *hostlist); /** * Remove duplicate host entries from the list, editing * the count as appropriate and merging key=value pairs. * + * \param hostlist An ompi_list_t containing + * mca_llm_base_hostfile_node_t instances. + * * \note If the same key is used with different values, the hosts * are considered different. */ int mca_llm_base_collapse_resources(ompi_list_t *hostlist); + /** - * Deallocate resources allocated by parse hostfile + * Rearrage the provide hostlist to meet the requirements of + * nodes / procs. + * + * \param hostlist An ompi_list_t containing + * mca_llm_base_hostfile_node_t instances. */ - int mca_llm_base_deallocate(ompi_list_t *nodelist); + int mca_llm_base_map_resources(int nodes, + int procs, + ompi_list_t *hostlist); + + + /** + * Take a prepped (including mapped) list of + * mca_llm_base_hostfile_node_t instances and wrap it in an + * ompi_node_allocation_t list. + */ + ompi_list_t *mca_llm_base_create_node_allocation(ompi_list_t *hostlist); + + /** + * free a list of mca_llm_base_hostfile_node_t instances + */ + void mca_llm_base_deallocate(ompi_list_t *hostlist); #if defined(c_plusplus) || defined(__cplusplus) } diff --git a/src/mca/llm/base/llm_base_collapse.c b/src/mca/llm/base/llm_base_collapse.c index c167afaa0b..bc94583d16 100644 --- a/src/mca/llm/base/llm_base_collapse.c +++ b/src/mca/llm/base/llm_base_collapse.c @@ -13,7 +13,7 @@ static bool -has_conflicts(ompi_rte_node_allocation_t *a, ompi_rte_node_allocation_t *b) +has_conflicts(mca_llm_base_hostfile_node_t *a, mca_llm_base_hostfile_node_t *b) { ompi_rte_valuepair_t *a_val, *b_val; ompi_list_item_t *a_item, *b_item; @@ -43,7 +43,8 @@ has_conflicts(ompi_rte_node_allocation_t *a, ompi_rte_node_allocation_t *b) static void -keyval_merge(ompi_rte_node_allocation_t *new, ompi_rte_node_allocation_t *old) +keyval_merge(mca_llm_base_hostfile_node_t *new, + mca_llm_base_hostfile_node_t *old) { ompi_list_item_t *old_item; @@ -56,19 +57,19 @@ keyval_merge(ompi_rte_node_allocation_t *new, ompi_rte_node_allocation_t *old) int mca_llm_base_collapse_resources(ompi_list_t *hostlist) { - ompi_rte_node_allocation_t *curr_node, *check_node; + mca_llm_base_hostfile_node_t *curr_node, *check_node; ompi_list_item_t *curr_nodeitem, *check_nodeitem, *tmp; for (curr_nodeitem = ompi_list_get_first(hostlist) ; curr_nodeitem != ompi_list_get_end(hostlist) ; curr_nodeitem = ompi_list_get_next(curr_nodeitem)) { - curr_node = (ompi_rte_node_allocation_t*) curr_nodeitem; + curr_node = (mca_llm_base_hostfile_node_t*) curr_nodeitem; for (check_nodeitem = ompi_list_get_next(curr_nodeitem) ; check_nodeitem != ompi_list_get_end(hostlist) ; check_nodeitem = ompi_list_get_next(check_nodeitem)) { - check_node = (ompi_rte_node_allocation_t*) check_nodeitem; + check_node = (mca_llm_base_hostfile_node_t*) check_nodeitem; if ((strcmp(curr_node->hostname, check_node->hostname) == 0) && (!has_conflicts(curr_node, check_node))) { diff --git a/src/mca/llm/base/llm_base_map.c b/src/mca/llm/base/llm_base_map.c index 57e57dee2f..28d4ed2c4a 100644 --- a/src/mca/llm/base/llm_base_map.c +++ b/src/mca/llm/base/llm_base_map.c @@ -16,7 +16,7 @@ mca_llm_base_map_resources(int nodes, int procs, ompi_list_t *hostlist) { - ompi_rte_node_allocation_t *node; + mca_llm_base_hostfile_node_t *node; ompi_list_item_t *nodeitem, *tmp; if (NULL == hostlist) { @@ -35,7 +35,7 @@ mca_llm_base_map_resources(int nodes, for (nodeitem = ompi_list_get_first(hostlist); nodeitem != ompi_list_get_end(hostlist); nodeitem = ompi_list_get_next(nodeitem)) { - node = (ompi_rte_node_allocation_t*) nodeitem; + node = (mca_llm_base_hostfile_node_t*) nodeitem; if (alloc_procs >= procs) { /* we've allocated enough - release this guy from the @@ -61,7 +61,7 @@ mca_llm_base_map_resources(int nodes, for (nodeitem = ompi_list_get_first(hostlist); nodeitem != ompi_list_get_end(hostlist); nodeitem = ompi_list_get_next(nodeitem)) { - node = (ompi_rte_node_allocation_t*) nodeitem; + node = (mca_llm_base_hostfile_node_t*) nodeitem; node->count = 1; } diff --git a/src/mca/llm/base/llm_base_open.c b/src/mca/llm/base/llm_base_open.c index b530326041..69ab94d519 100644 --- a/src/mca/llm/base/llm_base_open.c +++ b/src/mca/llm/base/llm_base_open.c @@ -8,6 +8,7 @@ #include "mca/base/base.h" #include "mca/llm/llm.h" #include "mca/llm/base/base.h" +#include "mca/llm/base/base_internal.h" #include "runtime/runtime_types.h" @@ -54,3 +55,67 @@ int mca_llm_base_open(void) /* All done */ return OMPI_SUCCESS; } + + +/* + * Object maintenance code + */ + +/** constructor for \c mca_llm_base_hostfile_data_t */ +static +void +llm_base_int_hostfile_data_construct(ompi_object_t *obj) +{ + mca_llm_base_hostfile_data_t *data = (mca_llm_base_hostfile_data_t*) obj; + data->hostlist = OBJ_NEW(ompi_list_t); +} + + +/** destructor for \c mca_llm_base_hostfile_data_t */ +static +void +llm_base_int_hostfile_data_destruct(ompi_object_t *obj) +{ + mca_llm_base_hostfile_data_t *data = (mca_llm_base_hostfile_data_t*) obj; + mca_llm_base_deallocate(data->hostlist); +} + + +/** constructor for \c mca_llm_base_hostfile_node_t */ +static +void +llm_base_int_hostfile_node_construct(ompi_object_t *obj) +{ + mca_llm_base_hostfile_node_t *node = (mca_llm_base_hostfile_node_t*) obj; + node->info = OBJ_NEW(ompi_list_t); +} + + +/** destructor for \c mca_llm_base_hostfile_node_t */ +static +void +llm_base_int_hostfile_node_destruct(ompi_object_t *obj) +{ + mca_llm_base_hostfile_node_t *node = (mca_llm_base_hostfile_node_t*) obj; + ompi_list_item_t *item; + + if (NULL == node->info) return; + + while (NULL != (item = ompi_list_remove_first(node->info))) { + OBJ_RELEASE(item); + } + + OBJ_RELEASE(node->info); +} + + +/** create instance information for \c mca_llm_base_hostfile_data_t */ +OBJ_CLASS_INSTANCE(mca_llm_base_hostfile_data_t, + ompi_rte_node_allocation_data_t, + llm_base_int_hostfile_data_construct, + llm_base_int_hostfile_data_destruct); +/** create instance information for \c mca_llm_base_hostfile_node_t */ +OBJ_CLASS_INSTANCE(mca_llm_base_hostfile_node_t, + ompi_list_item_t, + llm_base_int_hostfile_node_construct, + llm_base_int_hostfile_node_destruct); diff --git a/src/mca/llm/base/llm_base_parse_hostfile.c b/src/mca/llm/base/llm_base_parse_hostfile.c index fa46dae7bb..e159fb0231 100644 --- a/src/mca/llm/base/llm_base_parse_hostfile.c +++ b/src/mca/llm/base/llm_base_parse_hostfile.c @@ -16,7 +16,7 @@ #include "runtime/runtime_types.h" static void parse_error(void); -static int parse_keyval(int, ompi_rte_node_allocation_t*); +static int parse_keyval(int, mca_llm_base_hostfile_node_t*); static void parse_error() @@ -27,7 +27,7 @@ parse_error() static int -parse_keyval(int first, ompi_rte_node_allocation_t *node) +parse_keyval(int first, mca_llm_base_hostfile_node_t *node) { int val; char *key, *value; @@ -89,7 +89,7 @@ parse_count(void) static int -parse_line(int first, ompi_rte_node_allocation_t *node) +parse_line(int first, mca_llm_base_hostfile_node_t *node) { int val; int ret; @@ -139,7 +139,7 @@ parse_line(int first, ompi_rte_node_allocation_t *node) ompi_list_t * mca_llm_base_parse_hostfile(const char *hostfile) { - ompi_rte_node_allocation_t *newnode; + mca_llm_base_hostfile_node_t *newnode; ompi_list_t *list; int val, ret; @@ -169,7 +169,7 @@ mca_llm_base_parse_hostfile(const char *hostfile) break; case MCA_LLM_BASE_STRING: - newnode = OBJ_NEW(ompi_rte_node_allocation_t); + newnode = OBJ_NEW(mca_llm_base_hostfile_node_t); ret = parse_line(val, newnode); if (OMPI_SUCCESS != ret) { OBJ_RELEASE(newnode); diff --git a/src/mca/llm/base/llm_base_util.c b/src/mca/llm/base/llm_base_util.c index 048171e966..d4c493b123 100644 --- a/src/mca/llm/base/llm_base_util.c +++ b/src/mca/llm/base/llm_base_util.c @@ -9,19 +9,91 @@ #include "mca/llm/base/base.h" #include "mca/llm/base/base_internal.h" -int -mca_llm_base_deallocate(ompi_list_t *nodelist) + +void +mca_llm_base_deallocate(ompi_list_t *hostlist) { - ompi_rte_node_allocation_t *node; ompi_list_item_t *item; - while (NULL != (item = ompi_list_remove_first(nodelist))) { - node = (ompi_rte_node_allocation_t*) item; - OBJ_RELEASE(node); + if (NULL == hostlist) return; + + while (NULL != (item = ompi_list_remove_first(hostlist))) { + OBJ_RELEASE(item); } - OBJ_RELEASE(nodelist); - - return OMPI_SUCCESS; + OBJ_RELEASE(hostlist); } + +static +ompi_rte_node_allocation_t* +get_allocation_for_size(int count, ompi_list_t *nodelist) +{ + ompi_list_item_t *nodeitem; + ompi_rte_node_allocation_t *node; + mca_llm_base_hostfile_data_t *data; + + for (nodeitem = ompi_list_get_first(nodelist) ; + nodeitem != ompi_list_get_end(nodelist) ; + nodeitem = ompi_list_get_next(nodeitem) ) { + node = (ompi_rte_node_allocation_t*) nodeitem; + + if (node->count == count) { + return node; + } + } + + /* no joy... make one and put it in the list */ + node = OBJ_NEW(ompi_rte_node_allocation_t); + printf("setting node's count as: %d\n", count); + node->count = count; + ompi_list_append(nodelist, (ompi_list_item_t*) node); + + data = OBJ_NEW(mca_llm_base_hostfile_data_t); + node->data = (ompi_rte_node_allocation_data_t*) data; + + return node; +} + + +ompi_list_t* +mca_llm_base_create_node_allocation(ompi_list_t *hostlist) +{ + ompi_list_t *nodelist; + mca_llm_base_hostfile_node_t *host; + mca_llm_base_hostfile_data_t *data; + ompi_rte_node_allocation_t *node; + ompi_list_item_t *hostitem, *nodeitem; + int start_count = 0; + + nodelist = OBJ_NEW(ompi_list_t); + + /* This is going to be slow as molasses in January in + * Alaska. Iterate through the list of hosts and group them in + * ompi_rte_node_allocation_t structures. Then take those and + * iterate through, setting the start numbers. So nothing too + * horrible, right? + */ + + /* on with the partitioning */ + while (NULL != (hostitem = ompi_list_remove_first(hostlist))) { + host = (mca_llm_base_hostfile_node_t*) hostitem; + node = get_allocation_for_size(host->count, nodelist); + data = (mca_llm_base_hostfile_data_t*) node->data; + node->nodes++; + + ompi_list_append(data->hostlist, (ompi_list_item_t*) host); + } + + /* and fix the start numbers */ + start_count = 0; + for (nodeitem = ompi_list_get_first(nodelist) ; + nodeitem != ompi_list_get_end(nodelist) ; + nodeitem = ompi_list_get_next(nodeitem) ) { + node = (ompi_rte_node_allocation_t*) nodeitem; + node->start = start_count; + start_count += (node->nodes * node->count); + } + + return nodelist; +} diff --git a/src/mca/llm/hostfile/src/llm_hostfile_allocate.c b/src/mca/llm/hostfile/src/llm_hostfile_allocate.c index 848e4fb5d2..d9f411699a 100644 --- a/src/mca/llm/hostfile/src/llm_hostfile_allocate.c +++ b/src/mca/llm/hostfile/src/llm_hostfile_allocate.c @@ -17,25 +17,32 @@ extern char *mca_llm_hostfile_filename; ompi_list_t* mca_llm_hostfile_allocate_resources(int jobid, int nodes, int procs) { + ompi_list_t *hostlist = NULL; ompi_list_t *nodelist = NULL; ompi_list_item_t *nodeitem; int ret; /* start by getting the full list of available resources */ - nodelist = mca_llm_base_parse_hostfile(mca_llm_hostfile_filename); - if (NULL == nodelist) { + hostlist = mca_llm_base_parse_hostfile(mca_llm_hostfile_filename); + if (NULL == hostlist) { return NULL; } - ret = mca_llm_base_collapse_resources(nodelist); + ret = mca_llm_base_collapse_resources(hostlist); if (OMPI_SUCCESS != ret) { - mca_llm_base_deallocate(nodelist); + mca_llm_base_deallocate(hostlist); return NULL; } - ret = mca_llm_base_map_resources(nodes, procs, nodelist); + ret = mca_llm_base_map_resources(nodes, procs, hostlist); if (OMPI_SUCCESS != ret) { - mca_llm_base_deallocate(nodelist); + mca_llm_base_deallocate(hostlist); + return NULL; + } + + nodelist = mca_llm_base_create_node_allocation(hostlist); + if (OMPI_SUCCESS != ret) { + mca_llm_base_deallocate(hostlist); return NULL; } diff --git a/src/mca/llm/hostfile/src/llm_hostfile_deallocate.c b/src/mca/llm/hostfile/src/llm_hostfile_deallocate.c index 06493d20a9..9b3da7a509 100644 --- a/src/mca/llm/hostfile/src/llm_hostfile_deallocate.c +++ b/src/mca/llm/hostfile/src/llm_hostfile_deallocate.c @@ -14,6 +14,19 @@ int mca_llm_hostfile_deallocate_resources(int jobid, ompi_list_t *nodelist) { - mca_llm_base_deallocate(nodelist); + ompi_list_item_t *item; + + /* pop off all the ompi_ret_node_allocatoin_t instances and free + * them. Their destructors will kill the + * mca_llm_base_hostfile_data_t, who's destructor will kill the + * mca_llm_base_hostfile_node_t instances associated with the + * node_allocation. In other words, everything goes "bye-bye" + */ + while (NULL != (item = ompi_list_remove_first(nodelist))) { + OBJ_RELEASE(item); + } + + OBJ_RELEASE(nodelist); + return OMPI_SUCCESS; } diff --git a/src/mca/llm/llm.h b/src/mca/llm/llm.h index edbf8ec93e..ad23f4efd8 100644 --- a/src/mca/llm/llm.h +++ b/src/mca/llm/llm.h @@ -97,22 +97,20 @@ typedef mca_llm_base_component_1_0_0_t mca_llm_base_component_t; * called once per jobid. * * @param jobid (IN) Jobid with which to associate the given resources. - * @param nodes (IN) Number of nodes to try to allocate. If 0, - * the LLM will try to allocate procs - * processes on as many nodes as are needed. If non-zero, - * will try to fairly distribute procs - * processes over the nodes. If procs is 0, - * will attempt to allocate all cpus on - * nodes nodes + * @param nodes (IN) Number of ndoes to try to allocate. If 0, the + * allocator will try to allocate \c procs processes + * on as many nodes as are needed. If non-zero, + * will try to allocate \c procs process slots + * per node. * @param procs (IN) Number of processors to try to allocate. See the note * for nodes for usage. * @param nodelist (OUT) List of ompi_rte_node_allocation_ts * describing the allocated resources. * - * @warning The type for jobid will change in the near future */ -typedef ompi_list_t* -(*mca_llm_base_allocate_resources_fn_t)(int jobid, int nodes,int procs); +typedef ompi_list_t * +(*mca_llm_base_allocate_resources_fn_t)(mca_ns_base_jobid_t jobid, + int nodes,int procs); /** @@ -123,10 +121,8 @@ typedef ompi_list_t* * @param jobid (IN) Jobid associated with the resources to be freed. * @param nodes (IN) Nodelist from associated allocate_resource call. * All associated memory will be freed as appropriate. - * - * @warning The type for jobid will change in the near future. */ -typedef int (*mca_llm_base_deallocate_resources_fn_t)(int jobid, +typedef int (*mca_llm_base_deallocate_resources_fn_t)(mca_ns_base_jobid_t jobid, ompi_list_t *nodelist); @@ -137,10 +133,10 @@ typedef int (*mca_llm_base_deallocate_resources_fn_t)(int jobid, * pointers to the calling interface. */ struct mca_llm_base_module_1_0_0_t { - /** Function to be called on resource request */ - mca_llm_base_allocate_resources_fn_t llm_allocate_resources; - /** Function to be called on resource return */ - mca_llm_base_deallocate_resources_fn_t llm_deallocate_resources; + /** Function to be called on resource request */ + mca_llm_base_allocate_resources_fn_t llm_allocate_resources; + /** Function to be called on resource return */ + mca_llm_base_deallocate_resources_fn_t llm_deallocate_resources; }; /** shorten mca_llm_base_module_1_0_0_t declaration */ typedef struct mca_llm_base_module_1_0_0_t mca_llm_base_module_1_0_0_t; diff --git a/src/mca/pcm/base/base.h b/src/mca/pcm/base/base.h index bd3ebee477..b1e032c703 100644 --- a/src/mca/pcm/base/base.h +++ b/src/mca/pcm/base/base.h @@ -11,7 +11,7 @@ #include "include/types.h" #include "mca/mca.h" #include "mca/pcm/pcm.h" - +#include "mca/llm/base/base_internal.h" /* * Global functions for MCA overall collective open and close @@ -30,19 +30,19 @@ extern "C" { int mca_pcm_base_send_schedule(FILE *fd, int jobid, ompi_rte_node_schedule_t *sched, - ompi_list_t *nodelist); + int num_procs); int mca_pcm_base_recv_schedule(FILE *fd, int *jobid, ompi_rte_node_schedule_t *sched, - ompi_list_t *nodelist); + int *num_procs); int mca_pcm_base_build_base_env(char **in_env, char ***out_envp); int mca_pcm_base_ioexecvp(char **cmdv, int showout, char *outbuff, int outbuffsize, int stderr_is_err); - char* mca_pcm_base_get_username(ompi_rte_node_allocation_t *node); + char* mca_pcm_base_get_username(mca_llm_base_hostfile_node_t *node); #if defined(c_plusplus) || defined(__cplusplus) } diff --git a/src/mca/pcm/base/pcm_base_comm.c b/src/mca/pcm/base/pcm_base_comm.c index 784ae57ba5..ee1d5b284b 100644 --- a/src/mca/pcm/base/pcm_base_comm.c +++ b/src/mca/pcm/base/pcm_base_comm.c @@ -9,6 +9,7 @@ #include "include/constants.h" #include "class/ompi_list.h" #include "mca/pcm/base/base.h" +#include "mca/llm/base/base_internal.h" #define START_KEY "@MCA_PCM@\n" #define END_KEY "@MCA_PCM_END@\n" @@ -19,13 +20,12 @@ int mca_pcm_base_send_schedule(FILE *fp, int jobid, - ompi_rte_node_schedule_t *sched, - ompi_list_t *nodelist) + ompi_rte_node_schedule_t *sched, + int num_procs) { int i, envc; - ompi_list_item_t *node_item, *info_item; - ompi_rte_node_allocation_t *node; - ompi_rte_valuepair_t *valpair; + ompi_list_item_t *node_item; + mca_llm_base_hostfile_data_t *node; fprintf(fp, START_KEY); fprintf(fp, "%d\n", PROTOCOL_VERSION); @@ -57,30 +57,8 @@ mca_pcm_base_send_schedule(FILE *fp, (strlen(sched->cwd) > 0) ? sched->cwd : ""); fflush(fp); - /* NODE LIST */ - fprintf(fp, "%d\n", (int) ompi_list_get_size(nodelist)); - for (node_item = ompi_list_get_first(nodelist) ; - node_item != ompi_list_get_end(nodelist) ; - node_item = ompi_list_get_next(node_item)) { - node = (ompi_rte_node_allocation_t*) node_item; - - fprintf(fp, NODE_KEY); - fprintf(fp, "%d %s\n", (int) strlen(node->hostname), - node->hostname); - fprintf(fp, "%d\n", node->count); - - /* INFO */ - fprintf(fp, "%d\n", (int) ompi_list_get_size(node->info)); - for (info_item = ompi_list_get_first(node->info) ; - info_item != ompi_list_get_end(node->info) ; - info_item = ompi_list_get_next(info_item)) { - valpair = (ompi_rte_valuepair_t*) info_item; - - fprintf(fp, "%d %d %s %s\n", - (int) strlen(valpair->key), (int) strlen(valpair->value), - valpair->key, valpair->value); - } - } + /* number of processes to start */ + fprintf(fp, "%d\n", num_procs); /* * so we've basically ignored the fact we might error out up until @@ -236,139 +214,11 @@ get_argv_array(FILE *fp, int *argcp, char ***argvp) } -static int -get_keyval(FILE *fp, char **keyp, char **valp) -{ - int ret; - char *key, *val; - int keylen, vallen; - size_t str_read;; - - ret = fscanf(fp, "%d %d ", &keylen, &vallen); - if (ret != 2) return OMPI_ERROR; - - key = (char*) malloc(sizeof(char) * (keylen + 2)); - if (NULL == key) return OMPI_ERROR; - - val = (char*) malloc(sizeof(char) * (vallen + 2)); - if (NULL == val) { - free(key); - return OMPI_ERROR; - } - - /* get the key */ - str_read = fread(key, keylen, 1, fp); - if (str_read != 1) { - free(key); - free(val); - return OMPI_ERROR; - } - - /* get the space */ - ret = fgetc(fp); - if (ret != ' ') { - free(key); - free(val); - return OMPI_ERROR; - } - - /* get the value */ - str_read = fread(val, vallen, 1, fp); - if (str_read != 1) { - free(key); - free(val); - return OMPI_ERROR; - } - - /* get the end of line newline */ - ret = fgetc(fp); - if (ret != '\n') { - free(key); - free(val); - return OMPI_ERROR; - } - - return OMPI_SUCCESS; -} - - -static int -get_nodeinfo(FILE *fp, ompi_list_t *info) -{ - ompi_rte_valuepair_t *newinfo; - int ret; - int info_len; - int i; - - ret = fscanf(fp, "%d\n", &info_len); - if (ret != 1) return OMPI_ERROR; - - for (i = 0 ; i < info_len ; ++i) { - ret = get_keyval(fp, &(newinfo->key), &(newinfo->value)); - if (OMPI_SUCCESS != ret) { - OBJ_RELEASE(newinfo); - return ret; - } - - ompi_list_append(info, (ompi_list_item_t*) newinfo); - } - - return OMPI_SUCCESS; -} - - -static int -get_nodelist(FILE *fp, ompi_list_t *nodelist) -{ - int nodelist_len; - int ret; - ompi_rte_node_allocation_t *newnode; - int i; - char *tmpstr; - - ret = fscanf(fp, "%d\n", &nodelist_len); - if (ret != 1) return OMPI_ERROR; - - for (i = 0 ; i < nodelist_len ; ++i) { - /* make sure we have a key */ - ret = get_key(fp, NODE_KEY); - if (OMPI_SUCCESS != ret) return ret; - - /* create the node */ - newnode = OBJ_NEW(ompi_rte_node_allocation_t); - /* fill in fields */ - ret = get_string(fp, &tmpstr); - if (OMPI_SUCCESS != ret) { - OBJ_RELEASE(newnode); - return OMPI_ERROR; - } - strncpy(newnode->hostname, tmpstr, sizeof(newnode->hostname)); - free(tmpstr); - - ret = fscanf(fp, "%d\n", &(newnode->count)); - if (ret != 1) { - OBJ_RELEASE(newnode); - return OMPI_ERROR; - } - - ret = get_nodeinfo(fp, newnode->info); - if (OMPI_SUCCESS != ret) { - OBJ_RELEASE(newnode); - return OMPI_ERROR; - } - - ompi_list_append(nodelist, (ompi_list_item_t*) newnode); - } - - return OMPI_SUCCESS; -} - - int mca_pcm_base_recv_schedule(FILE *fp, int *jobid, ompi_rte_node_schedule_t *sched, - ompi_list_t *nodelist) + int *num_procs) { int ret, val; @@ -396,9 +246,8 @@ mca_pcm_base_recv_schedule(FILE *fp, ret = get_string(fp, &(sched->cwd)); if (OMPI_SUCCESS != ret) return ret; - /* get node list */ - ret = get_nodelist(fp, nodelist); - if (OMPI_SUCCESS != ret) return ret; + /* get num procs */ + ret = get_int(fp, num_procs); /* make sure we have our end */ ret = get_key(fp, END_KEY); diff --git a/src/mca/pcm/base/pcm_base_util.c b/src/mca/pcm/base/pcm_base_util.c index c53aae0609..4deb4b7f79 100644 --- a/src/mca/pcm/base/pcm_base_util.c +++ b/src/mca/pcm/base/pcm_base_util.c @@ -13,8 +13,8 @@ #include "util/argv.h" #include "runtime/runtime_types.h" #include "mca/pcm/base/base.h" -#include "mca/pcm/base/base.h" - +#include "mca/llm/base/base.h" +#include "mca/llm/base/base_internal.h" char * mca_pcm_base_no_unique_name(void) @@ -48,13 +48,13 @@ mca_pcm_base_build_base_env(char **in_env, char ***out_envp) char * -mca_pcm_base_get_username(ompi_rte_node_allocation_t *node) +mca_pcm_base_get_username(mca_llm_base_hostfile_node_t *host) { ompi_list_item_t *item; ompi_rte_valuepair_t *valpair; - for (item = ompi_list_get_first(node->info) ; - item != ompi_list_get_end(node->info) ; + for (item = ompi_list_get_first(host->info) ; + item != ompi_list_get_end(host->info) ; item = ompi_list_get_next(item)) { valpair = (ompi_rte_valuepair_t*) item; if (0 == strcmp("user", valpair->key)) return valpair->value; diff --git a/src/mca/pcm/rsh/src/pcm_rsh_spawn.c b/src/mca/pcm/rsh/src/pcm_rsh_spawn.c index 38635627be..2ae5ec7397 100644 --- a/src/mca/pcm/rsh/src/pcm_rsh_spawn.c +++ b/src/mca/pcm/rsh/src/pcm_rsh_spawn.c @@ -37,7 +37,7 @@ #define PRS_BUFSIZE 1024 static int internal_spawn_proc(int jobid, ompi_rte_node_schedule_t *sched, - ompi_list_t *nodelist, + ompi_list_t *hostlist, int my_start_vpid, int global_start_vpid, int num_procs); @@ -55,9 +55,11 @@ mca_pcm_rsh_can_spawn(void) int mca_pcm_rsh_spawn_procs(int jobid, ompi_list_t *schedlist) { - ompi_list_item_t *sched_item, *node_item; + ompi_list_item_t *sched_item, *node_item, *host_item; ompi_rte_node_schedule_t *sched; ompi_rte_node_allocation_t *node; + mca_llm_base_hostfile_data_t *data; + mca_llm_base_hostfile_node_t *host; ompi_list_t launch; ompi_list_t done; int ret, i; @@ -65,12 +67,11 @@ mca_pcm_rsh_spawn_procs(int jobid, ompi_list_t *schedlist) int local_start_vpid = 0; int global_start_vpid = 0; int num_procs = 0; + int tmp_count; OBJ_CONSTRUCT(&launch, ompi_list_t); OBJ_CONSTRUCT(&done, ompi_list_t); - - for (sched_item = ompi_list_get_first(schedlist) ; sched_item != ompi_list_get_end(schedlist) ; sched_item = ompi_list_get_next(sched_item)) { @@ -80,7 +81,11 @@ mca_pcm_rsh_spawn_procs(int jobid, ompi_list_t *schedlist) node_item != ompi_list_get_end(sched->nodelist) ; node_item = ompi_list_get_next(node_item)) { node = (ompi_rte_node_allocation_t*) node_item; - num_procs += node->count; + if (node->nodes > 0) { + num_procs += (node->count * node->nodes); + } else { + num_procs += node->count; + } } } @@ -92,49 +97,64 @@ mca_pcm_rsh_spawn_procs(int jobid, ompi_list_t *schedlist) sched_item = ompi_list_get_next(sched_item)) { sched = (ompi_rte_node_schedule_t*) sched_item; - /* - * make sure I'm the first node in the list and then start our - * deal. We rsh me just like everyone else so that we don't - * have any unexpected environment oddities... - */ - /* BWB - do front of list check! */ - node_item = ompi_list_get_first(sched->nodelist); + for (node_item = ompi_list_get_first(sched->nodelist) ; + node_item != ompi_list_get_end(sched->nodelist) ; + node_item = ompi_list_get_next(node_item) ) { + node = (ompi_rte_node_allocation_t*) node_item; + data = (mca_llm_base_hostfile_data_t*) node->data; - while (node_item != ompi_list_get_end(sched->nodelist)) { - /* find enough entries for this slice to go */ - for (i = 0 ; - i < width && node_item != ompi_list_get_end(sched->nodelist) ; - node_item = ompi_list_get_next(node_item), ++i) { } - /* if we don't have anyone, get us out of here.. */ - if (i == 0) { - continue; - } + /* + * make sure I'm the first node in the list and then start + * our deal. We rsh me just like everyone else so that we + * don't have any unexpected environment oddities... + */ + /* BWB - do front of list check! */ + host_item = ompi_list_get_first(data->hostlist); - /* make a launch list */ - ompi_list_splice(&launch, ompi_list_get_end(&launch), - sched->nodelist, - ompi_list_get_first(sched->nodelist), - node_item); + while (host_item != ompi_list_get_end(data->hostlist)) { + /* find enough entries for this slice to go */ + tmp_count = 0; + for (i = 0 ; + i < width && + host_item != ompi_list_get_end(data->hostlist) ; + host_item = ompi_list_get_next(host_item), ++i) { + host = (mca_llm_base_hostfile_node_t*) host_item; + tmp_count += host->count; + } + /* if we don't have anyone, get us out of here.. */ + if (i == 0) { + continue; + } - /* do the launch to the first node in the list, passing - him the rest of the list */ - ret = internal_spawn_proc(jobid, sched, &launch, - local_start_vpid, global_start_vpid, - num_procs); - if (OMPI_SUCCESS != ret) { - /* well, crap! put ourselves back together, I guess. - Should call killjob */ + /* make a launch list */ + ompi_list_splice(&launch, ompi_list_get_end(&launch), + data->hostlist, + ompi_list_get_first(data->hostlist), + host_item); + + /* do the launch to the first node in the list, passing + him the rest of the list */ + ret = internal_spawn_proc(jobid, sched, &launch, + local_start_vpid, global_start_vpid, + num_procs); + if (OMPI_SUCCESS != ret) { + /* well, crap! put ourselves back together, I guess. + Should call killjob */ + ompi_list_join(&done, ompi_list_get_end(&done), &launch); + ompi_list_join(data->hostlist, + ompi_list_get_first(data->hostlist), + &done); + return ret; + } + local_start_vpid += tmp_count; + + /* copy the list over to the done part */ ompi_list_join(&done, ompi_list_get_end(&done), &launch); - ompi_list_join(sched->nodelist, - ompi_list_get_first(sched->nodelist), - &done); - return ret; } - local_start_vpid += - ((ompi_rte_node_allocation_t*) ompi_list_get_first(&launch))->count; - /* copy the list over to the done part */ - ompi_list_join(&done, ompi_list_get_end(&done), &launch); + /* put the list back where we found it... */ + ompi_list_join(data->hostlist, ompi_list_get_end(data->hostlist), + &done); } } @@ -146,7 +166,7 @@ mca_pcm_rsh_spawn_procs(int jobid, ompi_list_t *schedlist) static int -internal_need_profile(ompi_rte_node_allocation_t *start_node, +internal_need_profile(mca_llm_base_hostfile_node_t *start_node, int stderr_is_error, bool *needs_profile) { struct passwd *p; @@ -253,12 +273,12 @@ cleanup: static int internal_spawn_proc(int jobid, ompi_rte_node_schedule_t *sched, - ompi_list_t *nodelist, int my_start_vpid, + ompi_list_t *hostlist, int my_start_vpid, int global_start_vpid, int num_procs) { int kidstdin[2]; /* child stdin pipe */ bool needs_profile = false; - ompi_rte_node_allocation_t *start_node; + mca_llm_base_hostfile_node_t *start_node; char** cmdv = NULL; char *cmd0 = NULL; int cmdc = 0; @@ -272,7 +292,7 @@ internal_spawn_proc(int jobid, ompi_rte_node_schedule_t *sched, int i; char *tmp; - start_node = (ompi_rte_node_allocation_t*) ompi_list_get_first(nodelist); + start_node = (mca_llm_base_hostfile_node_t*) ompi_list_get_first(hostlist); /* * Check to see if we need to do the .profile thing @@ -306,7 +326,7 @@ internal_spawn_proc(int jobid, ompi_rte_node_schedule_t *sched, /* build the command to start */ ompi_argv_append(&cmdc, &cmdv, BOOTAGENT); - +#if 1 /* starting vpid for launchee's procs */ tmp = ltostr(my_start_vpid); ompi_argv_append(&cmdc, &cmdv, "--local_start_vpid"); @@ -324,7 +344,7 @@ internal_spawn_proc(int jobid, ompi_rte_node_schedule_t *sched, ompi_argv_append(&cmdc, &cmdv, "--num_procs"); ompi_argv_append(&cmdc, &cmdv, tmp); free(tmp); - +#endif /* add the end of the .profile thing if required */ if (needs_profile) { ompi_argv_append(&cmdc, &cmdv, ")"); @@ -377,7 +397,7 @@ internal_spawn_proc(int jobid, ompi_rte_node_schedule_t *sched, /* send our stuff down the wire */ fp = fdopen(kidstdin[1], "a"); if (fp == NULL) { perror("fdopen"); abort(); } - ret = mca_pcm_base_send_schedule(fp, jobid, sched, nodelist); + ret = mca_pcm_base_send_schedule(fp, jobid, sched, start_node->count); fclose(fp); if (OMPI_SUCCESS != ret) { kill(pid, SIGTERM); diff --git a/src/runtime/ompi_rte_init.c b/src/runtime/ompi_rte_init.c index 41f5538861..62338be650 100644 --- a/src/runtime/ompi_rte_init.c +++ b/src/runtime/ompi_rte_init.c @@ -302,6 +302,8 @@ ompi_rte_int_node_schedule_destruct(ompi_object_t *obj) ompi_rte_node_allocation_t *node; ompi_list_item_t *item; + if (NULL == sched->nodelist) return; + while (NULL != (item = ompi_list_remove_first(sched->nodelist))) { node = (ompi_rte_node_allocation_t*) item; OBJ_RELEASE(node); @@ -317,7 +319,10 @@ void ompi_rte_int_node_allocation_construct(ompi_object_t *obj) { ompi_rte_node_allocation_t *node = (ompi_rte_node_allocation_t*) obj; - node->info = OBJ_NEW(ompi_list_t); + node->start = 0; + node->nodes = 0; + node->count = 0; + node->data = NULL; } @@ -327,15 +332,10 @@ void ompi_rte_int_node_allocation_destruct(ompi_object_t *obj) { ompi_rte_node_allocation_t *node = (ompi_rte_node_allocation_t*) obj; - ompi_rte_valuepair_t *valpair; - ompi_list_item_t *item; - while (NULL != (item = ompi_list_remove_first(node->info))) { - valpair = (ompi_rte_valuepair_t*) item; - OBJ_RELEASE(valpair); - } + if (NULL == node->data) return; - OBJ_RELEASE(node->info); + OBJ_RELEASE(node->data); } @@ -372,3 +372,6 @@ OBJ_CLASS_INSTANCE(ompi_rte_node_allocation_t, ompi_list_item_t, OBJ_CLASS_INSTANCE(ompi_rte_valuepair_t, ompi_list_item_t, ompi_rte_int_valuepair_construct, ompi_rte_int_valuepair_destruct); +/** create instance information for \c ompi_rte_node_allocation_data_t */ +OBJ_CLASS_INSTANCE(ompi_rte_node_allocation_data_t, ompi_object_t, + NULL, NULL); diff --git a/src/runtime/ompi_rte_llm.c b/src/runtime/ompi_rte_llm.c index 9675738f27..1fcdc94861 100644 --- a/src/runtime/ompi_rte_llm.c +++ b/src/runtime/ompi_rte_llm.c @@ -12,7 +12,7 @@ extern mca_pcm_base_module_t mca_pcm; ompi_list_t* -ompi_rte_allocate_resources(int jobid, int nodes, int procs) +ompi_rte_allocate_resources(mca_ns_base_jobid_t jobid, int nodes, int procs) { if (NULL == mca_pcm.pcm_allocate_resources) { return NULL; @@ -23,7 +23,7 @@ ompi_rte_allocate_resources(int jobid, int nodes, int procs) int -ompi_rte_deallocate_resources(int jobid, ompi_list_t *nodelist) +ompi_rte_deallocate_resources(mca_ns_base_jobid_t jobid, ompi_list_t *nodelist) { if (NULL == mca_pcm.pcm_deallocate_resources) { return OMPI_ERROR; diff --git a/src/runtime/ompi_rte_pcm.c b/src/runtime/ompi_rte_pcm.c index 51f7e76e28..6a96617612 100644 --- a/src/runtime/ompi_rte_pcm.c +++ b/src/runtime/ompi_rte_pcm.c @@ -25,7 +25,7 @@ ompi_rte_can_spawn(void) int -ompi_rte_spawn_procs(int jobid, ompi_list_t *schedule_list) +ompi_rte_spawn_procs(mca_ns_base_jobid_t jobid, ompi_list_t *schedule_list) { if (NULL == mca_pcm.pcm_spawn_procs) { return OMPI_ERROR; @@ -69,7 +69,7 @@ ompi_rte_kill_proc(ompi_process_name_t *name, int flags) int -ompi_rte_kill_job(int jobid, int flags) +ompi_rte_kill_job(mca_ns_base_jobid_t jobid, int flags) { if (NULL == mca_pcm.pcm_kill_job) { return OMPI_ERROR; diff --git a/src/runtime/runtime.h b/src/runtime/runtime.h index b1c5c23a80..85b7de2e59 100644 --- a/src/runtime/runtime.h +++ b/src/runtime/runtime.h @@ -14,6 +14,7 @@ #include "ompi_config.h" #include "runtime/runtime_types.h" +#include "mca/ns/ns.h" /* For backwards compatibility. If you only need MPI stuff, please include mpiruntime/mpiruntime.h directly */ @@ -110,21 +111,23 @@ extern "C" { * once per jobid. * * @param jobid (IN) Jobid with which to associate the given resources. - * @param nodes (IN) Number of nodes to try to allocate. If 0, the - * LLM will try to allocate procs - * processes on as many nodes as are needed. If - * non-zero, will try to fairly distribute - * procs processes over the nodes. - * If procs is 0, will attempt to - * allocate all cpus on nodes nodes + * @param nodes (IN) Number of ndoes to try to allocate. If 0, the + * allocator will try to allocate \c procs processes + * on as many nodes as are needed. If non-zero, + * will try to allocate \c procs process slots + * per node. * @param procs (IN) Number of processors to try to allocate. See the note * for nodes for usage. * @return List of ompi_rte_node_allocation_ts * describing the allocated resources. * - * @warning The type for jobid will change in the near future + * @note In the future, a more complex resource allocation + * function may be added, which allows for complicated + * resource requests. This function will continue to exist + * as a special case of that function. */ - ompi_list_t* ompi_rte_allocate_resources(int jobid, int nodes, int procs); + ompi_list_t* ompi_rte_allocate_resources(mca_ns_base_jobid_t jobid, + int nodes, int procs); /** @@ -144,9 +147,9 @@ extern "C" { * of \c mca_pcm_base_schedule_t structures, which give both process * and location information. * - * @warning Parameter list will probably change in the near future. */ - int ompi_rte_spawn_procs(int jobid, ompi_list_t *schedule_list); + int ompi_rte_spawn_procs(mca_ns_base_jobid_t jobid, + ompi_list_t *schedule_list); /** @@ -210,7 +213,7 @@ extern "C" { * future compatibility. Will be used to specify how to kill * processes (0 will be same as a "kill " */ - int ompi_rte_kill_job(int jobid, int flags); + int ompi_rte_kill_job(mca_ns_base_jobid_t jobid, int flags); /** @@ -221,10 +224,9 @@ extern "C" { * @param jobid (IN) Jobid associated with the resources to be freed. * @param nodes (IN) Nodelist from associated allocate_resource call. * All associated memory will be freed as appropriate. - * - * @warning The type for jobid will change in the near future. */ - int ompi_rte_deallocate_resources(int jobid, ompi_list_t *nodelist); + int ompi_rte_deallocate_resources(mca_ns_base_jobid_t jobid, + ompi_list_t *nodelist); /** diff --git a/src/runtime/runtime_types.h b/src/runtime/runtime_types.h index 10eab863ac..5991c4b52f 100644 --- a/src/runtime/runtime_types.h +++ b/src/runtime/runtime_types.h @@ -59,22 +59,58 @@ OBJ_CLASS_DECLARATION(ompi_rte_node_schedule_t); /** - * Node + * Base container for node-related information * - * Container for allocation and deallocation of resources used to - * launch parallel jobs. - * + * Base container type for holding llm/pcm private information on a \c + * ompi_rte_node_allocation_t container. + */ +struct ompi_rte_node_allocation_data_t { + /** make us an instance of object so our constructors go boom */ + ompi_object_t super; +}; +/** shorten ompi_rte_node_allocation_data_t declarations */ +typedef struct ompi_rte_node_allocation_data_t ompi_rte_node_allocation_data_t; +/** create the required instance information */ +OBJ_CLASS_DECLARATION(ompi_rte_node_allocation_data_t); + + +/** + * Resource allocation container + * + * Container for passing information between the resource allocator, + * the resource/job mapper, and the job starter portions of the + * run-time environment. + * + * \c count has a strange meaning. If \c nodes is 0, \c count is the + * total number of cpus available in this block of resources. If \c + * nodes is non-zero, \c count is the number of cpus available per + * node. + * + * \c start provides an integer number of where in the job the + * resource is available. If you had two node_allocation_t elements + * returned from a call to allocate resources, one with + * nodes=4,count=2 and one with nodes=2,count=4, start would be 0 for + * the first element and 8 for the second. + * + * The contents of the structure (with the exception of \c data) may + * be examined by the process mapping functions. However, the fields + * should be considered read-only. The \c data field may contain + * private data that reflects the status of the \c nodes and \c count + * fields. The \c ompi_rte_node_* functions are available for + * manipulating \c ompi_rte_node_allocation_t structures. */ struct ompi_rte_node_allocation_t { /** make us an instance of list item */ ompi_list_item_t super; - /** hostname for this node. Can be used as generic description - field if hostnames aren't used on this platform */ - char hostname[MAXHOSTNAMELEN]; - /** number of MPI processes Open MPI can start on this host */ + /** start of allocation numbers for this block of nodes */ + int start; + /** number of nodes in this allocation - 0 means unknown */ + int nodes; + /** number of "process slots" (places to start a process) that + are allocated as part of this block of processes */ int count; - /** generic key=value storage mechanism */ - ompi_list_t *info; + /** data store for use by the Open MPI run-time environment */ + ompi_rte_node_allocation_data_t *data; }; /** shorten ompi_rte_allocation_t declarations */ typedef struct ompi_rte_node_allocation_t ompi_rte_node_allocation_t; diff --git a/src/tools/bootproxy/bootproxy.c b/src/tools/bootproxy/bootproxy.c index 2820080a03..59195ca767 100644 --- a/src/tools/bootproxy/bootproxy.c +++ b/src/tools/bootproxy/bootproxy.c @@ -25,7 +25,6 @@ int main(int argc, char *argv[]) { ompi_rte_node_schedule_t *sched; - ompi_rte_node_allocation_t *node; pid_t pid; int i; int ret; @@ -33,7 +32,8 @@ main(int argc, char *argv[]) ompi_cmd_line_t *cmd_line = NULL; int local_vpid_start, global_vpid_start; int cellid = 0; - int num_procs; + int total_num_procs; + int fork_num_procs; char *env_buf; ompi_init(argc, argv); @@ -68,26 +68,18 @@ main(int argc, char *argv[]) show_usage(argv[0]); exit(1); } - num_procs = atoi(ompi_cmd_line_get_param(cmd_line, "num_procs", 0, 0)); + total_num_procs = atoi(ompi_cmd_line_get_param(cmd_line, "num_procs", 0, 0)); sched = OBJ_NEW(ompi_rte_node_schedule_t); /* recv_schedule wants an already initialized ompi_list_t */ ret = mca_pcm_base_recv_schedule(stdin, &jobid, sched, - sched->nodelist); + &fork_num_procs); if (ret != OMPI_SUCCESS) { fprintf(stderr, "Failure in receiving schedule information\n"); exit(1); } - /* sanity check */ - if (ompi_list_get_size(sched->nodelist) > 1) { - fprintf(stderr, "Received more than one node - ignoring extra info\n"); - } - if (ompi_list_get_size(sched->nodelist) < 1) { - fprintf(stderr, "Received less than one node\n"); - } - /* fill our environment */ for (i = 0 ; sched->env[i] != NULL ; ++i) { putenv(sched->env[i]); @@ -97,7 +89,7 @@ main(int argc, char *argv[]) putenv(env_buf); asprintf(&env_buf, "OMPI_MCA_pcmclient_env_jobid=%d", jobid); putenv(env_buf); - asprintf(&env_buf, "OMPI_MCA_pcmclient_env_num_procs=%d", num_procs); + asprintf(&env_buf, "OMPI_MCA_pcmclient_env_num_procs=%d", total_num_procs); putenv(env_buf); asprintf(&env_buf, "OMPI_MCA_pcmclient_env_vpid_start=%d", global_vpid_start); @@ -112,9 +104,8 @@ main(int argc, char *argv[]) } } - node = (ompi_rte_node_allocation_t*) ompi_list_get_first(sched->nodelist); /* let's go! - if we are the parent, don't stick around... */ - for (i = 0 ; i < node->count ; ++i) { + for (i = 0 ; i < fork_num_procs ; ++i) { pid = fork(); if (pid < 0) { /* error :( */ diff --git a/test/mca/llm/base/parse_hostfile.c b/test/mca/llm/base/parse_hostfile.c index 0e5c1a0585..bada38ac38 100644 --- a/test/mca/llm/base/parse_hostfile.c +++ b/test/mca/llm/base/parse_hostfile.c @@ -21,7 +21,7 @@ int main(int argc, char *argv[]) { ompi_list_t *hostlist; - ompi_rte_node_allocation_t *node; + mca_llm_base_hostfile_node_t *node; ompi_rte_valuepair_t *valpair; ompi_list_item_t *nodeitem, *valpairitem; FILE *test1_out=NULL; /* output file for first test */ @@ -55,7 +55,7 @@ main(int argc, char *argv[]) nodeitem != ompi_list_get_end(hostlist); nodeitem = ompi_list_get_next(nodeitem)) { - node = (ompi_rte_node_allocation_t*) nodeitem; + node = (mca_llm_base_hostfile_node_t*) nodeitem; fprintf(test1_out, "\t%s %d\n", node->hostname, node->count); for (valpairitem = ompi_list_get_first(node->info); @@ -84,7 +84,7 @@ main(int argc, char *argv[]) nodeitem != ompi_list_get_end(hostlist); nodeitem = ompi_list_get_next(nodeitem)) { - node = (ompi_rte_node_allocation_t*) nodeitem; + node = (mca_llm_base_hostfile_node_t*) nodeitem; fprintf(test2_out, "\t%s %d\n", node->hostname, node->count); for (valpairitem = ompi_list_get_first(node->info); diff --git a/test/mca/pcm/base/build_env.c b/test/mca/pcm/base/build_env.c index acd8728e62..da448df75f 100644 --- a/test/mca/pcm/base/build_env.c +++ b/test/mca/pcm/base/build_env.c @@ -9,6 +9,7 @@ #include #include "mca/pcm/base/base.h" +#include "util/argv.h" char *env[] = { "ENV0=", diff --git a/test/mca/pcm/base/sched_comm.c b/test/mca/pcm/base/sched_comm.c index 77257a813f..3b584b0602 100644 --- a/test/mca/pcm/base/sched_comm.c +++ b/test/mca/pcm/base/sched_comm.c @@ -30,6 +30,8 @@ main(int argc, char *argv[]) FILE *test2_in = NULL; int result; /* result of system call */ int jobid = 123; + int out_num_procs = 5; + int in_num_procs = 5; test_init("sched_comm_t"); @@ -52,7 +54,7 @@ main(int argc, char *argv[]) schedout->cwd = "/foo/bar/baz"; result = mca_pcm_base_send_schedule(test1_out, jobid, schedout, - schedout->nodelist); + out_num_procs); if (result != OMPI_SUCCESS) { test_failure("send_schedule failed"); exit(1); @@ -75,12 +77,12 @@ main(int argc, char *argv[]) test2_in = fopen("./test1_out", "r"); result = mca_pcm_base_recv_schedule(test2_in, &jobid, schedin, - schedin->nodelist); + &in_num_procs); if (result != OMPI_SUCCESS) { test_failure("recv_schedule failed"); exit(1); } - mca_pcm_base_send_schedule(test2_out, jobid, schedin, schedin->nodelist); + mca_pcm_base_send_schedule(test2_out, jobid, schedin, in_num_procs); if (result != OMPI_SUCCESS) { test_failure("send_schedule (2) failed"); exit(1); diff --git a/test/mca/pcm/base/test1_out_std b/test/mca/pcm/base/test1_out_std index ea66658ddf..df683fe73f 100644 --- a/test/mca/pcm/base/test1_out_std +++ b/test/mca/pcm/base/test1_out_std @@ -2,11 +2,11 @@ 1 123 1 -12 ./sched_comm +73 /Users/brbarret/research/ompi/nodelist/test/mca/pcm/base/.libs/sched_comm 3 19 ENV1=blah blah blah 19 ENV2=foo bar is fun 8 ENV3=123 12 /foo/bar/baz -0 +5 @MCA_PCM_END@ diff --git a/test/mca/pcm/base/test2_out_std b/test/mca/pcm/base/test2_out_std index ea66658ddf..df683fe73f 100644 --- a/test/mca/pcm/base/test2_out_std +++ b/test/mca/pcm/base/test2_out_std @@ -2,11 +2,11 @@ 1 123 1 -12 ./sched_comm +73 /Users/brbarret/research/ompi/nodelist/test/mca/pcm/base/.libs/sched_comm 3 19 ENV1=blah blah blah 19 ENV2=foo bar is fun 8 ENV3=123 12 /foo/bar/baz -0 +5 @MCA_PCM_END@