diff --git a/src/mca/topo/base/base.h b/src/mca/topo/base/base.h index d06f91bbb2..49417ba427 100644 --- a/src/mca/topo/base/base.h +++ b/src/mca/topo/base/base.h @@ -10,6 +10,7 @@ #include "mpi.h" #include "class/ompi_list.h" #include "mca/topo/topo.h" +#include "proc/proc.h" /* * All stuff goes in here @@ -18,83 +19,90 @@ extern "C" { #endif int mca_topo_base_open(void); + int mca_topo_base_close(void); - int mca_topo_base_select(mca_topo_t *selected, - bool *allow_multi_user_threads, - bool *have_hidden_threads); + + int mca_topo_base_comm_select(struct ompi_communicator_t *comm, + struct mca_base_module_t *preferred); + + int mca_topo_base_comm_unselect(struct ompi_communicator_t *comm); + + int mca_topo_base_find_available (bool *allow_multi_user_threads, + bool *have_hidden_threads); int mca_topo_base_init_comm (MPI_Comm comm); + int mca_topo_base_get_param (MPI_Comm comm, int keyval); - const mca_topo_1_0_0_t * - mca_topo_unity_query(int *priority, - bool *allow_multi_user_threads, - bool *have_hidden_threads); /* * All the glue functions which we will provide to the users * by default. The users need to only write back-end functions * for graph_map() and cart_map() for their topology modules. * But they can implement these glue functions if they want. */ - int topo_base_cart_coords (MPI_Comm comm, + int mca_topo_base_cart_coords (MPI_Comm comm, int rank, int maxdims, int *coords); - int topo_base_cart_create (MPI_Comm old_comm, + int mca_topo_base_cart_create (mca_topo_comm_t *topo_data, + int *proc_count, + ompi_proc_t **proc_pointers, + int *new_rank, int ndims, int *dims, int *periods, - int reorder, - MPI_Comm *comm_cart); + bool reorder); - int topo_base_cartdim_get (MPI_Comm comm, + int mca_topo_base_cartdim_get (MPI_Comm comm, int *ndims); - int topo_base_cart_get (MPI_Comm comm, + int mca_topo_base_cart_get (MPI_Comm comm, int maxdims, int *dims, int *periods, int *coords); - int topo_base_cart_rank (MPI_Comm comm, + int mca_topo_base_cart_rank (MPI_Comm comm, int *coords, int *rank); - int topo_base_cart_shift (MPI_Comm comm, + int mca_topo_base_cart_shift (MPI_Comm comm, int direction, int disp, int *rank_source, int *rank_dest); - int topo_base_cart_sub (MPI_Comm comm, + int mca_topo_base_cart_sub (MPI_Comm comm, int *remain_dims, MPI_Comm *new_comm); - int topo_base_graph_create (MPI_Comm comm_old, + int mca_topo_base_graph_create (mca_topo_comm_t *topo_data, + int *proc_count, + ompi_proc_t **proc_pointers, + int *new_rank, int nnodes, int *index, int *edges, - int reorder, - MPI_Comm *comm_graph); + bool reorder); - int topo_base_graphdims_get (MPI_Comm comm, + int mca_topo_base_graphdims_get (MPI_Comm comm, int *nodes, int *nedges); - int topo_base_graph_get (MPI_Comm comm, + int mca_topo_base_graph_get (MPI_Comm comm, int maxindex, int maxedges, int *index, int *edges); - int topo_base_graph_neighbors (MPI_Comm comm, + int mca_topo_base_graph_neighbors (MPI_Comm comm, int rank, int maxneighbors, int *neighbors); - int topo_base_graph_neighbors_count (MPI_Comm comm, + int mca_topo_base_graph_neighbors_count (MPI_Comm comm, int rank, int *nneighbors); @@ -103,8 +111,12 @@ extern "C" { * Globals */ extern int mca_topo_base_output; +extern int mca_topo_base_param; + extern ompi_list_t mca_topo_base_modules_available; -extern mca_topo_base_module_t mca_topo_base_selected_module; -extern mca_topo_t mca_topo; +extern ompi_list_t mca_topo_base_modules_opened; + +extern bool mca_topo_base_modules_opened_valid; +extern bool mca_topo_base_modules_available_valid; #endif /* MCA_BASE_TOPO_H */ diff --git a/src/mca/topo/base/topo_base_cart_coords.c b/src/mca/topo/base/topo_base_cart_coords.c index acbf9169e5..ad2446dc53 100644 --- a/src/mca/topo/base/topo_base_cart_coords.c +++ b/src/mca/topo/base/topo_base_cart_coords.c @@ -24,7 +24,7 @@ * @retval MPI_ERR_ARG */ -int topo_base_cart_coords (MPI_Comm comm, +int mca_topo_base_cart_coords (MPI_Comm comm, int rank, int maxdims, int *coords){ @@ -36,13 +36,18 @@ int topo_base_cart_coords (MPI_Comm comm, /* * loop computing the co-ordinates */ - d = comm->c_topo_comm->mtc_dims; - remprocs = comm->c_topo_comm->mtc_nprocs; - for (i = 0; (i < comm->c_topo_comm->mtc_ndims) && (i < maxdims); ++i, ++d) { + d = comm->c_topo_comm->mtc_dims_or_index; + remprocs = ompi_comm_size(comm); + + for (i = 0; + (i < comm->c_topo_comm->mtc_ndims_or_nnodes) && (i < maxdims); + ++i, ++d) { + dim = (*d > 0) ? *d : -(*d); remprocs /= dim; *coords++ = rank / remprocs; rank %= remprocs; } + return MPI_SUCCESS; } diff --git a/src/mca/topo/base/topo_base_cart_create.c b/src/mca/topo/base/topo_base_cart_create.c index 562d3a65a5..4f9ffed2e1 100644 --- a/src/mca/topo/base/topo_base_cart_create.c +++ b/src/mca/topo/base/topo_base_cart_create.c @@ -4,7 +4,9 @@ #include "mca/topo/base/base.h" #include "communicator/communicator.h" +#include "group/group.h" #include "mca/topo/topo.h" +#include "mpi.h" /* * function - makes a new communicator to which topology information @@ -24,119 +26,68 @@ * @retval MPI_SUCCESS */ -int topo_base_cart_create (MPI_Comm old_comm, +int mca_topo_base_cart_create (mca_topo_comm_t *topo_data, + int *proc_count, + ompi_proc_t **proc_pointers, + int *new_rank, int ndims, int *dims, int *periods, - int reorder, - MPI_Comm *comm_cart){ - MPI_Comm newcomm; -#if 0 - MPI_Group newgroup; -#endif - int rank; - int size; + bool reorder) { + int nprocs; - int err; - int range[1][3]; + int dim; int i; int *p; + int *coords = topo_data->mtc_coords; - /* - * Compute the # of processes in the grid. - */ nprocs = 1; - for (i = 0, p = dims; i < ndims; ++i, ++p) { - if (*p <= 0) { - return MPI_ERR_DIMS; + p = topo_data->mtc_dims_or_index; + + /* Calculate the number of processes in this grid */ + for (i = 0; i < topo_data->mtc_ndims_or_nnodes; ++i, ++p) { + if(*p <= 0) { + return OMPI_ERROR; } nprocs *= *p; } - /* - * Create the group for the new communicator. - */ -#if 0 - err = ompi_comm_size (comm, &size); -#endif - if (err != MPI_SUCCESS) { - return err; + + /* check for the error condition */ + + if (*proc_count < nprocs) { + return MPI_ERR_DIMS; } - if (nprocs > size) { - return MPI_ERR_DIMS; + /* check if we have to trim the list of processes */ + if (nprocs < *proc_count) { + *proc_count = nprocs; + } + + if (*new_rank > (nprocs-1)) { + /* sorry, but in our scheme this process is cut off */ + *new_rank = MPI_UNDEFINED; + return MPI_SUCCESS; } - if (nprocs == size) { -#if 0 - err = ompi_comm_group (comm, &newgroup); -#endif - } else { - range[0][0] = 0; - range[0][1] = nprocs - 1; - range[0][2] = 1; -#if 0 - err = ompi_group_range_incl (comm->c_group, 1, range, &newgroup); -#endif + for (i = 0, p = topo_data->mtc_dims_or_index; i < ndims; ++i, ++p) { + *p = (*periods) ? -(*dims) : *dims; + ++dims; + ++periods; } - if (err != MPI_SUCCESS) { - return err; - } - /* - * Create the new communicator. - */ -#if 0 - err = ompi_comm_create (comm, newgroup, comm_cart); -#endif - if (err != MPI_SUCCESS) { -#if 0 - ompi_group_free (&newgroup); -#endif - return err; - } - /* - * Fill the communicator with topology information. - */ - newcomm = *comm_cart; - if (newcomm != MPI_COMM_NULL) { - newcomm->c_flags |= OMPI_COMM_CART; - newcomm->c_topo_comm->mtc_type = MPI_CART; - newcomm->c_topo_comm->mtc_nprocs = nprocs; - newcomm->c_topo_comm->mtc_ndims = ndims; - newcomm->c_topo_comm->mtc_dims = (int *) - malloc((unsigned) 2 * ndims * sizeof(int)); - if (newcomm->c_topo_comm->mtc_dims == 0) { - return MPI_ERR_OTHER; - } - newcomm->c_topo_comm->mtc_coords = newcomm->c_topo_comm->mtc_dims + ndims; - for (i = 0, p = newcomm->c_topo_comm->mtc_dims; i < ndims; ++i, ++p) { - *p = (*periods) ? -(*dims) : *dims; - ++dims; - ++periods; - } - /* - * Compute the caller's coordinates. - */ -#if 0 - err = ompi_comm_rank (newcomm, &rank); -#endif - if (err != MPI_SUCCESS) { - return err; - } + /* Have to replace this with the actual function body itself */ + p = topo_data->mtc_dims_or_index; + coords = topo_data->mtc_coords; - err = newcomm->c_topo.topo_cart_coords (newcomm, rank, - ndims, newcomm->c_topo_comm->mtc_coords); - if (err != MPI_SUCCESS) { - return err; - } - } - -#if 0 - err = ompi_group_free (&newgroup); -#endif - if (err != MPI_SUCCESS) { - return err; - } + for (i=0; + (i < topo_data->mtc_ndims_or_nnodes); + ++i, ++p) { + dim = (*p > 0) ? *p : -(*p); + nprocs /= dim; + *coords++ = *new_rank / nprocs; + *new_rank %= nprocs; + } + /* end here */ return MPI_SUCCESS; } diff --git a/src/mca/topo/base/topo_base_cart_get.c b/src/mca/topo/base/topo_base_cart_get.c index 501707c7d6..8d50163de9 100644 --- a/src/mca/topo/base/topo_base_cart_get.c +++ b/src/mca/topo/base/topo_base_cart_get.c @@ -21,7 +21,7 @@ * * @retval MPI_SUCCESS */ -int topo_base_cart_get (MPI_Comm comm, +int mca_topo_base_cart_get (MPI_Comm comm, int maxdims, int *dims, int *periods, @@ -30,9 +30,11 @@ int topo_base_cart_get (MPI_Comm comm, int *d; int *c; - d = comm->c_topo_comm->mtc_dims; + d = comm->c_topo_comm->mtc_dims_or_index; c = comm->c_topo_comm->mtc_coords; - for (i = 0; (i < comm->c_topo_comm->mtc_ndims) && (i < maxdims); ++i) { + + for (i = 0; (i < comm->c_topo_comm->mtc_ndims_or_nnodes) && (i < maxdims); ++i) { + if (*d > 0) { *dims++ = *d++; *periods++ = 0; diff --git a/src/mca/topo/base/topo_base_cart_rank.c b/src/mca/topo/base/topo_base_cart_rank.c index 192d18927f..ff3e8b5b5f 100644 --- a/src/mca/topo/base/topo_base_cart_rank.c +++ b/src/mca/topo/base/topo_base_cart_rank.c @@ -21,7 +21,7 @@ * @retval MPI_ERR_ARG */ -int topo_base_cart_rank (MPI_Comm comm, +int mca_topo_base_cart_rank (MPI_Comm comm, int *coords, int *rank){ int prank; @@ -37,8 +37,9 @@ int topo_base_cart_rank (MPI_Comm comm, */ factor = 1; prank = 0; - i = comm->c_topo_comm->mtc_ndims - 1; - d = comm->c_topo_comm->mtc_dims + i; + + i = comm->c_topo_comm->mtc_ndims_or_nnodes - 1; + d = comm->c_topo_comm->mtc_dims_or_index + i; c = coords + i; for (; i >= 0; --i, --c, --d) { diff --git a/src/mca/topo/base/topo_base_cart_shift.c b/src/mca/topo/base/topo_base_cart_shift.c index cc0368e244..a94658427b 100644 --- a/src/mca/topo/base/topo_base_cart_shift.c +++ b/src/mca/topo/base/topo_base_cart_shift.c @@ -25,7 +25,7 @@ * @retval MPI_ERR_COMM * @retval MPI_ERR_ARG */ -int topo_base_cart_shift (MPI_Comm comm, +int mca_topo_base_cart_shift (MPI_Comm comm, int direction, int disp, int *rank_source, @@ -42,9 +42,8 @@ int topo_base_cart_shift (MPI_Comm comm, /* * Handle the trivial case. */ -#if 0 ord = ompi_comm_rank(comm); -#endif + if (disp == 0) { *rank_dest = *rank_source = ord; return MPI_SUCCESS; @@ -52,9 +51,9 @@ int topo_base_cart_shift (MPI_Comm comm, /* * Compute the rank factor and ordinate. */ - factor = comm->c_topo_comm->mtc_nprocs; - p = comm->c_topo_comm->mtc_dims; - for (i = 0; (i < comm->c_topo_comm->mtc_ndims) && (i <= direction); ++i, ++p) { + factor = ompi_comm_size(comm); + p = comm->c_topo_comm->mtc_dims_or_index; + for (i = 0; (i < comm->c_topo_comm->mtc_ndims_or_nnodes) && (i <= direction); ++i, ++p) { if ((thisdirection = *p) > 0) { thisperiod = 0; } else { @@ -79,9 +78,7 @@ int topo_base_cart_shift (MPI_Comm comm, } else { destord %= thisdirection; if (destord < 0) destord += thisdirection; -#if 0 *rank_dest = ompi_comm_rank(comm); -#endif *rank_dest += ((destord - ord) * factor); } if ( ((srcord < 0) || (srcord >= thisdirection)) && (!thisperiod) ) { @@ -89,9 +86,7 @@ int topo_base_cart_shift (MPI_Comm comm, } else { srcord %= thisdirection; if (srcord < 0) srcord += thisdirection; -#if 0 *rank_dest = ompi_comm_rank(comm); -#endif *rank_dest += ((srcord - ord) * factor); } diff --git a/src/mca/topo/base/topo_base_cart_sub.c b/src/mca/topo/base/topo_base_cart_sub.c index d452ed8b8d..7a01bfe7e6 100644 --- a/src/mca/topo/base/topo_base_cart_sub.c +++ b/src/mca/topo/base/topo_base_cart_sub.c @@ -21,10 +21,11 @@ * @retval MPI_ERR_TOPOLOGY * @retval MPI_ERR_COMM */ -int topo_base_cart_sub (MPI_Comm comm, +int mca_topo_base_cart_sub (MPI_Comm comm, int *remain_dims, MPI_Comm *new_comm){ - MPI_Comm newcomm; + + struct ompi_communicator_t *temp_comm; int errcode; int colour; int key; @@ -33,23 +34,25 @@ int topo_base_cart_sub (MPI_Comm comm, int rank; int ndim; int dim; - int allfalse; + bool allfalse; int i; int *d; int *c; int *r; int *p; + *new_comm = MPI_COMM_NULL; + /* * Compute colour and key used in splitting the communicator. */ colour = key = 0; colfactor = keyfactor = 1; ndim = 0; - allfalse = 0; + allfalse = false; - i = comm->c_topo_comm->mtc_ndims - 1; - d = comm->c_topo_comm->mtc_dims + i; + i = comm->c_topo_comm->mtc_ndims_or_nnodes - 1; + d = comm->c_topo_comm->mtc_dims_or_index + i; c = comm->c_topo_comm->mtc_coords + i; r = remain_dims + i; @@ -70,62 +73,59 @@ int topo_base_cart_sub (MPI_Comm comm, * have a communicator unless you're in it). */ if (ndim == 0) { -#if 0 - ompi_comm_rank (comm, &colour); -#endif + colour = ompi_comm_rank (comm); ndim = 1; - allfalse = 1; + allfalse = true; } /* * Split the communicator. */ -#if 0 - errcode = ompi_comm_split (comm, colour, key, new_comm); -#endif + errcode = ompi_comm_split (comm, colour, key, &temp_comm); if (errcode != MPI_SUCCESS) { return errcode; } /* * Fill the communicator with topology information. */ - newcomm = *new_comm; - if (newcomm != MPI_COMM_NULL) { - newcomm->c_topo_comm->mtc_type = MPI_CART; - newcomm->c_topo_comm->mtc_nprocs = keyfactor; - newcomm->c_topo_comm->mtc_ndims = ndim; - newcomm->c_topo_comm->mtc_dims = (int *) - malloc((unsigned) 2 * ndim * sizeof(int)); - if (newcomm->c_topo_comm->mtc_dims == 0) { + if (temp_comm != MPI_COMM_NULL) { + + temp_comm->c_topo_comm->mtc_ndims_or_nnodes = ndim; + temp_comm->c_topo_comm->mtc_dims_or_index = (int *) + malloc((unsigned) 2 * ndim * sizeof(int)); + + if (NULL == temp_comm->c_topo_comm->mtc_dims_or_index) { + OBJ_RELEASE(temp_comm); return MPI_ERR_OTHER; } - newcomm->c_topo_comm->mtc_coords = newcomm->c_topo_comm->mtc_dims + ndim; + temp_comm->c_topo_comm->mtc_coords = temp_comm->c_topo_comm->mtc_dims_or_index + ndim; if (!allfalse) { - p = newcomm->c_topo_comm->mtc_dims; - d = comm->c_topo_comm->mtc_dims; + p = temp_comm->c_topo_comm->mtc_dims_or_index; + d = comm->c_topo_comm->mtc_dims_or_index; r = remain_dims; - for (i = 0; i < comm->c_topo_comm->mtc_ndims; ++i, ++d, ++r) { + for (i = 0; i < comm->c_topo_comm->mtc_ndims_or_nnodes; ++i, ++d, ++r) { if (*r) { *p++ = *d; } } } else { - newcomm->c_topo_comm->mtc_dims[0] = 1; + temp_comm->c_topo_comm->mtc_dims_or_index[0] = 1; } /* * Compute the caller's coordinates. */ -#if 0 - errcode = ompi_comm_rank (newcomm, &rank); -#endif - if (errcode != MPI_SUCCESS) { + rank = ompi_comm_rank (temp_comm); + if (MPI_SUCCESS != errcode) { + OBJ_RELEASE(temp_comm); return errcode; } - errcode = newcomm->c_topo.topo_cart_coords (newcomm, rank, - ndim, newcomm->c_topo_comm->mtc_coords); - if (errcode != MPI_SUCCESS) { + errcode = temp_comm->c_topo->topo_cart_coords (temp_comm, rank, + ndim, temp_comm->c_topo_comm->mtc_coords); + if (MPI_SUCCESS != errcode) { + OBJ_RELEASE(temp_comm); return errcode; } } - return MPI_SUCCESS; + *new_comm = temp_comm; + return MPI_SUCCESS; } diff --git a/src/mca/topo/base/topo_base_cartdim_get.c b/src/mca/topo/base/topo_base_cartdim_get.c index c0c4dde35e..adbb47b40d 100644 --- a/src/mca/topo/base/topo_base_cartdim_get.c +++ b/src/mca/topo/base/topo_base_cartdim_get.c @@ -16,10 +16,10 @@ * @retval MPI_SUCCESS * @retval MPI_ERR_COMM */ -int topo_base_cartdim_get (MPI_Comm comm, +int mca_topo_base_cartdim_get (MPI_Comm comm, int *ndims){ - *ndims = comm->c_topo_comm->mtc_ndims; + *ndims = comm->c_topo_comm->mtc_ndims_or_nnodes; return MPI_SUCCESS; } diff --git a/src/mca/topo/base/topo_base_close.c b/src/mca/topo/base/topo_base_close.c index 50a6f9be96..a9040decf0 100644 --- a/src/mca/topo/base/topo_base_close.c +++ b/src/mca/topo/base/topo_base_close.c @@ -5,6 +5,7 @@ #include +#include "util/output.h" #include "include/constants.h" #include "mca/mca.h" #include "mca/base/base.h" @@ -12,13 +13,23 @@ #include "mca/topo/base/base.h" int mca_topo_base_close(void) { - extern ompi_list_t mca_topo_base_modules_available; + /* We have to close all the modules which are open. This might either + be the list of opened modules or the list of available modules. + Note that the modules which are opened but are not available are + already closed */ - /* - * Close all the available modules - */ - mca_base_modules_close (mca_topo_base_output, - &mca_topo_base_modules_available, NULL); + if (mca_topo_base_modules_opened_valid) { + mca_base_modules_close (mca_topo_base_output, + &mca_topo_base_modules_opened, NULL); + mca_topo_base_modules_opened_valid = false; + } else if (mca_topo_base_modules_available_valid) { + mca_base_modules_close (mca_topo_base_output, + &mca_topo_base_modules_available, NULL); + mca_topo_base_modules_available_valid = false; + } + + /* Close the output stream for this framework */ + ompi_output_close (mca_topo_base_output); /* * All done diff --git a/src/mca/topo/base/topo_base_comm_select.c b/src/mca/topo/base/topo_base_comm_select.c new file mode 100644 index 0000000000..77ca057ee9 --- /dev/null +++ b/src/mca/topo/base/topo_base_comm_select.c @@ -0,0 +1,298 @@ +/* + * $HEADER$ + */ + +#include "ompi_config.h" + +#include "class/ompi_list.h" +#include "runtime/runtime.h" +#include "mca/mca.h" +#include "mca/base/base.h" +#include "mca/topo/topo.h" +#include "mca/topo/base/base.h" +#include "util/output.h" +#include "communicator/communicator.h" + + +static void fill_null_pointers(mca_topo_t *actions); +/* + * This structure is needed so that we can close the modules + * which are not selected but were opened. mca_base_modules_close + * which does this job for us requires a ompi_list_t which contains + * these modules + */ +struct queried_module_t { + ompi_list_item_t super; + mca_topo_base_module_t *om_module; + mca_topo_t *om_actions; +}; +typedef struct queried_module_t queried_module_t; + +OBJ_CLASS_INSTANCE(queried_module_t, ompi_list_item_t, NULL, NULL); + +/* + * Only one topo module can be attached to each communicator. + * + * This module calls the query funtion on all the modules + * that were detected by topo_base_open. This function is + * called on a per-communicator basis. This function has the + * following function. + * + * 1. Iterate over the list of available_modules + * 2. Call the query function on each of these modules. + * 3. query function returns the structure containing pointers + * to its functions and the priority of this module. + * 4. Select the module with the highest priority + * 5. Call the init function on its actions so that it does the + * right setup for the communicator + * 6. Call finalize on all the other modules which returned + * their actions but were unfortunate to not get selected + */ + +int mca_topo_base_comm_select (struct ompi_communicator_t *comm, + struct mca_base_module_t *preferred) { + int priority; + int best_priority; + char name[MPI_MAX_OBJECT_NAME+32]; + ompi_list_item_t *item; + mca_base_component_priority_list_item_t *cpli; + mca_topo_base_module_t *module; + mca_topo_base_module_t *preferred_module = NULL; + mca_topo_base_module_t *best_module; + mca_topo_t *actions; + ompi_list_t queried; + queried_module_t *om; + char *str; + int err; + + /* Announce */ + + /* ANJU: + * check for names array .... mca_base_param_ */ + + + snprintf(name, sizeof(name), "%s (cid %d)", comm->c_name, + comm->c_contextid); + name[sizeof(name) - 1] = '\0'; + ompi_output_verbose(10, mca_topo_base_output, + "topo:base:comm_select: new communicator: %s", + name); + + /* Check and see if a preferred module was provided. If it was provided + then it should be used (if possible) */ + if (NULL != preferred) { + + /* We have a preferred module. Check if it is available + and if so, whether it wants to run */ + + str = &(preferred->mca_module_name[0]); + + ompi_output_verbose(10, mca_topo_base_output, + "topo:base:comm_select: Checking preferred module: %s", + str); + + /* query the module for its priority and get its actions + structure. This is necessary to proceed */ + + actions = preferred_module->topom_comm_query (&priority); + + if (NULL != actions && + NULL != actions->topo_module_init && + NULL != actions->topo_graph_map && + NULL != actions->topo_cart_map) { + + /* this query seems to have returned something legitimate and + * we can now go ahead and initialize the communicator with it + * but first, the functions which are null need to be filled in */ + + fill_null_pointers (actions); + comm->c_topo = actions; + return actions->topo_module_init(comm); + } + /* His preferred module is present, but is unable to run. This is + * not a good sign. We should try selecting some other component + * We let it fall through and select from the list of available + * components + */ + } /*end fo selection for preferred module */ + + /* + * We fall till here if one of the two things happened: + * 1. The preferred module was provided but for some reason was not able + * to be selected + * 2. No preferred module was provided + * + * All we need to do is to go through the list of available modules and find + * the one which has the highest priority and use that for this communicator + */ + + best_module = NULL; + best_priority = -1; + OBJ_CONSTRUCT(&queried, ompi_list_t); + + for (item = ompi_list_get_first(&mca_topo_base_modules_available); + item != ompi_list_get_end(&mca_topo_base_modules_available); + item = ompi_list_get_next(item)) { + /* + * convert the ompi_list_item_t returned into the proper type + */ + cpli = (mca_base_component_priority_list_item_t *) item; + module = (mca_topo_base_module_t *) cpli->cpli_component; + + ompi_output_verbose(10, mca_topo_base_output, + "select: initialising %s module %s", + module->topom_version.mca_type_name, + module->topom_version.mca_module_name); + + /* + * we can call the query function only if there is a function :-) + */ + if (NULL == module->topom_comm_query) { + ompi_output_verbose(10, mca_topo_base_output, + "select: no query, ignoring the module"); + } else { + /* + * call the query function and see what it returns + */ + actions = module->topom_comm_query (&priority); + + if (NULL == actions || + NULL == actions->topo_module_init || + NULL == actions->topo_graph_map || + NULL == actions->topo_cart_map) { + /* + * query did not return any action which can be used + */ + ompi_output_verbose(10, mca_topo_base_output, + "select: query returned failure"); + } else { + ompi_output_verbose(10, mca_topo_base_output, + "select: query returned priority &d", + priority); + /* + * is this the best module we have found till now. Check if + * this module has cart_map and graph_map implemented. Everything + * else can be covered using base functions. + */ + if (priority > best_priority) { + best_priority = priority; + best_module = module; + } + + om = OBJ_NEW(queried_module_t); + /* + * check if we have run out of space + */ + if (NULL == om) { + OBJ_DESTRUCT(&queried); + return OMPI_ERR_OUT_OF_RESOURCE; + } + om->om_module = module; + om->om_actions = actions; + ompi_list_append(&queried, (ompi_list_item_t *)om); + } /* end else of if (NULL == actions) */ + } /* end else of if (NULL == module->topom_init) */ + } /* end for ... end of traversal */ + + /* + * Now we have alist of modules which successfully returned their actions struct. + * One of these modules has the best priority. The rest have to be comm_unqueried + * to counter the effects of comm_query'ing them. Finalize happens only on modules + * which should are initialized. + */ + if (NULL == best_module) { + /* + * This typically means that there was no module which was able + * to run properly this time. So, we need to abort + * JMS replace with show_help + */ + OBJ_DESTRUCT(&queried); + return OMPI_ERROR; + } + + /* + * We now have a list of modules which have successfully returned + * their priorities from the query. We now have to unquery() those + * modules which have not been selected and init() the module which + * was selected + */ + for (item = ompi_list_remove_first(&queried); + NULL != item; + item = ompi_list_remove_first(&queried)) { + om = (queried_module_t *) item; + if (om->om_module == best_module) { + /* + * this is the chosen module, we have to initialise + * the actions of this module. + * + * ANJU: a module might not have all the functions defined. + * Whereever a function pointer is null in the actions + * structure we need to fill it in with the base structure + * function pointers. This is yet to be done + */ + + /* + * We don return here coz we still need to go through + * and elease the other objects + */ + + fill_null_pointers (om->om_actions); + comm->c_topo = om->om_actions; + err = actions->topo_module_init(comm); + + } else { + /* + * this is not the "choosen one", finalize + */ + if (NULL != om->om_module->topom_comm_unquery) { + /* unquery the module only if they have some + * clean up job to do. Modules which are queried + * but do not actually do anything typically do not + * have a unquery. Hence this check is necessary + */ + (void) om->om_module->topom_comm_unquery(comm); + ompi_output_verbose(10, mca_topo_base_output, + "select: module %s is not selected", + om->om_module->topom_version.mca_module_name); + } /* end if */ + } /* if not best module */ + OBJ_RELEASE(om); + } /* traversing through the entire list */ + + ompi_output_verbose(10, mca_topo_base_output, + "select: module %s selected", + module->topom_version.mca_module_name); + + OBJ_DESTRUCT(&queried); + + return err; +} + +/* + * This function fills in the null function pointers, in other words, + * those functions which are not implemented by the module with the + * pointers from the base function. Somewhere, I need to incoroporate + * a check for the common minimum funtions being implemented by the + * module atleast. If not, this module cannot be considered. + */ +static void fill_null_pointers(mca_topo_t *actions) { + +#define CHECK_FOR_NULL_FUNCTION_POINTER(name) \ + if (NULL == actions->topo_##name) { \ + actions->topo_##name = mca_topo_base_##name; \ + } + + CHECK_FOR_NULL_FUNCTION_POINTER(cart_coords); + CHECK_FOR_NULL_FUNCTION_POINTER(cart_create); + CHECK_FOR_NULL_FUNCTION_POINTER(cartdim_get); + CHECK_FOR_NULL_FUNCTION_POINTER(cart_rank); + CHECK_FOR_NULL_FUNCTION_POINTER(cart_shift); + CHECK_FOR_NULL_FUNCTION_POINTER(cart_sub); + CHECK_FOR_NULL_FUNCTION_POINTER(graph_create); + CHECK_FOR_NULL_FUNCTION_POINTER(graph_get); + CHECK_FOR_NULL_FUNCTION_POINTER(graphdims_get); + CHECK_FOR_NULL_FUNCTION_POINTER(graph_neighbors); + CHECK_FOR_NULL_FUNCTION_POINTER(graph_neighbors_count); + +#undef CHECK_FOR_NULL_FUNCTION_POINTER +} diff --git a/src/mca/topo/base/topo_base_comm_unselect.c b/src/mca/topo/base/topo_base_comm_unselect.c new file mode 100644 index 0000000000..7f783eaba5 --- /dev/null +++ b/src/mca/topo/base/topo_base_comm_unselect.c @@ -0,0 +1,28 @@ +#include +#include +#include + +#include "mpi.h" +#include "mca/mca.h" +#include "mca/base/base.h" +#include "mca/topo/topo.h" +#include "mca/topo/base/base.h" +#include "communicator/communicator.h" + +/* + * This function is used to shut down a topology module + * on a communicator. As of now, this should do nothing + * more than just invoke the finalize on the module which + * was selected. There is nothing fancy which we need to + * do as is the case with collectives. + */ +int mca_topo_base_comm_unselect(struct ompi_communicator_t *comm) { + + if (NULL != comm->c_topo && NULL != comm->c_topo->topo_module_finalize) { + return comm->c_topo->topo_module_finalize(comm); + } + + /* we fall here if there was no topolog module or the selected module + * did not have anything to finalize (its func pointer was NULL) */ + return OMPI_SUCCESS; +} diff --git a/src/mca/topo/base/topo_base_find_available.c b/src/mca/topo/base/topo_base_find_available.c new file mode 100644 index 0000000000..cfb9bfe2b9 --- /dev/null +++ b/src/mca/topo/base/topo_base_find_available.c @@ -0,0 +1,137 @@ +#include "ompi_config.h" + +#include +#include + +#include "mpi.h" +#include "include/constants.h" +#include "class/ompi_list.h" +#include "util/output.h" +#include "mca/mca.h" +#include "mca/base/base.h" +#include "mca/topo/topo.h" +#include "mca/topo/base/base.h" + +ompi_list_t mca_topo_base_modules_available; +bool mca_topo_base_modules_available_valid = false; + +static int init_query(const mca_base_module_t *m, + mca_base_component_priority_list_item_t *entry); +static int init_query_1_0_0(const mca_base_module_t *component, + mca_base_component_priority_list_item_t *entry); + +int mca_topo_base_find_available(bool *allow_multi_user_threads, + bool *have_hidden_threads) { + bool found = false; + mca_base_component_priority_list_item_t *entry; + ompi_list_item_t *p; + + + /* Initialize the list */ + + OBJ_CONSTRUCT(&mca_topo_base_modules_available, ompi_list_t); + mca_topo_base_modules_available_valid = true; + + /* The list of modules which we should check is already present + in mca_topo_base_modules_opened, which was established in + mca_topo_base_open */ + + for (found = false, p = ompi_list_remove_first (&mca_topo_base_modules_opened); + NULL != p; + p = ompi_list_remove_first (&mca_topo_base_modules_opened)) { + + entry = OBJ_NEW(mca_base_component_priority_list_item_t); + entry->cpli_component = ((mca_base_module_list_item_t *)p)->mli_module; + + /* Now for this entry, we have to determine the thread level. Call + a subroutine to do the job for us */ + + if (OMPI_SUCCESS == init_query(entry->cpli_component, entry)) { + /* Save the results in the list. The priority is not relvant at + this point in time. But we save the thread arguments so that + the initial selection algorithm can negotiate overall thread + level for this process */ + entry->cpli_priority = 0; + ompi_list_append (&mca_topo_base_modules_available, + (ompi_list_item_t *) entry); + found = true; + } else { + /* The component does not want to run, so close it. Its close() + has already been invoked. Close it out of the DSO repository + (if it is there in the repository) */ + mca_base_module_repository_release (entry->cpli_component); + OBJ_RELEASE(entry); + } + /* Free entry from the "opened" list */ + OBJ_RELEASE(p); + } + + /* The opened list is no longer necessary, so we can free it */ + OBJ_DESTRUCT (&mca_topo_base_modules_opened); + mca_topo_base_modules_opened_valid = false; + + /* There should atleast be one topo module which was available */ + if (false == found) { + /* Need to free all items in the list */ + OBJ_DESTRUCT(&mca_topo_base_modules_available); + mca_topo_base_modules_available_valid = false; + ompi_output_verbose (10, mca_topo_base_output, + "topo:find_available: no topo components available!"); + return OMPI_ERROR; + } + + /* All done */ + return OMPI_SUCCESS; +} + + +static int init_query(const mca_base_module_t *m, + mca_base_component_priority_list_item_t *entry) { + int ret; + + ompi_output_verbose(10, mca_topo_base_output, + "topo:find_available: querying topo component %s", + m->mca_module_name); + + /* This module has been successfully opened, now try to query it */ + if (1 == m->mca_type_major_version && + 0 == m->mca_type_minor_version && + 0 == m->mca_type_release_version) { + ret = init_query_1_0_0 (m, entry); + } else { + /* unrecognised API version */ + ompi_output_verbose(10, mca_topo_base_output, + "topo:find_available:unrecognised topo API version (%d.%d.%d)", + m->mca_type_major_version, + m->mca_type_minor_version, + m->mca_type_release_version); + return OMPI_ERROR; + } + + /* Query done -- look at return value to see what happened */ + if (OMPI_SUCCESS != ret) { + ompi_output_verbose(10, mca_topo_base_output, + "topo:find_available topo component %s is not available", + m->mca_module_name); + if (NULL != m->mca_close_module) { + m->mca_close_module(); + } + } else { + ompi_output_verbose(10, mca_topo_base_output, + "topo:find_avalable: topo component %s is available", + m->mca_module_name); + + } + /* All done */ + return ret; +} + + +static int init_query_1_0_0(const mca_base_module_t *component, + mca_base_component_priority_list_item_t *entry) { + + mca_topo_base_module_1_0_0_t *topo = (mca_topo_base_module_1_0_0_t *) component; + + return topo->topom_init_query(&(entry->cpli_allow_multi_user_threads), + &(entry->cpli_have_hidden_threads)); +} diff --git a/src/mca/topo/base/topo_base_graph_create.c b/src/mca/topo/base/topo_base_graph_create.c index 0aad214f92..1fa377df3d 100644 --- a/src/mca/topo/base/topo_base_graph_create.c +++ b/src/mca/topo/base/topo_base_graph_create.c @@ -22,106 +22,49 @@ * @retval MPI_ERR_OUT_OF_RESOURCE */ -int topo_base_graph_create (MPI_Comm comm_old, +int mca_topo_base_graph_create (mca_topo_comm_t *topo_data, + int *proc_count, + ompi_proc_t **proc_pointers, + int *new_rank, int nnodes, int *index, int *edges, - int reorder, - MPI_Comm *comm_graph) { -#if 0 - MPI_Group newgroup; -#endif + bool reorder){ + int nedges; - int size; - int err; - int range[1][3]; int i; - int *topo; int *p; - /* - * Create and error check the topology information. - */ - nedges = index[nnodes - 1]; - topo = (int *) malloc((unsigned) (nnodes + nedges) * sizeof(int)); - if (topo == 0) { - printf ("Out of resources\n"); - return MPI_ERR_SYSRESOURCE; + /* check if the number of nodes is more than the number of procs */ + + if (nnodes > *proc_count) { + return MPI_ERR_DIMS; } - for (i = 0, p = topo; i < nnodes; ++i, ++p) { - *p = *index++; - } + /* Create and error check the topology information */ + + nedges = topo_data->mtc_dims_or_index[nnodes-1]; + + /* Check if there are any negative values on the edges */ + + p = topo_data->mtc_periods_or_edges; for (i = 0; i < nedges; ++i, ++p) { - *p = *edges++; if (*p < 0 || *p >= nnodes) { - free((char *) topo); return MPI_ERR_TOPOLOGY; } } - /* - * Create the group for the new communicator. - */ -#if 0 - err = ompi_comm_size (comm_old, &size); -#endif - if (err != MPI_SUCCESS) { - free((char *) topo); - return err; + + /* if the graph does not have to be trimmed, then nothing has to change */ + if (nnodes < *proc_count) { + *proc_count = nnodes; } - if (nnodes > size) { - free((char *) topo); - return MPI_ERR_ARG; - } - - if (nnodes == size) { -#if 0 - err = ompi_comm_group (comm_old, &newgroup); -#endif - } else { - range[0][0] = 0; - range[0][1] = nnodes - 1; - range[0][2] = 1; -#if 0 - err = ompi_group_range_incl(comm_old->c_group, 1, range, &newgroup); -#endif - } - if (err != MPI_SUCCESS) { - free((char *) topo); - return err; - } - /* - * Create the new communicator. - */ -#if 0 - err = ompi_comm_create (comm_old, newgroup, comm_graph); -#endif - if (err != MPI_SUCCESS) { - free((char *) topo); -#if 0 - ompi_group_free (&newgroup); -#endif - return err; - } - /* - * Set the communicator topology information. - */ - if (*comm_graph != MPI_COMM_NULL) { - (*comm_graph)->c_flags |= OMPI_COMM_GRAPH; - (*comm_graph)->c_topo_comm->mtc_type = MPI_GRAPH; - (*comm_graph)->c_topo_comm->mtc_nprocs = nnodes; - (*comm_graph)->c_topo_comm->mtc_nedges = nedges; - (*comm_graph)->c_topo_comm->mtc_index = topo; - (*comm_graph)->c_topo_comm->mtc_edges = topo + nnodes; - } - -#if 0 - err = ompi_group_free (&newgroup); -#endif - if (err != MPI_SUCCESS) { - return err; + /* check if this rank makes the cut. if it does not return -1 */ + if (*new_rank > nnodes) { + /* sorry but in our scheme, you are out */ + *new_rank = MPI_UNDEFINED; + return MPI_SUCCESS; } return(MPI_SUCCESS); diff --git a/src/mca/topo/base/topo_base_graph_get.c b/src/mca/topo/base/topo_base_graph_get.c index b0256c564d..ba4f64b2a6 100644 --- a/src/mca/topo/base/topo_base_graph_get.c +++ b/src/mca/topo/base/topo_base_graph_get.c @@ -19,25 +19,31 @@ * @retval MPI_SUCCESS */ -int topo_base_graph_get (MPI_Comm comm, +int mca_topo_base_graph_get (MPI_Comm comm, int maxindex, int maxedges, int *index, int *edges){ int i; int *p; + int nprocs = ompi_comm_size(comm); /* * Fill the nodes and edges arrays. */ - p = comm->c_topo_comm->mtc_index; - for (i = 0; (i < comm->c_topo_comm->mtc_nprocs) && (i < maxindex); ++i, ++p) { + p = comm->c_topo_comm->mtc_dims_or_index; + for (i = 0; (i < nprocs) && (i < maxindex); ++i, ++p) { *index++ = *p; } - p = comm->c_topo_comm->mtc_edges; - for (i = 0; (i < comm->c_topo_comm->mtc_nedges) && (i < maxedges); ++i, ++p) { + p = comm->c_topo_comm->mtc_periods_or_edges; + + for (i = 0; + (i < comm->c_topo_comm->mtc_dims_or_index[nprocs-1]) && (i < maxedges); + ++i, ++p) { + *edges++ = *p; + } return MPI_SUCCESS; diff --git a/src/mca/topo/base/topo_base_graph_neighbors.c b/src/mca/topo/base/topo_base_graph_neighbors.c index b3ca644c4b..42ce8842e6 100644 --- a/src/mca/topo/base/topo_base_graph_neighbors.c +++ b/src/mca/topo/base/topo_base_graph_neighbors.c @@ -19,7 +19,7 @@ * @retval MPI_SUCCESS */ -int topo_base_graph_neighbors (MPI_Comm comm, +int mca_topo_base_graph_neighbors (MPI_Comm comm, int rank, int maxneighbors, int *neighbors){ @@ -30,11 +30,11 @@ int topo_base_graph_neighbors (MPI_Comm comm, /* * Fill the neighbours. */ - nnbrs = comm->c_topo_comm->mtc_index[rank]; - p = comm->c_topo_comm->mtc_edges; + nnbrs = comm->c_topo_comm->mtc_dims_or_index[rank]; + p = comm->c_topo_comm->mtc_periods_or_edges; if (rank > 0) { - i = comm->c_topo_comm->mtc_index[rank - 1]; + i = comm->c_topo_comm->mtc_dims_or_index[rank - 1]; nnbrs -= i; p += i; } diff --git a/src/mca/topo/base/topo_base_graph_neighbors_count.c b/src/mca/topo/base/topo_base_graph_neighbors_count.c index 9defe93d22..00f75b327a 100644 --- a/src/mca/topo/base/topo_base_graph_neighbors_count.c +++ b/src/mca/topo/base/topo_base_graph_neighbors_count.c @@ -17,12 +17,13 @@ * @retval MPI_SUCCESS */ -int topo_base_graph_neighbors_count (MPI_Comm comm, +int mca_topo_base_graph_neighbors_count (MPI_Comm comm, int rank, int *nneighbors){ - *nneighbors = comm->c_topo_comm->mtc_index[rank]; + + *nneighbors = comm->c_topo_comm->mtc_dims_or_index[rank]; if (rank > 0) { - *nneighbors -= comm->c_topo_comm->mtc_index[rank - 1]; + *nneighbors -= comm->c_topo_comm->mtc_dims_or_index[rank - 1]; } return MPI_SUCCESS; diff --git a/src/mca/topo/base/topo_base_graphdims_get.c b/src/mca/topo/base/topo_base_graphdims_get.c index 39969499f5..797b9f64b7 100644 --- a/src/mca/topo/base/topo_base_graphdims_get.c +++ b/src/mca/topo/base/topo_base_graphdims_get.c @@ -19,12 +19,12 @@ * @retval MPI_ERR_COMM * @retval MPI_ERR_ARG */ -int topo_base_graphdims_get (MPI_Comm comm, +int mca_topo_base_graphdims_get (MPI_Comm comm, int *nodes, int *nedges){ - *nodes = comm->c_topo_comm->mtc_nprocs; - *nedges = comm->c_topo_comm->mtc_nedges; + *nodes = ompi_comm_size(comm); + *nedges = comm->c_topo_comm->mtc_dims_or_index[*nodes -1]; return MPI_SUCCESS; } diff --git a/src/mca/topo/base/topo_base_open.c b/src/mca/topo/base/topo_base_open.c index 91fcec6a3e..6765223aea 100644 --- a/src/mca/topo/base/topo_base_open.c +++ b/src/mca/topo/base/topo_base_open.c @@ -5,6 +5,8 @@ #include +#include "util/output.h" +#include "class/ompi_list.h" #include "mca/mca.h" #include "mca/base/base.h" #include "mca/topo/base/base.h" @@ -20,26 +22,39 @@ * Global variables */ int mca_topo_base_output = -1; -ompi_list_t mca_topo_base_modules_available; +int mca_topo_base_param = -1; + +ompi_list_t mca_topo_base_modules_opened; + mca_topo_base_module_t mca_topo_base_selected_module; mca_topo_t mca_topo; +bool mca_topo_base_modules_opened_valid = false; + /** - * Functions for finding and opening either all the MCA topo modules, or + * Function for finding and opening either all the MCA topo modules, or * the one that specifically requested via a MCA parameter. */ int mca_topo_base_open(void) { - /* - * Open up all available modules - */ - if (OMPI_SUCCESS != - mca_base_modules_open("topo", 0, mca_topo_base_static_modules, - &mca_topo_base_modules_available)) { + + + /* Open the topo framework output stream */ + mca_topo_base_output = ompi_output_open(NULL); + + /* Open up all available modules */ + if (OMPI_SUCCESS != + mca_base_modules_open("topo", mca_topo_base_output, + mca_topo_base_static_modules, + &mca_topo_base_modules_opened)) { return OMPI_ERROR; } - /* - * All done - */ + + mca_topo_base_modules_opened_valid = true; + + /* Find the index of the "topo" param for selection */ + mca_topo_base_param = mca_base_param_find("topo", "base", NULL); + + /* All done */ return OMPI_SUCCESS; }