2004-06-29 00:02:25 +00:00
|
|
|
/*
|
2005-11-05 19:57:48 +00:00
|
|
|
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
|
|
|
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2004-11-28 20:09:25 +00:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 12:43:37 +00:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2007-10-09 15:18:23 +00:00
|
|
|
* Copyright (c) 2007 Lawrence Livermore National Security, LLC. All
|
|
|
|
* rights reserved.
|
2004-11-22 01:38:40 +00:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
2004-06-29 00:02:25 +00:00
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "ompi_config.h"
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
|
|
|
#include "mpi.h"
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "ompi/communicator/communicator.h"
|
2005-07-04 00:13:44 +00:00
|
|
|
#include "opal/util/argv.h"
|
2005-07-04 02:38:44 +00:00
|
|
|
#include "opal/util/show_help.h"
|
2005-07-03 16:22:16 +00:00
|
|
|
#include "opal/class/opal_list.h"
|
2005-07-03 16:06:07 +00:00
|
|
|
#include "opal/class/opal_object.h"
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "opal/mca/mca.h"
|
|
|
|
#include "opal/mca/base/base.h"
|
|
|
|
#include "ompi/mca/coll/coll.h"
|
|
|
|
#include "ompi/mca/coll/base/base.h"
|
2004-06-29 00:02:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Local variables
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Local types
|
|
|
|
*/
|
|
|
|
struct avail_coll_t {
|
2005-07-03 16:22:16 +00:00
|
|
|
opal_list_item_t super;
|
2004-06-29 00:02:25 +00:00
|
|
|
|
2005-01-30 18:42:37 +00:00
|
|
|
int ac_priority;
|
2007-08-19 03:37:49 +00:00
|
|
|
mca_coll_base_module_1_1_0_t *ac_module;
|
2004-06-29 00:02:25 +00:00
|
|
|
};
|
|
|
|
typedef struct avail_coll_t avail_coll_t;
|
|
|
|
|
2005-01-30 18:42:37 +00:00
|
|
|
|
2004-06-29 00:02:25 +00:00
|
|
|
/*
|
|
|
|
* Local functions
|
|
|
|
*/
|
2005-07-03 16:22:16 +00:00
|
|
|
static opal_list_t *check_components(opal_list_t *components,
|
2004-06-29 00:02:25 +00:00
|
|
|
ompi_communicator_t *comm,
|
|
|
|
char **names, int num_names);
|
|
|
|
static int check_one_component(ompi_communicator_t *comm,
|
2004-08-02 00:24:22 +00:00
|
|
|
const mca_base_component_t *component,
|
2007-08-19 03:37:49 +00:00
|
|
|
mca_coll_base_module_1_1_0_t **module);
|
2004-06-29 00:02:25 +00:00
|
|
|
|
2004-08-02 00:24:22 +00:00
|
|
|
static int query(const mca_base_component_t *component,
|
2004-06-29 00:02:25 +00:00
|
|
|
ompi_communicator_t *comm, int *priority,
|
2007-08-19 03:37:49 +00:00
|
|
|
mca_coll_base_module_1_1_0_t **module);
|
2004-06-29 00:02:25 +00:00
|
|
|
|
2007-08-19 03:37:49 +00:00
|
|
|
static int query_1_1_0(const mca_coll_base_component_1_1_0_t *coll_component,
|
|
|
|
ompi_communicator_t *comm, int *priority,
|
|
|
|
mca_coll_base_module_1_1_0_t **module);
|
2004-06-29 00:02:25 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Stuff for the OBJ interface
|
|
|
|
*/
|
2005-07-03 16:22:16 +00:00
|
|
|
static OBJ_CLASS_INSTANCE(avail_coll_t, opal_list_item_t, NULL, NULL);
|
2004-06-29 00:02:25 +00:00
|
|
|
|
|
|
|
|
2007-08-19 03:37:49 +00:00
|
|
|
#define COPY(module, comm, func) \
|
|
|
|
do { \
|
|
|
|
if (NULL != module->coll_ ## func) { \
|
|
|
|
if (NULL != comm->c_coll.coll_ ## func ## _module) { \
|
|
|
|
OBJ_RELEASE(comm->c_coll.coll_ ## func ## _module); \
|
|
|
|
} \
|
|
|
|
comm->c_coll.coll_ ## func = module->coll_ ## func; \
|
|
|
|
comm->c_coll.coll_ ## func ## _module = module; \
|
|
|
|
OBJ_RETAIN(module); \
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
2004-06-29 00:02:25 +00:00
|
|
|
/*
|
|
|
|
* This function is called at the initialization time of every
|
|
|
|
* communicator. It is used to select which coll component will be
|
|
|
|
* active for a given communicator.
|
|
|
|
*
|
|
|
|
* This selection logic is not for the weak.
|
|
|
|
*/
|
2007-08-19 03:37:49 +00:00
|
|
|
int mca_coll_base_comm_select(ompi_communicator_t *comm)
|
2004-06-29 00:02:25 +00:00
|
|
|
{
|
2007-08-19 03:37:49 +00:00
|
|
|
int ret, num_names;
|
|
|
|
char name[MPI_MAX_OBJECT_NAME + 32];
|
|
|
|
char *names, **name_array;
|
|
|
|
opal_list_t *selectable;
|
|
|
|
opal_list_item_t *item;
|
2004-06-29 00:02:25 +00:00
|
|
|
|
|
|
|
/* Announce */
|
|
|
|
snprintf(name, sizeof(name), "%s (cid %d)", comm->c_name,
|
|
|
|
comm->c_contextid);
|
|
|
|
name[sizeof(name) - 1] = '\0';
|
2005-07-03 23:31:27 +00:00
|
|
|
opal_output_verbose(10, mca_coll_base_output,
|
2004-06-29 00:02:25 +00:00
|
|
|
"coll:base:comm_select: new communicator: %s",
|
|
|
|
name);
|
|
|
|
|
|
|
|
/* Initialize all the relevant pointers, since they're used as
|
|
|
|
sentinel values */
|
2007-08-19 03:37:49 +00:00
|
|
|
memset(&comm->c_coll, 0, sizeof(mca_coll_base_comm_coll_t));
|
2004-06-29 00:02:25 +00:00
|
|
|
|
|
|
|
/* See if a set of component was requested by the MCA parameter.
|
|
|
|
Don't check for error. */
|
|
|
|
names = NULL;
|
|
|
|
mca_base_param_lookup_string(mca_coll_base_param, &names);
|
|
|
|
|
2007-08-19 03:37:49 +00:00
|
|
|
if (NULL != names && 0 < strlen(names)) {
|
|
|
|
/* mca param based */
|
2005-07-04 00:13:44 +00:00
|
|
|
name_array = opal_argv_split(names, ',');
|
|
|
|
num_names = opal_argv_count(name_array);
|
2004-06-29 00:02:25 +00:00
|
|
|
|
2005-07-03 23:31:27 +00:00
|
|
|
opal_output_verbose(10, mca_coll_base_output,
|
2004-06-29 00:02:25 +00:00
|
|
|
"coll:base:comm_select: Checking specific modules: %s",
|
|
|
|
names);
|
|
|
|
selectable = check_components(&mca_coll_base_components_available,
|
|
|
|
comm, name_array, num_names);
|
2005-07-04 00:13:44 +00:00
|
|
|
opal_argv_free(name_array);
|
2007-08-19 03:37:49 +00:00
|
|
|
} else {
|
|
|
|
/* no specific components given -- try all */
|
2005-07-03 23:31:27 +00:00
|
|
|
opal_output_verbose(10, mca_coll_base_output,
|
2004-06-29 00:02:25 +00:00
|
|
|
"coll:base:comm_select: Checking all available modules");
|
|
|
|
selectable = check_components(&mca_coll_base_components_available,
|
|
|
|
comm, NULL, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Upon return from the above, the modules list will contain the
|
|
|
|
list of modules that returned (priority >= 0). If we have no
|
|
|
|
collective modules available, then use the basic component */
|
|
|
|
if (NULL == selectable) {
|
2007-08-19 03:37:49 +00:00
|
|
|
/* There's no modules available */
|
2005-07-04 02:38:44 +00:00
|
|
|
opal_show_help("help-mca-coll-base",
|
2004-09-05 16:05:37 +00:00
|
|
|
"comm-select:none-available", true);
|
2004-06-29 00:02:25 +00:00
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|
|
|
|
|
2007-08-19 03:37:49 +00:00
|
|
|
/* FIX ME - Do some kind of collective operation to find a module
|
|
|
|
that everyone has available */
|
2004-06-29 00:02:25 +00:00
|
|
|
|
2007-08-19 03:37:49 +00:00
|
|
|
/* do the selection loop */
|
|
|
|
for (item = opal_list_get_first(selectable) ;
|
|
|
|
item != opal_list_get_end(selectable) ;
|
|
|
|
item = opal_list_get_next(item)) {
|
|
|
|
avail_coll_t *avail = (avail_coll_t*) item;
|
2005-10-04 17:09:45 +00:00
|
|
|
|
2007-08-19 03:37:49 +00:00
|
|
|
/* initialize the module */
|
|
|
|
ret = avail->ac_module->coll_module_enable(avail->ac_module, comm);
|
|
|
|
if (OMPI_SUCCESS != ret) {
|
|
|
|
mca_coll_base_comm_unselect(comm);
|
|
|
|
continue;
|
|
|
|
}
|
2005-10-04 17:09:45 +00:00
|
|
|
|
2007-08-19 03:37:49 +00:00
|
|
|
/* copy over any of the pointers */
|
|
|
|
COPY(avail->ac_module, comm, allgather);
|
|
|
|
COPY(avail->ac_module, comm, allgatherv);
|
|
|
|
COPY(avail->ac_module, comm, allreduce);
|
|
|
|
COPY(avail->ac_module, comm, alltoall);
|
|
|
|
COPY(avail->ac_module, comm, alltoallv);
|
|
|
|
COPY(avail->ac_module, comm, alltoallw);
|
|
|
|
COPY(avail->ac_module, comm, barrier);
|
|
|
|
COPY(avail->ac_module, comm, bcast);
|
|
|
|
COPY(avail->ac_module, comm, exscan);
|
|
|
|
COPY(avail->ac_module, comm, gather);
|
|
|
|
COPY(avail->ac_module, comm, gatherv);
|
|
|
|
COPY(avail->ac_module, comm, reduce);
|
|
|
|
COPY(avail->ac_module, comm, reduce_scatter);
|
|
|
|
COPY(avail->ac_module, comm, scan);
|
|
|
|
COPY(avail->ac_module, comm, scatter);
|
|
|
|
COPY(avail->ac_module, comm, scatterv);
|
|
|
|
|
|
|
|
/* release the original module reference */
|
|
|
|
OBJ_RELEASE(avail->ac_module);
|
2004-06-29 00:02:25 +00:00
|
|
|
}
|
|
|
|
|
2007-08-19 03:37:49 +00:00
|
|
|
/* check to make sure no NULLs */
|
|
|
|
if ((NULL == comm->c_coll.coll_allgather) ||
|
|
|
|
(NULL == comm->c_coll.coll_allgatherv) ||
|
|
|
|
(NULL == comm->c_coll.coll_allreduce) ||
|
|
|
|
(NULL == comm->c_coll.coll_alltoall) ||
|
|
|
|
(NULL == comm->c_coll.coll_alltoallv) ||
|
|
|
|
(NULL == comm->c_coll.coll_alltoallw) ||
|
|
|
|
(NULL == comm->c_coll.coll_barrier) ||
|
|
|
|
(NULL == comm->c_coll.coll_bcast) ||
|
|
|
|
((OMPI_COMM_IS_INTRA(comm)) && (NULL == comm->c_coll.coll_exscan)) ||
|
|
|
|
(NULL == comm->c_coll.coll_gather) ||
|
|
|
|
(NULL == comm->c_coll.coll_gatherv) ||
|
|
|
|
(NULL == comm->c_coll.coll_reduce) ||
|
|
|
|
(NULL == comm->c_coll.coll_reduce_scatter) ||
|
|
|
|
((OMPI_COMM_IS_INTRA(comm)) && (NULL == comm->c_coll.coll_scan)) ||
|
|
|
|
(NULL == comm->c_coll.coll_scatter) ||
|
|
|
|
(NULL == comm->c_coll.coll_scatterv)) {
|
|
|
|
mca_coll_base_comm_unselect(comm);
|
|
|
|
return OMPI_ERR_NOT_FOUND;
|
2004-06-29 00:02:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For each module in the list, if it is in the list of names (or the
|
|
|
|
* list of names is NULL), then check and see if it wants to run, and
|
|
|
|
* do the resulting priority comparison. Make a list of modules to be
|
|
|
|
* only those who returned that they want to run, and put them in
|
|
|
|
* priority order.
|
|
|
|
*/
|
2005-07-03 16:22:16 +00:00
|
|
|
static opal_list_t *check_components(opal_list_t *components,
|
2004-06-29 00:02:25 +00:00
|
|
|
ompi_communicator_t *comm,
|
|
|
|
char **names, int num_names)
|
|
|
|
{
|
|
|
|
int i, priority;
|
2004-08-02 00:24:22 +00:00
|
|
|
const mca_base_component_t *component;
|
2005-07-03 16:22:16 +00:00
|
|
|
opal_list_item_t *item, *item2;
|
2007-08-19 03:37:49 +00:00
|
|
|
mca_coll_base_module_1_1_0_t *module;
|
2004-06-29 00:02:25 +00:00
|
|
|
bool want_to_check;
|
2005-07-03 16:22:16 +00:00
|
|
|
opal_list_t *selectable;
|
2004-07-30 19:14:55 +00:00
|
|
|
avail_coll_t *avail, *avail2;
|
2004-09-14 09:10:23 +00:00
|
|
|
|
2004-06-29 00:02:25 +00:00
|
|
|
/* Make a list of the components that query successfully */
|
|
|
|
|
2005-07-03 16:22:16 +00:00
|
|
|
selectable = OBJ_NEW(opal_list_t);
|
2004-06-29 00:02:25 +00:00
|
|
|
|
|
|
|
/* Scan through the list of components. This nested loop is O(N^2),
|
|
|
|
but we should never have too many components and/or names, so this
|
|
|
|
*hopefully* shouldn't matter... */
|
|
|
|
|
2005-07-03 16:22:16 +00:00
|
|
|
for (item = opal_list_get_first(components);
|
|
|
|
item != opal_list_get_end(components);
|
|
|
|
item = opal_list_get_next(item)) {
|
2004-06-29 00:02:25 +00:00
|
|
|
component = ((mca_base_component_priority_list_item_t *)
|
2004-11-12 16:55:41 +00:00
|
|
|
item)->super.cli_component;
|
2004-06-29 00:02:25 +00:00
|
|
|
|
|
|
|
/* If we have a list of names, scan through it */
|
|
|
|
|
|
|
|
if (0 == num_names) {
|
|
|
|
want_to_check = true;
|
|
|
|
} else {
|
|
|
|
want_to_check = false;
|
|
|
|
for (i = 0; i < num_names; ++i) {
|
2004-08-02 00:24:22 +00:00
|
|
|
if (0 == strcmp(names[i], component->mca_component_name)) {
|
2004-06-29 00:02:25 +00:00
|
|
|
want_to_check = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we determined that we want to check this component, then do
|
|
|
|
so */
|
|
|
|
|
|
|
|
if (want_to_check) {
|
2007-08-19 03:37:49 +00:00
|
|
|
priority = check_one_component(comm, component, &module);
|
2005-02-10 04:15:16 +00:00
|
|
|
if (priority >= 0) {
|
2004-06-29 00:02:25 +00:00
|
|
|
|
|
|
|
/* We have a component that indicated that it wants to run by
|
|
|
|
giving us a module */
|
|
|
|
|
|
|
|
avail = OBJ_NEW(avail_coll_t);
|
2004-09-03 14:57:00 +00:00
|
|
|
avail->ac_priority = priority;
|
2004-08-03 21:29:23 +00:00
|
|
|
avail->ac_module = module;
|
2004-06-29 00:02:25 +00:00
|
|
|
|
2007-08-19 03:37:49 +00:00
|
|
|
/* Put this item on the list in priority order (lowest
|
2004-07-30 19:14:55 +00:00
|
|
|
priority first). Should it go first? */
|
|
|
|
|
2007-10-08 23:01:36 +00:00
|
|
|
for(item2 = opal_list_get_first(selectable);
|
|
|
|
item2 != opal_list_get_end(selectable);
|
|
|
|
item2 = opal_list_get_next(item2)) {
|
|
|
|
avail2 = (avail_coll_t*)item2;
|
|
|
|
if(avail->ac_priority < avail2->ac_priority) {
|
|
|
|
opal_list_insert_pos(selectable,
|
|
|
|
item2, (opal_list_item_t*)avail);
|
|
|
|
break;
|
2004-07-30 19:14:55 +00:00
|
|
|
}
|
|
|
|
}
|
2007-10-08 23:01:36 +00:00
|
|
|
|
|
|
|
if(opal_list_get_end(selectable) == item2) {
|
|
|
|
opal_list_append(selectable, (opal_list_item_t*)avail);
|
|
|
|
}
|
2004-07-30 19:14:55 +00:00
|
|
|
}
|
2004-06-29 00:02:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we didn't find any available components, return an error */
|
|
|
|
|
2005-07-03 16:22:16 +00:00
|
|
|
if (0 == opal_list_get_size(selectable)) {
|
2004-06-29 00:02:25 +00:00
|
|
|
OBJ_RELEASE(selectable);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* All done */
|
|
|
|
|
|
|
|
return selectable;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check a single component
|
|
|
|
*/
|
|
|
|
static int check_one_component(ompi_communicator_t *comm,
|
2004-08-02 00:24:22 +00:00
|
|
|
const mca_base_component_t *component,
|
2007-08-19 03:37:49 +00:00
|
|
|
mca_coll_base_module_1_1_0_t **module)
|
2004-06-29 00:02:25 +00:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
int priority = -1;
|
|
|
|
|
2007-08-19 03:37:49 +00:00
|
|
|
err = query(component, comm, &priority, module);
|
2004-06-29 00:02:25 +00:00
|
|
|
|
|
|
|
if (OMPI_SUCCESS == err) {
|
|
|
|
priority = (priority < 100) ? priority : 100;
|
2005-07-03 23:31:27 +00:00
|
|
|
opal_output_verbose(10, mca_coll_base_output,
|
2004-06-29 00:02:25 +00:00
|
|
|
"coll:base:comm_select: component available: %s, priority: %d",
|
2004-08-02 00:24:22 +00:00
|
|
|
component->mca_component_name, priority);
|
2004-06-29 00:02:25 +00:00
|
|
|
|
|
|
|
} else {
|
|
|
|
priority = -1;
|
2005-07-03 23:31:27 +00:00
|
|
|
opal_output_verbose(10, mca_coll_base_output,
|
2004-06-29 00:02:25 +00:00
|
|
|
"coll:base:comm_select: component not available: %s",
|
2004-08-02 00:24:22 +00:00
|
|
|
component->mca_component_name);
|
2004-06-29 00:02:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return priority;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**************************************************************************
|
|
|
|
* Query functions
|
|
|
|
**************************************************************************/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Take any version of a coll module, query it, and return the right
|
2004-08-03 21:29:23 +00:00
|
|
|
* module struct
|
2004-06-29 00:02:25 +00:00
|
|
|
*/
|
2004-08-02 00:24:22 +00:00
|
|
|
static int query(const mca_base_component_t *component,
|
2004-06-29 00:02:25 +00:00
|
|
|
ompi_communicator_t *comm,
|
2007-08-19 03:37:49 +00:00
|
|
|
int *priority, mca_coll_base_module_1_1_0_t **module)
|
2004-06-29 00:02:25 +00:00
|
|
|
{
|
2007-10-07 12:20:22 +00:00
|
|
|
/* coll v1.1.0 */
|
2004-06-29 00:02:25 +00:00
|
|
|
|
|
|
|
*module = NULL;
|
2007-10-07 12:20:22 +00:00
|
|
|
if (1 == component->mca_type_major_version &&
|
|
|
|
1 == component->mca_type_minor_version &&
|
|
|
|
0 == component->mca_type_release_version) {
|
2007-08-19 03:37:49 +00:00
|
|
|
const mca_coll_base_component_1_1_0_t *coll100 =
|
|
|
|
(mca_coll_base_component_1_1_0_t *) component;
|
2004-06-29 00:02:25 +00:00
|
|
|
|
2007-08-19 03:37:49 +00:00
|
|
|
return query_1_1_0(coll100, comm, priority, module);
|
2004-06-29 00:02:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Unknown coll API version -- return error */
|
|
|
|
|
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2007-08-19 03:37:49 +00:00
|
|
|
static int query_1_1_0(const mca_coll_base_component_1_1_0_t *component,
|
2004-06-29 00:02:25 +00:00
|
|
|
ompi_communicator_t *comm, int *priority,
|
2007-08-19 03:37:49 +00:00
|
|
|
mca_coll_base_module_1_1_0_t **module)
|
2004-06-29 00:02:25 +00:00
|
|
|
{
|
2007-08-19 03:37:49 +00:00
|
|
|
mca_coll_base_module_1_1_0_t *ret;
|
2004-06-29 00:02:25 +00:00
|
|
|
|
|
|
|
/* There's currently no need for conversion */
|
|
|
|
|
2007-08-19 03:37:49 +00:00
|
|
|
ret = component->collm_comm_query(comm, priority);
|
2004-06-29 00:02:25 +00:00
|
|
|
if (NULL != ret) {
|
|
|
|
*module = ret;
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
return OMPI_ERROR;
|
|
|
|
}
|