2007-12-21 06:02:00 +00:00
|
|
|
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
2004-12-27 08:56:33 +00:00
|
|
|
/*
|
2007-03-16 23:11:45 +00:00
|
|
|
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
2005-11-05 19:57:48 +00:00
|
|
|
* University Research and Technology
|
|
|
|
* Corporation. All rights reserved.
|
2007-12-21 06:02:00 +00:00
|
|
|
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
2005-11-05 19:57:48 +00:00
|
|
|
* of Tennessee Research Foundation. All rights
|
|
|
|
* reserved.
|
2004-12-27 08:56:33 +00:00
|
|
|
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
|
|
|
* University of Stuttgart. All rights reserved.
|
2005-03-24 12:43:37 +00:00
|
|
|
* Copyright (c) 2004-2005 The Regents of the University of California.
|
|
|
|
* All rights reserved.
|
2008-02-01 17:11:36 +00:00
|
|
|
* Copyright (c) 2007-2008 University of Houston. All rights reserved.
|
2004-12-27 08:56:33 +00:00
|
|
|
* $COPYRIGHT$
|
|
|
|
*
|
|
|
|
* Additional copyrights may follow
|
|
|
|
*
|
|
|
|
* $HEADER$
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "ompi_config.h"
|
|
|
|
#include "coll_hierarch.h"
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
#include "mpi.h"
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "ompi/communicator/communicator.h"
|
|
|
|
#include "ompi/group/group.h"
|
|
|
|
#include "ompi/proc/proc.h"
|
2005-10-11 21:26:07 +00:00
|
|
|
#include "ompi/op/op.h"
|
|
|
|
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "ompi/mca/coll/coll.h"
|
|
|
|
#include "ompi/mca/coll/base/base.h"
|
2005-10-11 20:34:17 +00:00
|
|
|
#include "ompi/mca/coll/base/coll_tags.h"
|
2005-10-11 21:26:07 +00:00
|
|
|
|
2006-02-12 01:33:29 +00:00
|
|
|
#include "ompi/class/ompi_bitmap.h"
|
|
|
|
#include "ompi/mca/bml/bml.h"
|
|
|
|
#include "ompi/mca/bml/base/base.h"
|
|
|
|
#include "ompi/mca/pml/pml.h"
|
|
|
|
#include "ompi/mca/btl/btl.h"
|
2005-10-11 17:29:59 +00:00
|
|
|
|
2005-10-15 19:36:54 +00:00
|
|
|
/* Local functions and data */
|
2008-02-01 17:11:36 +00:00
|
|
|
#define HIER_MAXPROTOCOL 5
|
|
|
|
#define HIER_MAX_PROTNAMELEN 7
|
2005-04-13 18:35:07 +00:00
|
|
|
static int mca_coll_hierarch_max_protocol=HIER_MAXPROTOCOL;
|
|
|
|
|
2008-02-01 17:11:36 +00:00
|
|
|
/* Commments: need to add ofud, udapl, portals and sctp into this list! */
|
|
|
|
static char hier_prot[HIER_MAXPROTOCOL][HIER_MAX_PROTNAMELEN]={"0","tcp","mx","openib","sm"};
|
2005-04-13 18:35:07 +00:00
|
|
|
|
2005-02-24 15:11:07 +00:00
|
|
|
static void mca_coll_hierarch_checkfor_component (struct ompi_communicator_t *comm,
|
2005-10-13 15:07:35 +00:00
|
|
|
int component_level,
|
|
|
|
char *component_name,
|
|
|
|
int *key, int *ncount);
|
2008-02-01 17:11:36 +00:00
|
|
|
static void mca_coll_hierarch_checkfor_sm (struct ompi_communicator_t *comm,
|
|
|
|
int *color,
|
|
|
|
int *ncount);
|
2007-08-23 20:41:29 +00:00
|
|
|
static void mca_coll_hierarch_dump_struct ( mca_coll_hierarch_module_t *c);
|
2004-12-27 08:56:33 +00:00
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initial query function that is invoked during MPI_INIT, allowing
|
|
|
|
* this module to indicate what level of thread support it provides.
|
|
|
|
*/
|
2005-10-11 22:05:24 +00:00
|
|
|
int mca_coll_hierarch_init_query(bool allow_hierarch_user_threads,
|
2007-12-21 06:02:00 +00:00
|
|
|
bool have_hidden_user_threads)
|
2004-12-27 08:56:33 +00:00
|
|
|
{
|
2005-10-11 22:05:24 +00:00
|
|
|
/* Don't ask. All done */
|
|
|
|
return OMPI_SUCCESS;
|
2004-12-27 08:56:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Invoked when there's a new communicator that has been created.
|
|
|
|
* Look at the communicator and decide which set of functions and
|
|
|
|
* priority we want to return.
|
|
|
|
*/
|
2007-08-23 20:41:29 +00:00
|
|
|
mca_coll_base_module_1_1_0_t *
|
|
|
|
mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority )
|
2004-12-27 08:56:33 +00:00
|
|
|
{
|
2005-10-13 16:21:13 +00:00
|
|
|
int size, rank;
|
2008-02-01 17:11:36 +00:00
|
|
|
int color, ncount=0, maxncount;
|
2005-04-13 18:35:07 +00:00
|
|
|
int level;
|
2005-10-11 20:34:17 +00:00
|
|
|
int ret=OMPI_SUCCESS;
|
2005-10-14 12:18:29 +00:00
|
|
|
int ignore_sm=0;
|
2008-02-01 17:11:36 +00:00
|
|
|
int detection_alg=0;
|
2007-08-23 20:41:29 +00:00
|
|
|
mca_coll_hierarch_module_t *hierarch_module;
|
|
|
|
|
|
|
|
hierarch_module = OBJ_NEW ( mca_coll_hierarch_module_t);
|
|
|
|
if ( NULL == hierarch_module ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
hierarch_module->super.coll_module_enable = mca_coll_hierarch_module_enable;
|
|
|
|
hierarch_module->super.ft_event = mca_coll_hierarch_ft_event;
|
|
|
|
|
|
|
|
hierarch_module->super.coll_allgather = NULL;
|
|
|
|
hierarch_module->super.coll_allgatherv = NULL;
|
|
|
|
hierarch_module->super.coll_allreduce = mca_coll_hierarch_allreduce_intra;
|
|
|
|
hierarch_module->super.coll_alltoall = NULL;
|
|
|
|
hierarch_module->super.coll_alltoallv = NULL;
|
|
|
|
hierarch_module->super.coll_alltoallw = NULL;
|
|
|
|
hierarch_module->super.coll_barrier = mca_coll_hierarch_barrier_intra;
|
|
|
|
hierarch_module->super.coll_bcast = mca_coll_hierarch_bcast_intra;
|
|
|
|
hierarch_module->super.coll_exscan = NULL;
|
|
|
|
hierarch_module->super.coll_gather = NULL;
|
|
|
|
hierarch_module->super.coll_gatherv = NULL;
|
|
|
|
hierarch_module->super.coll_reduce = mca_coll_hierarch_reduce_intra;
|
|
|
|
hierarch_module->super.coll_reduce_scatter = NULL;
|
|
|
|
hierarch_module->super.coll_scan = NULL;
|
|
|
|
hierarch_module->super.coll_scatter = NULL;
|
|
|
|
hierarch_module->super.coll_scatterv = NULL;
|
|
|
|
|
2005-10-14 12:18:29 +00:00
|
|
|
|
2005-10-14 17:41:44 +00:00
|
|
|
/* This module only works for intra-communicators at the moment */
|
|
|
|
if ( OMPI_COMM_IS_INTER(comm) ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2005-10-30 16:01:13 +00:00
|
|
|
|
|
|
|
/* Get the priority level attached to this module. If priority = 0,
|
|
|
|
we assume that we won't be chosen anyway, so we quit and improve
|
|
|
|
therefore the startup time. */
|
|
|
|
*priority = mca_coll_hierarch_priority_param;
|
|
|
|
if ( 0 >= mca_coll_hierarch_priority_param ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2005-10-14 12:18:29 +00:00
|
|
|
/* Check whether we should ignore sm. This might be necessary to take advantage
|
|
|
|
of the some ib or gm collectives. */
|
2005-10-25 18:34:47 +00:00
|
|
|
ignore_sm = mca_coll_hierarch_ignore_sm_param;
|
|
|
|
|
2005-02-24 15:11:07 +00:00
|
|
|
size = ompi_comm_size(comm);
|
2005-10-18 19:20:48 +00:00
|
|
|
if ( size < 3 ) {
|
|
|
|
/* No need for hierarchical collectives for 1 or 2 procs. */
|
2005-10-18 18:17:50 +00:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2005-10-13 16:21:13 +00:00
|
|
|
rank = ompi_comm_rank(comm);
|
2005-04-14 21:27:24 +00:00
|
|
|
|
2007-08-23 20:41:29 +00:00
|
|
|
hierarch_module->hier_num_colorarr = size;
|
|
|
|
hierarch_module->hier_colorarr = (int *) malloc ( sizeof(int) * size);
|
|
|
|
if ( NULL == hierarch_module->hier_colorarr ) {
|
2005-10-13 15:07:35 +00:00
|
|
|
*priority = 0;
|
|
|
|
return NULL;
|
2005-02-24 15:11:07 +00:00
|
|
|
}
|
|
|
|
|
2005-04-13 18:35:07 +00:00
|
|
|
/*
|
|
|
|
* walk through the list of registered protocols, and check which one
|
2008-02-01 17:11:36 +00:00
|
|
|
* is feasible.
|
2005-04-13 18:35:07 +00:00
|
|
|
* Later we start with level=0, and introduce the multi-cell check
|
2007-12-21 06:02:00 +00:00
|
|
|
*/
|
2005-10-14 12:18:29 +00:00
|
|
|
if ( ignore_sm ) {
|
|
|
|
mca_coll_hierarch_max_protocol = HIER_MAXPROTOCOL - 1;
|
|
|
|
}
|
2008-02-01 17:11:36 +00:00
|
|
|
|
|
|
|
/* if number of levels is not specified, or if it is specified as ALL_LEVELS,
|
|
|
|
* proceed in the usual way
|
|
|
|
*/
|
|
|
|
|
|
|
|
detection_alg = mca_coll_hierarch_detection_alg_param;
|
|
|
|
if( TWO_LEVELS == detection_alg ) {
|
|
|
|
mca_coll_hierarch_max_protocol = 2;
|
|
|
|
if ( mca_coll_hierarch_verbose_param ) {
|
|
|
|
printf("Switching to two level hierarchy detection\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-10-18 18:17:50 +00:00
|
|
|
for ( level = mca_coll_hierarch_max_protocol - 1; level >0 ; level--) {
|
2008-02-01 17:11:36 +00:00
|
|
|
if ( ALL_LEVELS == detection_alg ) {
|
|
|
|
mca_coll_hierarch_checkfor_component ( comm,
|
|
|
|
level,
|
|
|
|
hier_prot[level],
|
|
|
|
&color,
|
|
|
|
&ncount);
|
2005-10-30 16:01:13 +00:00
|
|
|
}
|
2008-02-01 17:11:36 +00:00
|
|
|
else if (TWO_LEVELS == detection_alg ) {
|
|
|
|
mca_coll_hierarch_checkfor_sm ( comm, &color, &ncount );
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This is probably a no-no! but for the moment we agreed with Jeff,
|
|
|
|
** that this might be the best solution. These functions emulate an
|
|
|
|
** allreduce and an allgather.
|
|
|
|
*/
|
|
|
|
ret = mca_coll_hierarch_allreduce_tmp (&ncount, &maxncount, 1, MPI_INT,
|
|
|
|
MPI_MAX, comm );
|
|
|
|
if ( OMPI_SUCCESS != ret ) {
|
|
|
|
return NULL;
|
2005-10-30 16:01:13 +00:00
|
|
|
}
|
2005-10-15 17:04:01 +00:00
|
|
|
|
2008-02-01 17:11:36 +00:00
|
|
|
if ( 0 == maxncount ) {
|
2005-10-25 18:34:47 +00:00
|
|
|
if ( mca_coll_hierarch_verbose_param ) {
|
2005-10-18 18:17:50 +00:00
|
|
|
printf("%s:%d: nobody talks with %s. Continuing to next level.\n",
|
|
|
|
comm->c_name, rank, hier_prot[level]);
|
2008-02-01 17:11:36 +00:00
|
|
|
}
|
2005-10-18 18:17:50 +00:00
|
|
|
continue;
|
2008-02-01 17:11:36 +00:00
|
|
|
}
|
|
|
|
else if ( maxncount == (size-1) ) {
|
2005-10-13 15:07:35 +00:00
|
|
|
/*
|
2008-02-01 17:11:36 +00:00
|
|
|
* everybody can talk to every other process with this protocol,
|
|
|
|
* no need to continue in the hierarchy tree and for the
|
|
|
|
* hierarchical component.
|
|
|
|
* Its (size-1) because we do not count ourselves.
|
2005-10-13 15:07:35 +00:00
|
|
|
* maxncount[1] should be zero.
|
2008-02-01 17:11:36 +00:00
|
|
|
*/
|
2005-10-25 18:34:47 +00:00
|
|
|
if ( mca_coll_hierarch_verbose_param ) {
|
2005-10-14 17:41:44 +00:00
|
|
|
printf("%s:%d: everybody talks with %s. No need to continue\n",
|
|
|
|
comm->c_name, rank, hier_prot[level]);
|
|
|
|
}
|
2008-02-01 17:11:36 +00:00
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
else {
|
2005-10-25 18:34:47 +00:00
|
|
|
if ( mca_coll_hierarch_verbose_param ) {
|
2005-10-15 19:36:54 +00:00
|
|
|
printf("%s:%d: %d procs talk with %s. Use this protocol, key %d\n",
|
2005-10-18 18:17:50 +00:00
|
|
|
comm->c_name, rank, maxncount, hier_prot[level], color);
|
2005-10-14 17:41:44 +00:00
|
|
|
}
|
2008-02-01 17:11:36 +00:00
|
|
|
|
|
|
|
ret = mca_coll_hierarch_allgather_tmp (&color, 1, MPI_INT,
|
2007-08-23 20:41:29 +00:00
|
|
|
hierarch_module->hier_colorarr, 1,
|
2005-10-15 17:04:01 +00:00
|
|
|
MPI_INT, comm );
|
2008-02-01 17:11:36 +00:00
|
|
|
if ( OMPI_SUCCESS != ret ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
hierarch_module->hier_level = level;
|
|
|
|
return &(hierarch_module->super);
|
|
|
|
}
|
2005-02-24 15:11:07 +00:00
|
|
|
}
|
2005-10-13 15:07:35 +00:00
|
|
|
|
2005-04-14 21:27:24 +00:00
|
|
|
exit:
|
2005-04-13 18:35:07 +00:00
|
|
|
*priority = 0;
|
|
|
|
return NULL;
|
2004-12-27 08:56:33 +00:00
|
|
|
}
|
2005-04-13 18:35:07 +00:00
|
|
|
|
2004-12-27 08:56:33 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Init module on the communicator
|
|
|
|
*/
|
2007-08-23 20:41:29 +00:00
|
|
|
int mca_coll_hierarch_module_enable (mca_coll_base_module_1_1_0_t *module,
|
|
|
|
struct ompi_communicator_t *comm)
|
2004-12-27 08:56:33 +00:00
|
|
|
{
|
2005-10-11 17:29:59 +00:00
|
|
|
int color;
|
2005-02-24 15:11:07 +00:00
|
|
|
int size, rank, ret=OMPI_SUCCESS;
|
2005-10-14 17:41:44 +00:00
|
|
|
|
2005-10-11 17:29:59 +00:00
|
|
|
struct ompi_communicator_t *lcomm=NULL;
|
2005-10-11 19:45:21 +00:00
|
|
|
struct ompi_communicator_t *llcomm=NULL;
|
2005-10-14 17:41:44 +00:00
|
|
|
struct mca_coll_hierarch_llead_t *llead=NULL;
|
2007-08-23 20:41:29 +00:00
|
|
|
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
|
|
|
|
|
2005-02-24 15:11:07 +00:00
|
|
|
rank = ompi_comm_rank(comm);
|
|
|
|
size = ompi_comm_size(comm);
|
2005-10-14 17:41:44 +00:00
|
|
|
|
2007-08-23 20:41:29 +00:00
|
|
|
color = hierarch_module->hier_colorarr[rank];
|
2005-02-24 15:11:07 +00:00
|
|
|
|
|
|
|
/* Generate the subcommunicator based on the color returned by
|
|
|
|
the previous function. */
|
2005-10-11 17:29:59 +00:00
|
|
|
ret = ompi_comm_split ( comm, color, rank, &lcomm, 0 );
|
2005-02-24 15:11:07 +00:00
|
|
|
if ( OMPI_SUCCESS != ret ) {
|
2005-10-13 15:07:35 +00:00
|
|
|
goto exit;
|
2005-02-24 15:11:07 +00:00
|
|
|
}
|
2005-10-14 17:41:44 +00:00
|
|
|
|
2007-08-23 20:41:29 +00:00
|
|
|
hierarch_module->hier_comm = comm;
|
|
|
|
hierarch_module->hier_lcomm = lcomm;
|
|
|
|
hierarch_module->hier_num_reqs = 2 * size;
|
|
|
|
hierarch_module->hier_reqs = (ompi_request_t **) malloc (sizeof(ompi_request_t)*size*2);
|
|
|
|
if ( NULL == hierarch_module->hier_reqs ) {
|
2005-10-13 15:07:35 +00:00
|
|
|
goto exit;
|
2005-02-24 15:11:07 +00:00
|
|
|
}
|
2005-10-14 17:41:44 +00:00
|
|
|
|
2005-10-11 19:45:21 +00:00
|
|
|
/* allocate a certain number of the hierarch_llead structures, which store
|
|
|
|
information about local leader and the according subcommunicators
|
|
|
|
*/
|
2005-10-15 17:04:01 +00:00
|
|
|
llead = (struct mca_coll_hierarch_llead_t * ) malloc (
|
2007-12-21 06:02:00 +00:00
|
|
|
sizeof(struct mca_coll_hierarch_llead_t));
|
2005-10-14 17:41:44 +00:00
|
|
|
if ( NULL == llead ) {
|
2005-10-13 15:07:35 +00:00
|
|
|
goto exit;
|
2005-10-11 19:45:21 +00:00
|
|
|
}
|
2005-10-14 17:41:44 +00:00
|
|
|
|
2005-10-15 17:04:01 +00:00
|
|
|
/* These two routines set all relevant entries in the mca_coll_base_comm_t
|
|
|
|
* structure. The first one makes all entries which are independent of the
|
|
|
|
* offset (and have to be done only once per module. The second one is
|
|
|
|
* depending on the offset, and has to be called therefore every time we need
|
|
|
|
* a new llcomm
|
|
|
|
*/
|
2007-08-23 20:41:29 +00:00
|
|
|
mca_coll_hierarch_get_llr ( hierarch_module );
|
|
|
|
mca_coll_hierarch_get_all_lleaders ( rank, hierarch_module, llead, 1 );
|
2005-10-14 17:41:44 +00:00
|
|
|
|
2005-10-11 17:29:59 +00:00
|
|
|
/* Generate the lleader communicator assuming that all lleaders are the first
|
|
|
|
process in the list of processes with the same color. A function generating
|
|
|
|
other lleader-comms will follow soon. */
|
2005-10-14 17:41:44 +00:00
|
|
|
ret = ompi_comm_split ( comm, llead->am_lleader, rank, &llcomm, 0);
|
2005-10-11 17:29:59 +00:00
|
|
|
if ( OMPI_SUCCESS != ret ) {
|
2005-10-13 15:07:35 +00:00
|
|
|
goto exit;
|
2005-10-11 17:29:59 +00:00
|
|
|
}
|
2005-10-14 17:41:44 +00:00
|
|
|
llead->llcomm = llcomm;
|
|
|
|
|
|
|
|
/* Store it now on the data structure */
|
2007-12-21 06:02:00 +00:00
|
|
|
OBJ_CONSTRUCT(&(hierarch_module->hier_llead), opal_pointer_array_t);
|
|
|
|
opal_pointer_array_add ( &(hierarch_module->hier_llead), llead);
|
2005-10-14 17:41:44 +00:00
|
|
|
|
2005-10-25 18:34:47 +00:00
|
|
|
if ( mca_coll_hierarch_verbose_param ) {
|
2007-12-21 06:02:00 +00:00
|
|
|
mca_coll_hierarch_dump_struct (hierarch_module);
|
2005-10-15 21:13:44 +00:00
|
|
|
}
|
2005-10-14 17:41:44 +00:00
|
|
|
|
2005-02-24 15:11:07 +00:00
|
|
|
exit:
|
|
|
|
if ( OMPI_SUCCESS != ret ) {
|
2005-10-13 15:07:35 +00:00
|
|
|
ompi_comm_free ( &lcomm );
|
2007-08-23 20:41:29 +00:00
|
|
|
return OMPI_ERROR;
|
2005-02-24 15:11:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return OMPI_SUCCESS;
|
2004-12-27 08:56:33 +00:00
|
|
|
}
|
2005-02-24 15:11:07 +00:00
|
|
|
|
|
|
|
|
2005-10-11 20:34:17 +00:00
|
|
|
|
2007-08-23 20:41:29 +00:00
|
|
|
int mca_coll_hierarch_get_all_lleaders ( int rank, mca_coll_hierarch_module_t *hierarch_module,
|
|
|
|
struct mca_coll_hierarch_llead_t * llead,
|
|
|
|
int offset )
|
2005-10-15 17:04:01 +00:00
|
|
|
{
|
|
|
|
int i, j, ret=OMPI_SUCCESS;
|
|
|
|
int *cntarr=NULL;
|
|
|
|
int mycolor;
|
|
|
|
|
2007-08-23 20:41:29 +00:00
|
|
|
cntarr = (int *)calloc (1, sizeof (int)* hierarch_module->hier_num_lleaders );
|
2005-10-15 17:04:01 +00:00
|
|
|
if ( NULL == cntarr ) {
|
|
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
}
|
|
|
|
|
2007-08-23 20:41:29 +00:00
|
|
|
llead->lleaders = (int *) malloc (sizeof(int) * hierarch_module->hier_num_lleaders);
|
2005-10-15 17:04:01 +00:00
|
|
|
if ( NULL == llead->lleaders ) {
|
|
|
|
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
goto exit;
|
|
|
|
}
|
|
|
|
llead->offset = offset;
|
|
|
|
|
2007-08-23 20:41:29 +00:00
|
|
|
for ( i=0; i < hierarch_module->hier_num_lleaders; i++ ) {
|
|
|
|
if ( MPI_UNDEFINED == hierarch_module->hier_llr[i] ) {
|
2005-10-18 18:17:50 +00:00
|
|
|
cntarr[i] = 1;
|
|
|
|
llead->lleaders[i] = MPI_UNDEFINED;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-08-23 20:41:29 +00:00
|
|
|
for ( i=0; i<hierarch_module->hier_num_colorarr; i++) {
|
|
|
|
if ( MPI_UNDEFINED == hierarch_module->hier_colorarr[i] ) {
|
2005-10-18 18:17:50 +00:00
|
|
|
continue;
|
|
|
|
}
|
2007-08-23 20:41:29 +00:00
|
|
|
for ( j=0; j<hierarch_module->hier_num_lleaders; j++) {
|
2005-10-15 17:04:01 +00:00
|
|
|
if ( cntarr[j] >= offset ) {
|
|
|
|
continue;
|
|
|
|
}
|
2007-08-23 20:41:29 +00:00
|
|
|
if ( hierarch_module->hier_colorarr[i] == hierarch_module->hier_llr[j]) {
|
2005-10-15 17:04:01 +00:00
|
|
|
cntarr[j]++;
|
|
|
|
llead->lleaders[j] = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-08-23 20:41:29 +00:00
|
|
|
mycolor = hierarch_module->hier_colorarr[rank];
|
|
|
|
if ( MPI_UNDEFINED == mycolor ) {
|
2005-10-18 18:17:50 +00:00
|
|
|
llead->am_lleader = 1;
|
|
|
|
llead->my_lleader = MPI_UNDEFINED;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
llead->am_lleader = 0;
|
2007-08-23 20:41:29 +00:00
|
|
|
for ( i=0; i< hierarch_module->hier_num_lleaders; i++ ) {
|
|
|
|
if ( hierarch_module->hier_llr[i] == mycolor ) {
|
2005-10-18 18:17:50 +00:00
|
|
|
llead->my_lleader = cntarr[i]-1;
|
|
|
|
if ( llead->lleaders[i] == rank ) {
|
|
|
|
llead->am_lleader = 1;
|
|
|
|
}
|
|
|
|
break;
|
2005-10-15 17:04:01 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
exit:
|
|
|
|
if ( NULL != cntarr ) {
|
|
|
|
free ( cntarr );
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2007-08-23 20:41:29 +00:00
|
|
|
int mca_coll_hierarch_get_llr ( mca_coll_hierarch_module_t *hierarch_module )
|
2005-10-15 17:04:01 +00:00
|
|
|
{
|
|
|
|
int i, j, cnt, found;
|
2005-10-18 18:17:50 +00:00
|
|
|
int ncount;
|
2005-10-15 17:04:01 +00:00
|
|
|
|
2007-08-23 20:41:29 +00:00
|
|
|
ncount = mca_coll_hierarch_count_lleaders ( hierarch_module->hier_num_colorarr,
|
|
|
|
hierarch_module->hier_colorarr);
|
|
|
|
hierarch_module->hier_num_lleaders = ncount;
|
|
|
|
hierarch_module->hier_llr = (int *) malloc ( hierarch_module->hier_num_lleaders * sizeof(int));
|
|
|
|
hierarch_module->hier_max_offset = (int *) calloc ( 1, hierarch_module->hier_num_lleaders * sizeof(int));
|
|
|
|
if ( ( NULL == hierarch_module->hier_llr) || ( NULL == hierarch_module->hier_max_offset )) {
|
2005-10-15 17:04:01 +00:00
|
|
|
return OMPI_ERR_OUT_OF_RESOURCE;
|
|
|
|
}
|
|
|
|
|
2007-08-23 20:41:29 +00:00
|
|
|
hierarch_module->hier_llr[0] = hierarch_module->hier_colorarr[0];
|
|
|
|
hierarch_module->hier_max_offset[0]=1;
|
|
|
|
for ( cnt=1, i=1; i<hierarch_module->hier_num_colorarr; i++ ) {
|
|
|
|
if ( MPI_UNDEFINED == hierarch_module->hier_colorarr[i] ) {
|
|
|
|
hierarch_module->hier_llr[cnt] = hierarch_module->hier_colorarr[i];
|
|
|
|
hierarch_module->hier_max_offset[cnt] = 1;
|
2005-10-18 18:17:50 +00:00
|
|
|
cnt++;
|
|
|
|
continue;
|
2005-10-15 17:04:01 +00:00
|
|
|
}
|
|
|
|
for ( found=0, j=0; j<cnt; j++ ) {
|
2007-08-23 20:41:29 +00:00
|
|
|
if ( hierarch_module->hier_llr[j] == hierarch_module->hier_colorarr[i]) {
|
|
|
|
hierarch_module->hier_max_offset[j]++;
|
2005-10-15 17:04:01 +00:00
|
|
|
found = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2005-10-18 18:17:50 +00:00
|
|
|
if ( !found ) {
|
2007-08-23 20:41:29 +00:00
|
|
|
hierarch_module->hier_llr[cnt] = hierarch_module->hier_colorarr[i];
|
|
|
|
hierarch_module->hier_max_offset[cnt]++;
|
2005-10-15 17:04:01 +00:00
|
|
|
cnt++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-10-14 18:11:21 +00:00
|
|
|
struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int root,
|
2007-12-21 06:02:00 +00:00
|
|
|
mca_coll_hierarch_module_t *hierarch_module,
|
|
|
|
int* llroot,
|
|
|
|
int* lroot)
|
2005-10-11 20:34:17 +00:00
|
|
|
{
|
|
|
|
struct ompi_communicator_t *llcomm=NULL;
|
|
|
|
struct ompi_group_t *llgroup=NULL;
|
|
|
|
struct ompi_group_t *group=NULL;
|
|
|
|
struct mca_coll_hierarch_llead_t *llead=NULL;
|
2005-10-15 17:04:01 +00:00
|
|
|
int found, i, rc, num_llead, offset;
|
2007-08-23 20:41:29 +00:00
|
|
|
int rank = ompi_comm_rank (hierarch_module->hier_comm);
|
2005-10-15 17:04:01 +00:00
|
|
|
|
|
|
|
/* determine what our offset of root is in the colorarr */
|
|
|
|
offset = mca_coll_hierarch_get_offset ( root,
|
2007-08-23 20:41:29 +00:00
|
|
|
hierarch_module->hier_num_colorarr,
|
|
|
|
hierarch_module->hier_colorarr );
|
2005-10-15 17:04:01 +00:00
|
|
|
|
2007-12-21 06:02:00 +00:00
|
|
|
num_llead = opal_pointer_array_get_size ( &(hierarch_module->hier_llead) );
|
2005-10-15 17:04:01 +00:00
|
|
|
for ( found=0, i=0; i < num_llead; i++ ) {
|
2007-12-21 06:02:00 +00:00
|
|
|
llead = (struct mca_coll_hierarch_llead_t *) opal_pointer_array_get_item (
|
|
|
|
&(hierarch_module->hier_llead), i );
|
2007-08-23 20:41:29 +00:00
|
|
|
if ( NULL == llead ) {
|
2007-12-21 06:02:00 +00:00
|
|
|
continue;
|
2005-10-15 19:36:54 +00:00
|
|
|
}
|
2005-10-11 20:34:17 +00:00
|
|
|
|
2005-10-18 19:20:48 +00:00
|
|
|
if (llead->offset == offset ) {
|
2005-10-15 17:04:01 +00:00
|
|
|
found = 1;
|
|
|
|
break;
|
|
|
|
}
|
2005-10-18 19:20:48 +00:00
|
|
|
#if 0
|
|
|
|
else if () {
|
2007-12-21 06:02:00 +00:00
|
|
|
/* the offset of root = maxoffset of this color and
|
|
|
|
* the offset on llead is larger then offset of root.
|
|
|
|
* then we can also use this llead structure
|
|
|
|
*/
|
2005-10-18 19:20:48 +00:00
|
|
|
}
|
|
|
|
#endif
|
2005-10-11 20:34:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if ( !found ) {
|
2005-10-15 17:04:01 +00:00
|
|
|
/* allocate a new llead element */
|
|
|
|
llead = (struct mca_coll_hierarch_llead_t *) malloc (
|
2007-12-21 06:02:00 +00:00
|
|
|
sizeof(struct mca_coll_hierarch_llead_t));
|
2005-10-14 17:41:44 +00:00
|
|
|
if ( NULL == llead ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2005-10-15 17:04:01 +00:00
|
|
|
|
2005-10-14 12:18:29 +00:00
|
|
|
/* generate the list of lleaders with this offset */
|
2007-08-23 20:41:29 +00:00
|
|
|
mca_coll_hierarch_get_all_lleaders ( rank, hierarch_module, llead, offset );
|
2005-10-15 17:04:01 +00:00
|
|
|
|
|
|
|
/* create new lleader subcommunicator */
|
2007-08-23 20:41:29 +00:00
|
|
|
rc = ompi_comm_split ( hierarch_module->hier_comm, llead->am_lleader, root, &llcomm, 0);
|
2005-10-15 17:04:01 +00:00
|
|
|
if ( OMPI_SUCCESS != rc ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2005-10-15 21:13:44 +00:00
|
|
|
llead->llcomm = llcomm;
|
2005-10-11 21:26:07 +00:00
|
|
|
|
2007-08-23 20:41:29 +00:00
|
|
|
/* Store the new element on the hierarch_module struct */
|
2007-12-21 06:02:00 +00:00
|
|
|
opal_pointer_array_add ( &(hierarch_module->hier_llead), llead);
|
2005-10-14 12:18:29 +00:00
|
|
|
}
|
2005-10-11 20:34:17 +00:00
|
|
|
|
2005-10-15 17:04:01 +00:00
|
|
|
llcomm = llead->llcomm;
|
|
|
|
*lroot = llead->my_lleader;
|
|
|
|
*llroot = MPI_UNDEFINED;
|
|
|
|
|
|
|
|
if ( MPI_COMM_NULL != llcomm ) {
|
2007-08-23 20:41:29 +00:00
|
|
|
rc = ompi_comm_group ( hierarch_module->hier_comm, &group);
|
2005-10-15 17:04:01 +00:00
|
|
|
if ( OMPI_SUCCESS != rc ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = ompi_comm_group ( llcomm, &llgroup);
|
|
|
|
if ( OMPI_SUCCESS != rc ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
rc = ompi_group_translate_ranks ( group, 1, &root, llgroup, llroot);
|
|
|
|
if ( OMPI_SUCCESS != rc ) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
/* ompi_group_free (&llgroup) */
|
|
|
|
/* ompi_group_free (&group); */
|
|
|
|
}
|
|
|
|
|
2005-10-11 20:34:17 +00:00
|
|
|
return llcomm;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-02-24 15:11:07 +00:00
|
|
|
/**********************************************************************/
|
|
|
|
/**********************************************************************/
|
|
|
|
/**********************************************************************/
|
2008-02-01 17:11:36 +00:00
|
|
|
static void
|
|
|
|
mca_coll_hierarch_checkfor_sm ( struct ompi_communicator_t *comm, int *color, int *ncount )
|
|
|
|
{
|
|
|
|
int i, size;
|
|
|
|
int lncount=0;
|
|
|
|
struct ompi_proc_t** procs=NULL;
|
|
|
|
struct ompi_proc_t* my_proc=NULL;
|
|
|
|
|
|
|
|
|
|
|
|
*color = -1;
|
|
|
|
size = ompi_comm_size(comm);
|
|
|
|
my_proc = ompi_proc_local();
|
|
|
|
procs = comm->c_local_group->grp_proc_pointers;
|
|
|
|
for ( i = 0 ; i < size ; i++) {
|
|
|
|
if ( procs[i]->proc_name.jobid == my_proc->proc_name.jobid &&
|
|
|
|
( (procs[i]->proc_flags & OMPI_PROC_FLAG_LOCAL)) ) {
|
|
|
|
lncount++;
|
|
|
|
if ( *color == -1){
|
|
|
|
*color = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we need to decrease ncount in order to make the other allreduce/allgather
|
|
|
|
operations work */
|
|
|
|
lncount--;
|
|
|
|
*ncount = lncount;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2005-02-24 15:11:07 +00:00
|
|
|
/* This function checks how many processes are using the component
|
|
|
|
'component_name' for communication and returns this count in
|
|
|
|
'ncount'. Furthermore it returns a 'key', which can be used to split
|
|
|
|
the communicator into subgroups, such that the new communicators
|
|
|
|
will definitly have all processes communicate with this component.
|
2005-10-13 15:07:35 +00:00
|
|
|
|
|
|
|
Oct 13: the algorithm has been modified such that it returns the
|
|
|
|
number of processes using the specified component and the number
|
|
|
|
of processes to which an even 'faster' protocol is being used. (Faster
|
|
|
|
specified in this context as being further up in the list of
|
|
|
|
hier_prot protocols specified at the beginning of this file).
|
2005-02-24 15:11:07 +00:00
|
|
|
*/
|
|
|
|
static void
|
|
|
|
mca_coll_hierarch_checkfor_component ( struct ompi_communicator_t *comm,
|
2005-10-13 15:07:35 +00:00
|
|
|
int component_level,
|
2005-02-24 15:11:07 +00:00
|
|
|
char *component_name,
|
|
|
|
int *key,
|
|
|
|
int *ncount )
|
|
|
|
{
|
2005-10-11 17:29:59 +00:00
|
|
|
ompi_bitmap_t reachable;
|
|
|
|
ompi_proc_t **procs=NULL;
|
|
|
|
struct mca_bml_base_endpoint_t **bml_endpoints=NULL;
|
|
|
|
struct mca_bml_base_btl_array_t *bml_btl_array=NULL;
|
|
|
|
mca_bml_base_btl_t *bml_btl=NULL;
|
|
|
|
mca_btl_base_component_t *btl=NULL;
|
2005-02-24 15:11:07 +00:00
|
|
|
|
2005-10-18 18:17:50 +00:00
|
|
|
int i, size, rc;
|
2005-02-24 15:11:07 +00:00
|
|
|
|
|
|
|
int counter=0;
|
|
|
|
int firstproc=999999;
|
2005-04-13 18:35:07 +00:00
|
|
|
int rank = -1;
|
2005-10-11 17:29:59 +00:00
|
|
|
int use_rdma=0;
|
2005-04-13 18:35:07 +00:00
|
|
|
|
|
|
|
/* default values in case an error occurs */
|
|
|
|
*ncount=0;
|
|
|
|
*key=MPI_UNDEFINED;
|
2005-02-24 15:11:07 +00:00
|
|
|
|
2005-10-11 17:29:59 +00:00
|
|
|
/* Shall we check the the rdma list instead of send-list in the endpoint-structure? */
|
2005-10-25 18:34:47 +00:00
|
|
|
use_rdma = mca_coll_hierarch_use_rdma_param;
|
2005-10-11 22:05:24 +00:00
|
|
|
|
2005-10-11 17:29:59 +00:00
|
|
|
size = ompi_comm_size ( comm );
|
|
|
|
rank = ompi_comm_rank ( comm );
|
|
|
|
|
|
|
|
OBJ_CONSTRUCT(&reachable, ompi_bitmap_t);
|
|
|
|
rc = ompi_bitmap_init(&reachable, size);
|
|
|
|
if(OMPI_SUCCESS != rc) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2005-10-12 20:56:29 +00:00
|
|
|
bml_endpoints = (struct mca_bml_base_endpoint_t **) malloc ( size *
|
2007-12-21 06:02:00 +00:00
|
|
|
sizeof(struct mca_bml_base_endpoint_t*));
|
2005-10-12 20:56:29 +00:00
|
|
|
if ( NULL == bml_endpoints ) {
|
2005-10-13 15:07:35 +00:00
|
|
|
return;
|
2005-10-12 20:56:29 +00:00
|
|
|
}
|
|
|
|
|
2005-10-11 22:05:24 +00:00
|
|
|
procs = comm->c_local_group->grp_proc_pointers;
|
2005-10-14 17:41:44 +00:00
|
|
|
rc = mca_bml.bml_add_procs ( size, procs, bml_endpoints, &reachable );
|
2005-10-11 17:29:59 +00:00
|
|
|
if(OMPI_SUCCESS != rc) {
|
2005-10-13 15:07:35 +00:00
|
|
|
return;
|
2005-04-13 18:35:07 +00:00
|
|
|
}
|
|
|
|
|
2005-02-24 15:11:07 +00:00
|
|
|
for ( i=0; i<size; i++ ) {
|
2005-10-13 15:07:35 +00:00
|
|
|
if ( rank == i ) {
|
|
|
|
/* skip myself */
|
|
|
|
continue;
|
|
|
|
}
|
2005-10-11 17:29:59 +00:00
|
|
|
|
2005-10-13 15:07:35 +00:00
|
|
|
if ( use_rdma ) {
|
|
|
|
bml_btl_array = &(bml_endpoints[i]->btl_rdma);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
bml_btl_array = &(bml_endpoints[i]->btl_send);
|
|
|
|
}
|
|
|
|
bml_btl = mca_bml_base_btl_array_get_index ( bml_btl_array, 0 );
|
|
|
|
btl = bml_btl->btl->btl_component;
|
2005-02-24 15:11:07 +00:00
|
|
|
|
2005-10-13 15:07:35 +00:00
|
|
|
/* sanity check */
|
|
|
|
if ( strcmp(btl->btl_version.mca_type_name,"btl") ) {
|
|
|
|
printf("Oops, got the wrong component! type_name = %s\n",
|
|
|
|
btl->btl_version.mca_type_name );
|
|
|
|
}
|
2005-02-24 15:11:07 +00:00
|
|
|
|
2005-10-13 15:07:35 +00:00
|
|
|
/* check for the required component */
|
|
|
|
if (! strcmp (btl->btl_version.mca_component_name, component_name)){
|
|
|
|
counter++;
|
2005-10-13 16:21:13 +00:00
|
|
|
if (i<firstproc ) {
|
2007-12-21 06:02:00 +00:00
|
|
|
firstproc = i;
|
2005-10-13 16:21:13 +00:00
|
|
|
}
|
|
|
|
continue;
|
2005-10-13 15:07:35 +00:00
|
|
|
}
|
2005-10-13 16:21:13 +00:00
|
|
|
|
2005-02-24 15:11:07 +00:00
|
|
|
}
|
2005-04-13 18:35:07 +00:00
|
|
|
|
2005-10-18 18:17:50 +00:00
|
|
|
*ncount = counter;
|
2005-03-02 13:28:39 +00:00
|
|
|
/* final decision */
|
2005-10-18 18:17:50 +00:00
|
|
|
if ( counter == 0 ) {
|
2005-10-13 15:07:35 +00:00
|
|
|
/* this is the section indicating, that we are not
|
|
|
|
using this component */
|
|
|
|
firstproc = MPI_UNDEFINED;
|
2005-02-24 15:11:07 +00:00
|
|
|
}
|
|
|
|
else {
|
2005-10-13 15:07:35 +00:00
|
|
|
if ( rank < firstproc ) {
|
|
|
|
firstproc = rank;
|
|
|
|
}
|
2005-02-24 15:11:07 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
*key = firstproc;
|
2005-10-12 20:56:29 +00:00
|
|
|
|
|
|
|
if ( NULL != bml_endpoints ) {
|
2005-10-13 15:07:35 +00:00
|
|
|
free ( bml_endpoints);
|
2005-10-12 20:56:29 +00:00
|
|
|
}
|
|
|
|
|
2005-02-24 15:11:07 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2005-03-02 13:28:39 +00:00
|
|
|
|
2005-10-14 17:41:44 +00:00
|
|
|
/********************************************************************************/
|
|
|
|
/********************************************************************************/
|
|
|
|
/********************************************************************************/
|
2005-03-02 13:28:39 +00:00
|
|
|
|
2007-08-23 20:41:29 +00:00
|
|
|
static void mca_coll_hierarch_dump_struct ( mca_coll_hierarch_module_t *c)
|
2005-03-02 13:28:39 +00:00
|
|
|
{
|
2005-10-11 19:45:21 +00:00
|
|
|
int i, j;
|
2005-10-13 20:04:26 +00:00
|
|
|
int rank;
|
2005-10-14 17:41:44 +00:00
|
|
|
struct mca_coll_hierarch_llead_t *current=NULL;
|
2005-10-13 20:04:26 +00:00
|
|
|
|
|
|
|
rank = ompi_comm_rank ( c->hier_comm );
|
2005-03-02 13:28:39 +00:00
|
|
|
|
2005-10-13 20:04:26 +00:00
|
|
|
printf("%d: Dump of hier-struct for comm %s cid %u\n",
|
|
|
|
rank, c->hier_comm->c_name, c->hier_comm->c_contextid);
|
2005-10-15 19:36:54 +00:00
|
|
|
|
2005-10-14 17:41:44 +00:00
|
|
|
printf("%d: No of llead communicators: %d No of lleaders: %d\n",
|
2007-12-21 06:02:00 +00:00
|
|
|
rank, opal_pointer_array_get_size ( &(c->hier_llead)),
|
2005-10-14 17:41:44 +00:00
|
|
|
c->hier_num_lleaders );
|
2005-10-15 19:36:54 +00:00
|
|
|
|
2007-12-21 06:02:00 +00:00
|
|
|
for ( i=0; i < opal_pointer_array_get_size(&(c->hier_llead)); i++ ) {
|
|
|
|
current = (mca_coll_hierarch_llead_t*)opal_pointer_array_get_item (&(c->hier_llead), i);
|
2005-10-15 19:36:54 +00:00
|
|
|
if ( current == NULL ) {
|
2007-12-21 06:02:00 +00:00
|
|
|
continue;
|
2005-10-15 19:36:54 +00:00
|
|
|
}
|
|
|
|
|
2005-10-14 17:41:44 +00:00
|
|
|
printf("%d: my_leader %d am_leader %d\n", rank,
|
|
|
|
current->my_lleader, current->am_lleader );
|
|
|
|
|
2005-10-15 19:36:54 +00:00
|
|
|
for (j=0; j<c->hier_num_lleaders; j++ ) {
|
|
|
|
printf("%d: lleader[%d] = %d\n", rank, j, current->lleaders[j]);
|
2005-10-13 15:07:35 +00:00
|
|
|
}
|
2005-03-02 13:28:39 +00:00
|
|
|
}
|
2005-04-15 16:20:30 +00:00
|
|
|
|
2005-03-02 13:28:39 +00:00
|
|
|
return;
|
|
|
|
}
|
2005-10-11 20:34:17 +00:00
|
|
|
|
2007-03-16 23:11:45 +00:00
|
|
|
int mca_coll_hierarch_ft_event(int state) {
|
|
|
|
if(OPAL_CRS_CHECKPOINT == state) {
|
|
|
|
;
|
|
|
|
}
|
|
|
|
else if(OPAL_CRS_CONTINUE == state) {
|
|
|
|
;
|
|
|
|
}
|
|
|
|
else if(OPAL_CRS_RESTART == state) {
|
|
|
|
;
|
|
|
|
}
|
|
|
|
else if(OPAL_CRS_TERM == state ) {
|
|
|
|
;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
;
|
|
|
|
}
|
|
|
|
|
|
|
|
return OMPI_SUCCESS;
|
|
|
|
}
|