hierarch: with Edgar's blessing, remove the coll hierarch module
Этот коммит содержится в:
родитель
a741c44035
Коммит
2d5b92157f
@ -1,51 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_ompi_coll_hierarch_DSO
|
||||
component_noinst =
|
||||
component_install = mca_coll_hierarch.la
|
||||
else
|
||||
component_noinst = libmca_coll_hierarch.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(ompilibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_coll_hierarch_la_SOURCES = $(sources)
|
||||
mca_coll_hierarch_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_coll_hierarch_la_SOURCES = $(sources)
|
||||
libmca_coll_hierarch_la_LDFLAGS = -module -avoid-version
|
||||
|
||||
# Source files
|
||||
|
||||
sources = \
|
||||
coll_hierarch.h \
|
||||
coll_hierarch.c \
|
||||
coll_hierarch_allreduce.c \
|
||||
coll_hierarch_barrier.c \
|
||||
coll_hierarch_bcast.c \
|
||||
coll_hierarch_component.c \
|
||||
coll_hierarch_reduce.c \
|
||||
coll_hierarch_tmpcoll.c
|
@ -1,747 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2008 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "coll_hierarch.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "opal/mca/hwloc/base/base.h"
|
||||
#include "opal/mca/btl/btl.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/group/group.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "ompi/mca/coll/base/base.h"
|
||||
#include "ompi/mca/coll/base/coll_tags.h"
|
||||
|
||||
#include "opal/class/opal_bitmap.h"
|
||||
#include "ompi/mca/bml/bml.h"
|
||||
#include "ompi/mca/bml/base/base.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/mca/pml/base/base.h"
|
||||
|
||||
/* Local functions and data */
|
||||
#define HIER_MAXPROTOCOL 6
|
||||
#define HIER_MAX_PROTNAMELEN 7
|
||||
static int mca_coll_hierarch_max_protocol=HIER_MAXPROTOCOL;
|
||||
|
||||
/* Commments: need to add ofud, portals and sctp into this list! */
|
||||
static char hier_prot[HIER_MAXPROTOCOL][HIER_MAX_PROTNAMELEN]={"0","tcp","udapl","mx","openib","sm"};
|
||||
|
||||
static void mca_coll_hierarch_checkfor_component (struct ompi_communicator_t *comm,
|
||||
int component_level,
|
||||
char *component_name,
|
||||
int *key, int *ncount);
|
||||
static void mca_coll_hierarch_checkfor_sm (struct ompi_communicator_t *comm,
|
||||
int *color,
|
||||
int *ncount);
|
||||
static void mca_coll_hierarch_dump_struct ( mca_coll_hierarch_module_t *c);
|
||||
|
||||
|
||||
/*
|
||||
* Initial query function that is invoked during MPI_INIT, allowing
|
||||
* this module to indicate what level of thread support it provides.
|
||||
*/
|
||||
int mca_coll_hierarch_init_query(bool allow_hierarch_user_threads,
|
||||
bool have_hidden_user_threads)
|
||||
{
|
||||
/* Don't ask. All done */
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Invoked when there's a new communicator that has been created.
|
||||
* Look at the communicator and decide which set of functions and
|
||||
* priority we want to return.
|
||||
*/
|
||||
mca_coll_base_module_t *
|
||||
mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority )
|
||||
{
|
||||
int size, rank;
|
||||
int color, ncount=0, maxncount;
|
||||
int level;
|
||||
int ret=OMPI_SUCCESS;
|
||||
int ignore_sm=0;
|
||||
int detection_alg=0;
|
||||
mca_coll_hierarch_module_t *hierarch_module;
|
||||
|
||||
/* This module only works for intra-communicators at the moment */
|
||||
if (OMPI_COMM_IS_INTER(comm)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Get the priority level attached to this module. If priority is less
|
||||
* than or equal to 0, then the module is unavailable. */
|
||||
*priority = mca_coll_hierarch_priority_param;
|
||||
if (0 >= mca_coll_hierarch_priority_param) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* This module only works when the BTLs are alive. If they aren't, time to exit. */
|
||||
if (!mca_bml_base_inited()) return NULL;
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
if (size < 3) {
|
||||
/* No need for hierarchical collectives for 1 or 2 procs. */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
hierarch_module = OBJ_NEW(mca_coll_hierarch_module_t);
|
||||
if (NULL == hierarch_module) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
hierarch_module->super.coll_module_enable = mca_coll_hierarch_module_enable;
|
||||
hierarch_module->super.ft_event = mca_coll_hierarch_ft_event;
|
||||
hierarch_module->super.coll_allgather = NULL;
|
||||
hierarch_module->super.coll_allgatherv = NULL;
|
||||
hierarch_module->super.coll_allreduce = mca_coll_hierarch_allreduce_intra;
|
||||
hierarch_module->super.coll_alltoall = NULL;
|
||||
hierarch_module->super.coll_alltoallv = NULL;
|
||||
hierarch_module->super.coll_alltoallw = NULL;
|
||||
hierarch_module->super.coll_barrier = mca_coll_hierarch_barrier_intra;
|
||||
hierarch_module->super.coll_bcast = mca_coll_hierarch_bcast_intra;
|
||||
hierarch_module->super.coll_exscan = NULL;
|
||||
hierarch_module->super.coll_gather = NULL;
|
||||
hierarch_module->super.coll_gatherv = NULL;
|
||||
hierarch_module->super.coll_reduce = mca_coll_hierarch_reduce_intra;
|
||||
hierarch_module->super.coll_reduce_scatter = NULL;
|
||||
hierarch_module->super.coll_scan = NULL;
|
||||
hierarch_module->super.coll_scatter = NULL;
|
||||
hierarch_module->super.coll_scatterv = NULL;
|
||||
|
||||
|
||||
/* Check whether we should ignore sm. This might be necessary to take advantage
|
||||
of the some ib or gm collectives. */
|
||||
ignore_sm = mca_coll_hierarch_ignore_sm_param;
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
hierarch_module->hier_num_colorarr = size;
|
||||
hierarch_module->hier_colorarr = (int *) malloc ( sizeof(int) * size);
|
||||
if ( NULL == hierarch_module->hier_colorarr ) {
|
||||
*priority = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* walk through the list of registered protocols, and check which one
|
||||
* is feasible.
|
||||
* Later we start with level=0, and introduce the multi-cell check
|
||||
*/
|
||||
if ( ignore_sm ) {
|
||||
mca_coll_hierarch_max_protocol = HIER_MAXPROTOCOL - 1;
|
||||
}
|
||||
|
||||
/* if number of levels is not specified, or if it is specified as ALL_LEVELS,
|
||||
* proceed in the usual way
|
||||
*/
|
||||
|
||||
detection_alg = mca_coll_hierarch_detection_alg_param;
|
||||
if( TWO_LEVELS == detection_alg ) {
|
||||
mca_coll_hierarch_max_protocol = 2;
|
||||
if ( mca_coll_hierarch_verbose_param ) {
|
||||
printf("Using two level hierarchy detection\n");
|
||||
}
|
||||
}
|
||||
|
||||
for ( level = mca_coll_hierarch_max_protocol - 1; level >0 ; level--) {
|
||||
if ( ALL_LEVELS == detection_alg ) {
|
||||
mca_coll_hierarch_checkfor_component ( comm,
|
||||
level,
|
||||
hier_prot[level],
|
||||
&color,
|
||||
&ncount);
|
||||
}
|
||||
else if (TWO_LEVELS == detection_alg ) {
|
||||
mca_coll_hierarch_checkfor_sm ( comm, &color, &ncount );
|
||||
}
|
||||
|
||||
/* This is probably a no-no! but for the moment we agreed with Jeff,
|
||||
** that this might be the best solution. These functions emulate an
|
||||
** allreduce and an allgather.
|
||||
*/
|
||||
ret = mca_coll_hierarch_allreduce_tmp (&ncount, &maxncount, 1, MPI_INT,
|
||||
MPI_MAX, comm );
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if ( 0 == maxncount ) {
|
||||
if ( mca_coll_hierarch_verbose_param ) {
|
||||
printf("%s:%d: nobody talks with %s. Continuing to next level.\n",
|
||||
comm->c_name, rank, hier_prot[level]);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
else if ( maxncount == (size-1) ) {
|
||||
/*
|
||||
* everybody can talk to every other process with this protocol,
|
||||
* no need to continue in the hierarchy tree and for the
|
||||
* hierarchical component.
|
||||
* Its (size-1) because we do not count ourselves.
|
||||
* maxncount[1] should be zero.
|
||||
*/
|
||||
if ( mca_coll_hierarch_verbose_param ) {
|
||||
if ( ALL_LEVELS == detection_alg ) {
|
||||
printf("%s:%d: everybody talks with %s. No need to continue\n",
|
||||
comm->c_name, rank, hier_prot[level]);
|
||||
}
|
||||
else if ( TWO_LEVELS == detection_alg ) {
|
||||
printf("%s:%d: everybody talks with sm. No need to continue\n",
|
||||
comm->c_name, rank );
|
||||
}
|
||||
}
|
||||
goto exit;
|
||||
}
|
||||
else {
|
||||
if ( mca_coll_hierarch_verbose_param ) {
|
||||
printf("%s:%d: %d procs talk with %s. Use this protocol, key %d\n",
|
||||
comm->c_name, rank, maxncount, hier_prot[level], color);
|
||||
}
|
||||
|
||||
ret = mca_coll_hierarch_allgather_tmp (&color, 1, MPI_INT,
|
||||
hierarch_module->hier_colorarr, 1,
|
||||
MPI_INT, comm );
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
hierarch_module->hier_level = level;
|
||||
return &(hierarch_module->super);
|
||||
}
|
||||
}
|
||||
|
||||
exit:
|
||||
*priority = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Init module on the communicator
|
||||
*/
|
||||
int mca_coll_hierarch_module_enable (mca_coll_base_module_t *module,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int color;
|
||||
int size, rank, ret=OMPI_SUCCESS;
|
||||
|
||||
struct ompi_communicator_t *lcomm=NULL;
|
||||
struct ompi_communicator_t *llcomm=NULL;
|
||||
struct mca_coll_hierarch_llead_t *llead=NULL;
|
||||
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
|
||||
|
||||
rank = ompi_comm_rank(comm);
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
color = hierarch_module->hier_colorarr[rank];
|
||||
|
||||
/* Generate the subcommunicator based on the color returned by
|
||||
the previous function. */
|
||||
ret = ompi_comm_split ( comm, color, rank, &lcomm, 0 );
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
goto exit;
|
||||
}
|
||||
if ( OMPI_COMM_CID_IS_LOWER ( lcomm, comm ) ) {
|
||||
/* Mark the communicator as 'extra retain' and increase the
|
||||
reference count by one more. See ompi_comm_activate
|
||||
for detailed comments
|
||||
*/
|
||||
OMPI_COMM_SET_EXTRA_RETAIN (lcomm);
|
||||
OBJ_RETAIN(lcomm);
|
||||
}
|
||||
|
||||
hierarch_module->hier_comm = comm;
|
||||
hierarch_module->hier_lcomm = lcomm;
|
||||
hierarch_module->hier_num_reqs = 2 * size;
|
||||
hierarch_module->hier_reqs = (ompi_request_t **) malloc (sizeof(ompi_request_t)*size*2);
|
||||
if ( NULL == hierarch_module->hier_reqs ) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* allocate a certain number of the hierarch_llead structures, which store
|
||||
information about local leader and the according subcommunicators
|
||||
*/
|
||||
llead = (struct mca_coll_hierarch_llead_t * ) malloc (
|
||||
sizeof(struct mca_coll_hierarch_llead_t));
|
||||
if ( NULL == llead ) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* These two routines set all relevant entries in the mca_coll_base_comm_t
|
||||
* structure. The first one makes all entries which are independent of the
|
||||
* offset (and have to be done only once per module. The second one is
|
||||
* depending on the offset, and has to be called therefore every time we need
|
||||
* a new llcomm
|
||||
*/
|
||||
mca_coll_hierarch_get_llr ( hierarch_module );
|
||||
mca_coll_hierarch_get_all_lleaders ( rank, hierarch_module, llead, 1 );
|
||||
|
||||
/* Generate the lleader communicator assuming that all lleaders are the first
|
||||
process in the list of processes with the same color. A function generating
|
||||
other lleader-comms will follow soon. */
|
||||
color = MPI_UNDEFINED;
|
||||
if ( llead->am_lleader ) {
|
||||
color = 1;
|
||||
}
|
||||
ret = ompi_comm_split ( comm, color, rank, &llcomm, 0);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
goto exit;
|
||||
}
|
||||
if ( OMPI_COMM_CID_IS_LOWER ( llcomm, comm ) ) {
|
||||
/* Mark the communicator as 'extra retain' and increase the
|
||||
reference count by one more. See ompi_comm_activate
|
||||
for detailed explanation.
|
||||
*/
|
||||
OMPI_COMM_SET_EXTRA_RETAIN (llcomm);
|
||||
OBJ_RETAIN(llcomm);
|
||||
}
|
||||
|
||||
|
||||
llead->llcomm = llcomm;
|
||||
|
||||
/* Store it now on the data structure */
|
||||
OBJ_CONSTRUCT(&(hierarch_module->hier_llead), opal_pointer_array_t);
|
||||
opal_pointer_array_add ( &(hierarch_module->hier_llead), llead);
|
||||
|
||||
if ( mca_coll_hierarch_verbose_param ) {
|
||||
mca_coll_hierarch_dump_struct (hierarch_module);
|
||||
}
|
||||
|
||||
exit:
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
if (NULL != llead) {
|
||||
free(llead);
|
||||
}
|
||||
ompi_comm_free ( &lcomm );
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int mca_coll_hierarch_get_all_lleaders ( int rank, mca_coll_hierarch_module_t *hierarch_module,
|
||||
struct mca_coll_hierarch_llead_t * llead,
|
||||
int offset )
|
||||
{
|
||||
int i, j, ret=OMPI_SUCCESS;
|
||||
int *cntarr=NULL;
|
||||
int mycolor;
|
||||
|
||||
cntarr = (int *)calloc (1, sizeof (int)* hierarch_module->hier_num_lleaders );
|
||||
if ( NULL == cntarr ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
llead->lleaders = (int *) malloc (sizeof(int) * hierarch_module->hier_num_lleaders);
|
||||
if ( NULL == llead->lleaders ) {
|
||||
ret = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto exit;
|
||||
}
|
||||
llead->offset = offset;
|
||||
|
||||
for ( i=0; i < hierarch_module->hier_num_lleaders; i++ ) {
|
||||
if ( MPI_UNDEFINED == hierarch_module->hier_llr[i] ) {
|
||||
cntarr[i] = 1;
|
||||
llead->lleaders[i] = MPI_UNDEFINED;
|
||||
}
|
||||
}
|
||||
|
||||
for ( i=0; i<hierarch_module->hier_num_colorarr; i++) {
|
||||
if ( MPI_UNDEFINED == hierarch_module->hier_colorarr[i] ) {
|
||||
continue;
|
||||
}
|
||||
for ( j=0; j<hierarch_module->hier_num_lleaders; j++) {
|
||||
if ( cntarr[j] >= offset ) {
|
||||
continue;
|
||||
}
|
||||
if ( hierarch_module->hier_colorarr[i] == hierarch_module->hier_llr[j]) {
|
||||
cntarr[j]++;
|
||||
llead->lleaders[j] = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mycolor = hierarch_module->hier_colorarr[rank];
|
||||
if ( MPI_UNDEFINED == mycolor ) {
|
||||
llead->am_lleader = 1;
|
||||
llead->my_lleader = MPI_UNDEFINED;
|
||||
}
|
||||
else {
|
||||
llead->am_lleader = 0;
|
||||
for ( i=0; i< hierarch_module->hier_num_lleaders; i++ ) {
|
||||
if ( hierarch_module->hier_llr[i] == mycolor ) {
|
||||
llead->my_lleader = cntarr[i]-1;
|
||||
if ( llead->lleaders[i] == rank ) {
|
||||
llead->am_lleader = 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
exit:
|
||||
if ( NULL != cntarr ) {
|
||||
free ( cntarr );
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mca_coll_hierarch_get_llr ( mca_coll_hierarch_module_t *hierarch_module )
|
||||
{
|
||||
int i, j, cnt, found;
|
||||
int ncount;
|
||||
|
||||
ncount = mca_coll_hierarch_count_lleaders ( hierarch_module->hier_num_colorarr,
|
||||
hierarch_module->hier_colorarr);
|
||||
hierarch_module->hier_num_lleaders = ncount;
|
||||
hierarch_module->hier_llr = (int *) malloc ( (size_t)hierarch_module->hier_num_lleaders * sizeof(int));
|
||||
hierarch_module->hier_max_offset = (int *) calloc ( 1, (size_t)hierarch_module->hier_num_lleaders * sizeof(int));
|
||||
if ( ( NULL == hierarch_module->hier_llr) || ( NULL == hierarch_module->hier_max_offset )) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
hierarch_module->hier_llr[0] = hierarch_module->hier_colorarr[0];
|
||||
hierarch_module->hier_max_offset[0]=1;
|
||||
for ( cnt=1, i=1; i<hierarch_module->hier_num_colorarr; i++ ) {
|
||||
if ( MPI_UNDEFINED == hierarch_module->hier_colorarr[i] ) {
|
||||
hierarch_module->hier_llr[cnt] = hierarch_module->hier_colorarr[i];
|
||||
hierarch_module->hier_max_offset[cnt] = 1;
|
||||
cnt++;
|
||||
continue;
|
||||
}
|
||||
for ( found=0, j=0; j<cnt; j++ ) {
|
||||
if ( hierarch_module->hier_llr[j] == hierarch_module->hier_colorarr[i]) {
|
||||
hierarch_module->hier_max_offset[j]++;
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( !found ) {
|
||||
hierarch_module->hier_llr[cnt] = hierarch_module->hier_colorarr[i];
|
||||
hierarch_module->hier_max_offset[cnt]++;
|
||||
cnt++;
|
||||
}
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int root,
|
||||
mca_coll_hierarch_module_t *hierarch_module,
|
||||
int* llroot,
|
||||
int* lroot)
|
||||
{
|
||||
struct ompi_communicator_t *llcomm=NULL;
|
||||
struct ompi_group_t *llgroup=NULL;
|
||||
struct ompi_group_t *group=NULL;
|
||||
struct mca_coll_hierarch_llead_t *llead=NULL;
|
||||
int found, i, rc, num_llead, offset;
|
||||
int rank = ompi_comm_rank (hierarch_module->hier_comm);
|
||||
int color;
|
||||
|
||||
/* determine what our offset of root is in the colorarr */
|
||||
offset = mca_coll_hierarch_get_offset ( root,
|
||||
hierarch_module->hier_num_colorarr,
|
||||
hierarch_module->hier_colorarr );
|
||||
|
||||
num_llead = opal_pointer_array_get_size ( &(hierarch_module->hier_llead) );
|
||||
for ( found=0, i=0; i < num_llead; i++ ) {
|
||||
llead = (struct mca_coll_hierarch_llead_t *) opal_pointer_array_get_item (
|
||||
&(hierarch_module->hier_llead), i );
|
||||
if ( NULL == llead ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (llead->offset == offset ) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
#if 0
|
||||
else if () {
|
||||
/* the offset of root = maxoffset of this color and
|
||||
* the offset on llead is larger then offset of root.
|
||||
* then we can also use this llead structure
|
||||
*/
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
if ( !found ) {
|
||||
/* allocate a new llead element */
|
||||
llead = (struct mca_coll_hierarch_llead_t *) malloc (
|
||||
sizeof(struct mca_coll_hierarch_llead_t));
|
||||
if ( NULL == llead ) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* generate the list of lleaders with this offset */
|
||||
mca_coll_hierarch_get_all_lleaders ( rank, hierarch_module, llead, offset );
|
||||
color = MPI_UNDEFINED;
|
||||
if ( llead->am_lleader ) {
|
||||
color = 1;
|
||||
}
|
||||
|
||||
/* create new lleader subcommunicator */
|
||||
rc = ompi_comm_split ( hierarch_module->hier_comm, color, root, &llcomm, 0);
|
||||
if ( OMPI_SUCCESS != rc ) {
|
||||
return NULL;
|
||||
}
|
||||
if ( OMPI_COMM_CID_IS_LOWER ( llcomm, hierarch_module->hier_comm ) ) {
|
||||
/* Mark the communicator as 'extra retain' and increase the
|
||||
reference count by one more. See ompi_comm_activate
|
||||
for detailed explanation. */
|
||||
OMPI_COMM_SET_EXTRA_RETAIN (llcomm);
|
||||
OBJ_RETAIN(llcomm);
|
||||
}
|
||||
|
||||
|
||||
llead->llcomm = llcomm;
|
||||
|
||||
/* Store the new element on the hierarch_module struct */
|
||||
opal_pointer_array_add ( &(hierarch_module->hier_llead), llead);
|
||||
}
|
||||
|
||||
llcomm = llead->llcomm;
|
||||
*lroot = llead->my_lleader;
|
||||
*llroot = MPI_UNDEFINED;
|
||||
|
||||
if ( MPI_COMM_NULL != llcomm ) {
|
||||
group = hierarch_module->hier_comm->c_local_group;
|
||||
llgroup = llcomm->c_local_group;
|
||||
|
||||
rc = ompi_group_translate_ranks ( group, 1, &root, llgroup, llroot);
|
||||
if ( OMPI_SUCCESS != rc ) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return llcomm;
|
||||
}
|
||||
|
||||
|
||||
/**********************************************************************/
|
||||
/**********************************************************************/
|
||||
/**********************************************************************/
|
||||
static void
|
||||
mca_coll_hierarch_checkfor_sm ( struct ompi_communicator_t *comm, int *color, int *ncount )
|
||||
{
|
||||
int i, size;
|
||||
int lncount=0;
|
||||
struct ompi_proc_t** procs=NULL;
|
||||
struct ompi_proc_t* my_proc=NULL;
|
||||
|
||||
|
||||
*color = -1;
|
||||
size = ompi_comm_size(comm);
|
||||
my_proc = ompi_proc_local();
|
||||
procs = comm->c_local_group->grp_proc_pointers;
|
||||
for ( i = 0 ; i < size ; i++) {
|
||||
if ( OMPI_CAST_RTE_NAME(&procs[i]->super.proc_name)->jobid == OMPI_CAST_RTE_NAME(&my_proc->super.proc_name)->jobid &&
|
||||
( OPAL_PROC_ON_LOCAL_NODE(procs[i]->super.proc_flags)) ) {
|
||||
lncount++;
|
||||
if ( *color == -1){
|
||||
*color = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* we need to decrease ncount in order to make the other allreduce/allgather
|
||||
operations work */
|
||||
lncount--;
|
||||
*ncount = lncount;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/* This function checks how many processes are using the component
|
||||
'component_name' for communication and returns this count in
|
||||
'ncount'. Furthermore it returns a 'key', which can be used to split
|
||||
the communicator into subgroups, such that the new communicators
|
||||
will definitly have all processes communicate with this component.
|
||||
|
||||
Oct 13: the algorithm has been modified such that it returns the
|
||||
number of processes using the specified component and the number
|
||||
of processes to which an even 'faster' protocol is being used. (Faster
|
||||
specified in this context as being further up in the list of
|
||||
hier_prot protocols specified at the beginning of this file).
|
||||
*/
|
||||
static void
|
||||
mca_coll_hierarch_checkfor_component ( struct ompi_communicator_t *comm,
|
||||
int component_level,
|
||||
char *component_name,
|
||||
int *key,
|
||||
int *ncount )
|
||||
{
|
||||
opal_bitmap_t reachable;
|
||||
ompi_proc_t **procs=NULL;
|
||||
struct mca_bml_base_btl_array_t *bml_btl_array=NULL;
|
||||
mca_bml_base_btl_t *bml_btl=NULL;
|
||||
mca_btl_base_component_t *btl=NULL;
|
||||
mca_bml_base_endpoint_t *endpoint;
|
||||
|
||||
int i, size, rc;
|
||||
|
||||
int counter=0;
|
||||
int firstproc=999999;
|
||||
int rank = -1;
|
||||
int use_rdma=0;
|
||||
|
||||
/* default values in case an error occurs */
|
||||
*ncount=0;
|
||||
*key=MPI_UNDEFINED;
|
||||
|
||||
/* Shall we check the the rdma list instead of send-list in the endpoint-structure? */
|
||||
use_rdma = mca_coll_hierarch_use_rdma_param;
|
||||
|
||||
size = ompi_comm_size ( comm );
|
||||
rank = ompi_comm_rank ( comm );
|
||||
|
||||
OBJ_CONSTRUCT(&reachable, opal_bitmap_t);
|
||||
rc = opal_bitmap_init(&reachable, size);
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
return;
|
||||
}
|
||||
|
||||
procs = comm->c_local_group->grp_proc_pointers;
|
||||
rc = mca_bml.bml_add_procs ( size, procs, &reachable );
|
||||
if(OMPI_SUCCESS != rc) {
|
||||
return;
|
||||
}
|
||||
|
||||
for ( i=0; i<size; i++ ) {
|
||||
if ( rank == i ) {
|
||||
/* skip myself */
|
||||
continue;
|
||||
}
|
||||
|
||||
endpoint = (mca_bml_base_endpoint_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
|
||||
if ( use_rdma ) {
|
||||
bml_btl_array = &(endpoint->btl_rdma);
|
||||
}
|
||||
else {
|
||||
bml_btl_array = &(endpoint->btl_send);
|
||||
}
|
||||
bml_btl = mca_bml_base_btl_array_get_index ( bml_btl_array, 0 );
|
||||
btl = bml_btl->btl->btl_component;
|
||||
|
||||
/* sanity check */
|
||||
if ( strcmp(btl->btl_version.mca_type_name,"btl") ) {
|
||||
printf("Oops, got the wrong component! type_name = %s\n",
|
||||
btl->btl_version.mca_type_name );
|
||||
}
|
||||
|
||||
/* check for the required component */
|
||||
if (! strcmp (btl->btl_version.mca_component_name, component_name)){
|
||||
counter++;
|
||||
if (i<firstproc ) {
|
||||
firstproc = i;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
*ncount = counter;
|
||||
/* final decision */
|
||||
if ( counter == 0 ) {
|
||||
/* this is the section indicating, that we are not
|
||||
using this component */
|
||||
firstproc = MPI_UNDEFINED;
|
||||
}
|
||||
else {
|
||||
if ( rank < firstproc ) {
|
||||
firstproc = rank;
|
||||
}
|
||||
}
|
||||
|
||||
*key = firstproc;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
/********************************************************************************/
|
||||
/********************************************************************************/
|
||||
/********************************************************************************/
|
||||
|
||||
static void mca_coll_hierarch_dump_struct ( mca_coll_hierarch_module_t *c)
|
||||
{
|
||||
int i, j;
|
||||
int rank;
|
||||
struct mca_coll_hierarch_llead_t *current=NULL;
|
||||
|
||||
rank = ompi_comm_rank ( c->hier_comm );
|
||||
|
||||
printf("%d: Dump of hier-struct for comm %s cid %u\n",
|
||||
rank, c->hier_comm->c_name, c->hier_comm->c_contextid);
|
||||
|
||||
printf("%d: No of llead communicators: %d No of lleaders: %d\n",
|
||||
rank, opal_pointer_array_get_size ( &(c->hier_llead)),
|
||||
c->hier_num_lleaders );
|
||||
|
||||
for ( i=0; i < opal_pointer_array_get_size(&(c->hier_llead)); i++ ) {
|
||||
current = (mca_coll_hierarch_llead_t*)opal_pointer_array_get_item (&(c->hier_llead), i);
|
||||
if ( current == NULL ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
printf("%d: my_leader %d am_leader %d\n", rank,
|
||||
current->my_lleader, current->am_lleader );
|
||||
|
||||
for (j=0; j<c->hier_num_lleaders; j++ ) {
|
||||
printf("%d: lleader[%d] = %d\n", rank, j, current->lleaders[j]);
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
int mca_coll_hierarch_ft_event(int state) {
|
||||
if(OPAL_CRS_CHECKPOINT == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_CONTINUE == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_RESTART == state) {
|
||||
;
|
||||
}
|
||||
else if(OPAL_CRS_TERM == state ) {
|
||||
;
|
||||
}
|
||||
else {
|
||||
;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
@ -1,351 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2008 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_COLL_HIERARCH_EXPORT_H
|
||||
#define MCA_COLL_HIERARCH_EXPORT_H
|
||||
|
||||
#define ALL_LEVELS 0
|
||||
#define TWO_LEVELS 2
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/constants.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "opal/class/opal_pointer_array.h"
|
||||
#include "opal/mca/mca.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* Globally exported variable
|
||||
*/
|
||||
|
||||
OMPI_MODULE_DECLSPEC extern const mca_coll_base_component_2_0_0_t mca_coll_hierarch_component;
|
||||
|
||||
extern int mca_coll_hierarch_priority_param;
|
||||
extern int mca_coll_hierarch_verbose_param;
|
||||
extern int mca_coll_hierarch_use_rdma_param;
|
||||
extern int mca_coll_hierarch_ignore_sm_param;
|
||||
extern int mca_coll_hierarch_detection_alg_param;
|
||||
extern int mca_coll_hierarch_bcast_alg_param;
|
||||
extern int mca_coll_hierarch_segsize_param;
|
||||
|
||||
|
||||
#define COLL_HIERARCH_SEG_BCAST_ALG 0
|
||||
#define COLL_HIERARCH_SEG1_BCAST_ALG 1
|
||||
#define COLL_HIERARCH_SEG2_BCAST_ALG 2
|
||||
#define COLL_HIERARCH_SEG3_BCAST_ALG 3
|
||||
#define COLL_HIERARCH_BASIC_BCAST_ALG 4
|
||||
|
||||
|
||||
|
||||
#define HIER_DEFAULT_NUM_LLEAD 5
|
||||
/*
|
||||
* Data structure for attaching data to the communicator
|
||||
*/
|
||||
|
||||
/* Clarifying some terminology:
|
||||
* comm: the input communicator, consisting of several lower level communicators.
|
||||
* lcomm: low level communicator, often refered to as subcommunicator
|
||||
* lleader: local leader, a dedicated process of each low level communicator
|
||||
ATTENTION: an lleader might be the 'head' of a low level
|
||||
communicator of size one!
|
||||
* llcomm: local leader communicator, grouping all local leaders of a comm.
|
||||
*/
|
||||
|
||||
struct mca_coll_hierarch_module_t {
|
||||
mca_coll_base_module_t super;
|
||||
|
||||
struct ompi_communicator_t *hier_comm; /* link back to the attached comm */
|
||||
struct ompi_communicator_t *hier_lcomm; /* low level communicator */
|
||||
opal_pointer_array_t hier_llead; /* local leader communicator structure */
|
||||
int hier_num_lleaders; /* number of local leaders */
|
||||
int hier_level; /* level in the hierarchy. For debugging*/
|
||||
int hier_num_reqs; /* num. of requests */
|
||||
ompi_request_t **hier_reqs; /* list of requests */
|
||||
int hier_num_colorarr; /* size of the colorarr array */
|
||||
int *hier_llr; /* color array compacted (1 entry per color).
|
||||
Array of size hier_num_lleaders */
|
||||
int *hier_max_offset; /* Number of processes for each color.
|
||||
Array of size hier_num_lleaders */
|
||||
int *hier_colorarr; /* array containing the color of all procs */
|
||||
};
|
||||
typedef struct mca_coll_hierarch_module_t mca_coll_hierarch_module_t;
|
||||
OBJ_CLASS_DECLARATION(mca_coll_hierarch_module_t);
|
||||
|
||||
struct mca_coll_hierarch_llead_t {
|
||||
struct ompi_communicator_t *llcomm; /* local leader communicator */
|
||||
int *lleaders; /* list of local leaders, ranks in comm */
|
||||
int my_lleader; /* rank of my lleader in lcomm */
|
||||
int am_lleader; /* am I an lleader? */
|
||||
int offset; /* Offset used for this llcomm */
|
||||
};
|
||||
|
||||
typedef struct mca_coll_hierarch_llead_t mca_coll_hierarch_llead_t;
|
||||
|
||||
|
||||
static inline int mca_coll_hierarch_count_lleaders ( int size, int *carr)
|
||||
{
|
||||
/*
|
||||
* Determine the number of local leaders. Please note, that any process
|
||||
* with color = MPI_UNDEFINED will be counted as the head of a group of its own.
|
||||
* Please note furthermore, that every process with color=MPI_UNDEFINED will be
|
||||
* stored in this array on its own...
|
||||
*/
|
||||
int cnt, i, j, found;
|
||||
int *llr=NULL;
|
||||
|
||||
llr = (int *) malloc ( size * sizeof(int));
|
||||
if (NULL == llr ){
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
llr[0] = carr[0];
|
||||
for (cnt=1, i=1; i<size; i++ ) {
|
||||
if ( carr[i] == MPI_UNDEFINED ) {
|
||||
llr[cnt++] = carr[i];
|
||||
continue;
|
||||
}
|
||||
for ( found=0, j=0; j<cnt; j++ ) {
|
||||
if ( carr[i] == llr[j] ) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if ( !found ) {
|
||||
llr[cnt++] = carr[i];
|
||||
}
|
||||
}
|
||||
|
||||
free (llr);
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static inline int mca_coll_hierarch_get_offset ( int rank, int size, int *carr)
|
||||
{
|
||||
int offset, i, color = carr[rank];
|
||||
|
||||
if ( color == MPI_UNDEFINED ) {
|
||||
/* always */
|
||||
return 1;
|
||||
}
|
||||
|
||||
for ( offset=0, i=0; i<=rank; i++) {
|
||||
if ( carr[i] == color ) {
|
||||
offset++;
|
||||
}
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* This function determine the parameters required in hierarchical
|
||||
* collective operations. It is called from the collective operations themselves.
|
||||
*
|
||||
* @param root (input): rank of the root process in comm
|
||||
* @param hierarch_module (input): module structure. Contains
|
||||
* all relevant, precomputed data for this set of collectives.
|
||||
*
|
||||
* @param llroot (output): rank of the root process in llcomm, MPI_UNDEFINED for all
|
||||
* processes not being part of the local leader communicator.
|
||||
* @param lroot (output): rank of the local leader in the low level communicator,
|
||||
* or MPI_UNDEFINED if there is no low level communicator.
|
||||
* return value: llcomm (local leader communicator) or MPI_COMM_NULL for
|
||||
* all processes not being part of the local leader communicator.
|
||||
*/
|
||||
|
||||
struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int rroot,
|
||||
mca_coll_hierarch_module_t *hierarch_module,
|
||||
int* llroot,
|
||||
int* lleader);
|
||||
|
||||
/* This function is supposed to set up all elements of the mca_coll_base_comm_t
|
||||
* structure, including:
|
||||
* hierarch_module->hier_num_lleaders: determine number of local leaders in the comms
|
||||
* hierarch_module->hier_llr: array of size hier_num_lleaders containing the colors
|
||||
* hierarch_module->hier_max_offset: array containing the counter for each color how often
|
||||
* it appears in the colorarr array.
|
||||
*/
|
||||
|
||||
int mca_coll_hierarch_get_llr ( mca_coll_hierarch_module_t *hierarch_module );
|
||||
|
||||
|
||||
/* This function is supposed to set all elements of the llead structure based on the
|
||||
* offset and the rank of the process.
|
||||
*
|
||||
* @param rank(input): rank of the calling process in comm
|
||||
* @param hierarch_module(input): structure of the hierarchical module. Contains
|
||||
* all relevant, precomputed data for this set of collectives.
|
||||
* @param llead(output): ptr to the mca_coll_hierarch_llead_t element which should
|
||||
* be set
|
||||
* @param offset(input): offset which shall be used.
|
||||
*/
|
||||
|
||||
int mca_coll_hierarch_get_all_lleaders ( int rank, mca_coll_hierarch_module_t *hierarch_module,
|
||||
struct mca_coll_hierarch_llead_t *llead,
|
||||
int offset );
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* coll API functions
|
||||
*/
|
||||
int mca_coll_hierarch_init_query(bool allow_hierarch_user_threads,
|
||||
bool have_hidden_threads);
|
||||
mca_coll_base_module_t *
|
||||
mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority );
|
||||
|
||||
|
||||
int mca_coll_hierarch_module_enable( mca_coll_base_module_t *module,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_hierarch_module_finalize(struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_hierarch_allgather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module );
|
||||
int mca_coll_hierarch_allgatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void * rbuf, int *rcounts,
|
||||
int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_hierarch_allreduce_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_hierarch_alltoall_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_hierarch_alltoallv_intra(void *sbuf, int *scounts,
|
||||
int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts,
|
||||
int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_hierarch_alltoallw_intra(void *sbuf, int *scounts,
|
||||
int *sdisps,
|
||||
struct ompi_datatype_t **sdtypes,
|
||||
void *rbuf, int *rcounts,
|
||||
int *rdisps,
|
||||
struct ompi_datatype_t **rdtypes,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_hierarch_barrier_intra(struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_hierarch_bcast_intra(void *buff, int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_hierarch_exscan_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_hierarch_gather_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_hierarch_gatherv_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int *rcounts, int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_hierarch_reduce_intra(void *sbuf, void* rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_hierarch_reduce_scatter_intra(void *sbuf, void *rbuf,
|
||||
int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_hierarch_scan_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_hierarch_scatter_intra(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype, void *rbuf,
|
||||
int rcount, struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_hierarch_scatterv_intra(void *sbuf, int *scounts, int *disps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void* rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
/*
|
||||
* These are trivial implementations of these routines used during comm_query/init,
|
||||
* since we cannot access any other collectives
|
||||
*/
|
||||
int mca_coll_hierarch_allgather_tmp(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_hierarch_allreduce_tmp(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm);
|
||||
int mca_coll_hierarch_bcast_tmp ( void *buf, int count, struct ompi_datatype_t *dtype,
|
||||
int root, struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_hierarch_gather_tmp(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm);
|
||||
int mca_coll_hierarch_reduce_tmp(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm);
|
||||
|
||||
int mca_coll_hierarch_ft_event(int status);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif /* MCA_COLL_HIERARCH_EXPORT_H */
|
@ -1,115 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 University of Houston. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "coll_hierarch.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "mpi.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
|
||||
|
||||
/*
|
||||
* reduce_intra
|
||||
*
|
||||
* Function: - reduction using two level hierarchy algorithm
|
||||
* Accepts: - same as MPI_Reduce()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_hierarch_allreduce_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
struct ompi_communicator_t *llcomm=NULL;
|
||||
struct ompi_communicator_t *lcomm=NULL;
|
||||
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
|
||||
int rank;
|
||||
int lroot, llroot;
|
||||
ptrdiff_t extent, true_extent, lb, true_lb;
|
||||
char *tmpbuf=NULL, *tbuf=NULL;
|
||||
int ret=OMPI_SUCCESS;
|
||||
int root=0;
|
||||
|
||||
rank = ompi_comm_rank ( comm );
|
||||
lcomm = hierarch_module->hier_lcomm;
|
||||
|
||||
if ( mca_coll_hierarch_verbose_param ) {
|
||||
printf("%s:%d: executing hierarchical allreduce with cnt=%d \n",
|
||||
comm->c_name, rank, count );
|
||||
}
|
||||
|
||||
llcomm = mca_coll_hierarch_get_llcomm ( root, hierarch_module, &llroot, &lroot);
|
||||
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
ompi_datatype_get_extent(dtype, &lb, &extent);
|
||||
ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
|
||||
tbuf = (char*)malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == tbuf) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
tmpbuf = tbuf - true_lb;
|
||||
|
||||
if ( MPI_IN_PLACE != sbuf ) {
|
||||
ret = lcomm->c_coll.coll_reduce (sbuf, tmpbuf, count, dtype,
|
||||
op, lroot, lcomm,
|
||||
lcomm->c_coll.coll_reduce_module);
|
||||
}
|
||||
else {
|
||||
ret = lcomm->c_coll.coll_reduce (rbuf, tmpbuf, count, dtype,
|
||||
op, lroot, lcomm,
|
||||
lcomm->c_coll.coll_reduce_module);
|
||||
}
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
if ( MPI_UNDEFINED != llroot ) {
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
ret = llcomm->c_coll.coll_allreduce (tmpbuf, rbuf, count, dtype,
|
||||
op, llcomm,
|
||||
llcomm->c_coll.coll_allreduce_module);
|
||||
}
|
||||
else {
|
||||
ret = llcomm->c_coll.coll_allreduce (sbuf, rbuf, count, dtype,
|
||||
op, llcomm,
|
||||
llcomm->c_coll.coll_allreduce_module);
|
||||
}
|
||||
}
|
||||
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
ret = lcomm->c_coll.coll_bcast(rbuf, count, dtype, lroot, lcomm,
|
||||
lcomm->c_coll.coll_bcast_module );
|
||||
}
|
||||
|
||||
|
||||
exit:
|
||||
if ( NULL != tmpbuf ) {
|
||||
free ( tmpbuf );
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
@ -1,81 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 University of Houston. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "coll_hierarch.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
|
||||
|
||||
/*
|
||||
* barrier_intra
|
||||
*
|
||||
* Function: - barrier using hierarchical algorithm
|
||||
* Accepts: - same as MPI_Barrier()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_hierarch_barrier_intra(struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
struct ompi_communicator_t *llcomm=NULL;
|
||||
struct ompi_communicator_t *lcomm=NULL;
|
||||
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
|
||||
int root=0;
|
||||
int lroot, llroot;
|
||||
int rank, ret=OMPI_SUCCESS;
|
||||
|
||||
rank = ompi_comm_rank ( comm );
|
||||
lcomm = hierarch_module->hier_lcomm;
|
||||
|
||||
if ( mca_coll_hierarch_verbose_param ) {
|
||||
printf("%s:%d: executing hierarchical barrier\n", comm->c_name, rank );
|
||||
}
|
||||
|
||||
llcomm = mca_coll_hierarch_get_llcomm ( root, hierarch_module, &llroot, &lroot);
|
||||
|
||||
/*
|
||||
* Barrier consists of three steps:
|
||||
* - barrier on the low-level communicators
|
||||
* - barrier among the local leaders
|
||||
* - barrier on the low-level communicators. This step is
|
||||
* necessary to avoid that any non local leaders exit too early.
|
||||
*/
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
ret = lcomm->c_coll.coll_barrier ( lcomm, lcomm->c_coll.coll_barrier_module );
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if ( MPI_UNDEFINED != llroot ) {
|
||||
ret = llcomm->c_coll.coll_barrier ( llcomm, llcomm->c_coll.coll_barrier_module );
|
||||
}
|
||||
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
ret = lcomm->c_coll.coll_barrier ( lcomm, lcomm->c_coll.coll_barrier_module );
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -1,755 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2008 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "coll_hierarch.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "ompi/mca/coll/base/coll_tags.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
|
||||
/*
|
||||
* bcast_intra
|
||||
*
|
||||
* Function: - broadcast using hierarchical algorithm
|
||||
* Accepts: - same arguments as MPI_Bcast()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
|
||||
|
||||
|
||||
|
||||
static int mca_coll_hierarch_bcast_intra_seg (void *buff,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module,
|
||||
int segsize );
|
||||
|
||||
|
||||
static int mca_coll_hierarch_bcast_intra_seg1 (void *buff,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module,
|
||||
int segsize );
|
||||
|
||||
|
||||
static int mca_coll_hierarch_bcast_intra_seg2 (void *buff,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module,
|
||||
int segsize );
|
||||
|
||||
static int mca_coll_hierarch_bcast_intra_seg3 (void *buff,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module,
|
||||
int segsize );
|
||||
|
||||
|
||||
|
||||
|
||||
int mca_coll_hierarch_bcast_intra(void *buff,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
int bcast_alg = mca_coll_hierarch_bcast_alg_param;
|
||||
int segsize = mca_coll_hierarch_segsize_param;
|
||||
int ret=OMPI_SUCCESS;
|
||||
|
||||
|
||||
/* Here is a brief description on what we try to evaluate:
|
||||
- bcast_intra_seg used the bcast of lcomm and llcomm, similarly
|
||||
to original algorithm in hierarch. However, it can segment
|
||||
the message, such that we might get an overlap between the two
|
||||
layers. This overlap is based on the assumption, that a process
|
||||
might be done early with a bcast and can start the next one.
|
||||
- bcast_intra_seg1: replaces the llcomm->bcast by isend/irecvs
|
||||
to increase the overlap, keeps the lcomm->bcast however
|
||||
- bcast_intra_seg2: replaced lcomm->bcast by isend/irecvs
|
||||
to increase the overlap, keeps however llcomm->bcast
|
||||
- bcast_intra_seg3: replaced both lcomm->bcast and llcomm->bcast
|
||||
by isend/irecvs
|
||||
*/
|
||||
|
||||
if ( COLL_HIERARCH_SEG_BCAST_ALG == bcast_alg ) {
|
||||
ret = mca_coll_hierarch_bcast_intra_seg ( buff, count, datatype, root,
|
||||
comm, module, segsize );
|
||||
}
|
||||
else if ( COLL_HIERARCH_SEG1_BCAST_ALG == bcast_alg ) {
|
||||
ret = mca_coll_hierarch_bcast_intra_seg1 ( buff, count, datatype, root,
|
||||
comm, module, segsize );
|
||||
}
|
||||
else if ( COLL_HIERARCH_SEG2_BCAST_ALG == bcast_alg ) {
|
||||
ret = mca_coll_hierarch_bcast_intra_seg2 ( buff, count, datatype, root,
|
||||
comm, module, segsize );
|
||||
}
|
||||
else if ( COLL_HIERARCH_SEG3_BCAST_ALG == bcast_alg ) {
|
||||
ret = mca_coll_hierarch_bcast_intra_seg3 ( buff, count, datatype, root,
|
||||
comm, module, segsize );
|
||||
}
|
||||
else {
|
||||
/* Segment size of zero forces the entire message to be bcasted
|
||||
as a single segment. */
|
||||
ret = mca_coll_hierarch_bcast_intra_seg ( buff, count, datatype, root,
|
||||
comm, module, 0 );
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int mca_coll_hierarch_bcast_intra_seg (void *buff,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module,
|
||||
int segsize )
|
||||
{
|
||||
struct ompi_communicator_t *llcomm=NULL;
|
||||
struct ompi_communicator_t *lcomm=NULL;
|
||||
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
|
||||
int lroot=MPI_UNDEFINED, llroot=MPI_UNDEFINED;
|
||||
int rank=0, ret=OMPI_SUCCESS;
|
||||
MPI_Aint ub=0, typeext=0;
|
||||
size_t typesize=0;
|
||||
int realsegsize=0, remaining_count=0;
|
||||
int num_segments=0, segcount=0, segindex=0;
|
||||
char* tmpbuf = (char *) buff;
|
||||
|
||||
rank = ompi_comm_rank ( comm );
|
||||
lcomm = hierarch_module->hier_lcomm;
|
||||
|
||||
if ( mca_coll_hierarch_verbose_param ) {
|
||||
printf("%s:%d: executing hierarchical seg bcast with cnt=%d root=%d, segsize=%d\n",
|
||||
comm->c_name, rank, count, root, segsize );
|
||||
}
|
||||
|
||||
/*
|
||||
* This function returns the local leader communicator
|
||||
* which *always* contains the root of this operation.
|
||||
* This might involve creating a new communicator. This is
|
||||
* also the reason, that *every* process in comm has to call
|
||||
* this function
|
||||
*/
|
||||
llcomm = mca_coll_hierarch_get_llcomm ( root, hierarch_module, &llroot, &lroot);
|
||||
|
||||
|
||||
ompi_datatype_type_size ( datatype, &typesize);
|
||||
ompi_datatype_get_extent ( datatype, &ub, &typeext);
|
||||
|
||||
|
||||
/* Determine number of segments and number of elements per segment */
|
||||
if ((typesize > 0) && (segsize % typesize != 0)) {
|
||||
/* segment size must be a multiple of typesize */
|
||||
segsize = typesize * (segsize / typesize);
|
||||
}
|
||||
if ((segsize == 0) || (count == 0) || (typesize == 0)) {
|
||||
segcount = count;
|
||||
num_segments = 1;
|
||||
}
|
||||
else {
|
||||
segcount = segsize/typesize;
|
||||
num_segments = count/segcount;
|
||||
if ( (count % segcount) != 0 ) {
|
||||
num_segments++;
|
||||
}
|
||||
if (num_segments == 1) {
|
||||
segcount = count;
|
||||
}
|
||||
}
|
||||
|
||||
realsegsize = segcount*typeext;
|
||||
remaining_count = segcount;
|
||||
|
||||
|
||||
for (segindex = 0; segindex < num_segments; segindex++) {
|
||||
/* determine how many elements are being sent in this round */
|
||||
if( segindex == (num_segments - 1) ) {
|
||||
remaining_count = count - segindex*segcount;
|
||||
}
|
||||
|
||||
/* Bcast on the upper level among the local leaders */
|
||||
if ( MPI_UNDEFINED != llroot ) {
|
||||
ret = llcomm->c_coll.coll_bcast(tmpbuf, remaining_count,
|
||||
datatype, llroot, llcomm,
|
||||
llcomm->c_coll.coll_bcast_module);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/* once the local leaders got the data from the root, they can distribute
|
||||
* it to the processes in their local, low-level communicator.
|
||||
*/
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
ret = lcomm->c_coll.coll_bcast(tmpbuf, remaining_count,
|
||||
datatype, lroot, lcomm,
|
||||
lcomm->c_coll.coll_bcast_module);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
tmpbuf += realsegsize;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int mca_coll_hierarch_bcast_intra_seg1 (void *buff,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module,
|
||||
int segsize )
|
||||
{
|
||||
struct ompi_communicator_t *llcomm=NULL;
|
||||
struct ompi_communicator_t *lcomm=NULL;
|
||||
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
|
||||
int lroot=MPI_UNDEFINED, llroot=MPI_UNDEFINED;
|
||||
int llrank=0, llsize=0, rank=0, ret=OMPI_SUCCESS;
|
||||
MPI_Aint ub=0, typeext=0;
|
||||
size_t typesize=0;
|
||||
int i, realsegsize=0, remaining_count=0;
|
||||
int num_segments=0, segcount=0, segindex=0;
|
||||
char* tmpbuf = (char *) buff;
|
||||
ompi_request_t **sreq=NULL;
|
||||
ompi_request_t *rreq=MPI_REQUEST_NULL;
|
||||
|
||||
rank = ompi_comm_rank ( comm );
|
||||
lcomm = hierarch_module->hier_lcomm;
|
||||
|
||||
if ( mca_coll_hierarch_verbose_param ) {
|
||||
printf("%s:%d: executing hierarchical seg1 bcast with cnt=%d root=%d segsize=%d\n",
|
||||
comm->c_name, rank, count, root, segsize );
|
||||
}
|
||||
|
||||
/*
|
||||
* This function returns the local leader communicator
|
||||
* which *always* contains the root of this operation.
|
||||
* This might involve creating a new communicator. This is
|
||||
* also the reason, that *every* process in comm has to call
|
||||
* this function
|
||||
*/
|
||||
llcomm = mca_coll_hierarch_get_llcomm ( root, hierarch_module, &llroot, &lroot);
|
||||
|
||||
ompi_datatype_type_size ( datatype, &typesize);
|
||||
ompi_datatype_get_extent ( datatype, &ub, &typeext);
|
||||
|
||||
/* Determine number of segments and number of elements per segment */
|
||||
if ((typesize > 0) && (segsize % typesize != 0)) {
|
||||
/* segment size must be a multiple of typesize */
|
||||
segsize = typesize * (segsize / typesize);
|
||||
}
|
||||
if ((segsize == 0) || (count == 0) || (typesize == 0)) {
|
||||
segcount = count;
|
||||
num_segments = 1;
|
||||
}
|
||||
else {
|
||||
segcount = segsize/typesize;
|
||||
num_segments = count/segcount;
|
||||
if ( (count % segcount) != 0 ) {
|
||||
num_segments++;
|
||||
}
|
||||
if (num_segments == 1) {
|
||||
segcount = count;
|
||||
}
|
||||
}
|
||||
|
||||
realsegsize = segcount*typeext;
|
||||
remaining_count = segcount;
|
||||
|
||||
if ( MPI_COMM_NULL != llcomm ) {
|
||||
llrank = ompi_comm_rank ( llcomm );
|
||||
llsize = ompi_comm_size ( llcomm);
|
||||
sreq = hierarch_module->hier_reqs;
|
||||
for(i=0; i<llsize; i++) {
|
||||
sreq[i] = MPI_REQUEST_NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Broadcasting the first segment in the upper level*/
|
||||
if ( MPI_UNDEFINED != llroot ) {
|
||||
ret = llcomm->c_coll.coll_bcast(tmpbuf, remaining_count, datatype,
|
||||
llroot, llcomm,
|
||||
llcomm->c_coll.coll_bcast_module );
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Since the first segment has already been bcasted, this loop
|
||||
starts at 1 and not with segment 0 */
|
||||
for (segindex = 1; segindex < num_segments; segindex++) {
|
||||
/* determine how many elements are being sent in this round */
|
||||
if( segindex == (num_segments - 1) ) {
|
||||
remaining_count = count - segindex*segcount;
|
||||
}
|
||||
tmpbuf += realsegsize;
|
||||
|
||||
/* Broadcasting the next segment in the upper level using non blocking
|
||||
operations*/
|
||||
if ( MPI_COMM_NULL != llcomm ) {
|
||||
if( llrank == llroot) {
|
||||
for( i = 0; i < llsize; i++) {
|
||||
if( i != llroot) {
|
||||
ret = MCA_PML_CALL(isend(tmpbuf, remaining_count, datatype, i,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
llcomm, &(sreq[i])));
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = MCA_PML_CALL(irecv(tmpbuf, remaining_count, datatype, llroot,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
llcomm, &rreq ));
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* broadcasting the before segment among the lower level processes
|
||||
using blocking operations*/
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
ret = lcomm->c_coll.coll_bcast(tmpbuf-realsegsize, segcount,
|
||||
datatype, lroot, lcomm,
|
||||
lcomm->c_coll.coll_bcast_module);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if ( MPI_COMM_NULL != llcomm ) {
|
||||
if ( llrank == llroot ) {
|
||||
ret = ompi_request_wait_all( llsize, sreq, MPI_STATUSES_IGNORE);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = ompi_request_wait( &rreq, MPI_STATUS_IGNORE);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Bcasting the last segment among the lower level processes using blocking operations
|
||||
* once the local leaders got the data from the root, they can distribute
|
||||
* it to the processes in their local, low-level communicator.
|
||||
*/
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
ret = lcomm->c_coll.coll_bcast(tmpbuf, remaining_count, datatype,
|
||||
lroot, lcomm,
|
||||
lcomm->c_coll.coll_bcast_module);
|
||||
}
|
||||
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int mca_coll_hierarch_bcast_intra_seg2 (void *buff,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module,
|
||||
int segsize )
|
||||
{
|
||||
struct ompi_communicator_t *llcomm=NULL;
|
||||
struct ompi_communicator_t *lcomm=NULL;
|
||||
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
|
||||
int lroot=MPI_UNDEFINED, llroot=MPI_UNDEFINED;
|
||||
int rank=0, ret=OMPI_SUCCESS;
|
||||
int lsize=0, lrank=0;
|
||||
MPI_Aint ub=0, typeext=0;
|
||||
size_t typesize=0;
|
||||
int i, realsegsize=0, remaining_count=0;
|
||||
int num_segments=0, segcount=0, segindex=0;
|
||||
char* tmpbuf = (char *) buff;
|
||||
ompi_request_t **sreq=NULL;
|
||||
ompi_request_t *rreq=MPI_REQUEST_NULL;
|
||||
|
||||
rank = ompi_comm_rank ( comm );
|
||||
lcomm = hierarch_module->hier_lcomm;
|
||||
|
||||
if ( mca_coll_hierarch_verbose_param ) {
|
||||
printf("%s:%d: executing hierarchical seg2 bcast with cnt=%d root=%d segsize=%d\n",
|
||||
comm->c_name, rank, count, root, segsize );
|
||||
}
|
||||
|
||||
/*
|
||||
* This function returns the local leader communicator
|
||||
* which *always* contains the root of this operation.
|
||||
* This might involve creating a new communicator. This is
|
||||
* also the reason, that *every* process in comm has to call
|
||||
* this function
|
||||
*/
|
||||
llcomm = mca_coll_hierarch_get_llcomm ( root, hierarch_module, &llroot, &lroot);
|
||||
|
||||
ompi_datatype_type_size ( datatype, &typesize);
|
||||
ompi_datatype_get_extent ( datatype, &ub, &typeext);
|
||||
|
||||
/* Determine number of segments and number of elements per segment */
|
||||
if ((typesize > 0) && (segsize % typesize != 0)) {
|
||||
/* segment size must be a multiple of typesize */
|
||||
segsize = typesize * (segsize / typesize);
|
||||
}
|
||||
if ((segsize == 0) || (count == 0) || (typesize == 0)) {
|
||||
segcount = count;
|
||||
num_segments = 1;
|
||||
}
|
||||
else {
|
||||
segcount = segsize/typesize;
|
||||
num_segments = count/segcount;
|
||||
if ( (count % segcount) != 0 ) {
|
||||
num_segments++;
|
||||
}
|
||||
if (num_segments == 1) {
|
||||
segcount = count;
|
||||
}
|
||||
}
|
||||
|
||||
realsegsize = segcount*typeext;
|
||||
remaining_count = segcount;
|
||||
|
||||
lsize = ompi_comm_size (lcomm);
|
||||
sreq = hierarch_module->hier_reqs;
|
||||
for(i=0; i<lsize; i++) {
|
||||
sreq[i] = MPI_REQUEST_NULL;
|
||||
}
|
||||
|
||||
|
||||
if ( MPI_UNDEFINED != llroot ) {
|
||||
ret = llcomm->c_coll.coll_bcast(tmpbuf, remaining_count, datatype,
|
||||
llroot, llcomm,
|
||||
llcomm->c_coll.coll_bcast_module);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
lrank = ompi_comm_rank ( lcomm );
|
||||
}
|
||||
|
||||
for (segindex = 1; segindex < num_segments; segindex++) {
|
||||
/* once the local leaders got the data from the root, they can distribute
|
||||
* it to the processes in their local, low-level communicator.*/
|
||||
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
if(lrank == lroot) {
|
||||
for(i = 0; i < lsize; i++) {
|
||||
if( i != lroot) {
|
||||
ret = MCA_PML_CALL(isend(tmpbuf, remaining_count, datatype, i,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
lcomm, &(sreq[i])));
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = MCA_PML_CALL(irecv(tmpbuf, remaining_count, datatype, lroot,
|
||||
MCA_COLL_BASE_TAG_BCAST, lcomm, &rreq));
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* determine how many elements are being sent in this round */
|
||||
if( segindex == (num_segments - 1) ) {
|
||||
remaining_count = count - segindex*segcount;
|
||||
}
|
||||
tmpbuf += realsegsize;
|
||||
|
||||
if ( MPI_UNDEFINED != llroot ) {
|
||||
ret = llcomm->c_coll.coll_bcast(tmpbuf, remaining_count, datatype,
|
||||
llroot, llcomm,
|
||||
llcomm->c_coll.coll_bcast_module);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
if ( lrank == lroot ) {
|
||||
ret = ompi_request_wait_all ( lsize, sreq, MPI_STATUSES_IGNORE);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = ompi_request_wait( &rreq, MPI_STATUS_IGNORE);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Bcasting the last segment among the lower level processes
|
||||
* once the local leaders got the data from the root, they can distribute
|
||||
* it to the processes in their local, low-level communicator.
|
||||
*/
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
ret = lcomm->c_coll.coll_bcast(tmpbuf, remaining_count, datatype,
|
||||
lroot, lcomm,
|
||||
lcomm->c_coll.coll_bcast_module);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int mca_coll_hierarch_bcast_intra_seg3 (void *buff,
|
||||
int count,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module,
|
||||
int segsize )
|
||||
{
|
||||
struct ompi_communicator_t *llcomm=NULL;
|
||||
struct ompi_communicator_t *lcomm=NULL;
|
||||
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
|
||||
int lroot=MPI_UNDEFINED, llroot=MPI_UNDEFINED;
|
||||
int llrank=MPI_UNDEFINED, llsize=0, rank=0, ret=OMPI_SUCCESS;
|
||||
int lsize=0, lrank=MPI_UNDEFINED;
|
||||
MPI_Aint ub=0, typeext=0;
|
||||
size_t typesize=0;
|
||||
int i, realsegsize=0, remaining_count=0;
|
||||
int num_segments=0, segcount=0, segindex=0;
|
||||
char* tmpbuf = (char *) buff;
|
||||
ompi_request_t **sreq=NULL, **sreq1=NULL;
|
||||
ompi_request_t *rreq=MPI_REQUEST_NULL, *rreq1=MPI_REQUEST_NULL;
|
||||
|
||||
rank = ompi_comm_rank ( comm );
|
||||
lcomm = hierarch_module->hier_lcomm;
|
||||
|
||||
if ( mca_coll_hierarch_verbose_param ) {
|
||||
printf("%s:%d: executing hierarchical seg3 bcast with cnt=%d root=%d segsize=%d\n",
|
||||
comm->c_name, rank, count, root, segsize );
|
||||
}
|
||||
|
||||
/*
|
||||
* This function returns the local leader communicator
|
||||
* which *always* contains the root of this operation.
|
||||
* This might involve creating a new communicator. This is
|
||||
* also the reason, that *every* process in comm has to call
|
||||
* this function
|
||||
*/
|
||||
llcomm = mca_coll_hierarch_get_llcomm ( root, hierarch_module, &llroot, &lroot);
|
||||
|
||||
ompi_datatype_type_size ( datatype, &typesize);
|
||||
ompi_datatype_get_extent ( datatype, &ub, &typeext);
|
||||
|
||||
/* Determine number of segments and number of elements per segment */
|
||||
if ((typesize > 0) && (segsize % typesize != 0)) {
|
||||
/* segment size must be a multiple of typesize */
|
||||
segsize = typesize * (segsize / typesize);
|
||||
}
|
||||
if ((segsize == 0) || (count == 0) || (typesize == 0)) {
|
||||
segcount = count;
|
||||
num_segments = 1;
|
||||
} else {
|
||||
segcount = segsize/typesize;
|
||||
num_segments = count/segcount;
|
||||
if ( (count % segcount) != 0 ) num_segments++;
|
||||
if (num_segments == 1) segcount = count;
|
||||
}
|
||||
realsegsize = segcount*typeext;
|
||||
remaining_count = segcount;
|
||||
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
lsize = ompi_comm_size ( lcomm );
|
||||
lrank = ompi_comm_rank ( lcomm );
|
||||
sreq1 = (ompi_request_t **)malloc ( lsize * sizeof(ompi_request_t *));
|
||||
if ( NULL == sreq1 ) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
for(i=0; i<lsize; i++) {
|
||||
sreq1[i] = MPI_REQUEST_NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if ( MPI_COMM_NULL != llcomm ) {
|
||||
llsize = ompi_comm_size (llcomm);
|
||||
llrank = ompi_comm_rank ( llcomm );
|
||||
|
||||
sreq = hierarch_module->hier_reqs;
|
||||
for(i=0; i<llsize; i++) {
|
||||
sreq[i] = MPI_REQUEST_NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Broadcasting the first segment in the upper level*/
|
||||
if ( MPI_UNDEFINED != llroot ) {
|
||||
ret = llcomm->c_coll.coll_bcast(tmpbuf, remaining_count, datatype,
|
||||
llroot, llcomm,
|
||||
llcomm->c_coll.coll_bcast_module);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
for (segindex = 1; segindex < num_segments; segindex++) {
|
||||
/* determine how many elements are being sent in this round */
|
||||
if( segindex == (num_segments - 1) ) {
|
||||
remaining_count = count - segindex*segcount;
|
||||
}
|
||||
tmpbuf += realsegsize;
|
||||
|
||||
/* Broadcasting the next segment in the upper level*/
|
||||
if ( MPI_COMM_NULL != llcomm ) {
|
||||
if(llrank == llroot) {
|
||||
for(i = 0; i < llsize; i++) {
|
||||
if( i != llroot) {
|
||||
ret = MCA_PML_CALL(isend(tmpbuf, remaining_count, datatype, i,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
llcomm, (sreq+i) ));
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = MCA_PML_CALL(irecv(tmpbuf, remaining_count, datatype, llroot,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
llcomm, &rreq ));
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* broadcasting the before segment among the lower level processes
|
||||
* once the local leaders got the data from the root, they can distribute
|
||||
* it to the processes in their local, low-level communicator.
|
||||
*/
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
if( lrank == lroot) {
|
||||
for( i = 0; i < lsize; i++) {
|
||||
if( i != lroot) {
|
||||
ret = MCA_PML_CALL(isend(tmpbuf-realsegsize, segcount, datatype, i,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD,
|
||||
lcomm, (sreq1+i) ));
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = MCA_PML_CALL(irecv(tmpbuf-realsegsize, segcount, datatype, lroot,
|
||||
MCA_COLL_BASE_TAG_BCAST , lcomm, &rreq1 ));
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Wait for the upper level bcast to complete*/
|
||||
if ( MPI_COMM_NULL != llcomm ) {
|
||||
if ( llrank == llroot ) {
|
||||
ret = ompi_request_wait_all(llsize, sreq, MPI_STATUSES_IGNORE);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = ompi_request_wait( &rreq, MPI_STATUS_IGNORE );
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*Wait for the lower level bcast to complete */
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
if ( lrank == lroot ) {
|
||||
ret = ompi_request_wait_all(lsize, sreq1, MPI_STATUSES_IGNORE);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
else {
|
||||
ret = ompi_request_wait( &rreq1, MPI_STATUS_IGNORE);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*Bcasting the last segment among the lower level processes
|
||||
* once the local leaders got the data from the root, they can distribute
|
||||
* it to the processes in their local, low-level communicator.
|
||||
*/
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
ret = lcomm->c_coll.coll_bcast(tmpbuf, remaining_count, datatype,
|
||||
lroot, lcomm,
|
||||
lcomm->c_coll.coll_bcast_module);
|
||||
}
|
||||
|
||||
exit:
|
||||
if ( NULL != sreq1 ) {
|
||||
free ( sreq1 );
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
@ -1,224 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; -*- */
|
||||
/*
|
||||
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2007 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007-2009 University of Houston. All rights reserved.
|
||||
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*
|
||||
* These symbols are in a file by themselves to provide nice linker
|
||||
* semantics. Since linkers generally pull in symbols by object
|
||||
* files, keeping these symbols as the only symbols in this file
|
||||
* prevents utility programs such as "ompi_info" from having to import
|
||||
* entire components just to query their version and parameters.
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "coll_hierarch.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
|
||||
/*
|
||||
* Public string showing the coll ompi_hierarch component version number
|
||||
*/
|
||||
const char *mca_coll_hierarch_component_version_string =
|
||||
"OMPI/MPI hierarch collective MCA component version " OMPI_VERSION;
|
||||
|
||||
/*
|
||||
* Global variable
|
||||
*/
|
||||
int mca_coll_hierarch_priority_param=0;
|
||||
int mca_coll_hierarch_verbose_param=0;
|
||||
int mca_coll_hierarch_use_rdma_param=0;
|
||||
int mca_coll_hierarch_ignore_sm_param=0;
|
||||
int mca_coll_hierarch_detection_alg_param=2;
|
||||
int mca_coll_hierarch_bcast_alg_param=COLL_HIERARCH_BASIC_BCAST_ALG;
|
||||
int mca_coll_hierarch_segsize_param=32768;
|
||||
|
||||
/*
|
||||
* Local function
|
||||
*/
|
||||
static int hierarch_register(void);
|
||||
|
||||
/*
|
||||
* Instantiate the public struct with all of our public information
|
||||
* and pointers to our public functions in it
|
||||
*/
|
||||
|
||||
const mca_coll_base_component_2_0_0_t mca_coll_hierarch_component = {
|
||||
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
about the component itself */
|
||||
|
||||
{
|
||||
MCA_COLL_BASE_VERSION_2_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
"hierarch",
|
||||
OMPI_MAJOR_VERSION,
|
||||
OMPI_MINOR_VERSION,
|
||||
OMPI_RELEASE_VERSION,
|
||||
|
||||
/* Component open and close functions */
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
hierarch_register
|
||||
},
|
||||
{
|
||||
/* The component is checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
|
||||
/* Initialization / querying functions */
|
||||
mca_coll_hierarch_init_query,
|
||||
mca_coll_hierarch_comm_query,
|
||||
};
|
||||
|
||||
|
||||
static int hierarch_register(void)
|
||||
{
|
||||
/* Use a high priority, but allow other components to be higher */
|
||||
mca_coll_hierarch_priority_param = 0;
|
||||
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
|
||||
"priority", "Priority of the hierarchical coll component",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_coll_hierarch_priority_param);
|
||||
|
||||
mca_coll_hierarch_verbose_param = 0;
|
||||
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
|
||||
"verbose",
|
||||
"Turn verbose message of the hierarchical coll component on/off",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_coll_hierarch_verbose_param);
|
||||
|
||||
mca_coll_hierarch_use_rdma_param = 0;
|
||||
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
|
||||
"use_rdma",
|
||||
"Switch from the send btl list used to detect hierarchies to "
|
||||
"the rdma btl list",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_coll_hierarch_use_rdma_param);
|
||||
|
||||
mca_coll_hierarch_ignore_sm_param = 0;
|
||||
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
|
||||
"ignore_sm",
|
||||
"Ignore sm protocol when detecting hierarchies. "
|
||||
"Required to enable the usage of protocol"
|
||||
" specific collective operations",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_coll_hierarch_ignore_sm_param);
|
||||
|
||||
mca_coll_hierarch_detection_alg_param = 2;
|
||||
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
|
||||
"detection_alg",
|
||||
"Used to specify the algorithm for detecting Hierarchy."
|
||||
"Choose between all or two levels of hierarchy",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_coll_hierarch_detection_alg_param);
|
||||
|
||||
|
||||
mca_coll_hierarch_bcast_alg_param = COLL_HIERARCH_BASIC_BCAST_ALG;
|
||||
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
|
||||
"bcast_alg",
|
||||
"Used to specify the algorithm used for bcast operations.",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_coll_hierarch_bcast_alg_param);
|
||||
|
||||
mca_coll_hierarch_segsize_param = 32768;
|
||||
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
|
||||
"segment_size",
|
||||
"Used to specify the segment size for segmented algorithms.",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_coll_hierarch_segsize_param);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static void
|
||||
mca_coll_hierarch_module_construct(mca_coll_hierarch_module_t *module)
|
||||
{
|
||||
module->hier_lcomm = MPI_COMM_NULL;
|
||||
module->hier_reqs = NULL;
|
||||
module->hier_colorarr = NULL;
|
||||
module->hier_llr = NULL;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void
|
||||
mca_coll_hierarch_module_destruct(mca_coll_hierarch_module_t *hierarch_module)
|
||||
{
|
||||
int i, size;
|
||||
struct mca_coll_hierarch_llead_t *current=NULL;
|
||||
|
||||
if ( MPI_COMM_NULL != hierarch_module->hier_lcomm ) {
|
||||
ompi_comm_free (&(hierarch_module->hier_lcomm) );
|
||||
}
|
||||
if ( NULL != hierarch_module->hier_reqs ) {
|
||||
free ( hierarch_module->hier_reqs );
|
||||
}
|
||||
|
||||
size = opal_pointer_array_get_size ( &(hierarch_module->hier_llead));
|
||||
for ( i=0; i<size; i++) {
|
||||
current = (struct mca_coll_hierarch_llead_t *)opal_pointer_array_get_item (
|
||||
&(hierarch_module->hier_llead), i ) ;
|
||||
|
||||
if ( NULL == current ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( NULL != current->lleaders ) {
|
||||
free ( current->lleaders );
|
||||
}
|
||||
if ( MPI_COMM_NULL != current->llcomm ){
|
||||
ompi_comm_free ( &(current->llcomm));
|
||||
}
|
||||
free ( current );
|
||||
}
|
||||
opal_pointer_array_remove_all ( &(hierarch_module->hier_llead));
|
||||
OBJ_DESTRUCT (&(hierarch_module->hier_llead));
|
||||
|
||||
if ( NULL != hierarch_module->hier_colorarr ) {
|
||||
free ( hierarch_module->hier_colorarr );
|
||||
}
|
||||
if ( NULL != hierarch_module->hier_llr ) {
|
||||
free ( hierarch_module->hier_llr);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_coll_hierarch_module_t,
|
||||
mca_coll_base_module_t,
|
||||
mca_coll_hierarch_module_construct,
|
||||
mca_coll_hierarch_module_destruct);
|
@ -1,109 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2014 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2007 University of Houston. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "coll_hierarch.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "mpi.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/datatype/ompi_datatype.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
|
||||
|
||||
/*
|
||||
* reduce_intra
|
||||
*
|
||||
* Function: - reduction using two level hierarchy algorithm
|
||||
* Accepts: - same as MPI_Reduce()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_hierarch_reduce_intra(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
struct ompi_communicator_t *llcomm=NULL;
|
||||
struct ompi_communicator_t *lcomm=NULL;
|
||||
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
|
||||
int rank;
|
||||
int lroot, llroot;
|
||||
ptrdiff_t extent, true_extent, lb, true_lb;
|
||||
char *tmpbuf=NULL, *tbuf=NULL;
|
||||
int ret=OMPI_SUCCESS;
|
||||
|
||||
rank = ompi_comm_rank ( comm );
|
||||
lcomm = hierarch_module->hier_lcomm;
|
||||
|
||||
if ( mca_coll_hierarch_verbose_param ) {
|
||||
printf("%s:%d: executing hierarchical reduce with cnt=%d and root=%d\n",
|
||||
comm->c_name, rank, count, root );
|
||||
}
|
||||
|
||||
llcomm = mca_coll_hierarch_get_llcomm ( root, hierarch_module, &llroot, &lroot);
|
||||
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
ompi_datatype_get_extent(dtype, &lb, &extent);
|
||||
ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent);
|
||||
|
||||
tbuf = (char*)malloc(true_extent + (count - 1) * extent);
|
||||
if (NULL == tbuf) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
tmpbuf = tbuf - true_lb;
|
||||
|
||||
|
||||
if ( MPI_IN_PLACE != sbuf ) {
|
||||
ret = lcomm->c_coll.coll_reduce (sbuf, tmpbuf, count, dtype,
|
||||
op, lroot, lcomm,
|
||||
lcomm->c_coll.coll_reduce_module);
|
||||
}
|
||||
else {
|
||||
ret = lcomm->c_coll.coll_reduce (rbuf, tmpbuf, count, dtype,
|
||||
op, lroot, lcomm,
|
||||
lcomm->c_coll.coll_reduce_module);
|
||||
}
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
if ( MPI_UNDEFINED != llroot ) {
|
||||
if ( MPI_COMM_NULL != lcomm ) {
|
||||
ret = llcomm->c_coll.coll_reduce (tmpbuf, rbuf, count, dtype,
|
||||
op, llroot, llcomm,
|
||||
llcomm->c_coll.coll_reduce_module);
|
||||
}
|
||||
else {
|
||||
ret = llcomm->c_coll.coll_reduce (sbuf, rbuf, count, dtype,
|
||||
op, llroot, llcomm,
|
||||
llcomm->c_coll.coll_reduce_module);
|
||||
}
|
||||
}
|
||||
|
||||
exit:
|
||||
if ( NULL != tmpbuf ) {
|
||||
free ( tmpbuf );
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
@ -1,209 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
* University Research and Technology
|
||||
* Corporation. All rights reserved.
|
||||
* Copyright (c) 2004-2006 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
* University of Stuttgart. All rights reserved.
|
||||
* Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2008 University of Houston. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "coll_hierarch.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "mpi.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/op/op.h"
|
||||
|
||||
#include "ompi/mca/coll/base/base.h"
|
||||
#include "ompi/mca/coll/base/coll_tags.h"
|
||||
|
||||
#include "ompi/mca/bml/base/base.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
|
||||
|
||||
int mca_coll_hierarch_allreduce_tmp(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = mca_coll_hierarch_reduce_tmp ( sbuf, rbuf, count, dtype, op, 0, comm);
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
ret = mca_coll_hierarch_bcast_tmp ( rbuf, count, dtype, 0, comm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int mca_coll_hierarch_allgather_tmp(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
int ret;
|
||||
int size = ompi_comm_size (comm);
|
||||
|
||||
ret = mca_coll_hierarch_gather_tmp ( sbuf, scount, sdtype, rbuf, rcount,
|
||||
rdtype, 0, comm);
|
||||
|
||||
if ( OMPI_SUCCESS != ret ) {
|
||||
return ret;
|
||||
}
|
||||
ret = mca_coll_hierarch_bcast_tmp ( rbuf, rcount*size, rdtype, 0, comm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mca_coll_hierarch_bcast_tmp ( void *buf, int count, struct ompi_datatype_t *dtype,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
{
|
||||
int err = OMPI_SUCCESS;
|
||||
int rank = ompi_comm_rank ( comm );
|
||||
|
||||
if ( rank != root ) {
|
||||
err = MCA_PML_CALL(recv(buf, count, dtype, root,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int i;
|
||||
int size=ompi_comm_size ( comm );
|
||||
|
||||
for ( i=0; i<size; i++ ) {
|
||||
if ( i == root ) {
|
||||
continue;
|
||||
}
|
||||
err = MCA_PML_CALL(send(buf, count, dtype, i,
|
||||
MCA_COLL_BASE_TAG_BCAST,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if ( OMPI_SUCCESS != err ) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int mca_coll_hierarch_reduce_tmp(void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i, err, size;
|
||||
char *pml_buffer = NULL;
|
||||
ptrdiff_t extent, lb;
|
||||
int rank = ompi_comm_rank(comm);
|
||||
|
||||
/* If not root, send data to the root. */
|
||||
if (rank != root) {
|
||||
err = MCA_PML_CALL(send(sbuf, count, dtype, root,
|
||||
MCA_COLL_BASE_TAG_REDUCE,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
return err;
|
||||
}
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
|
||||
ompi_datatype_get_extent(dtype, &lb, &extent);
|
||||
pml_buffer = (char*)malloc(count * extent);
|
||||
if (NULL == pml_buffer) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf, (char*)sbuf);
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
|
||||
/* Loop receiving and calling reduction function (C or Fortran). */
|
||||
for (i = size - 1; i >= 0; --i) {
|
||||
if (rank == i) {
|
||||
continue;
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(pml_buffer, count, dtype, i,
|
||||
MCA_COLL_BASE_TAG_REDUCE, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (MPI_SUCCESS != err) {
|
||||
goto exit;
|
||||
}
|
||||
}
|
||||
|
||||
/* Perform the reduction */
|
||||
ompi_op_reduce(op, pml_buffer, rbuf, count, dtype);
|
||||
}
|
||||
|
||||
exit:
|
||||
if (NULL != pml_buffer) {
|
||||
free(pml_buffer);
|
||||
}
|
||||
return MPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
int mca_coll_hierarch_gather_tmp(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm)
|
||||
{
|
||||
int i;
|
||||
int err;
|
||||
int rank;
|
||||
int size;
|
||||
char *ptmp;
|
||||
MPI_Aint incr;
|
||||
MPI_Aint extent;
|
||||
MPI_Aint lb;
|
||||
|
||||
size = ompi_comm_size(comm);
|
||||
rank = ompi_comm_rank(comm);
|
||||
|
||||
/* Everyone but root sends data and returns. */
|
||||
if (rank != root) {
|
||||
return MCA_PML_CALL(send(sbuf, scount, sdtype, root,
|
||||
MCA_COLL_BASE_TAG_GATHER,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
}
|
||||
|
||||
/* I am the root, loop receiving the data. */
|
||||
ompi_datatype_get_extent(rdtype, &lb, &extent);
|
||||
incr = extent * rcount;
|
||||
for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) {
|
||||
if (i == rank) {
|
||||
if (MPI_IN_PLACE != sbuf) {
|
||||
err = ompi_datatype_sndrcv(sbuf, scount, sdtype,
|
||||
ptmp, rcount, rdtype);
|
||||
} else {
|
||||
err = MPI_SUCCESS;
|
||||
}
|
||||
} else {
|
||||
err = MCA_PML_CALL(recv(ptmp, rcount, rdtype, i,
|
||||
MCA_COLL_BASE_TAG_GATHER,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
}
|
||||
if (MPI_SUCCESS != err) {
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
/* All done */
|
||||
return MPI_SUCCESS;
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2013 Sandia National Laboratories. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_ompi_coll_hierarch_POST_CONFIG(will_build)
|
||||
# ----------------------------------------
|
||||
# The hierarch coll requires a BML endpoint tag to compile, so require it.
|
||||
# Require in POST_CONFIG instead of CONFIG so that we only require it
|
||||
# if we're not disabled.
|
||||
AC_DEFUN([MCA_ompi_coll_hierarch_POST_CONFIG], [
|
||||
AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([BML])])
|
||||
])dnl
|
||||
|
||||
# MCA_ompi_coll_hierarch_CONFIG(action-if-can-compile,
|
||||
# [action-if-cant-compile])
|
||||
# ------------------------------------------------
|
||||
# We can always build, unless we were explicitly disabled.
|
||||
AC_DEFUN([MCA_ompi_coll_hierarch_CONFIG],[
|
||||
AC_CONFIG_FILES([ompi/mca/coll/hierarch/Makefile])
|
||||
[$1]
|
||||
])dnl
|
Загрузка…
x
Ссылка в новой задаче
Block a user