1
1

hierarch: with Edgar's blessing, remove the coll hierarch module

Этот коммит содержится в:
Jeff Squyres 2015-01-27 13:25:27 -06:00
родитель a741c44035
Коммит 2d5b92157f
10 изменённых файлов: 0 добавлений и 2669 удалений

Просмотреть файл

@ -1,51 +0,0 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2010 Cisco Systems, Inc. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_ompi_coll_hierarch_DSO
component_noinst =
component_install = mca_coll_hierarch.la
else
component_noinst = libmca_coll_hierarch.la
component_install =
endif
mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_coll_hierarch_la_SOURCES = $(sources)
mca_coll_hierarch_la_LDFLAGS = -module -avoid-version
noinst_LTLIBRARIES = $(component_noinst)
libmca_coll_hierarch_la_SOURCES = $(sources)
libmca_coll_hierarch_la_LDFLAGS = -module -avoid-version
# Source files
sources = \
coll_hierarch.h \
coll_hierarch.c \
coll_hierarch_allreduce.c \
coll_hierarch_barrier.c \
coll_hierarch_bcast.c \
coll_hierarch_component.c \
coll_hierarch_reduce.c \
coll_hierarch_tmpcoll.c

Просмотреть файл

@ -1,747 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2008 University of Houston. All rights reserved.
* Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
* Copyright (c) 2012 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_hierarch.h"
#include <stdio.h>
#include "opal/mca/hwloc/base/base.h"
#include "opal/mca/btl/btl.h"
#include "mpi.h"
#include "ompi/communicator/communicator.h"
#include "ompi/group/group.h"
#include "ompi/proc/proc.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/base.h"
#include "ompi/mca/coll/base/coll_tags.h"
#include "opal/class/opal_bitmap.h"
#include "ompi/mca/bml/bml.h"
#include "ompi/mca/bml/base/base.h"
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/pml/base/base.h"
/* Local functions and data */
#define HIER_MAXPROTOCOL 6
#define HIER_MAX_PROTNAMELEN 7
static int mca_coll_hierarch_max_protocol=HIER_MAXPROTOCOL;
/* Commments: need to add ofud, portals and sctp into this list! */
static char hier_prot[HIER_MAXPROTOCOL][HIER_MAX_PROTNAMELEN]={"0","tcp","udapl","mx","openib","sm"};
static void mca_coll_hierarch_checkfor_component (struct ompi_communicator_t *comm,
int component_level,
char *component_name,
int *key, int *ncount);
static void mca_coll_hierarch_checkfor_sm (struct ompi_communicator_t *comm,
int *color,
int *ncount);
static void mca_coll_hierarch_dump_struct ( mca_coll_hierarch_module_t *c);
/*
* Initial query function that is invoked during MPI_INIT, allowing
* this module to indicate what level of thread support it provides.
*/
int mca_coll_hierarch_init_query(bool allow_hierarch_user_threads,
bool have_hidden_user_threads)
{
/* Don't ask. All done */
return OMPI_SUCCESS;
}
/*
* Invoked when there's a new communicator that has been created.
* Look at the communicator and decide which set of functions and
* priority we want to return.
*/
mca_coll_base_module_t *
mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority )
{
int size, rank;
int color, ncount=0, maxncount;
int level;
int ret=OMPI_SUCCESS;
int ignore_sm=0;
int detection_alg=0;
mca_coll_hierarch_module_t *hierarch_module;
/* This module only works for intra-communicators at the moment */
if (OMPI_COMM_IS_INTER(comm)) {
return NULL;
}
/* Get the priority level attached to this module. If priority is less
* than or equal to 0, then the module is unavailable. */
*priority = mca_coll_hierarch_priority_param;
if (0 >= mca_coll_hierarch_priority_param) {
return NULL;
}
/* This module only works when the BTLs are alive. If they aren't, time to exit. */
if (!mca_bml_base_inited()) return NULL;
size = ompi_comm_size(comm);
if (size < 3) {
/* No need for hierarchical collectives for 1 or 2 procs. */
return NULL;
}
hierarch_module = OBJ_NEW(mca_coll_hierarch_module_t);
if (NULL == hierarch_module) {
return NULL;
}
hierarch_module->super.coll_module_enable = mca_coll_hierarch_module_enable;
hierarch_module->super.ft_event = mca_coll_hierarch_ft_event;
hierarch_module->super.coll_allgather = NULL;
hierarch_module->super.coll_allgatherv = NULL;
hierarch_module->super.coll_allreduce = mca_coll_hierarch_allreduce_intra;
hierarch_module->super.coll_alltoall = NULL;
hierarch_module->super.coll_alltoallv = NULL;
hierarch_module->super.coll_alltoallw = NULL;
hierarch_module->super.coll_barrier = mca_coll_hierarch_barrier_intra;
hierarch_module->super.coll_bcast = mca_coll_hierarch_bcast_intra;
hierarch_module->super.coll_exscan = NULL;
hierarch_module->super.coll_gather = NULL;
hierarch_module->super.coll_gatherv = NULL;
hierarch_module->super.coll_reduce = mca_coll_hierarch_reduce_intra;
hierarch_module->super.coll_reduce_scatter = NULL;
hierarch_module->super.coll_scan = NULL;
hierarch_module->super.coll_scatter = NULL;
hierarch_module->super.coll_scatterv = NULL;
/* Check whether we should ignore sm. This might be necessary to take advantage
of the some ib or gm collectives. */
ignore_sm = mca_coll_hierarch_ignore_sm_param;
rank = ompi_comm_rank(comm);
hierarch_module->hier_num_colorarr = size;
hierarch_module->hier_colorarr = (int *) malloc ( sizeof(int) * size);
if ( NULL == hierarch_module->hier_colorarr ) {
*priority = 0;
return NULL;
}
/*
* walk through the list of registered protocols, and check which one
* is feasible.
* Later we start with level=0, and introduce the multi-cell check
*/
if ( ignore_sm ) {
mca_coll_hierarch_max_protocol = HIER_MAXPROTOCOL - 1;
}
/* if number of levels is not specified, or if it is specified as ALL_LEVELS,
* proceed in the usual way
*/
detection_alg = mca_coll_hierarch_detection_alg_param;
if( TWO_LEVELS == detection_alg ) {
mca_coll_hierarch_max_protocol = 2;
if ( mca_coll_hierarch_verbose_param ) {
printf("Using two level hierarchy detection\n");
}
}
for ( level = mca_coll_hierarch_max_protocol - 1; level >0 ; level--) {
if ( ALL_LEVELS == detection_alg ) {
mca_coll_hierarch_checkfor_component ( comm,
level,
hier_prot[level],
&color,
&ncount);
}
else if (TWO_LEVELS == detection_alg ) {
mca_coll_hierarch_checkfor_sm ( comm, &color, &ncount );
}
/* This is probably a no-no! but for the moment we agreed with Jeff,
** that this might be the best solution. These functions emulate an
** allreduce and an allgather.
*/
ret = mca_coll_hierarch_allreduce_tmp (&ncount, &maxncount, 1, MPI_INT,
MPI_MAX, comm );
if ( OMPI_SUCCESS != ret ) {
return NULL;
}
if ( 0 == maxncount ) {
if ( mca_coll_hierarch_verbose_param ) {
printf("%s:%d: nobody talks with %s. Continuing to next level.\n",
comm->c_name, rank, hier_prot[level]);
}
continue;
}
else if ( maxncount == (size-1) ) {
/*
* everybody can talk to every other process with this protocol,
* no need to continue in the hierarchy tree and for the
* hierarchical component.
* Its (size-1) because we do not count ourselves.
* maxncount[1] should be zero.
*/
if ( mca_coll_hierarch_verbose_param ) {
if ( ALL_LEVELS == detection_alg ) {
printf("%s:%d: everybody talks with %s. No need to continue\n",
comm->c_name, rank, hier_prot[level]);
}
else if ( TWO_LEVELS == detection_alg ) {
printf("%s:%d: everybody talks with sm. No need to continue\n",
comm->c_name, rank );
}
}
goto exit;
}
else {
if ( mca_coll_hierarch_verbose_param ) {
printf("%s:%d: %d procs talk with %s. Use this protocol, key %d\n",
comm->c_name, rank, maxncount, hier_prot[level], color);
}
ret = mca_coll_hierarch_allgather_tmp (&color, 1, MPI_INT,
hierarch_module->hier_colorarr, 1,
MPI_INT, comm );
if ( OMPI_SUCCESS != ret ) {
return NULL;
}
hierarch_module->hier_level = level;
return &(hierarch_module->super);
}
}
exit:
*priority = 0;
return NULL;
}
/*
* Init module on the communicator
*/
int mca_coll_hierarch_module_enable (mca_coll_base_module_t *module,
struct ompi_communicator_t *comm)
{
int color;
int size, rank, ret=OMPI_SUCCESS;
struct ompi_communicator_t *lcomm=NULL;
struct ompi_communicator_t *llcomm=NULL;
struct mca_coll_hierarch_llead_t *llead=NULL;
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
rank = ompi_comm_rank(comm);
size = ompi_comm_size(comm);
color = hierarch_module->hier_colorarr[rank];
/* Generate the subcommunicator based on the color returned by
the previous function. */
ret = ompi_comm_split ( comm, color, rank, &lcomm, 0 );
if ( OMPI_SUCCESS != ret ) {
goto exit;
}
if ( OMPI_COMM_CID_IS_LOWER ( lcomm, comm ) ) {
/* Mark the communicator as 'extra retain' and increase the
reference count by one more. See ompi_comm_activate
for detailed comments
*/
OMPI_COMM_SET_EXTRA_RETAIN (lcomm);
OBJ_RETAIN(lcomm);
}
hierarch_module->hier_comm = comm;
hierarch_module->hier_lcomm = lcomm;
hierarch_module->hier_num_reqs = 2 * size;
hierarch_module->hier_reqs = (ompi_request_t **) malloc (sizeof(ompi_request_t)*size*2);
if ( NULL == hierarch_module->hier_reqs ) {
goto exit;
}
/* allocate a certain number of the hierarch_llead structures, which store
information about local leader and the according subcommunicators
*/
llead = (struct mca_coll_hierarch_llead_t * ) malloc (
sizeof(struct mca_coll_hierarch_llead_t));
if ( NULL == llead ) {
goto exit;
}
/* These two routines set all relevant entries in the mca_coll_base_comm_t
* structure. The first one makes all entries which are independent of the
* offset (and have to be done only once per module. The second one is
* depending on the offset, and has to be called therefore every time we need
* a new llcomm
*/
mca_coll_hierarch_get_llr ( hierarch_module );
mca_coll_hierarch_get_all_lleaders ( rank, hierarch_module, llead, 1 );
/* Generate the lleader communicator assuming that all lleaders are the first
process in the list of processes with the same color. A function generating
other lleader-comms will follow soon. */
color = MPI_UNDEFINED;
if ( llead->am_lleader ) {
color = 1;
}
ret = ompi_comm_split ( comm, color, rank, &llcomm, 0);
if ( OMPI_SUCCESS != ret ) {
goto exit;
}
if ( OMPI_COMM_CID_IS_LOWER ( llcomm, comm ) ) {
/* Mark the communicator as 'extra retain' and increase the
reference count by one more. See ompi_comm_activate
for detailed explanation.
*/
OMPI_COMM_SET_EXTRA_RETAIN (llcomm);
OBJ_RETAIN(llcomm);
}
llead->llcomm = llcomm;
/* Store it now on the data structure */
OBJ_CONSTRUCT(&(hierarch_module->hier_llead), opal_pointer_array_t);
opal_pointer_array_add ( &(hierarch_module->hier_llead), llead);
if ( mca_coll_hierarch_verbose_param ) {
mca_coll_hierarch_dump_struct (hierarch_module);
}
exit:
if ( OMPI_SUCCESS != ret ) {
if (NULL != llead) {
free(llead);
}
ompi_comm_free ( &lcomm );
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
int mca_coll_hierarch_get_all_lleaders ( int rank, mca_coll_hierarch_module_t *hierarch_module,
struct mca_coll_hierarch_llead_t * llead,
int offset )
{
int i, j, ret=OMPI_SUCCESS;
int *cntarr=NULL;
int mycolor;
cntarr = (int *)calloc (1, sizeof (int)* hierarch_module->hier_num_lleaders );
if ( NULL == cntarr ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
llead->lleaders = (int *) malloc (sizeof(int) * hierarch_module->hier_num_lleaders);
if ( NULL == llead->lleaders ) {
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
llead->offset = offset;
for ( i=0; i < hierarch_module->hier_num_lleaders; i++ ) {
if ( MPI_UNDEFINED == hierarch_module->hier_llr[i] ) {
cntarr[i] = 1;
llead->lleaders[i] = MPI_UNDEFINED;
}
}
for ( i=0; i<hierarch_module->hier_num_colorarr; i++) {
if ( MPI_UNDEFINED == hierarch_module->hier_colorarr[i] ) {
continue;
}
for ( j=0; j<hierarch_module->hier_num_lleaders; j++) {
if ( cntarr[j] >= offset ) {
continue;
}
if ( hierarch_module->hier_colorarr[i] == hierarch_module->hier_llr[j]) {
cntarr[j]++;
llead->lleaders[j] = i;
break;
}
}
}
mycolor = hierarch_module->hier_colorarr[rank];
if ( MPI_UNDEFINED == mycolor ) {
llead->am_lleader = 1;
llead->my_lleader = MPI_UNDEFINED;
}
else {
llead->am_lleader = 0;
for ( i=0; i< hierarch_module->hier_num_lleaders; i++ ) {
if ( hierarch_module->hier_llr[i] == mycolor ) {
llead->my_lleader = cntarr[i]-1;
if ( llead->lleaders[i] == rank ) {
llead->am_lleader = 1;
}
break;
}
}
}
exit:
if ( NULL != cntarr ) {
free ( cntarr );
}
return ret;
}
int mca_coll_hierarch_get_llr ( mca_coll_hierarch_module_t *hierarch_module )
{
int i, j, cnt, found;
int ncount;
ncount = mca_coll_hierarch_count_lleaders ( hierarch_module->hier_num_colorarr,
hierarch_module->hier_colorarr);
hierarch_module->hier_num_lleaders = ncount;
hierarch_module->hier_llr = (int *) malloc ( (size_t)hierarch_module->hier_num_lleaders * sizeof(int));
hierarch_module->hier_max_offset = (int *) calloc ( 1, (size_t)hierarch_module->hier_num_lleaders * sizeof(int));
if ( ( NULL == hierarch_module->hier_llr) || ( NULL == hierarch_module->hier_max_offset )) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
hierarch_module->hier_llr[0] = hierarch_module->hier_colorarr[0];
hierarch_module->hier_max_offset[0]=1;
for ( cnt=1, i=1; i<hierarch_module->hier_num_colorarr; i++ ) {
if ( MPI_UNDEFINED == hierarch_module->hier_colorarr[i] ) {
hierarch_module->hier_llr[cnt] = hierarch_module->hier_colorarr[i];
hierarch_module->hier_max_offset[cnt] = 1;
cnt++;
continue;
}
for ( found=0, j=0; j<cnt; j++ ) {
if ( hierarch_module->hier_llr[j] == hierarch_module->hier_colorarr[i]) {
hierarch_module->hier_max_offset[j]++;
found = 1;
break;
}
}
if ( !found ) {
hierarch_module->hier_llr[cnt] = hierarch_module->hier_colorarr[i];
hierarch_module->hier_max_offset[cnt]++;
cnt++;
}
}
return OMPI_SUCCESS;
}
struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int root,
mca_coll_hierarch_module_t *hierarch_module,
int* llroot,
int* lroot)
{
struct ompi_communicator_t *llcomm=NULL;
struct ompi_group_t *llgroup=NULL;
struct ompi_group_t *group=NULL;
struct mca_coll_hierarch_llead_t *llead=NULL;
int found, i, rc, num_llead, offset;
int rank = ompi_comm_rank (hierarch_module->hier_comm);
int color;
/* determine what our offset of root is in the colorarr */
offset = mca_coll_hierarch_get_offset ( root,
hierarch_module->hier_num_colorarr,
hierarch_module->hier_colorarr );
num_llead = opal_pointer_array_get_size ( &(hierarch_module->hier_llead) );
for ( found=0, i=0; i < num_llead; i++ ) {
llead = (struct mca_coll_hierarch_llead_t *) opal_pointer_array_get_item (
&(hierarch_module->hier_llead), i );
if ( NULL == llead ) {
continue;
}
if (llead->offset == offset ) {
found = 1;
break;
}
#if 0
else if () {
/* the offset of root = maxoffset of this color and
* the offset on llead is larger then offset of root.
* then we can also use this llead structure
*/
}
#endif
}
if ( !found ) {
/* allocate a new llead element */
llead = (struct mca_coll_hierarch_llead_t *) malloc (
sizeof(struct mca_coll_hierarch_llead_t));
if ( NULL == llead ) {
return NULL;
}
/* generate the list of lleaders with this offset */
mca_coll_hierarch_get_all_lleaders ( rank, hierarch_module, llead, offset );
color = MPI_UNDEFINED;
if ( llead->am_lleader ) {
color = 1;
}
/* create new lleader subcommunicator */
rc = ompi_comm_split ( hierarch_module->hier_comm, color, root, &llcomm, 0);
if ( OMPI_SUCCESS != rc ) {
return NULL;
}
if ( OMPI_COMM_CID_IS_LOWER ( llcomm, hierarch_module->hier_comm ) ) {
/* Mark the communicator as 'extra retain' and increase the
reference count by one more. See ompi_comm_activate
for detailed explanation. */
OMPI_COMM_SET_EXTRA_RETAIN (llcomm);
OBJ_RETAIN(llcomm);
}
llead->llcomm = llcomm;
/* Store the new element on the hierarch_module struct */
opal_pointer_array_add ( &(hierarch_module->hier_llead), llead);
}
llcomm = llead->llcomm;
*lroot = llead->my_lleader;
*llroot = MPI_UNDEFINED;
if ( MPI_COMM_NULL != llcomm ) {
group = hierarch_module->hier_comm->c_local_group;
llgroup = llcomm->c_local_group;
rc = ompi_group_translate_ranks ( group, 1, &root, llgroup, llroot);
if ( OMPI_SUCCESS != rc ) {
return NULL;
}
}
return llcomm;
}
/**********************************************************************/
/**********************************************************************/
/**********************************************************************/
static void
mca_coll_hierarch_checkfor_sm ( struct ompi_communicator_t *comm, int *color, int *ncount )
{
int i, size;
int lncount=0;
struct ompi_proc_t** procs=NULL;
struct ompi_proc_t* my_proc=NULL;
*color = -1;
size = ompi_comm_size(comm);
my_proc = ompi_proc_local();
procs = comm->c_local_group->grp_proc_pointers;
for ( i = 0 ; i < size ; i++) {
if ( OMPI_CAST_RTE_NAME(&procs[i]->super.proc_name)->jobid == OMPI_CAST_RTE_NAME(&my_proc->super.proc_name)->jobid &&
( OPAL_PROC_ON_LOCAL_NODE(procs[i]->super.proc_flags)) ) {
lncount++;
if ( *color == -1){
*color = i;
}
}
}
/* we need to decrease ncount in order to make the other allreduce/allgather
operations work */
lncount--;
*ncount = lncount;
return;
}
/* This function checks how many processes are using the component
'component_name' for communication and returns this count in
'ncount'. Furthermore it returns a 'key', which can be used to split
the communicator into subgroups, such that the new communicators
will definitly have all processes communicate with this component.
Oct 13: the algorithm has been modified such that it returns the
number of processes using the specified component and the number
of processes to which an even 'faster' protocol is being used. (Faster
specified in this context as being further up in the list of
hier_prot protocols specified at the beginning of this file).
*/
static void
mca_coll_hierarch_checkfor_component ( struct ompi_communicator_t *comm,
int component_level,
char *component_name,
int *key,
int *ncount )
{
opal_bitmap_t reachable;
ompi_proc_t **procs=NULL;
struct mca_bml_base_btl_array_t *bml_btl_array=NULL;
mca_bml_base_btl_t *bml_btl=NULL;
mca_btl_base_component_t *btl=NULL;
mca_bml_base_endpoint_t *endpoint;
int i, size, rc;
int counter=0;
int firstproc=999999;
int rank = -1;
int use_rdma=0;
/* default values in case an error occurs */
*ncount=0;
*key=MPI_UNDEFINED;
/* Shall we check the the rdma list instead of send-list in the endpoint-structure? */
use_rdma = mca_coll_hierarch_use_rdma_param;
size = ompi_comm_size ( comm );
rank = ompi_comm_rank ( comm );
OBJ_CONSTRUCT(&reachable, opal_bitmap_t);
rc = opal_bitmap_init(&reachable, size);
if(OMPI_SUCCESS != rc) {
return;
}
procs = comm->c_local_group->grp_proc_pointers;
rc = mca_bml.bml_add_procs ( size, procs, &reachable );
if(OMPI_SUCCESS != rc) {
return;
}
for ( i=0; i<size; i++ ) {
if ( rank == i ) {
/* skip myself */
continue;
}
endpoint = (mca_bml_base_endpoint_t*) procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
if ( use_rdma ) {
bml_btl_array = &(endpoint->btl_rdma);
}
else {
bml_btl_array = &(endpoint->btl_send);
}
bml_btl = mca_bml_base_btl_array_get_index ( bml_btl_array, 0 );
btl = bml_btl->btl->btl_component;
/* sanity check */
if ( strcmp(btl->btl_version.mca_type_name,"btl") ) {
printf("Oops, got the wrong component! type_name = %s\n",
btl->btl_version.mca_type_name );
}
/* check for the required component */
if (! strcmp (btl->btl_version.mca_component_name, component_name)){
counter++;
if (i<firstproc ) {
firstproc = i;
}
continue;
}
}
*ncount = counter;
/* final decision */
if ( counter == 0 ) {
/* this is the section indicating, that we are not
using this component */
firstproc = MPI_UNDEFINED;
}
else {
if ( rank < firstproc ) {
firstproc = rank;
}
}
*key = firstproc;
return;
}
/********************************************************************************/
/********************************************************************************/
/********************************************************************************/
static void mca_coll_hierarch_dump_struct ( mca_coll_hierarch_module_t *c)
{
int i, j;
int rank;
struct mca_coll_hierarch_llead_t *current=NULL;
rank = ompi_comm_rank ( c->hier_comm );
printf("%d: Dump of hier-struct for comm %s cid %u\n",
rank, c->hier_comm->c_name, c->hier_comm->c_contextid);
printf("%d: No of llead communicators: %d No of lleaders: %d\n",
rank, opal_pointer_array_get_size ( &(c->hier_llead)),
c->hier_num_lleaders );
for ( i=0; i < opal_pointer_array_get_size(&(c->hier_llead)); i++ ) {
current = (mca_coll_hierarch_llead_t*)opal_pointer_array_get_item (&(c->hier_llead), i);
if ( current == NULL ) {
continue;
}
printf("%d: my_leader %d am_leader %d\n", rank,
current->my_lleader, current->am_lleader );
for (j=0; j<c->hier_num_lleaders; j++ ) {
printf("%d: lleader[%d] = %d\n", rank, j, current->lleaders[j]);
}
}
return;
}
int mca_coll_hierarch_ft_event(int state) {
if(OPAL_CRS_CHECKPOINT == state) {
;
}
else if(OPAL_CRS_CONTINUE == state) {
;
}
else if(OPAL_CRS_RESTART == state) {
;
}
else if(OPAL_CRS_TERM == state ) {
;
}
else {
;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,351 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2008 University of Houston. All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_COLL_HIERARCH_EXPORT_H
#define MCA_COLL_HIERARCH_EXPORT_H
#define ALL_LEVELS 0
#define TWO_LEVELS 2
#include "ompi_config.h"
#include "ompi/constants.h"
#include "mpi.h"
#include "opal/class/opal_pointer_array.h"
#include "opal/mca/mca.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/request/request.h"
#include "ompi/communicator/communicator.h"
BEGIN_C_DECLS
/*
* Globally exported variable
*/
OMPI_MODULE_DECLSPEC extern const mca_coll_base_component_2_0_0_t mca_coll_hierarch_component;
extern int mca_coll_hierarch_priority_param;
extern int mca_coll_hierarch_verbose_param;
extern int mca_coll_hierarch_use_rdma_param;
extern int mca_coll_hierarch_ignore_sm_param;
extern int mca_coll_hierarch_detection_alg_param;
extern int mca_coll_hierarch_bcast_alg_param;
extern int mca_coll_hierarch_segsize_param;
#define COLL_HIERARCH_SEG_BCAST_ALG 0
#define COLL_HIERARCH_SEG1_BCAST_ALG 1
#define COLL_HIERARCH_SEG2_BCAST_ALG 2
#define COLL_HIERARCH_SEG3_BCAST_ALG 3
#define COLL_HIERARCH_BASIC_BCAST_ALG 4
#define HIER_DEFAULT_NUM_LLEAD 5
/*
* Data structure for attaching data to the communicator
*/
/* Clarifying some terminology:
* comm: the input communicator, consisting of several lower level communicators.
* lcomm: low level communicator, often refered to as subcommunicator
* lleader: local leader, a dedicated process of each low level communicator
ATTENTION: an lleader might be the 'head' of a low level
communicator of size one!
* llcomm: local leader communicator, grouping all local leaders of a comm.
*/
struct mca_coll_hierarch_module_t {
mca_coll_base_module_t super;
struct ompi_communicator_t *hier_comm; /* link back to the attached comm */
struct ompi_communicator_t *hier_lcomm; /* low level communicator */
opal_pointer_array_t hier_llead; /* local leader communicator structure */
int hier_num_lleaders; /* number of local leaders */
int hier_level; /* level in the hierarchy. For debugging*/
int hier_num_reqs; /* num. of requests */
ompi_request_t **hier_reqs; /* list of requests */
int hier_num_colorarr; /* size of the colorarr array */
int *hier_llr; /* color array compacted (1 entry per color).
Array of size hier_num_lleaders */
int *hier_max_offset; /* Number of processes for each color.
Array of size hier_num_lleaders */
int *hier_colorarr; /* array containing the color of all procs */
};
typedef struct mca_coll_hierarch_module_t mca_coll_hierarch_module_t;
OBJ_CLASS_DECLARATION(mca_coll_hierarch_module_t);
struct mca_coll_hierarch_llead_t {
struct ompi_communicator_t *llcomm; /* local leader communicator */
int *lleaders; /* list of local leaders, ranks in comm */
int my_lleader; /* rank of my lleader in lcomm */
int am_lleader; /* am I an lleader? */
int offset; /* Offset used for this llcomm */
};
typedef struct mca_coll_hierarch_llead_t mca_coll_hierarch_llead_t;
static inline int mca_coll_hierarch_count_lleaders ( int size, int *carr)
{
/*
* Determine the number of local leaders. Please note, that any process
* with color = MPI_UNDEFINED will be counted as the head of a group of its own.
* Please note furthermore, that every process with color=MPI_UNDEFINED will be
* stored in this array on its own...
*/
int cnt, i, j, found;
int *llr=NULL;
llr = (int *) malloc ( size * sizeof(int));
if (NULL == llr ){
return OMPI_ERR_OUT_OF_RESOURCE;
}
llr[0] = carr[0];
for (cnt=1, i=1; i<size; i++ ) {
if ( carr[i] == MPI_UNDEFINED ) {
llr[cnt++] = carr[i];
continue;
}
for ( found=0, j=0; j<cnt; j++ ) {
if ( carr[i] == llr[j] ) {
found = 1;
break;
}
}
if ( !found ) {
llr[cnt++] = carr[i];
}
}
free (llr);
return cnt;
}
static inline int mca_coll_hierarch_get_offset ( int rank, int size, int *carr)
{
int offset, i, color = carr[rank];
if ( color == MPI_UNDEFINED ) {
/* always */
return 1;
}
for ( offset=0, i=0; i<=rank; i++) {
if ( carr[i] == color ) {
offset++;
}
}
return offset;
}
/* This function determine the parameters required in hierarchical
* collective operations. It is called from the collective operations themselves.
*
* @param root (input): rank of the root process in comm
* @param hierarch_module (input): module structure. Contains
* all relevant, precomputed data for this set of collectives.
*
* @param llroot (output): rank of the root process in llcomm, MPI_UNDEFINED for all
* processes not being part of the local leader communicator.
* @param lroot (output): rank of the local leader in the low level communicator,
* or MPI_UNDEFINED if there is no low level communicator.
* return value: llcomm (local leader communicator) or MPI_COMM_NULL for
* all processes not being part of the local leader communicator.
*/
struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int rroot,
mca_coll_hierarch_module_t *hierarch_module,
int* llroot,
int* lleader);
/* This function is supposed to set up all elements of the mca_coll_base_comm_t
* structure, including:
* hierarch_module->hier_num_lleaders: determine number of local leaders in the comms
* hierarch_module->hier_llr: array of size hier_num_lleaders containing the colors
* hierarch_module->hier_max_offset: array containing the counter for each color how often
* it appears in the colorarr array.
*/
int mca_coll_hierarch_get_llr ( mca_coll_hierarch_module_t *hierarch_module );
/* This function is supposed to set all elements of the llead structure based on the
* offset and the rank of the process.
*
* @param rank(input): rank of the calling process in comm
* @param hierarch_module(input): structure of the hierarchical module. Contains
* all relevant, precomputed data for this set of collectives.
* @param llead(output): ptr to the mca_coll_hierarch_llead_t element which should
* be set
* @param offset(input): offset which shall be used.
*/
int mca_coll_hierarch_get_all_lleaders ( int rank, mca_coll_hierarch_module_t *hierarch_module,
struct mca_coll_hierarch_llead_t *llead,
int offset );
/*
* coll API functions
*/
int mca_coll_hierarch_init_query(bool allow_hierarch_user_threads,
bool have_hidden_threads);
mca_coll_base_module_t *
mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority );
int mca_coll_hierarch_module_enable( mca_coll_base_module_t *module,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_module_finalize(struct ompi_communicator_t *comm);
int mca_coll_hierarch_allgather_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module );
int mca_coll_hierarch_allgatherv_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void * rbuf, int *rcounts,
int *disps,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_hierarch_allreduce_intra(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_hierarch_alltoall_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_hierarch_alltoallv_intra(void *sbuf, int *scounts,
int *sdisps,
struct ompi_datatype_t *sdtype,
void *rbuf, int *rcounts,
int *rdisps,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_hierarch_alltoallw_intra(void *sbuf, int *scounts,
int *sdisps,
struct ompi_datatype_t **sdtypes,
void *rbuf, int *rcounts,
int *rdisps,
struct ompi_datatype_t **rdtypes,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_hierarch_barrier_intra(struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_hierarch_bcast_intra(void *buff, int count,
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_hierarch_exscan_intra(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_gather_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_hierarch_gatherv_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int *rcounts, int *disps,
struct ompi_datatype_t *rdtype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_hierarch_reduce_intra(void *sbuf, void* rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_hierarch_reduce_scatter_intra(void *sbuf, void *rbuf,
int *rcounts,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_hierarch_scan_intra(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_hierarch_scatter_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype, void *rbuf,
int rcount, struct ompi_datatype_t *rdtype,
int root, struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
int mca_coll_hierarch_scatterv_intra(void *sbuf, int *scounts, int *disps,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype, int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
/*
* These are trivial implementations of these routines used during comm_query/init,
* since we cannot access any other collectives
*/
int mca_coll_hierarch_allgather_tmp(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_allreduce_tmp(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_bcast_tmp ( void *buf, int count, struct ompi_datatype_t *dtype,
int root, struct ompi_communicator_t *comm);
int mca_coll_hierarch_gather_tmp(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
int root, struct ompi_communicator_t *comm);
int mca_coll_hierarch_reduce_tmp(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
int root, struct ompi_communicator_t *comm);
int mca_coll_hierarch_ft_event(int status);
END_C_DECLS
#endif /* MCA_COLL_HIERARCH_EXPORT_H */

Просмотреть файл

@ -1,115 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 University of Houston. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_hierarch.h"
#include <stdio.h>
#include "mpi.h"
#include "ompi/constants.h"
#include "ompi/communicator/communicator.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/mca/coll/coll.h"
/*
* reduce_intra
*
* Function: - reduction using two level hierarchy algorithm
* Accepts: - same as MPI_Reduce()
* Returns: - MPI_SUCCESS or error code
*/
int mca_coll_hierarch_allreduce_intra(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
struct ompi_communicator_t *llcomm=NULL;
struct ompi_communicator_t *lcomm=NULL;
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
int rank;
int lroot, llroot;
ptrdiff_t extent, true_extent, lb, true_lb;
char *tmpbuf=NULL, *tbuf=NULL;
int ret=OMPI_SUCCESS;
int root=0;
rank = ompi_comm_rank ( comm );
lcomm = hierarch_module->hier_lcomm;
if ( mca_coll_hierarch_verbose_param ) {
printf("%s:%d: executing hierarchical allreduce with cnt=%d \n",
comm->c_name, rank, count );
}
llcomm = mca_coll_hierarch_get_llcomm ( root, hierarch_module, &llroot, &lroot);
if ( MPI_COMM_NULL != lcomm ) {
ompi_datatype_get_extent(dtype, &lb, &extent);
ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent);
tbuf = (char*)malloc(true_extent + (count - 1) * extent);
if (NULL == tbuf) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
tmpbuf = tbuf - true_lb;
if ( MPI_IN_PLACE != sbuf ) {
ret = lcomm->c_coll.coll_reduce (sbuf, tmpbuf, count, dtype,
op, lroot, lcomm,
lcomm->c_coll.coll_reduce_module);
}
else {
ret = lcomm->c_coll.coll_reduce (rbuf, tmpbuf, count, dtype,
op, lroot, lcomm,
lcomm->c_coll.coll_reduce_module);
}
if ( OMPI_SUCCESS != ret ) {
goto exit;
}
}
if ( MPI_UNDEFINED != llroot ) {
if ( MPI_COMM_NULL != lcomm ) {
ret = llcomm->c_coll.coll_allreduce (tmpbuf, rbuf, count, dtype,
op, llcomm,
llcomm->c_coll.coll_allreduce_module);
}
else {
ret = llcomm->c_coll.coll_allreduce (sbuf, rbuf, count, dtype,
op, llcomm,
llcomm->c_coll.coll_allreduce_module);
}
}
if ( MPI_COMM_NULL != lcomm ) {
ret = lcomm->c_coll.coll_bcast(rbuf, count, dtype, lroot, lcomm,
lcomm->c_coll.coll_bcast_module );
}
exit:
if ( NULL != tmpbuf ) {
free ( tmpbuf );
}
return ret;
}

Просмотреть файл

@ -1,81 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 University of Houston. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_hierarch.h"
#include "mpi.h"
#include "ompi/constants.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/coll.h"
/*
* barrier_intra
*
* Function: - barrier using hierarchical algorithm
* Accepts: - same as MPI_Barrier()
* Returns: - MPI_SUCCESS or error code
*/
int mca_coll_hierarch_barrier_intra(struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
struct ompi_communicator_t *llcomm=NULL;
struct ompi_communicator_t *lcomm=NULL;
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
int root=0;
int lroot, llroot;
int rank, ret=OMPI_SUCCESS;
rank = ompi_comm_rank ( comm );
lcomm = hierarch_module->hier_lcomm;
if ( mca_coll_hierarch_verbose_param ) {
printf("%s:%d: executing hierarchical barrier\n", comm->c_name, rank );
}
llcomm = mca_coll_hierarch_get_llcomm ( root, hierarch_module, &llroot, &lroot);
/*
* Barrier consists of three steps:
* - barrier on the low-level communicators
* - barrier among the local leaders
* - barrier on the low-level communicators. This step is
* necessary to avoid that any non local leaders exit too early.
*/
if ( MPI_COMM_NULL != lcomm ) {
ret = lcomm->c_coll.coll_barrier ( lcomm, lcomm->c_coll.coll_barrier_module );
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
if ( MPI_UNDEFINED != llroot ) {
ret = llcomm->c_coll.coll_barrier ( llcomm, llcomm->c_coll.coll_barrier_module );
}
if ( MPI_COMM_NULL != lcomm ) {
ret = lcomm->c_coll.coll_barrier ( lcomm, lcomm->c_coll.coll_barrier_module );
}
return ret;
}

Просмотреть файл

@ -1,755 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2005 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2008 University of Houston. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_hierarch.h"
#include "mpi.h"
#include "ompi/constants.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/coll.h"
#include "ompi/mca/coll/base/coll_tags.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/mca/pml/pml.h"
/*
* bcast_intra
*
* Function: - broadcast using hierarchical algorithm
* Accepts: - same arguments as MPI_Bcast()
* Returns: - MPI_SUCCESS or error code
*/
static int mca_coll_hierarch_bcast_intra_seg (void *buff,
int count,
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module,
int segsize );
static int mca_coll_hierarch_bcast_intra_seg1 (void *buff,
int count,
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module,
int segsize );
static int mca_coll_hierarch_bcast_intra_seg2 (void *buff,
int count,
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module,
int segsize );
static int mca_coll_hierarch_bcast_intra_seg3 (void *buff,
int count,
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module,
int segsize );
int mca_coll_hierarch_bcast_intra(void *buff,
int count,
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
int bcast_alg = mca_coll_hierarch_bcast_alg_param;
int segsize = mca_coll_hierarch_segsize_param;
int ret=OMPI_SUCCESS;
/* Here is a brief description on what we try to evaluate:
- bcast_intra_seg used the bcast of lcomm and llcomm, similarly
to original algorithm in hierarch. However, it can segment
the message, such that we might get an overlap between the two
layers. This overlap is based on the assumption, that a process
might be done early with a bcast and can start the next one.
- bcast_intra_seg1: replaces the llcomm->bcast by isend/irecvs
to increase the overlap, keeps the lcomm->bcast however
- bcast_intra_seg2: replaced lcomm->bcast by isend/irecvs
to increase the overlap, keeps however llcomm->bcast
- bcast_intra_seg3: replaced both lcomm->bcast and llcomm->bcast
by isend/irecvs
*/
if ( COLL_HIERARCH_SEG_BCAST_ALG == bcast_alg ) {
ret = mca_coll_hierarch_bcast_intra_seg ( buff, count, datatype, root,
comm, module, segsize );
}
else if ( COLL_HIERARCH_SEG1_BCAST_ALG == bcast_alg ) {
ret = mca_coll_hierarch_bcast_intra_seg1 ( buff, count, datatype, root,
comm, module, segsize );
}
else if ( COLL_HIERARCH_SEG2_BCAST_ALG == bcast_alg ) {
ret = mca_coll_hierarch_bcast_intra_seg2 ( buff, count, datatype, root,
comm, module, segsize );
}
else if ( COLL_HIERARCH_SEG3_BCAST_ALG == bcast_alg ) {
ret = mca_coll_hierarch_bcast_intra_seg3 ( buff, count, datatype, root,
comm, module, segsize );
}
else {
/* Segment size of zero forces the entire message to be bcasted
as a single segment. */
ret = mca_coll_hierarch_bcast_intra_seg ( buff, count, datatype, root,
comm, module, 0 );
}
return ret;
}
static int mca_coll_hierarch_bcast_intra_seg (void *buff,
int count,
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module,
int segsize )
{
struct ompi_communicator_t *llcomm=NULL;
struct ompi_communicator_t *lcomm=NULL;
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
int lroot=MPI_UNDEFINED, llroot=MPI_UNDEFINED;
int rank=0, ret=OMPI_SUCCESS;
MPI_Aint ub=0, typeext=0;
size_t typesize=0;
int realsegsize=0, remaining_count=0;
int num_segments=0, segcount=0, segindex=0;
char* tmpbuf = (char *) buff;
rank = ompi_comm_rank ( comm );
lcomm = hierarch_module->hier_lcomm;
if ( mca_coll_hierarch_verbose_param ) {
printf("%s:%d: executing hierarchical seg bcast with cnt=%d root=%d, segsize=%d\n",
comm->c_name, rank, count, root, segsize );
}
/*
* This function returns the local leader communicator
* which *always* contains the root of this operation.
* This might involve creating a new communicator. This is
* also the reason, that *every* process in comm has to call
* this function
*/
llcomm = mca_coll_hierarch_get_llcomm ( root, hierarch_module, &llroot, &lroot);
ompi_datatype_type_size ( datatype, &typesize);
ompi_datatype_get_extent ( datatype, &ub, &typeext);
/* Determine number of segments and number of elements per segment */
if ((typesize > 0) && (segsize % typesize != 0)) {
/* segment size must be a multiple of typesize */
segsize = typesize * (segsize / typesize);
}
if ((segsize == 0) || (count == 0) || (typesize == 0)) {
segcount = count;
num_segments = 1;
}
else {
segcount = segsize/typesize;
num_segments = count/segcount;
if ( (count % segcount) != 0 ) {
num_segments++;
}
if (num_segments == 1) {
segcount = count;
}
}
realsegsize = segcount*typeext;
remaining_count = segcount;
for (segindex = 0; segindex < num_segments; segindex++) {
/* determine how many elements are being sent in this round */
if( segindex == (num_segments - 1) ) {
remaining_count = count - segindex*segcount;
}
/* Bcast on the upper level among the local leaders */
if ( MPI_UNDEFINED != llroot ) {
ret = llcomm->c_coll.coll_bcast(tmpbuf, remaining_count,
datatype, llroot, llcomm,
llcomm->c_coll.coll_bcast_module);
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
/* once the local leaders got the data from the root, they can distribute
* it to the processes in their local, low-level communicator.
*/
if ( MPI_COMM_NULL != lcomm ) {
ret = lcomm->c_coll.coll_bcast(tmpbuf, remaining_count,
datatype, lroot, lcomm,
lcomm->c_coll.coll_bcast_module);
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
tmpbuf += realsegsize;
}
return ret;
}
static int mca_coll_hierarch_bcast_intra_seg1 (void *buff,
int count,
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module,
int segsize )
{
struct ompi_communicator_t *llcomm=NULL;
struct ompi_communicator_t *lcomm=NULL;
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
int lroot=MPI_UNDEFINED, llroot=MPI_UNDEFINED;
int llrank=0, llsize=0, rank=0, ret=OMPI_SUCCESS;
MPI_Aint ub=0, typeext=0;
size_t typesize=0;
int i, realsegsize=0, remaining_count=0;
int num_segments=0, segcount=0, segindex=0;
char* tmpbuf = (char *) buff;
ompi_request_t **sreq=NULL;
ompi_request_t *rreq=MPI_REQUEST_NULL;
rank = ompi_comm_rank ( comm );
lcomm = hierarch_module->hier_lcomm;
if ( mca_coll_hierarch_verbose_param ) {
printf("%s:%d: executing hierarchical seg1 bcast with cnt=%d root=%d segsize=%d\n",
comm->c_name, rank, count, root, segsize );
}
/*
* This function returns the local leader communicator
* which *always* contains the root of this operation.
* This might involve creating a new communicator. This is
* also the reason, that *every* process in comm has to call
* this function
*/
llcomm = mca_coll_hierarch_get_llcomm ( root, hierarch_module, &llroot, &lroot);
ompi_datatype_type_size ( datatype, &typesize);
ompi_datatype_get_extent ( datatype, &ub, &typeext);
/* Determine number of segments and number of elements per segment */
if ((typesize > 0) && (segsize % typesize != 0)) {
/* segment size must be a multiple of typesize */
segsize = typesize * (segsize / typesize);
}
if ((segsize == 0) || (count == 0) || (typesize == 0)) {
segcount = count;
num_segments = 1;
}
else {
segcount = segsize/typesize;
num_segments = count/segcount;
if ( (count % segcount) != 0 ) {
num_segments++;
}
if (num_segments == 1) {
segcount = count;
}
}
realsegsize = segcount*typeext;
remaining_count = segcount;
if ( MPI_COMM_NULL != llcomm ) {
llrank = ompi_comm_rank ( llcomm );
llsize = ompi_comm_size ( llcomm);
sreq = hierarch_module->hier_reqs;
for(i=0; i<llsize; i++) {
sreq[i] = MPI_REQUEST_NULL;
}
}
/* Broadcasting the first segment in the upper level*/
if ( MPI_UNDEFINED != llroot ) {
ret = llcomm->c_coll.coll_bcast(tmpbuf, remaining_count, datatype,
llroot, llcomm,
llcomm->c_coll.coll_bcast_module );
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
/* Since the first segment has already been bcasted, this loop
starts at 1 and not with segment 0 */
for (segindex = 1; segindex < num_segments; segindex++) {
/* determine how many elements are being sent in this round */
if( segindex == (num_segments - 1) ) {
remaining_count = count - segindex*segcount;
}
tmpbuf += realsegsize;
/* Broadcasting the next segment in the upper level using non blocking
operations*/
if ( MPI_COMM_NULL != llcomm ) {
if( llrank == llroot) {
for( i = 0; i < llsize; i++) {
if( i != llroot) {
ret = MCA_PML_CALL(isend(tmpbuf, remaining_count, datatype, i,
MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD,
llcomm, &(sreq[i])));
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
}
}
else {
ret = MCA_PML_CALL(irecv(tmpbuf, remaining_count, datatype, llroot,
MCA_COLL_BASE_TAG_BCAST,
llcomm, &rreq ));
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
}
/* broadcasting the before segment among the lower level processes
using blocking operations*/
if ( MPI_COMM_NULL != lcomm ) {
ret = lcomm->c_coll.coll_bcast(tmpbuf-realsegsize, segcount,
datatype, lroot, lcomm,
lcomm->c_coll.coll_bcast_module);
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
if ( MPI_COMM_NULL != llcomm ) {
if ( llrank == llroot ) {
ret = ompi_request_wait_all( llsize, sreq, MPI_STATUSES_IGNORE);
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
else {
ret = ompi_request_wait( &rreq, MPI_STATUS_IGNORE);
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
}
}
/* Bcasting the last segment among the lower level processes using blocking operations
* once the local leaders got the data from the root, they can distribute
* it to the processes in their local, low-level communicator.
*/
if ( MPI_COMM_NULL != lcomm ) {
ret = lcomm->c_coll.coll_bcast(tmpbuf, remaining_count, datatype,
lroot, lcomm,
lcomm->c_coll.coll_bcast_module);
}
return ret;
}
static int mca_coll_hierarch_bcast_intra_seg2 (void *buff,
int count,
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module,
int segsize )
{
struct ompi_communicator_t *llcomm=NULL;
struct ompi_communicator_t *lcomm=NULL;
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
int lroot=MPI_UNDEFINED, llroot=MPI_UNDEFINED;
int rank=0, ret=OMPI_SUCCESS;
int lsize=0, lrank=0;
MPI_Aint ub=0, typeext=0;
size_t typesize=0;
int i, realsegsize=0, remaining_count=0;
int num_segments=0, segcount=0, segindex=0;
char* tmpbuf = (char *) buff;
ompi_request_t **sreq=NULL;
ompi_request_t *rreq=MPI_REQUEST_NULL;
rank = ompi_comm_rank ( comm );
lcomm = hierarch_module->hier_lcomm;
if ( mca_coll_hierarch_verbose_param ) {
printf("%s:%d: executing hierarchical seg2 bcast with cnt=%d root=%d segsize=%d\n",
comm->c_name, rank, count, root, segsize );
}
/*
* This function returns the local leader communicator
* which *always* contains the root of this operation.
* This might involve creating a new communicator. This is
* also the reason, that *every* process in comm has to call
* this function
*/
llcomm = mca_coll_hierarch_get_llcomm ( root, hierarch_module, &llroot, &lroot);
ompi_datatype_type_size ( datatype, &typesize);
ompi_datatype_get_extent ( datatype, &ub, &typeext);
/* Determine number of segments and number of elements per segment */
if ((typesize > 0) && (segsize % typesize != 0)) {
/* segment size must be a multiple of typesize */
segsize = typesize * (segsize / typesize);
}
if ((segsize == 0) || (count == 0) || (typesize == 0)) {
segcount = count;
num_segments = 1;
}
else {
segcount = segsize/typesize;
num_segments = count/segcount;
if ( (count % segcount) != 0 ) {
num_segments++;
}
if (num_segments == 1) {
segcount = count;
}
}
realsegsize = segcount*typeext;
remaining_count = segcount;
lsize = ompi_comm_size (lcomm);
sreq = hierarch_module->hier_reqs;
for(i=0; i<lsize; i++) {
sreq[i] = MPI_REQUEST_NULL;
}
if ( MPI_UNDEFINED != llroot ) {
ret = llcomm->c_coll.coll_bcast(tmpbuf, remaining_count, datatype,
llroot, llcomm,
llcomm->c_coll.coll_bcast_module);
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
if ( MPI_COMM_NULL != lcomm ) {
lrank = ompi_comm_rank ( lcomm );
}
for (segindex = 1; segindex < num_segments; segindex++) {
/* once the local leaders got the data from the root, they can distribute
* it to the processes in their local, low-level communicator.*/
if ( MPI_COMM_NULL != lcomm ) {
if(lrank == lroot) {
for(i = 0; i < lsize; i++) {
if( i != lroot) {
ret = MCA_PML_CALL(isend(tmpbuf, remaining_count, datatype, i,
MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD,
lcomm, &(sreq[i])));
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
}
}
else {
ret = MCA_PML_CALL(irecv(tmpbuf, remaining_count, datatype, lroot,
MCA_COLL_BASE_TAG_BCAST, lcomm, &rreq));
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
}
/* determine how many elements are being sent in this round */
if( segindex == (num_segments - 1) ) {
remaining_count = count - segindex*segcount;
}
tmpbuf += realsegsize;
if ( MPI_UNDEFINED != llroot ) {
ret = llcomm->c_coll.coll_bcast(tmpbuf, remaining_count, datatype,
llroot, llcomm,
llcomm->c_coll.coll_bcast_module);
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
if ( MPI_COMM_NULL != lcomm ) {
if ( lrank == lroot ) {
ret = ompi_request_wait_all ( lsize, sreq, MPI_STATUSES_IGNORE);
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
else {
ret = ompi_request_wait( &rreq, MPI_STATUS_IGNORE);
if ( OMPI_SUCCESS != ret ) {
return ret;
}
}
}
}
/* Bcasting the last segment among the lower level processes
* once the local leaders got the data from the root, they can distribute
* it to the processes in their local, low-level communicator.
*/
if ( MPI_COMM_NULL != lcomm ) {
ret = lcomm->c_coll.coll_bcast(tmpbuf, remaining_count, datatype,
lroot, lcomm,
lcomm->c_coll.coll_bcast_module);
}
return ret;
}
static int mca_coll_hierarch_bcast_intra_seg3 (void *buff,
int count,
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm,
mca_coll_base_module_t *module,
int segsize )
{
struct ompi_communicator_t *llcomm=NULL;
struct ompi_communicator_t *lcomm=NULL;
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
int lroot=MPI_UNDEFINED, llroot=MPI_UNDEFINED;
int llrank=MPI_UNDEFINED, llsize=0, rank=0, ret=OMPI_SUCCESS;
int lsize=0, lrank=MPI_UNDEFINED;
MPI_Aint ub=0, typeext=0;
size_t typesize=0;
int i, realsegsize=0, remaining_count=0;
int num_segments=0, segcount=0, segindex=0;
char* tmpbuf = (char *) buff;
ompi_request_t **sreq=NULL, **sreq1=NULL;
ompi_request_t *rreq=MPI_REQUEST_NULL, *rreq1=MPI_REQUEST_NULL;
rank = ompi_comm_rank ( comm );
lcomm = hierarch_module->hier_lcomm;
if ( mca_coll_hierarch_verbose_param ) {
printf("%s:%d: executing hierarchical seg3 bcast with cnt=%d root=%d segsize=%d\n",
comm->c_name, rank, count, root, segsize );
}
/*
* This function returns the local leader communicator
* which *always* contains the root of this operation.
* This might involve creating a new communicator. This is
* also the reason, that *every* process in comm has to call
* this function
*/
llcomm = mca_coll_hierarch_get_llcomm ( root, hierarch_module, &llroot, &lroot);
ompi_datatype_type_size ( datatype, &typesize);
ompi_datatype_get_extent ( datatype, &ub, &typeext);
/* Determine number of segments and number of elements per segment */
if ((typesize > 0) && (segsize % typesize != 0)) {
/* segment size must be a multiple of typesize */
segsize = typesize * (segsize / typesize);
}
if ((segsize == 0) || (count == 0) || (typesize == 0)) {
segcount = count;
num_segments = 1;
} else {
segcount = segsize/typesize;
num_segments = count/segcount;
if ( (count % segcount) != 0 ) num_segments++;
if (num_segments == 1) segcount = count;
}
realsegsize = segcount*typeext;
remaining_count = segcount;
if ( MPI_COMM_NULL != lcomm ) {
lsize = ompi_comm_size ( lcomm );
lrank = ompi_comm_rank ( lcomm );
sreq1 = (ompi_request_t **)malloc ( lsize * sizeof(ompi_request_t *));
if ( NULL == sreq1 ) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
for(i=0; i<lsize; i++) {
sreq1[i] = MPI_REQUEST_NULL;
}
}
if ( MPI_COMM_NULL != llcomm ) {
llsize = ompi_comm_size (llcomm);
llrank = ompi_comm_rank ( llcomm );
sreq = hierarch_module->hier_reqs;
for(i=0; i<llsize; i++) {
sreq[i] = MPI_REQUEST_NULL;
}
}
/* Broadcasting the first segment in the upper level*/
if ( MPI_UNDEFINED != llroot ) {
ret = llcomm->c_coll.coll_bcast(tmpbuf, remaining_count, datatype,
llroot, llcomm,
llcomm->c_coll.coll_bcast_module);
if ( OMPI_SUCCESS != ret ) {
goto exit;
}
}
for (segindex = 1; segindex < num_segments; segindex++) {
/* determine how many elements are being sent in this round */
if( segindex == (num_segments - 1) ) {
remaining_count = count - segindex*segcount;
}
tmpbuf += realsegsize;
/* Broadcasting the next segment in the upper level*/
if ( MPI_COMM_NULL != llcomm ) {
if(llrank == llroot) {
for(i = 0; i < llsize; i++) {
if( i != llroot) {
ret = MCA_PML_CALL(isend(tmpbuf, remaining_count, datatype, i,
MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD,
llcomm, (sreq+i) ));
if ( OMPI_SUCCESS != ret ) {
goto exit;
}
}
}
}
else {
ret = MCA_PML_CALL(irecv(tmpbuf, remaining_count, datatype, llroot,
MCA_COLL_BASE_TAG_BCAST,
llcomm, &rreq ));
if ( OMPI_SUCCESS != ret ) {
goto exit;
}
}
}
/* broadcasting the before segment among the lower level processes
* once the local leaders got the data from the root, they can distribute
* it to the processes in their local, low-level communicator.
*/
if ( MPI_COMM_NULL != lcomm ) {
if( lrank == lroot) {
for( i = 0; i < lsize; i++) {
if( i != lroot) {
ret = MCA_PML_CALL(isend(tmpbuf-realsegsize, segcount, datatype, i,
MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD,
lcomm, (sreq1+i) ));
if ( OMPI_SUCCESS != ret ) {
goto exit;
}
}
}
}
else {
ret = MCA_PML_CALL(irecv(tmpbuf-realsegsize, segcount, datatype, lroot,
MCA_COLL_BASE_TAG_BCAST , lcomm, &rreq1 ));
if ( OMPI_SUCCESS != ret ) {
goto exit;
}
}
}
/* Wait for the upper level bcast to complete*/
if ( MPI_COMM_NULL != llcomm ) {
if ( llrank == llroot ) {
ret = ompi_request_wait_all(llsize, sreq, MPI_STATUSES_IGNORE);
if ( OMPI_SUCCESS != ret ) {
goto exit;
}
}
else {
ret = ompi_request_wait( &rreq, MPI_STATUS_IGNORE );
if ( OMPI_SUCCESS != ret ) {
goto exit;
}
}
}
/*Wait for the lower level bcast to complete */
if ( MPI_COMM_NULL != lcomm ) {
if ( lrank == lroot ) {
ret = ompi_request_wait_all(lsize, sreq1, MPI_STATUSES_IGNORE);
if ( OMPI_SUCCESS != ret ) {
goto exit;
}
}
else {
ret = ompi_request_wait( &rreq1, MPI_STATUS_IGNORE);
if ( OMPI_SUCCESS != ret ) {
goto exit;
}
}
}
}
/*Bcasting the last segment among the lower level processes
* once the local leaders got the data from the root, they can distribute
* it to the processes in their local, low-level communicator.
*/
if ( MPI_COMM_NULL != lcomm ) {
ret = lcomm->c_coll.coll_bcast(tmpbuf, remaining_count, datatype,
lroot, lcomm,
lcomm->c_coll.coll_bcast_module);
}
exit:
if ( NULL != sreq1 ) {
free ( sreq1 );
}
return ret;
}

Просмотреть файл

@ -1,224 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
* Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2007 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007-2009 University of Houston. All rights reserved.
* Copyright (c) 2008 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*
* These symbols are in a file by themselves to provide nice linker
* semantics. Since linkers generally pull in symbols by object
* files, keeping these symbols as the only symbols in this file
* prevents utility programs such as "ompi_info" from having to import
* entire components just to query their version and parameters.
*/
#include "ompi_config.h"
#include "coll_hierarch.h"
#include "mpi.h"
#include "ompi/communicator/communicator.h"
#include "ompi/mca/coll/coll.h"
/*
* Public string showing the coll ompi_hierarch component version number
*/
const char *mca_coll_hierarch_component_version_string =
"OMPI/MPI hierarch collective MCA component version " OMPI_VERSION;
/*
* Global variable
*/
int mca_coll_hierarch_priority_param=0;
int mca_coll_hierarch_verbose_param=0;
int mca_coll_hierarch_use_rdma_param=0;
int mca_coll_hierarch_ignore_sm_param=0;
int mca_coll_hierarch_detection_alg_param=2;
int mca_coll_hierarch_bcast_alg_param=COLL_HIERARCH_BASIC_BCAST_ALG;
int mca_coll_hierarch_segsize_param=32768;
/*
* Local function
*/
static int hierarch_register(void);
/*
* Instantiate the public struct with all of our public information
* and pointers to our public functions in it
*/
const mca_coll_base_component_2_0_0_t mca_coll_hierarch_component = {
/* First, the mca_component_t struct containing meta information
about the component itself */
{
MCA_COLL_BASE_VERSION_2_0_0,
/* Component name and version */
"hierarch",
OMPI_MAJOR_VERSION,
OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION,
/* Component open and close functions */
NULL,
NULL,
NULL,
hierarch_register
},
{
/* The component is checkpoint ready */
MCA_BASE_METADATA_PARAM_CHECKPOINT
},
/* Initialization / querying functions */
mca_coll_hierarch_init_query,
mca_coll_hierarch_comm_query,
};
static int hierarch_register(void)
{
/* Use a high priority, but allow other components to be higher */
mca_coll_hierarch_priority_param = 0;
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
"priority", "Priority of the hierarchical coll component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_hierarch_priority_param);
mca_coll_hierarch_verbose_param = 0;
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
"verbose",
"Turn verbose message of the hierarchical coll component on/off",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_hierarch_verbose_param);
mca_coll_hierarch_use_rdma_param = 0;
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
"use_rdma",
"Switch from the send btl list used to detect hierarchies to "
"the rdma btl list",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_hierarch_use_rdma_param);
mca_coll_hierarch_ignore_sm_param = 0;
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
"ignore_sm",
"Ignore sm protocol when detecting hierarchies. "
"Required to enable the usage of protocol"
" specific collective operations",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_hierarch_ignore_sm_param);
mca_coll_hierarch_detection_alg_param = 2;
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
"detection_alg",
"Used to specify the algorithm for detecting Hierarchy."
"Choose between all or two levels of hierarchy",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_hierarch_detection_alg_param);
mca_coll_hierarch_bcast_alg_param = COLL_HIERARCH_BASIC_BCAST_ALG;
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
"bcast_alg",
"Used to specify the algorithm used for bcast operations.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_hierarch_bcast_alg_param);
mca_coll_hierarch_segsize_param = 32768;
(void) mca_base_component_var_register(&mca_coll_hierarch_component.collm_version,
"segment_size",
"Used to specify the segment size for segmented algorithms.",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&mca_coll_hierarch_segsize_param);
return OMPI_SUCCESS;
}
static void
mca_coll_hierarch_module_construct(mca_coll_hierarch_module_t *module)
{
module->hier_lcomm = MPI_COMM_NULL;
module->hier_reqs = NULL;
module->hier_colorarr = NULL;
module->hier_llr = NULL;
return;
}
static void
mca_coll_hierarch_module_destruct(mca_coll_hierarch_module_t *hierarch_module)
{
int i, size;
struct mca_coll_hierarch_llead_t *current=NULL;
if ( MPI_COMM_NULL != hierarch_module->hier_lcomm ) {
ompi_comm_free (&(hierarch_module->hier_lcomm) );
}
if ( NULL != hierarch_module->hier_reqs ) {
free ( hierarch_module->hier_reqs );
}
size = opal_pointer_array_get_size ( &(hierarch_module->hier_llead));
for ( i=0; i<size; i++) {
current = (struct mca_coll_hierarch_llead_t *)opal_pointer_array_get_item (
&(hierarch_module->hier_llead), i ) ;
if ( NULL == current ) {
continue;
}
if ( NULL != current->lleaders ) {
free ( current->lleaders );
}
if ( MPI_COMM_NULL != current->llcomm ){
ompi_comm_free ( &(current->llcomm));
}
free ( current );
}
opal_pointer_array_remove_all ( &(hierarch_module->hier_llead));
OBJ_DESTRUCT (&(hierarch_module->hier_llead));
if ( NULL != hierarch_module->hier_colorarr ) {
free ( hierarch_module->hier_colorarr );
}
if ( NULL != hierarch_module->hier_llr ) {
free ( hierarch_module->hier_llr);
}
return;
}
OBJ_CLASS_INSTANCE(mca_coll_hierarch_module_t,
mca_coll_base_module_t,
mca_coll_hierarch_module_construct,
mca_coll_hierarch_module_destruct);

Просмотреть файл

@ -1,109 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2014 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2007 University of Houston. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_hierarch.h"
#include <stdio.h>
#include "mpi.h"
#include "ompi/constants.h"
#include "ompi/communicator/communicator.h"
#include "ompi/datatype/ompi_datatype.h"
#include "ompi/mca/coll/coll.h"
/*
* reduce_intra
*
* Function: - reduction using two level hierarchy algorithm
* Accepts: - same as MPI_Reduce()
* Returns: - MPI_SUCCESS or error code
*/
int mca_coll_hierarch_reduce_intra(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
int root, struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
struct ompi_communicator_t *llcomm=NULL;
struct ompi_communicator_t *lcomm=NULL;
mca_coll_hierarch_module_t *hierarch_module = (mca_coll_hierarch_module_t *) module;
int rank;
int lroot, llroot;
ptrdiff_t extent, true_extent, lb, true_lb;
char *tmpbuf=NULL, *tbuf=NULL;
int ret=OMPI_SUCCESS;
rank = ompi_comm_rank ( comm );
lcomm = hierarch_module->hier_lcomm;
if ( mca_coll_hierarch_verbose_param ) {
printf("%s:%d: executing hierarchical reduce with cnt=%d and root=%d\n",
comm->c_name, rank, count, root );
}
llcomm = mca_coll_hierarch_get_llcomm ( root, hierarch_module, &llroot, &lroot);
if ( MPI_COMM_NULL != lcomm ) {
ompi_datatype_get_extent(dtype, &lb, &extent);
ompi_datatype_get_true_extent(dtype, &true_lb, &true_extent);
tbuf = (char*)malloc(true_extent + (count - 1) * extent);
if (NULL == tbuf) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
tmpbuf = tbuf - true_lb;
if ( MPI_IN_PLACE != sbuf ) {
ret = lcomm->c_coll.coll_reduce (sbuf, tmpbuf, count, dtype,
op, lroot, lcomm,
lcomm->c_coll.coll_reduce_module);
}
else {
ret = lcomm->c_coll.coll_reduce (rbuf, tmpbuf, count, dtype,
op, lroot, lcomm,
lcomm->c_coll.coll_reduce_module);
}
if ( OMPI_SUCCESS != ret ) {
goto exit;
}
}
if ( MPI_UNDEFINED != llroot ) {
if ( MPI_COMM_NULL != lcomm ) {
ret = llcomm->c_coll.coll_reduce (tmpbuf, rbuf, count, dtype,
op, llroot, llcomm,
llcomm->c_coll.coll_reduce_module);
}
else {
ret = llcomm->c_coll.coll_reduce (sbuf, rbuf, count, dtype,
op, llroot, llcomm,
llcomm->c_coll.coll_reduce_module);
}
}
exit:
if ( NULL != tmpbuf ) {
free ( tmpbuf );
}
return ret;
}

Просмотреть файл

@ -1,209 +0,0 @@
/*
* Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
* University Research and Technology
* Corporation. All rights reserved.
* Copyright (c) 2004-2006 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2008 University of Houston. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "coll_hierarch.h"
#include <stdio.h>
#include "mpi.h"
#include "ompi/communicator/communicator.h"
#include "ompi/op/op.h"
#include "ompi/mca/coll/base/base.h"
#include "ompi/mca/coll/base/coll_tags.h"
#include "ompi/mca/bml/base/base.h"
#include "ompi/mca/pml/pml.h"
int mca_coll_hierarch_allreduce_tmp(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm)
{
int ret;
ret = mca_coll_hierarch_reduce_tmp ( sbuf, rbuf, count, dtype, op, 0, comm);
if ( OMPI_SUCCESS != ret ) {
return ret;
}
ret = mca_coll_hierarch_bcast_tmp ( rbuf, count, dtype, 0, comm);
return ret;
}
int mca_coll_hierarch_allgather_tmp(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm)
{
int ret;
int size = ompi_comm_size (comm);
ret = mca_coll_hierarch_gather_tmp ( sbuf, scount, sdtype, rbuf, rcount,
rdtype, 0, comm);
if ( OMPI_SUCCESS != ret ) {
return ret;
}
ret = mca_coll_hierarch_bcast_tmp ( rbuf, rcount*size, rdtype, 0, comm);
return ret;
}
int mca_coll_hierarch_bcast_tmp ( void *buf, int count, struct ompi_datatype_t *dtype,
int root, struct ompi_communicator_t *comm)
{
int err = OMPI_SUCCESS;
int rank = ompi_comm_rank ( comm );
if ( rank != root ) {
err = MCA_PML_CALL(recv(buf, count, dtype, root,
MCA_COLL_BASE_TAG_BCAST,
comm, MPI_STATUS_IGNORE));
if ( OMPI_SUCCESS != err ) {
return err;
}
}
else {
int i;
int size=ompi_comm_size ( comm );
for ( i=0; i<size; i++ ) {
if ( i == root ) {
continue;
}
err = MCA_PML_CALL(send(buf, count, dtype, i,
MCA_COLL_BASE_TAG_BCAST,
MCA_PML_BASE_SEND_STANDARD, comm));
if ( OMPI_SUCCESS != err ) {
return err;
}
}
}
return err;
}
int mca_coll_hierarch_reduce_tmp(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
int root, struct ompi_communicator_t *comm)
{
int i, err, size;
char *pml_buffer = NULL;
ptrdiff_t extent, lb;
int rank = ompi_comm_rank(comm);
/* If not root, send data to the root. */
if (rank != root) {
err = MCA_PML_CALL(send(sbuf, count, dtype, root,
MCA_COLL_BASE_TAG_REDUCE,
MCA_PML_BASE_SEND_STANDARD, comm));
return err;
}
size = ompi_comm_size(comm);
ompi_datatype_get_extent(dtype, &lb, &extent);
pml_buffer = (char*)malloc(count * extent);
if (NULL == pml_buffer) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
err = ompi_datatype_copy_content_same_ddt(dtype, count, (char*)rbuf, (char*)sbuf);
if (MPI_SUCCESS != err) {
goto exit;
}
/* Loop receiving and calling reduction function (C or Fortran). */
for (i = size - 1; i >= 0; --i) {
if (rank == i) {
continue;
} else {
err = MCA_PML_CALL(recv(pml_buffer, count, dtype, i,
MCA_COLL_BASE_TAG_REDUCE, comm,
MPI_STATUS_IGNORE));
if (MPI_SUCCESS != err) {
goto exit;
}
}
/* Perform the reduction */
ompi_op_reduce(op, pml_buffer, rbuf, count, dtype);
}
exit:
if (NULL != pml_buffer) {
free(pml_buffer);
}
return MPI_SUCCESS;
}
int mca_coll_hierarch_gather_tmp(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
int root, struct ompi_communicator_t *comm)
{
int i;
int err;
int rank;
int size;
char *ptmp;
MPI_Aint incr;
MPI_Aint extent;
MPI_Aint lb;
size = ompi_comm_size(comm);
rank = ompi_comm_rank(comm);
/* Everyone but root sends data and returns. */
if (rank != root) {
return MCA_PML_CALL(send(sbuf, scount, sdtype, root,
MCA_COLL_BASE_TAG_GATHER,
MCA_PML_BASE_SEND_STANDARD, comm));
}
/* I am the root, loop receiving the data. */
ompi_datatype_get_extent(rdtype, &lb, &extent);
incr = extent * rcount;
for (i = 0, ptmp = (char *) rbuf; i < size; ++i, ptmp += incr) {
if (i == rank) {
if (MPI_IN_PLACE != sbuf) {
err = ompi_datatype_sndrcv(sbuf, scount, sdtype,
ptmp, rcount, rdtype);
} else {
err = MPI_SUCCESS;
}
} else {
err = MCA_PML_CALL(recv(ptmp, rcount, rdtype, i,
MCA_COLL_BASE_TAG_GATHER,
comm, MPI_STATUS_IGNORE));
}
if (MPI_SUCCESS != err) {
return err;
}
}
/* All done */
return MPI_SUCCESS;
}

Просмотреть файл

@ -1,27 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2013 Sandia National Laboratories. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_ompi_coll_hierarch_POST_CONFIG(will_build)
# ----------------------------------------
# The hierarch coll requires a BML endpoint tag to compile, so require it.
# Require in POST_CONFIG instead of CONFIG so that we only require it
# if we're not disabled.
AC_DEFUN([MCA_ompi_coll_hierarch_POST_CONFIG], [
AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([BML])])
])dnl
# MCA_ompi_coll_hierarch_CONFIG(action-if-can-compile,
# [action-if-cant-compile])
# ------------------------------------------------
# We can always build, unless we were explicitly disabled.
AC_DEFUN([MCA_ompi_coll_hierarch_CONFIG],[
AC_CONFIG_FILES([ompi/mca/coll/hierarch/Makefile])
[$1]
])dnl