diff --git a/ompi/mca/coll/hierarch/coll_hierarch.c b/ompi/mca/coll/hierarch/coll_hierarch.c index 39a2a79e7a..93c28c3717 100644 --- a/ompi/mca/coll/hierarch/coll_hierarch.c +++ b/ompi/mca/coll/hierarch/coll_hierarch.c @@ -198,7 +198,6 @@ mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority, } tdata->hier_num_colorarr = size; - tdata->hier_type_colorarr = MCA_COLL_HIERARCH_COLORARR_LINEAR; tdata->hier_colorarr = (int *) malloc ( sizeof(int) * size); if ( NULL == tdata->hier_colorarr ) { *priority = 0; @@ -378,7 +377,7 @@ mca_coll_hierarch_module_init(struct ompi_communicator_t *comm) llead->lleaders, 1 ); /* determine my lleader, maybe its me */ - mca_coll_hierarch_get_lleader ( rank, data, &(llead->my_lleader), 1 ); + mca_coll_hierarch_get_lleader ( rank, data, &(llead->my_lleader), &(llead->am_lleader), 1 ); /* Generate the lleader communicator assuming that all lleaders are the first process in the list of processes with the same color. A function generating @@ -392,8 +391,6 @@ mca_coll_hierarch_module_init(struct ompi_communicator_t *comm) /* Store it now on the data structure */ ompi_pointer_array_set_item ( &(data->hier_llead), 0, &(llead)); - /* This is the point where I will introduce later on a function trying to - compact the colorarr array. Not done at the moment */ mca_coll_hierarch_dump_struct (data); exit: @@ -469,9 +466,10 @@ int mca_coll_hierarch_comm_unquery ( struct ompi_communicator_t *comm, } -struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int rank, +struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int root, struct mca_coll_base_comm_t *data, - int* lrank) + int* llroot, + int* lleader) { struct ompi_communicator_t *llcomm=NULL; struct ompi_group_t *llgroup=NULL; @@ -497,13 +495,13 @@ struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int rank, return NULL; } - rc = ompi_group_translate_ranks ( group, 1, &rank, llgroup, lrank); + rc = ompi_group_translate_ranks ( group, 1, &root, llgroup, llroot); if ( OMPI_SUCCESS != rc ) { return NULL; } /* ompi_group_free (&llgroup) */ - if ( MPI_UNDEFINED != *lrank ) { + if ( MPI_UNDEFINED != *llroot ) { found = 1; break; } @@ -513,7 +511,7 @@ struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int rank, int offset; /* determine what our offset of root is in the colorarr */ - offset = mca_coll_hierarch_get_offset ( rank, data->hier_num_colorarr, data->hier_colorarr ); + offset = mca_coll_hierarch_get_offset ( root, data->hier_num_colorarr, data->hier_colorarr ); /* allocate a new llead element */ llead = (struct mca_coll_hierarch_llead_t *) malloc (sizeof(struct mca_coll_hierarch_llead_t)); @@ -535,7 +533,7 @@ struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int rank, /* create new lleader subcommunicator */ - rc = ompi_comm_split ( data->hier_comm, llead->am_lleader, rank, &llcomm, 0); + rc = ompi_comm_split ( data->hier_comm, llead->am_lleader, root, &llcomm, 0); if ( OMPI_SUCCESS != rc ) { return NULL; } diff --git a/ompi/mca/coll/hierarch/coll_hierarch.h b/ompi/mca/coll/hierarch/coll_hierarch.h index 458192d6b0..c03f47cc57 100644 --- a/ompi/mca/coll/hierarch/coll_hierarch.h +++ b/ompi/mca/coll/hierarch/coll_hierarch.h @@ -45,6 +45,13 @@ extern int mca_coll_hierarch_ignore_sm_param; * Data structure for attaching data to the communicator */ +/* Clarifying some terminology: + * comm: the input communicator, consisting of several lower level communicators. + * lcomm: low level communicator, often refered to as subcommunicator + * lleader: local leader, a dedicated process of each low level communicator + * llcomm: local leader communicator, grouping all local leaders of a comm. +*/ + struct mca_coll_base_comm_t { struct ompi_communicator_t *hier_comm; /* link back to the attached comm */ struct ompi_communicator_t *hier_lcomm; /* low level communicator */ @@ -53,7 +60,6 @@ extern int mca_coll_hierarch_ignore_sm_param; int hier_level; /* level in the hierarchy. just debugging */ int hier_num_reqs; /* num. of requests */ ompi_request_t **hier_reqs; /* list of requests */ - int hier_type_colorarr; /* format in which the colorarr is stored */ int hier_num_colorarr; /* size of the colorarr array */ int *hier_llr; /* color array compacted (1 entry per color)*/ int *hier_colorarr; /* array containing the color of all procs */ @@ -62,60 +68,12 @@ extern int mca_coll_hierarch_ignore_sm_param; struct mca_coll_hierarch_llead_t { struct ompi_communicator_t *llcomm; /* local leader communicator */ int *lleaders; /* list of local leaders, ranks in comm */ - int my_lleader_on_lcomm; /* rank of my lleader in llcomm */ + int my_lleader; /* rank of my lleader in lcomm */ int am_lleader; /* am I an lleader? */ - int my_lleader; /* pos. of my lleader in hier_lleaders */ }; typedef struct mca_coll_hierarch_llead_t mca_coll_hierarch_llead_t; -/* These are various modes how the colorarr is stored. The reason - for the various versions is to minimize the memory requirement - for this task, since in most real-world scenarios, the information - can be stored significantly more compact that storing the whole array - - MCA_COLL_HIERARCH_COLORARR_LINEAR: - contains an array of size hier_num_colorarr. Each element - contains the color of the according process - MCA_COLL_HIERARCH_COLORARR_RANGE: - the ranks beeing in the same subcommunicator are consecutive - ranks (e.g. ranks 0-8 are in subgroup1, 9-16 in subgroup2 etc) - - hier_colorarr[0] : number of blocks - hier_colorarr[2*i+1] : first rank of block i, i=0,(hier_colorarr[0]-1) - hier_colorarr[2*i+2] : last rank of block i - - hier_num_coloarr = hier_coloarr[0] + 1; - - MCA_COLL_HIERARCH_COLORARR_STRIDE2: - the processes are in two subgroups with a stride of two, - e.g. (0,2,4,6,...) are in subgroup 1, (1,3,5,7,...) in subgroup2 - This scenario might happen on dual-processor nodes if the scheduler - has distributed the processes in a round-robin fashion. - - hier_colorarr[0] = first rank of first subgroup - hier_colorarr[1] = first rank of second subgroup - hier_num_colorarr = 2 - - MCA_COLL_HIERARCH_COLORARR_STRIDE4: - the processes are in four subgroups with a stride of four, - e.g. (0,4,8,12,...) are in subgroup 1, (1,5,9,13,...) in subgroup2 etc. - This scenario might happen on quad-processor nodes if the scheduler - has distributed the processes in a round-robin fashion. - - hier_colorarr[0] = first rank of first subgroup - hier_colorarr[1] = first rank of second subgroup - hier_colorarr[2] = first rank of third subgroup - hier_colorarr[3] = first rank of forth subgroup - hier_num_colorarr = 4 - -*/ - -#define MCA_COLL_HIERARCH_COLORARR_INVALID -1 -#define MCA_COLL_HIERARCH_COLORARR_LINEAR 0 -#define MCA_COLL_HIERARCH_COLORARR_RANGE 1 -#define MCA_COLL_HIERARCH_COLORARR_STRIDE2 2 -#define MCA_COLL_HIERARCH_COLORARR_STRIDE4 3 static inline int mca_coll_hierarch_count_lleaders ( int size, int *carr) { @@ -219,43 +177,31 @@ static inline void mca_coll_hierarch_get_lleader (int rank, struct mca_coll_base /* initialize it to be undefined */ *lleader = MPI_UNDEFINED; - switch ( data->hier_type_colorarr ) - { - case MCA_COLL_HIERARCH_COLORARR_LINEAR: - /* sanity check */ - if ( rank > data->hier_num_colorarr-1 ) { - return; - } - - /* Get the color of this rank */ - color = data->hier_colorarr[rank]; - - /* get the first rank having this color. this is - currently by definition the local leader */ - for ( i=0; i< data->hier_num_colorarr-1; i++ ) { - if ( data->hier_colorarr[i] == color ) { - *lleader = i; - break; - } - } - - break; - case MCA_COLL_HIERARCH_COLORARR_RANGE: - case MCA_COLL_HIERARCH_COLORARR_STRIDE2: - case MCA_COLL_HIERARCH_COLORARR_STRIDE4: - case MCA_COLL_HIERARCH_COLORARR_INVALID: - default: - break; + /* sanity check */ + if ( rank > data->hier_num_colorarr-1 ) { + return; } + /* Get the color of this rank */ + color = data->hier_colorarr[rank]; + + /* get the first rank having this color. this is + currently by definition the local leader */ + for ( i=0; i< data->hier_num_colorarr-1; i++ ) { + if ( data->hier_colorarr[i] == color ) { + *lleader = i; + break; + } + } + return; } /* * coll API functions */ -struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int rank, struct mca_coll_base_comm_t *data, - int* lrank); +struct ompi_communicator_t* mca_coll_hierarch_get_llcomm (int rroot, struct mca_coll_base_comm_t *data, + int* llroot, int* lleader); int mca_coll_hierarch_init_query(bool allow_hierarch_user_threads, diff --git a/ompi/mca/coll/hierarch/coll_hierarch_bcast.c b/ompi/mca/coll/hierarch/coll_hierarch_bcast.c index f672e680ae..0c3e927180 100644 --- a/ompi/mca/coll/hierarch/coll_hierarch_bcast.c +++ b/ompi/mca/coll/hierarch/coll_hierarch_bcast.c @@ -56,7 +56,7 @@ int mca_coll_hierarch_bcast_intra(void *buff, also the reason, that *every* process in comm has to call this function */ - llcomm = mca_coll_hierarch_get_llcomm ( root, data, &llroot); + llcomm = mca_coll_hierarch_get_llcomm ( root, data, &llroot, &lleader); /* Bcast on the upper level among the local leaders */ if ( MPI_UNDEFINED != llroot ) { @@ -71,7 +71,6 @@ int mca_coll_hierarch_bcast_intra(void *buff, */ if ( MPI_COMM_NULL != lcomm ) { - mca_coll_hierarch_get_lleader (root, data, &lleader); ret = lcomm->c_coll.coll_bcast(buff, count, datatype, lleader, lcomm ); } diff --git a/ompi/mca/coll/hierarch/coll_hierarch_reduce.c b/ompi/mca/coll/hierarch/coll_hierarch_reduce.c index a508750a91..a8d3c34f7b 100644 --- a/ompi/mca/coll/hierarch/coll_hierarch_reduce.c +++ b/ompi/mca/coll/hierarch/coll_hierarch_reduce.c @@ -52,8 +52,9 @@ int mca_coll_hierarch_reduce_intra(void *sbuf, void *rbuf, int count, data = comm->c_coll_selected_data; lcomm = data->hier_lcomm; + llcomm = mca_coll_hierarch_get_llcomm ( root, data, &llroot, &lleader); + if ( MPI_COMM_NULL != lcomm ) { - mca_coll_hierarch_get_lleader ( root, data, &lleader ); lrank = ompi_comm_rank (lcomm); if ( lrank == lleader ) { @@ -80,7 +81,6 @@ int mca_coll_hierarch_reduce_intra(void *sbuf, void *rbuf, int count, } } - llcomm = mca_coll_hierarch_get_llcomm ( root, data, &llroot); if ( MPI_UNDEFINED != llroot ) { ret = llcomm->c_coll.coll_reduce (tmpbuf, rbuf, count, dtype, op, llroot, llcomm);