1
1

checkpoint of the hierarch collective module.

This commit was SVN r5366.
Этот коммит содержится в:
Edgar Gabriel 2005-04-14 21:27:24 +00:00
родитель cba83190af
Коммит 52d3e1f876
3 изменённых файлов: 337 добавлений и 171 удалений

Просмотреть файл

@ -102,7 +102,7 @@ mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority,
{
int size;
int color, ncount, maxncount;
int *colorarr=NULL;
struct mca_coll_base_comm_t *tdata=NULL;
int level;
/* Get the priority level attached to this module */
@ -117,15 +117,19 @@ mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority,
return NULL;
}
/* This array will hold the color of each process returned for a certain
protocol. The array is one element larger than required to store
the position of the selected protocol in the hier_prot array.
This avoids, that we have to walk through the whole list in
module_init again.
*/
size = ompi_comm_size(comm);
colorarr = (int *) malloc ( sizeof(int) * size + 1);
if ( NULL == colorarr ) {
/* allocate the data structure holding all information */
tdata = calloc ( 1, sizeof(struct mca_coll_base_comm_t));
if ( NULL == tdata ) {
*priority = 0;
return NULL;
}
tdata->hier_num_colorarr = size;
tdata->hier_type_colorarr = MCA_COLL_HIERARCH_COLORARR_LINEAR;
tdata->hier_colorarr = (int *) malloc ( sizeof(int) * size);
if ( NULL == tdata->hier_colorarr ) {
*priority = 0;
return NULL;
}
@ -141,7 +145,7 @@ mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority,
comm->c_coll_basic_module->coll_allreduce (&ncount, &maxncount, 1, MPI_INT,
MPI_MAX, comm );
comm->c_coll_basic_module->coll_allgather (&color, 1, MPI_INT,
colorarr, 1, MPI_INT, comm );
tdata->hier_colorarr, 1, MPI_INT, comm );
if ( 0 == maxncount ) {
/*
@ -157,18 +161,24 @@ mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm, int *priority,
* hierarchical component.
* Its (size-1) because we do not count ourselves.
*/
goto err_exit;
goto exit;
}
else {
colorarr[size] = level;
*data = (struct mca_coll_base_comm_t *) colorarr;
tdata->hier_level = level;
/* This is just a temporary assigned, which will be removed
once this component has been selected */
*data = tdata;
return &intra;
}
}
err_exit:
if ( NULL != colorarr ) {
free ( colorarr ) ;
exit:
if ( NULL != tdata->hier_colorarr ) {
free ( tdata->hier_colorarr ) ;
}
if ( NULL != tdata ) {
free ( tdata );
}
*priority = 0;
@ -194,10 +204,8 @@ mca_coll_hierarch_module_init(struct ompi_communicator_t *comm)
rank = ompi_comm_rank(comm);
size = ompi_comm_size(comm);
colorarr = (int *) comm->c_coll_selected_data;
level = colorarr[size+1];
mca_coll_hierarch_checkfor_component ( comm, hier_prot[level], &color, &ncount);
data = comm->c_coll_selected_data;
color = data->hier_colorarr[rank];
/* Generate the subcommunicator based on the color returned by
the previous function. */
@ -206,70 +214,41 @@ mca_coll_hierarch_module_init(struct ompi_communicator_t *comm)
goto exit;
}
/* store the low-level communicator and a certain number of requests
on the communicator */
data = calloc ( 1, sizeof(struct mca_coll_base_comm_t));
if ( NULL == data ) {
goto exit;
}
data->hier_comm = comm;
data->hier_llcomm = llcomm;
data->hier_num_reqs = 2 * size;
data->hier_reqs = (ompi_request_t **) malloc (sizeof(ompi_request_t)*size*2);
if ( NULL == data->hier_reqs ) {
goto exit;
}
data->hier_am_lleader=0; /* false */
/* determine how many local leader there are and who they are */
llr = (int *) calloc (1, sizeof(int) * size);
if (NULL == llr ) {
goto exit;
}
for (c=0, i=0; i<size; i++ ){
found=0;
for (j=0; j<c ; j++) {
if ( colorarr[i] == llr[j] ) {
found=0;
break;
}
}
if ( !found ) {
if ( MPI_UNDEFINED == colorarr[i] ) {
llr[c] = i;
}
else {
llr[c] = colorarr[i];
}
if ( llr[c] == color ) {
data->hier_my_lleader = c;
}
c++;
if ( llr[c] == rank ) {
data->hier_am_lleader = 1;
}
}
}
data->hier_num_lleaders = c-1;
/* we allocate one more element than required to be able to add the
root of an operation to this list */
data->hier_lleaders = (int *) malloc ( sizeof(int) * data->hier_num_lleaders + 1);
data->hier_num_lleaders = mca_coll_hierarch_count_lleaders (size, data->hier_colorarr);
data->hier_lleaders = (int *) malloc ( sizeof(int) * data->hier_num_lleaders);
if ( NULL == data->hier_lleaders ) {
goto exit;
}
mca_coll_hierarch_get_all_lleaders ( data->hier_num_colorarr,
data->hier_colorarr,
data->hier_num_lleaders,
data->hier_lleaders );
/* determine my lleader, maybe its me */
data->hier_am_lleader=0; /* false */
mca_coll_hierarch_get_lleader ( rank, data, &data->hier_my_lleader );
if ( data->hier_colorarr[data->hier_my_lleader] == rank ) {
data->hier_am_lleader = 1; /*true */
}
/* This is the point where I will introduce later on a function trying to
compact the colorarr array. Not done at the moment */
memcpy ( data->hier_lleaders, llr, data->hier_num_lleaders * sizeof(int));
comm->c_coll_selected_data = (struct mca_coll_base_comm_t *)data;
exit:
if ( NULL != llr ) {
free (llr);
}
if ( NULL != colorarr ) {
free ( colorarr ) ;
}
if ( OMPI_SUCCESS != ret ) {
ompi_comm_free ( &llcomm );
if ( NULL != data ) {
@ -279,6 +258,9 @@ mca_coll_hierarch_module_init(struct ompi_communicator_t *comm)
if ( NULL != data->hier_lleaders ) {
free ( data->hier_lleaders);
}
if ( NULL != data->hier_colorarr ) {
free ( data->hier_colorarr ) ;
}
free ( data );
}
@ -301,8 +283,9 @@ int mca_coll_hierarch_module_finalize(struct ompi_communicator_t *comm)
llcomm = data->hier_llcomm;
ompi_comm_free (&llcomm);
free ( data->hier_reqs);
free ( data->hier_lleaders);
free ( data->hier_reqs );
free ( data->hier_lleaders );
free ( data->hier_colorarr );
if ( NULL != data->hier_topo.topo_next ) {
free (data->hier_topo.topo_next);
}
@ -315,7 +298,8 @@ int mca_coll_hierarch_module_finalize(struct ompi_communicator_t *comm)
int mca_coll_hierarch_comm_unquery ( struct ompi_communicator_t *comm,
struct mca_coll_base_comm_t *data )
{
free (data);
free ( data->hier_colorarr );
free ( data );
return OMPI_SUCCESS;
}

Просмотреть файл

@ -37,6 +37,8 @@ extern const mca_coll_base_component_1_0_0_t mca_coll_hierarch_component;
extern int mca_coll_hierarch_priority_param;
extern int mca_coll_hierarch_verbose_param;
extern int mca_coll_hierarch_verbose;
extern int mca_coll_hierarch_walk_through_list_param;
extern int mca_coll_hierarch_use_next_param;
/*
* Data structure for attaching data to the communicator
@ -53,125 +55,289 @@ extern int mca_coll_hierarch_verbose;
struct mca_coll_base_comm_t {
struct ompi_communicator_t *hier_comm; /* link back to the attached comm */
struct ompi_communicator_t *hier_llcomm; /* low level communicator */
int hier_level; /* level in the hierarchy. just debugging */
int hier_num_lleaders; /* number of local leaders */
int *hier_lleaders; /* list of local leaders */
int *hier_lleaders; /* list of local leaders, ranks in comm */
int hier_my_lleader; /* pos. of my lleader in hier_lleaders */
int hier_my_lleader_on_llcomm; /* rank of my lleader in llcomm */
int hier_am_lleader; /* am I an lleader? */
int hier_num_reqs; /* num. of requests */
ompi_request_t **hier_reqs; /* list of requests */
int hier_type_colorarr; /* format in which the colorarr is stored */
int hier_num_colorarr; /* size of the colorarr array */
int* hier_colorarr; /* array containing the color of all procs */
struct mca_coll_hierarch_topo hier_topo; /* topology used in the coll ops */
};
/* These are various modes how the colorarr is stored. The reason
for the various versions is to minimize the memory requirement
for this task, since in most real-world scenarios, the information
can be stored significantly more compact that storing the whole array
MCA_COLL_HIERARCH_COLORARR_LINEAR:
contains an array of size hier_num_colorarr. Each element
contains the color of the according process
MCA_COLL_HIERARCH_COLORARR_RANGE:
the ranks beeing in the same subcommunicator are consecutive
ranks (e.g. ranks 0-8 are in subgroup1, 9-16 in subgroup2 etc)
hier_colorarr[0] : number of blocks
hier_colorarr[2*i+1] : first rank of block i, i=0,(hier_colorarr[0]-1)
hier_colorarr[2*i+2] : last rank of block i
#define MCA_COLL_HIERARCH_IS_ROOT_LLEADER(_root, _lls, _llsize, _found, _pos) { \
int _i; \
for (_found=0, _pos=_llsize, _i=0; _i<_llsize; _i++) { \
if ( _lls[_i] == _root ) { \
_found = 1; \
_pos = _i; \
break; \
} \
} \
}
hier_num_coloarr = hier_coloarr[0] + 1;
MCA_COLL_HIERARCH_COLORARR_STRIDE2:
the processes are in two subgroups with a stride of two,
e.g. (0,2,4,6,...) are in subgroup 1, (1,3,5,7,...) in subgroup2
This scenario might happen on dual-processor nodes if the scheduler
has distributed the processes in a round-robin fashion.
hier_colorarr[0] = first rank of first subgroup
hier_colorarr[1] = first rank of second subgroup
hier_num_colorarr = 2
MCA_COLL_HIERARCH_COLORARR_STRIDE4:
the processes are in four subgroups with a stride of four,
e.g. (0,4,8,12,...) are in subgroup 1, (1,5,9,13,...) in subgroup2 etc.
This scenario might happen on quad-processor nodes if the scheduler
has distributed the processes in a round-robin fashion.
hier_colorarr[0] = first rank of first subgroup
hier_colorarr[1] = first rank of second subgroup
hier_colorarr[2] = first rank of third subgroup
hier_colorarr[3] = first rank of forth subgroup
hier_num_colorarr = 4
*/
#define MCA_COLL_HIERARCH_COLORARR_INVALID -1
#define MCA_COLL_HIERARCH_COLORARR_LINEAR 0
#define MCA_COLL_HIERARCH_COLORARR_RANGE 1
#define MCA_COLL_HIERARCH_COLORARR_STRIDE2 2
#define MCA_COLL_HIERARCH_COLORARR_STRIDE4 3
static inline int mca_coll_hierarch_count_lleaders ( int size, int *carr )
{
int cnt, i, j, found;
int *llr=NULL;
llr = (int *) calloc (1, sizeof(int) * size);
if (NULL == llr ) {
return -1;
}
for (cnt=0, i=0; i<size; i++ ) {
for ( found=0, j=0; j<cnt; j++ ) {
if ( carr[i] == llr[j] ) {
found = 1;
break;
}
}
if ( !found && (MPI_UNDEFINED != carr[i]) ) {
llr[cnt++] = carr[i];
}
}
free (llr);
return cnt;
}
static inline void mca_coll_hierarch_get_all_lleaders ( int size, int *carr, int lsize,
int *larr)
{
int i, j, cnt, found;
for (cnt=0, i=0; i<size; i++ ) {
for ( found=0, j=0; j<cnt; j++ ) {
if ( carr[i] == larr[j] ) {
found = 1;
break;
}
}
if ( !found && (MPI_UNDEFINED != carr[i]) ) {
larr[cnt++] = carr[i];
}
}
return;
}
static inline void mca_coll_hierarch_get_lleader (int rank, struct mca_coll_base_comm_t *data,
int* lleader )
{
int color, i;
/* initialize it to be undefined */
*lleader = MPI_UNDEFINED;
switch ( data->hier_type_colorarr )
{
case MCA_COLL_HIERARCH_COLORARR_LINEAR:
/* sanity check */
if ( rank > data->hier_num_colorarr-1 ) {
return;
}
/* Get the color of this rank */
color = data->hier_colorarr[rank];
/* get the first rank having this color. this is
currently by definition the local leader */
for ( i=0; i< data->hier_num_colorarr-1; i++ ) {
if ( data->hier_colorarr[i] == color ) {
*lleader = i;
break;
}
}
break;
case MCA_COLL_HIERARCH_COLORARR_RANGE:
case MCA_COLL_HIERARCH_COLORARR_STRIDE2:
case MCA_COLL_HIERARCH_COLORARR_STRIDE4:
case MCA_COLL_HIERARCH_COLORARR_INVALID:
default:
break;
}
return;
}
static inline void mca_coll_hierarch_map_rank (int rank, struct mca_coll_base_comm_t *data,
int* lrank )
{
int i, color, tmprank=-1;
/* initialize it to MPI_UNDEFINED */
*lrank = MPI_UNDEFINED;
switch ( data->hier_type_colorarr )
{
case MCA_COLL_HIERARCH_COLORARR_LINEAR:
/* sanity check */
if ( rank > data->hier_num_colorarr-1 ) {
return;
}
/* Get the color of this process */
color = data->hier_colorarr[rank];
/* walk through the array until we reach 'rank' and calculate
how many processes had the same color */
for ( i=0; i< rank+1; i++ ) {
if ( data->hier_colorarr[i] == color ) {
tmprank++;
break;
}
}
*lrank = tmprank;
break;
case MCA_COLL_HIERARCH_COLORARR_RANGE:
case MCA_COLL_HIERARCH_COLORARR_STRIDE2:
case MCA_COLL_HIERARCH_COLORARR_STRIDE4:
case MCA_COLL_HIERARCH_COLORARR_INVALID:
default:
break;
}
return;
}
/*
* coll API functions
*/
int mca_coll_hierarch_init_query(bool *allow_hierarch_user_threads,
bool *have_hidden_threads);
const struct mca_coll_base_module_1_0_0_t *
mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm,
int *priority, struct mca_coll_base_comm_t **data);
int mca_coll_hierarch_comm_unquery(struct ompi_communicator_t *comm,
struct mca_coll_base_comm_t *data);
/* API functions */
const struct mca_coll_base_module_1_0_0_t *
mca_coll_hierarch_module_init(struct ompi_communicator_t *comm);
int mca_coll_hierarch_module_finalize(struct ompi_communicator_t *comm);
int mca_coll_hierarch_init_query(bool *allow_hierarch_user_threads,
bool *have_hidden_threads);
const struct mca_coll_base_module_1_0_0_t *
mca_coll_hierarch_comm_query(struct ompi_communicator_t *comm,
int *priority, struct mca_coll_base_comm_t **data);
int mca_coll_hierarch_comm_unquery(struct ompi_communicator_t *comm,
struct mca_coll_base_comm_t *data);
const struct mca_coll_base_module_1_0_0_t *
mca_coll_hierarch_module_init(struct ompi_communicator_t *comm);
int mca_coll_hierarch_module_finalize(struct ompi_communicator_t *comm);
int mca_coll_hierarch_allgather_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_allgatherv_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void * rbuf, int *rcounts,
int *disps,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_allreduce_intra(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_alltoall_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_alltoallv_intra(void *sbuf, int *scounts,
int *sdisps,
struct ompi_datatype_t *sdtype,
void *rbuf, int *rcounts,
int *rdisps,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_alltoallw_intra(void *sbuf, int *scounts,
int *sdisps,
struct ompi_datatype_t **sdtypes,
void *rbuf, int *rcounts,
int *rdisps,
struct ompi_datatype_t **rdtypes,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_barrier_intra(struct ompi_communicator_t *comm);
int mca_coll_hierarch_bcast_intra(void *buff, int count,
struct ompi_datatype_t *datatype,
int root,
int mca_coll_hierarch_allgather_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_exscan_intra(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_gather_intra(void *sbuf, int scount,
int mca_coll_hierarch_allgatherv_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
void * rbuf, int *rcounts,
int *disps,
struct ompi_datatype_t *rdtype,
int root,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_gatherv_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int *rcounts, int *disps,
struct ompi_datatype_t *rdtype,
int root,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_reduce_intra(void *sbuf, void* rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
int root,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_reduce_scatter_intra(void *sbuf, void *rbuf,
int *rcounts,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_scan_intra(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
int mca_coll_hierarch_allreduce_intra(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_alltoall_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_alltoallv_intra(void *sbuf, int *scounts,
int *sdisps,
struct ompi_datatype_t *sdtype,
void *rbuf, int *rcounts,
int *rdisps,
struct ompi_datatype_t *rdtype,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_alltoallw_intra(void *sbuf, int *scounts,
int *sdisps,
struct ompi_datatype_t **sdtypes,
void *rbuf, int *rcounts,
int *rdisps,
struct ompi_datatype_t **rdtypes,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_barrier_intra(struct ompi_communicator_t *comm);
int mca_coll_hierarch_bcast_intra(void *buff, int count,
struct ompi_datatype_t *datatype,
int root,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_exscan_intra(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_gather_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int rcount,
struct ompi_datatype_t *rdtype,
int root,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_gatherv_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype,
void *rbuf, int *rcounts, int *disps,
struct ompi_datatype_t *rdtype,
int root,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_reduce_intra(void *sbuf, void* rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
int root,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_reduce_scatter_intra(void *sbuf, void *rbuf,
int *rcounts,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_scan_intra(void *sbuf, void *rbuf, int count,
struct ompi_datatype_t *dtype,
struct ompi_op_t *op,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_scatter_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype, void *rbuf,
int rcount, struct ompi_datatype_t *rdtype,
int root, struct ompi_communicator_t *comm);
int mca_coll_hierarch_scatterv_intra(void *sbuf, int *scounts, int *disps,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype, int root,
struct ompi_communicator_t *comm);
int mca_coll_hierarch_scatter_intra(void *sbuf, int scount,
struct ompi_datatype_t *sdtype, void *rbuf,
int rcount, struct ompi_datatype_t *rdtype,
int root, struct ompi_communicator_t *comm);
int mca_coll_hierarch_scatterv_intra(void *sbuf, int *scounts, int *disps,
struct ompi_datatype_t *sdtype,
void* rbuf, int rcount,
struct ompi_datatype_t *rdtype, int root,
struct ompi_communicator_t *comm);
#if defined(c_plusplus) || defined(__cplusplus)
}

Просмотреть файл

@ -48,6 +48,7 @@ int mca_coll_hierarch_bcast_intra(void *buff,
data = comm->c_coll_selected_data;
llcomm = data->hier_llcomm;
/* trivial linear distribution of the data to all local leaders.
need something significantly better */
if ( rank == root ) {
@ -118,14 +119,20 @@ int mca_coll_hierarch_bcast_intra(void *buff,
{
struct mca_coll_base_comm_t *data=NULL;
struct ompi_communicator_t *llcomm=NULL;
int rank, ret;
int lleader_of_root, lleader_replaced_by_root=0;
int rank, ret, lroot;
int segsize;
rank = ompi_comm_rank ( comm );
data = comm->c_coll_selected_data;
llcomm = data->hier_llcomm;
if ( rank == root || data->hier_am_lleader ) {
mca_coll_hierarch_get_lleader (root, data, &lleader_of_root);
if ( lleader_of_root != root ) {
lleader_replaced_by_root = 1;
}
if ( rank == root || ( data->hier_am_lleader && !lleader_replaced_by_root) ) {
/* this functions sets up the topology used in the segmented
bcast afterwards and determines the segment size. */
ret = mca_coll_hierarch_intra_bcast_setup_topo (count, datatype, root, data,
@ -150,13 +157,22 @@ int mca_coll_hierarch_bcast_intra(void *buff,
it to the processes in their local, low-leve communicator.
*/
if ( MPI_COMM_NULL != llcomm ) {
ret = llcomm->c_coll.coll_bcast(buff, count, datatype,
data->hier_my_lleader, llcomm );
if ( lleader_replaced_by_root ) {
mca_coll_hierarch_map_rank(root, data, &lroot);
ret = llcomm->c_coll.coll_bcast(buff, count, datatype, lroot,
llcomm);
}
else {
ret = llcomm->c_coll.coll_bcast(buff, count, datatype,
data->hier_my_lleader_on_llcomm, llcomm );
}
}
return ret;
}
/*
* This is the mother of all segmented bcast algorithms of any type.
* Due to the general structure of the topo argument, you can use this function