fix location of the restrict key word.
Make the tag in the fan-in/fan-out algorithm be fragment based. This commit was SVN r17903.
Этот коммит содержится в:
родитель
2c66d396b7
Коммит
a7c836a2b0
@ -52,11 +52,6 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
|
||||
|
||||
sm_module=(mca_coll_sm2_module_t *) module;
|
||||
|
||||
/* get unique tag for this collective - assume only one collective
|
||||
* per communicator at a given time, so no locking needed
|
||||
* for atomic update of the tag */
|
||||
tag=sm_module->collective_tag;
|
||||
sm_module->collective_tag++;
|
||||
|
||||
/* get size of data needed - same layout as user data, so that
|
||||
* we can apply the reudction routines directly on these buffers
|
||||
@ -95,6 +90,12 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
|
||||
/* NOTE: starting with a rather synchronous approach */
|
||||
for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
|
||||
|
||||
/* get unique tag for this stripe - assume only one collective
|
||||
* per communicator at a given time, so no locking needed
|
||||
* for atomic update of the tag */
|
||||
tag=sm_module->collective_tag;
|
||||
sm_module->collective_tag++;
|
||||
|
||||
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
|
||||
|
||||
/* get number of elements to process in this stripe */
|
||||
|
@ -680,8 +680,8 @@ LOC_FUNC(minloc, long_double_int, <)
|
||||
* routines, needed for some optimizations.
|
||||
*/
|
||||
#define OP_FUNC_3BUF(name, type_name, type, op) \
|
||||
void ompi_mpi_op_three_buff_##name##_##type_name(restrict void *in1, \
|
||||
restrict void *in2, restrict void *out, int *count, \
|
||||
void ompi_mpi_op_three_buff_##name##_##type_name(void * restrict in1, \
|
||||
void * restrict in2, void * restrict out, int *count, \
|
||||
MPI_Datatype *dtype) \
|
||||
{ \
|
||||
int i; \
|
||||
@ -694,8 +694,8 @@ LOC_FUNC(minloc, long_double_int, <)
|
||||
}
|
||||
|
||||
#define COMPLEX_OP_FUNC_SUM_3BUF(type_name, type) \
|
||||
void ompi_mpi_op_sum_three_buff_##type_name(restrict void *in1, \
|
||||
restrict void * in2, restrict void *out, int *count, \
|
||||
void ompi_mpi_op_sum_three_buff_##type_name(void * restrict in1, \
|
||||
void * restrict in2, void * restrict out, int *count, \
|
||||
MPI_Datatype *dtype) \
|
||||
{ \
|
||||
int i; \
|
||||
@ -709,8 +709,8 @@ LOC_FUNC(minloc, long_double_int, <)
|
||||
}
|
||||
|
||||
#define COMPLEX_OP_FUNC_PROD_3BUF(type_name, type) \
|
||||
void ompi_mpi_op_prod_three_buff_##type_name(restrict void *in1, \
|
||||
restrict void *in2, restrict void *out, int *count, \
|
||||
void ompi_mpi_op_prod_three_buff_##type_name(void * restrict in1, \
|
||||
void * restrict in2, void * restrict out, int *count, \
|
||||
MPI_Datatype *dtype) \
|
||||
{ \
|
||||
int i; \
|
||||
@ -734,8 +734,8 @@ LOC_FUNC(minloc, long_double_int, <)
|
||||
* This macro is for (out = op(in1, in2))
|
||||
*/
|
||||
#define FUNC_FUNC_3BUF(name, type_name, type) \
|
||||
void ompi_mpi_op_three_buff_##name##_##type_name(restrict void *in1, \
|
||||
restrict void *in2, restrict void *out, int *count, \
|
||||
void ompi_mpi_op_three_buff_##name##_##type_name(void * restrict in1, \
|
||||
void * restrict in2, void * restrict out, int *count, \
|
||||
MPI_Datatype *dtype) \
|
||||
{ \
|
||||
int i; \
|
||||
@ -766,9 +766,9 @@ LOC_FUNC(minloc, long_double_int, <)
|
||||
*/
|
||||
|
||||
#define LOC_FUNC_3BUF(name, type_name, op) \
|
||||
void ompi_mpi_op_three_buff_##name##_##type_name(restrict void *in1, \
|
||||
restrict void *in2, restrict void *out, int *count, \
|
||||
MPI_Datatype *dtype) \
|
||||
void ompi_mpi_op_three_buff_##name##_##type_name(void * restrict in1, \
|
||||
void * restrict in2, void * restrict out, int *count, \
|
||||
MPI_Datatype *dtype) \
|
||||
{ \
|
||||
int i; \
|
||||
ompi_op_predefined_##type_name##_t *a1 = (ompi_op_predefined_##type_name##_t*) in1; \
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user