1
1

fix location of the restrict key word.

Make the tag in the fan-in/fan-out algorithm be fragment based.

This commit was SVN r17903.
Этот коммит содержится в:
Rich Graham 2008-03-21 01:40:36 +00:00
родитель 2c66d396b7
Коммит a7c836a2b0
2 изменённых файлов: 17 добавлений и 16 удалений

Просмотреть файл

@ -52,11 +52,6 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
sm_module=(mca_coll_sm2_module_t *) module;
/* get unique tag for this collective - assume only one collective
* per communicator at a given time, so no locking needed
* for atomic update of the tag */
tag=sm_module->collective_tag;
sm_module->collective_tag++;
/* get size of data needed - same layout as user data, so that
* we can apply the reudction routines directly on these buffers
@ -95,6 +90,12 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count,
/* NOTE: starting with a rather synchronous approach */
for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) {
/* get unique tag for this stripe - assume only one collective
* per communicator at a given time, so no locking needed
* for atomic update of the tag */
tag=sm_module->collective_tag;
sm_module->collective_tag++;
sm_buffer_desc=alloc_sm2_shared_buffer(sm_module);
/* get number of elements to process in this stripe */

Просмотреть файл

@ -680,8 +680,8 @@ LOC_FUNC(minloc, long_double_int, <)
* routines, needed for some optimizations.
*/
#define OP_FUNC_3BUF(name, type_name, type, op) \
void ompi_mpi_op_three_buff_##name##_##type_name(restrict void *in1, \
restrict void *in2, restrict void *out, int *count, \
void ompi_mpi_op_three_buff_##name##_##type_name(void * restrict in1, \
void * restrict in2, void * restrict out, int *count, \
MPI_Datatype *dtype) \
{ \
int i; \
@ -694,8 +694,8 @@ LOC_FUNC(minloc, long_double_int, <)
}
#define COMPLEX_OP_FUNC_SUM_3BUF(type_name, type) \
void ompi_mpi_op_sum_three_buff_##type_name(restrict void *in1, \
restrict void * in2, restrict void *out, int *count, \
void ompi_mpi_op_sum_three_buff_##type_name(void * restrict in1, \
void * restrict in2, void * restrict out, int *count, \
MPI_Datatype *dtype) \
{ \
int i; \
@ -709,8 +709,8 @@ LOC_FUNC(minloc, long_double_int, <)
}
#define COMPLEX_OP_FUNC_PROD_3BUF(type_name, type) \
void ompi_mpi_op_prod_three_buff_##type_name(restrict void *in1, \
restrict void *in2, restrict void *out, int *count, \
void ompi_mpi_op_prod_three_buff_##type_name(void * restrict in1, \
void * restrict in2, void * restrict out, int *count, \
MPI_Datatype *dtype) \
{ \
int i; \
@ -734,8 +734,8 @@ LOC_FUNC(minloc, long_double_int, <)
* This macro is for (out = op(in1, in2))
*/
#define FUNC_FUNC_3BUF(name, type_name, type) \
void ompi_mpi_op_three_buff_##name##_##type_name(restrict void *in1, \
restrict void *in2, restrict void *out, int *count, \
void ompi_mpi_op_three_buff_##name##_##type_name(void * restrict in1, \
void * restrict in2, void * restrict out, int *count, \
MPI_Datatype *dtype) \
{ \
int i; \
@ -766,9 +766,9 @@ LOC_FUNC(minloc, long_double_int, <)
*/
#define LOC_FUNC_3BUF(name, type_name, op) \
void ompi_mpi_op_three_buff_##name##_##type_name(restrict void *in1, \
restrict void *in2, restrict void *out, int *count, \
MPI_Datatype *dtype) \
void ompi_mpi_op_three_buff_##name##_##type_name(void * restrict in1, \
void * restrict in2, void * restrict out, int *count, \
MPI_Datatype *dtype) \
{ \
int i; \
ompi_op_predefined_##type_name##_t *a1 = (ompi_op_predefined_##type_name##_t*) in1; \