diff --git a/ompi/mca/coll/sm2/coll_sm2_allreduce.c b/ompi/mca/coll/sm2/coll_sm2_allreduce.c index cb1219aee5..442095d518 100644 --- a/ompi/mca/coll/sm2/coll_sm2_allreduce.c +++ b/ompi/mca/coll/sm2/coll_sm2_allreduce.c @@ -15,11 +15,11 @@ #include "ompi/op/op.h" #include "ompi/datatype/datatype.h" #include "ompi/communicator/communicator.h" -/* debug */ +/* debug #include "opal/sys/timer.h" extern uint64_t timers[7]; -/* end debug */ + end debug */ @@ -54,7 +54,6 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count, tree_node_t *my_reduction_node, *my_fanout_read_tree; sm_work_buffer_t *sm_buffer_desc; - sm_module=(mca_coll_sm2_module_t *) module; /* get unique tag for this collective - assume only one collective @@ -66,7 +65,7 @@ int mca_coll_sm2_allreduce_intra_fanin_fanout(void *sbuf, void *rbuf, int count, /* get size of data needed - same layout as user data, so that * we can apply the reudction routines directly on these buffers */ - rc=ompi_ddt_type_size(dtype, &dt_extent); + rc=ompi_ddt_type_extent(dtype, &dt_extent); if( OMPI_SUCCESS != rc ) { goto Error; } @@ -367,16 +366,16 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, volatile mca_coll_sm2_nb_request_process_shared_mem_t * extra_ctl_pointer; mca_coll_sm2_module_t *sm_module; - /* debug */ + /* debug opal_timer_t t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10; - /* end debug */ + end debug */ sm_module=(mca_coll_sm2_module_t *) module; /* get size of data needed - same layout as user data, so that * we can apply the reudction routines directly on these buffers */ - rc=ompi_ddt_type_size(dtype, &dt_extent); + rc=ompi_ddt_type_extent(dtype, &dt_extent); if( OMPI_SUCCESS != rc ) { goto Error; } @@ -406,24 +405,22 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, count_processed=0; - /* debug */ + /* debug t0=opal_sys_timer_get_cycles(); - /* end debug */ + end debug */ sm_buffer_desc=alloc_sm2_shared_buffer(sm_module); - /* debug */ + /* debug t1=opal_sys_timer_get_cycles(); - /* end debug */ - /* debug */ timers[0]+=(t1-t0); - /* end debug */ + end debug */ /* get a pointer to the shared-memory working buffer */ /* NOTE: starting with a rather synchronous approach */ for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) { /* get number of elements to process in this stripe */ - /* debug */ + /* debug t2=opal_sys_timer_get_cycles(); - /* end debug */ + end debug */ count_this_stripe=n_dts_per_buffer; if( count_processed + count_this_stripe > count ) count_this_stripe=count-count_processed; @@ -449,10 +446,10 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, if( 0 != rc ) { return OMPI_ERROR; } - /* debug */ + /* debug t3=opal_sys_timer_get_cycles(); timers[1]+=(t3-t2); - /* end debug */ + end debug */ /* copy data in from the "extra" source, if need be */ tag=base_tag; @@ -500,9 +497,9 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, /* loop over data exchanges */ for(exchange=0 ; exchange < my_exchange_node->n_exchanges ; exchange++) { - /* debug */ + /* debug t4=opal_sys_timer_get_cycles(); - /* end debug */ + end debug */ index_read=(exchange&1); index_write=((exchange+1)&1); @@ -524,10 +521,10 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, while( partner_ctl_pointer->flag < tag ) { opal_progress(); } - /* debug */ + /* debug t5=opal_sys_timer_get_cycles(); timers[2]+=(t5-t4); - /* end debug */ + end debug */ /* reduce data into my write buffer */ /* apply collective operation */ @@ -538,6 +535,9 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, /* test */ + ompi_3buff_op_reduce(op,my_read_pointer,partner_read_pointer, + my_write_pointer,count_this_stripe,dtype); + /* { int ii,n_ints; int * restrict my_read=(int *)my_read_pointer; @@ -549,10 +549,11 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, } } - /* debug */ + */ + /* debug t6=opal_sys_timer_get_cycles(); timers[3]+=(t6-t5); - /* end debug */ + end debug */ /* end test */ @@ -566,10 +567,10 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, while( partner_ctl_pointer->flag < tag ) { opal_progress(); } - /* debug */ + /* debug t7=opal_sys_timer_get_cycles(); timers[4]+=(t7-t6); - /* end debug */ + end debug */ } @@ -635,9 +636,9 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, } } - /* debug */ + /* debug t8=opal_sys_timer_get_cycles(); - /* end debug */ + end debug */ /* copy data into the destination buffer */ rc=ompi_ddt_copy_content_same_ddt(dtype, count_this_stripe, (char *)((char *)rbuf+dt_extent*count_processed), @@ -651,11 +652,11 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, } - /* debug */ + /* debug t9=opal_sys_timer_get_cycles(); timers[5]+=(t9-t8); - /* end debug */ + end debug */ /* "free" the shared-memory working buffer */ @@ -664,10 +665,10 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, goto Error; } - /* debug */ + /* debug t10=opal_sys_timer_get_cycles(); timers[6]+=(t10-t9); - /* end debug */ + end debug */ /* return */ return rc; @@ -710,9 +711,9 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, volatile mca_coll_sm2_nb_request_process_shared_mem_t * extra_ctl_pointer; mca_coll_sm2_module_t *sm_module; - /* debug */ + /* debug opal_timer_t t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10; - /* end debug */ + end debug */ sm_module=(mca_coll_sm2_module_t *) module; @@ -753,9 +754,9 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, /* NOTE: starting with a rather synchronous approach */ - /* debug */ + /* debug t0=opal_sys_timer_get_cycles(); - /* end debug */ + end debug */ /* use the same set of buffers for a single reduction */ sm_buffer_desc=alloc_sm2_shared_buffer(sm_module); @@ -767,9 +768,9 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, my_tmp_data_buffer[0]=my_write_pointer; my_tmp_data_buffer[1]=my_read_pointer; - /* debug */ + /* debug t1=opal_sys_timer_get_cycles(); - /* end debug */ + end debug */ for( stripe_number=0 ; stripe_number < n_data_segments ; stripe_number++ ) { /* get number of elements to process in this stripe */ count_this_stripe=n_dts_per_buffer; @@ -783,10 +784,10 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, base_tag=sm_module->collective_tag; sm_module->collective_tag+=my_exchange_node->n_tags; - /* debug */ + /* debug t2=opal_sys_timer_get_cycles(); timers[0]+=(t2-t1); - /* end debug */ + end debug */ /* copy data into the write buffer */ rc=ompi_ddt_copy_content_same_ddt(dtype, count_this_stripe, @@ -795,10 +796,10 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, if( 0 != rc ) { return OMPI_ERROR; } - /* debug */ + /* debug t3=opal_sys_timer_get_cycles(); timers[1]+=(t3-t2); - /* end debug */ + end debug */ /* copy data in from the "extra" source, if need be */ tag=base_tag; @@ -845,9 +846,9 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, /* loop over data exchanges */ for(exchange=0 ; exchange < my_exchange_node->n_exchanges ; exchange++) { - /* debug */ + /* debug t4=opal_sys_timer_get_cycles(); - /* end debug */ + end debug */ index_read=(exchange&1); index_write=((exchange+1)&1); @@ -869,10 +870,10 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, while( partner_ctl_pointer->flag < tag ) { opal_progress(); } - /* debug */ + /* debug t5=opal_sys_timer_get_cycles(); timers[2]+=(t5-t4); - /* end debug */ + end debug */ /* reduce data into my write buffer */ /* apply collective operation */ @@ -894,10 +895,10 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, } } - /* debug */ + /* debug t6=opal_sys_timer_get_cycles(); timers[3]+=(t6-t5); - /* end debug */ + end debug */ /* end test */ @@ -911,10 +912,10 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, while( partner_ctl_pointer->flag < tag ) { opal_progress(); } - /* debug */ + /* debug t7=opal_sys_timer_get_cycles(); timers[4]+=(t7-t6); - /* end debug */ + end debug */ } @@ -981,9 +982,9 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, } } - /* debug */ + /* debug t8=opal_sys_timer_get_cycles(); - /* end debug */ + end debug */ /* copy data into the destination buffer */ rc=ompi_ddt_copy_content_same_ddt(dtype, count_this_stripe, (char *)((char *)rbuf+dt_extent*count_processed), @@ -991,16 +992,16 @@ int mca_coll_sm2_allreduce_intra_recursive_doubling(void *sbuf, void *rbuf, if( 0 != rc ) { return OMPI_ERROR; } - /* debug */ + /* debug t9=opal_sys_timer_get_cycles(); timers[5]+=(t9-t8); - /* end debug */ + end debug */ /* "free" the shared-memory working buffer */ - /* debug */ + /* debug t10=opal_sys_timer_get_cycles(); timers[6]+=(t10-t9); - /* end debug */ + end debug */ /* update the count of elements processed */ count_processed+=count_this_stripe; diff --git a/ompi/mca/coll/sm2/coll_sm2_module.c b/ompi/mca/coll/sm2/coll_sm2_module.c index 4bf09f1dd9..f8fa0c26b0 100644 --- a/ompi/mca/coll/sm2/coll_sm2_module.c +++ b/ompi/mca/coll/sm2/coll_sm2_module.c @@ -115,16 +115,19 @@ static bool have_local_peers(ompi_group_t *group, size_t size) * Create mmaped shared file */ -static int allocate_shared_file(size_t size, char *file_name, +static int allocate_shared_file(size_t size, char **file_name, struct ompi_communicator_t *comm, char **sm_backing_file) { int fd = -1; int group_size,my_rank; + int unique_comm_id; + size_t len; + char *f_name; bool i_create_shared_file=false; ssize_t p; int rc=0, sm_file_inited=0; - struct iovec iov[2]; + struct iovec iov[3]; int sm_file_created; ompi_proc_t **comm_proc_list; @@ -141,11 +144,27 @@ static int allocate_shared_file(size_t size, char *file_name, /* open the backing file. */ if( i_create_shared_file ) { + /* + * set file name + */ + + /* generate id that will be different for non-overlapping + * communicators. + */ + unique_comm_id=(int)getpid(); + len=asprintf(&f_name, + "%s"OPAL_PATH_SEP"sm_coll_v2%s_%0d_%0d",orte_process_info.job_session_dir, + orte_system_info.nodename,ompi_comm_get_cid(comm),unique_comm_id); + if( 0 > len ) { + return OMPI_ERROR; + } + *file_name=f_name; + /* process initializing the file */ - fd = open(file_name, O_CREAT|O_RDWR, 0600); + fd = open(*file_name, O_CREAT|O_RDWR, 0600); if (fd < 0) { opal_output(0,"mca_common_sm_mmap_init: open %s failed with errno=%d\n", - file_name, errno); + *file_name, errno); goto file_opened; } /* map the file and initialize segment state */ @@ -180,7 +199,9 @@ static int allocate_shared_file(size_t size, char *file_name, iov[0].iov_len=sizeof(sm_file_created); iov[1].iov_base=&sm_file_inited; iov[1].iov_len=sizeof(sm_file_inited); - rc=orte_rml.send(&(comm_proc_list[p]->proc_name),iov,2, + iov[2].iov_base=&unique_comm_id; + iov[2].iov_len=sizeof(unique_comm_id); + rc=orte_rml.send(&(comm_proc_list[p]->proc_name),iov,3, OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED,0); if( rc < 0 ) { opal_output(0, @@ -200,7 +221,9 @@ static int allocate_shared_file(size_t size, char *file_name, iov[0].iov_len=sizeof(sm_file_created); iov[1].iov_base=&sm_file_inited; iov[1].iov_len=sizeof(sm_file_inited); - rc=orte_rml.recv(&(comm_proc_list[0]->proc_name),iov,2, + iov[2].iov_base=&unique_comm_id; + iov[2].iov_len=sizeof(unique_comm_id); + rc=orte_rml.recv(&(comm_proc_list[0]->proc_name),iov,3, OMPI_RML_TAG_COLL_SM2_BACK_FILE_CREATED,0); if( rc < 0 ) { opal_output(0, "allocate_shared_file: orte_rml.recv failed from %ld with errno=%d\n", @@ -211,12 +234,22 @@ static int allocate_shared_file(size_t size, char *file_name, if( 0 == sm_file_inited ) { goto return_error; } + /* set file name - we need the unique id for non-overlapping + * communicators, that could have the same communicator id + */ + len=asprintf(&f_name, + "%s"OPAL_PATH_SEP"sm_coll_v2%s_%0d_%0d",orte_process_info.job_session_dir, + orte_system_info.nodename,ompi_comm_get_cid(comm),unique_comm_id); + if( 0 > len ) { + return OMPI_ERROR; + } + *file_name=f_name; /* open backing file */ - fd = open(file_name, O_RDWR, 0600); + fd = open(*file_name, O_RDWR, 0600); if (fd < 0) { opal_output(0,"mca_common_sm_mmap_init: open %s failed with errno=%d\n", - file_name, errno); + *file_name, errno); goto return_error; } @@ -462,15 +495,12 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority) mca_coll_sm2_module_t *sm_module; int i,j,group_size,ret; size_t alignment,size; - ssize_t size_tot_per_proc_per_seg; - size_t tot_size_per_bank,size_tot_per_segment; size_t tot_size_mem_banks; size_t ctl_memory_per_proc_per_segment; size_t mem_management_per_proc_per_block; size_t mem_management_per_proc; size_t mem_management_total; size_t size_sm2_backing_file; - size_t len; size_t size_buff_ctl_per_proc,size_data_buff_per_proc; /* @@ -699,16 +729,18 @@ mca_coll_sm2_comm_query(struct ompi_communicator_t *comm, int *priority) sm_module->size_sm2_backing_file=size_sm2_backing_file; /* set file name */ + /* len=asprintf(&(sm_module->coll_sm2_file_name), - "%s"OPAL_PATH_SEP"sm_coll_v2%s_%0d",orte_process_info.job_session_dir, + "%s"OPAL_PATH_SEP"sm_coll_v2%s_%0d\0",orte_process_info.job_session_dir, orte_system_info.nodename,ompi_comm_get_cid(comm)); if( 0 > len ) { goto CLEANUP; } + */ /* allocate backing file */ ret=allocate_shared_file(size_sm2_backing_file, - sm_module->coll_sm2_file_name, comm, + &(sm_module->coll_sm2_file_name), comm, &(sm_module->shared_memory_region)); if( MPI_SUCCESS != ret ) { goto CLEANUP; diff --git a/ompi/op/op.c b/ompi/op/op.c index 130fe1aa7b..f96f30ece8 100644 --- a/ompi/op/op.c +++ b/ompi/op/op.c @@ -71,10 +71,16 @@ OBJ_CLASS_INSTANCE(ompi_op_t, opal_object_t, #define C_INTEGER_LONG_LONG(name) \ { ompi_mpi_op_##name##_long_long_int }, /* OMPI_OP_TYPE_LONG_LONG_INT */ \ { ompi_mpi_op_##name##_unsigned_long_long } /* OMPI_OP_TYPE_UNSIGNED_LONG_LONG */ +#define C_INTEGER_LONG_LONG_3BUFF(name) \ + { ompi_mpi_op_three_buff_##name##_long_long_int }, /* OMPI_OP_TYPE_LONG_LONG_INT */ \ + { ompi_mpi_op_three_buff_##name##_unsigned_long_long } /* OMPI_OP_TYPE_UNSIGNED_LONG_LONG */ #else #define C_INTEGER_LONG_LONG(name) \ { NULL }, /* OMPI_OP_TYPE_LONG_LONG_INT */ \ { NULL } /* OMPI_OP_TYPE_UNSIGNED_LONG_LONG */ +#define C_INTEGER_LONG_LONG_3BUFF(name) \ + { NULL }, /* OMPI_OP_TYPE_LONG_LONG_INT */ \ + { NULL } /* OMPI_OP_TYPE_UNSIGNED_LONG_LONG */ #endif #define C_INTEGER(name) \ @@ -87,6 +93,17 @@ OBJ_CLASS_INSTANCE(ompi_op_t, opal_object_t, { ompi_mpi_op_##name##_unsigned }, /* OMPI_OP_TYPE_UNSIGNED */ \ { ompi_mpi_op_##name##_unsigned_long }, /* OMPI_OP_TYPE_UNSIGNED_LONG */ \ C_INTEGER_LONG_LONG(name) +#define C_INTEGER_3BUFF(name) \ + { ompi_mpi_op_three_buff_##name##_unsigned_char }, /* OMPI_OP_TYPE_UNSIGNED_CHAR */ \ + { ompi_mpi_op_three_buff_##name##_signed_char }, /* OMPI_OP_TYPE_SIGNED_CHAR */ \ + { ompi_mpi_op_three_buff_##name##_int }, /* OMPI_OP_TYPE_INT */ \ + { ompi_mpi_op_three_buff_##name##_long }, /* OMPI_OP_TYPE_LONG */ \ + { ompi_mpi_op_three_buff_##name##_short }, /* OMPI_OP_TYPE_SHORT */ \ + { ompi_mpi_op_three_buff_##name##_unsigned_short }, /* OMPI_OP_TYPE_UNSIGNED_SHORT */ \ + { ompi_mpi_op_three_buff_##name##_unsigned }, /* OMPI_OP_TYPE_UNSIGNED */ \ + { ompi_mpi_op_three_buff_##name##_unsigned_long }, /* OMPI_OP_TYPE_UNSIGNED_LONG */ \ + C_INTEGER_LONG_LONG_3BUFF(name) + #define C_INTEGER_NULL \ { NULL }, /* OMPI_OP_TYPE_UNSIGNED_CHAR */ \ { NULL }, /* OMPI_OP_TYPE_SIGNED_CHAR */ \ @@ -99,37 +116,61 @@ OBJ_CLASS_INSTANCE(ompi_op_t, opal_object_t, { NULL }, /* OMPI_OP_TYPE_LONG_LONG_INT */ \ { NULL } /* OMPI_OP_TYPE_UNSIGNED_LONG_LONG */ +#define C_INTEGER_NULL_3BUFF \ + { NULL }, /* OMPI_OP_TYPE_UNSIGNED_CHAR */ \ + { NULL }, /* OMPI_OP_TYPE_SIGNED_CHAR */ \ + { NULL }, /* OMPI_OP_TYPE_INT */ \ + { NULL }, /* OMPI_OP_TYPE_LONG */ \ + { NULL }, /* OMPI_OP_TYPE_SHORT */ \ + { NULL }, /* OMPI_OP_TYPE_UNSIGNED_SHORT */ \ + { NULL }, /* OMPI_OP_TYPE_UNSIGNED */ \ + { NULL }, /* OMPI_OP_TYPE_UNSIGNED_LONG */ \ + { NULL }, /* OMPI_OP_TYPE_LONG_LONG_INT */ \ + { NULL } /* OMPI_OP_TYPE_UNSIGNED_LONG_LONG */ + /** All the Fortran integers ********************************************/ #if OMPI_HAVE_FORTRAN_INTEGER #define FORTRAN_INTEGER_PLAIN(name) { ompi_mpi_op_##name##_fortran_integer } +#define FORTRAN_INTEGER_PLAIN_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_integer } #else #define FORTRAN_INTEGER_PLAIN(name) { NULL } +#define FORTRAN_INTEGER_PLAIN_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_INTEGER1 #define FORTRAN_INTEGER1(name) { ompi_mpi_op_##name##_fortran_integer1 } +#define FORTRAN_INTEGER1_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_integer1 } #else #define FORTRAN_INTEGER1(name) { NULL } +#define FORTRAN_INTEGER1_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_INTEGER2 #define FORTRAN_INTEGER2(name) { ompi_mpi_op_##name##_fortran_integer2 } +#define FORTRAN_INTEGER2_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_integer2 } #else #define FORTRAN_INTEGER2(name) { NULL } +#define FORTRAN_INTEGER2_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_INTEGER4 #define FORTRAN_INTEGER4(name) { ompi_mpi_op_##name##_fortran_integer4 } +#define FORTRAN_INTEGER4_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_integer4 } #else #define FORTRAN_INTEGER4(name) { NULL } +#define FORTRAN_INTEGER4_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_INTEGER8 #define FORTRAN_INTEGER8(name) { ompi_mpi_op_##name##_fortran_integer8 } +#define FORTRAN_INTEGER8_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_integer8 } #else #define FORTRAN_INTEGER8(name) { NULL } +#define FORTRAN_INTEGER8_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_INTEGER16 #define FORTRAN_INTEGER16(name) { ompi_mpi_op_##name##_fortran_integer16 } +#define FORTRAN_INTEGER16_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_integer16 } #else #define FORTRAN_INTEGER16(name) { NULL } +#define FORTRAN_INTEGER16_3BUFF(name) { NULL } #endif #define FORTRAN_INTEGER(name) \ FORTRAN_INTEGER_PLAIN(name), /* OMPI_OP_TYPE_INTEGER */ \ @@ -138,6 +179,15 @@ OBJ_CLASS_INSTANCE(ompi_op_t, opal_object_t, FORTRAN_INTEGER4(name), /* OMPI_OP_TYPE_INTEGER4 */ \ FORTRAN_INTEGER8(name), /* OMPI_OP_TYPE_INTEGER8 */ \ FORTRAN_INTEGER16(name) /* OMPI_OP_TYPE_INTEGER16 */ + +#define FORTRAN_INTEGER_3BUFF(name) \ + FORTRAN_INTEGER_PLAIN_3BUFF(name), /* OMPI_OP_TYPE_INTEGER */ \ + FORTRAN_INTEGER1_3BUFF(name), /* OMPI_OP_TYPE_INTEGER1 */ \ + FORTRAN_INTEGER2_3BUFF(name), /* OMPI_OP_TYPE_INTEGER2 */ \ + FORTRAN_INTEGER4_3BUFF(name), /* OMPI_OP_TYPE_INTEGER4 */ \ + FORTRAN_INTEGER8_3BUFF(name), /* OMPI_OP_TYPE_INTEGER8 */ \ + FORTRAN_INTEGER16_3BUFF(name) /* OMPI_OP_TYPE_INTEGER16 */ + #define FORTRAN_INTEGER_NULL \ { NULL }, /* OMPI_OP_TYPE_INTEGER */ \ { NULL }, /* OMPI_OP_TYPE_INTEGER1 */ \ @@ -146,32 +196,50 @@ OBJ_CLASS_INSTANCE(ompi_op_t, opal_object_t, { NULL }, /* OMPI_OP_TYPE_INTEGER8 */ \ { NULL } /* OMPI_OP_TYPE_INTEGER16 */ +#define FORTRAN_INTEGER_NULL_3BUFF \ + { NULL }, /* OMPI_OP_TYPE_INTEGER */ \ + { NULL }, /* OMPI_OP_TYPE_INTEGER1 */ \ + { NULL }, /* OMPI_OP_TYPE_INTEGER2 */ \ + { NULL }, /* OMPI_OP_TYPE_INTEGER4 */ \ + { NULL }, /* OMPI_OP_TYPE_INTEGER8 */ \ + { NULL } /* OMPI_OP_TYPE_INTEGER16 */ + /** All the Fortran reals ***********************************************/ #if OMPI_HAVE_FORTRAN_REAL #define FLOATING_POINT_FORTRAN_REAL_PLAIN(name) { ompi_mpi_op_##name##_fortran_real } +#define FLOATING_POINT_FORTRAN_REAL_PLAIN_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_real } #else #define FLOATING_POINT_FORTRAN_REAL_PLAIN(name) { NULL } +#define FLOATING_POINT_FORTRAN_REAL_PLAIN_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_REAL2 #define FLOATING_POINT_FORTRAN_REAL2(name) { ompi_mpi_op_##name##_fortran_real2 } +#define FLOATING_POINT_FORTRAN_REAL2_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_real2 } #else #define FLOATING_POINT_FORTRAN_REAL2(name) { NULL } +#define FLOATING_POINT_FORTRAN_REAL2_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_REAL4 #define FLOATING_POINT_FORTRAN_REAL4(name) { ompi_mpi_op_##name##_fortran_real4 } +#define FLOATING_POINT_FORTRAN_REAL4_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_real4 } #else #define FLOATING_POINT_FORTRAN_REAL4(name) { NULL } +#define FLOATING_POINT_FORTRAN_REAL4_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_REAL8 #define FLOATING_POINT_FORTRAN_REAL8(name) { ompi_mpi_op_##name##_fortran_real8 } +#define FLOATING_POINT_FORTRAN_REAL8_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_real8 } #else #define FLOATING_POINT_FORTRAN_REAL8(name) { NULL } +#define FLOATING_POINT_FORTRAN_REAL8_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_REAL16 #define FLOATING_POINT_FORTRAN_REAL16(name) { ompi_mpi_op_##name##_fortran_real16 } +#define FLOATING_POINT_FORTRAN_REAL16_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_real16 } #else #define FLOATING_POINT_FORTRAN_REAL16(name) { NULL } +#define FLOATING_POINT_FORTRAN_REAL16_3BUFF(name) { NULL } #endif #define FLOATING_POINT_FORTRAN_REAL(name) \ @@ -181,13 +249,23 @@ OBJ_CLASS_INSTANCE(ompi_op_t, opal_object_t, FLOATING_POINT_FORTRAN_REAL8(name), /* OMPI_OP_TYPE_REAL8 */ \ FLOATING_POINT_FORTRAN_REAL16(name) /* OMPI_OP_TYPE_REAL16 */ +#define FLOATING_POINT_FORTRAN_REAL_3BUFF(name) \ + FLOATING_POINT_FORTRAN_REAL_PLAIN_3BUFF(name), /* OMPI_OP_TYPE_REAL */ \ + FLOATING_POINT_FORTRAN_REAL2_3BUFF(name), /* OMPI_OP_TYPE_REAL2 */ \ + FLOATING_POINT_FORTRAN_REAL4_3BUFF(name), /* OMPI_OP_TYPE_REAL4 */ \ + FLOATING_POINT_FORTRAN_REAL8_3BUFF(name), /* OMPI_OP_TYPE_REAL8 */ \ + FLOATING_POINT_FORTRAN_REAL16_3BUFF(name) /* OMPI_OP_TYPE_REAL16 */ + /** Fortran double precision ********************************************/ #if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION #define FLOATING_POINT_FORTRAN_DOUBLE_PRECISION(name) \ { ompi_mpi_op_##name##_fortran_double_precision } +#define FLOATING_POINT_FORTRAN_DOUBLE_PRECISION_3BUFF(name) \ + { ompi_mpi_op_three_buff_##name##_fortran_double_precision } #else #define FLOATING_POINT_FORTRAN_DOUBLE_PRECISION(name) { NULL } +#define FLOATING_POINT_FORTRAN_DOUBLE_PRECISION_3BUFF(name) { NULL } #endif /** Floating point, including all the Fortran reals *********************/ @@ -198,6 +276,14 @@ OBJ_CLASS_INSTANCE(ompi_op_t, opal_object_t, FLOATING_POINT_FORTRAN_REAL(name), /* OMPI_OP_TYPE_REAL */ \ FLOATING_POINT_FORTRAN_DOUBLE_PRECISION(name), /* OMPI_OP_TYPE_DOUBLE_PRECISION */ \ { ompi_mpi_op_##name##_long_double } /* OMPI_OP_TYPE_LONG_DOUBLE */ + +#define FLOATING_POINT_3BUFF(name) \ + { ompi_mpi_op_three_buff_##name##_float }, /* OMPI_OP_TYPE_FLOAT */\ + { ompi_mpi_op_three_buff_##name##_double }, /* OMPI_OP_TYPE_DOUBLE */\ + FLOATING_POINT_FORTRAN_REAL_3BUFF(name), /* OMPI_OP_TYPE_REAL */ \ + FLOATING_POINT_FORTRAN_DOUBLE_PRECISION_3BUFF(name), /* OMPI_OP_TYPE_DOUBLE_PRECISION */ \ + { ompi_mpi_op_three_buff_##name##_long_double } /* OMPI_OP_TYPE_LONG_DOUBLE */ + #define FLOATING_POINT_NULL \ { NULL }, /* OMPI_OP_TYPE_FLOAT */ \ { NULL }, /* OMPI_OP_TYPE_DOUBLE */ \ @@ -209,48 +295,79 @@ OBJ_CLASS_INSTANCE(ompi_op_t, opal_object_t, { NULL }, /* OMPI_OP_TYPE_DOUBLE_PRECISION */ \ { NULL } /* OMPI_OP_TYPE_LONG_DOUBLE */ +#define FLOATING_POINT_NULL_3BUFF \ + { NULL }, /* OMPI_OP_TYPE_FLOAT */ \ + { NULL }, /* OMPI_OP_TYPE_DOUBLE */ \ + { NULL }, /* OMPI_OP_TYPE_REAL */ \ + { NULL }, /* OMPI_OP_TYPE_REAL2 */ \ + { NULL }, /* OMPI_OP_TYPE_REAL4 */ \ + { NULL }, /* OMPI_OP_TYPE_REAL8 */ \ + { NULL }, /* OMPI_OP_TYPE_REAL16 */ \ + { NULL }, /* OMPI_OP_TYPE_DOUBLE_PRECISION */ \ + { NULL } /* OMPI_OP_TYPE_LONG_DOUBLE */ + /** Fortran logical *****************************************************/ #if OMPI_HAVE_FORTRAN_LOGICAL #define FORTRAN_LOGICAL(name) \ { ompi_mpi_op_##name##_fortran_logical } /* OMPI_OP_TYPE_LOGICAL */ +#define FORTRAN_LOGICAL_3BUFF(name) \ + { ompi_mpi_op_three_buff_##name##_fortran_logical } /* OMPI_OP_TYPE_LOGICAL */ #else #define FORTRAN_LOGICAL(name) { NULL } +#define FORTRAN_LOGICAL_3BUFF(name) { NULL } #endif #define LOGICAL(name) \ FORTRAN_LOGICAL(name), \ { ompi_mpi_op_##name##_bool } /* OMPI_OP_TYPE_BOOL */ +#define LOGICAL_3BUFF(name) \ + FORTRAN_LOGICAL_3BUFF(name), \ + { ompi_mpi_op_three_buff_##name##_bool } /* OMPI_OP_TYPE_BOOL */ #define LOGICAL_NULL \ { NULL }, /* OMPI_OP_TYPE_LOGICAL */ \ { NULL } /* OMPI_OP_TYPE_BOOL */ +#define LOGICAL_NULL_3BUFF \ + { NULL }, /* OMPI_OP_TYPE_LOGICAL */ \ + { NULL } /* OMPI_OP_TYPE_BOOL */ + /** Fortran complex *****************************************************/ #if OMPI_HAVE_FORTRAN_REAL && OMPI_HAVE_FORTRAN_COMPLEX #define COMPLEX_PLAIN(name) { ompi_mpi_op_##name##_fortran_complex } +#define COMPLEX_PLAIN_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_complex } #else #define COMPLEX_PLAIN(name) { NULL } +#define COMPLEX_PLAIN_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION && OMPI_HAVE_FORTRAN_COMPLEX #define COMPLEX_DOUBLE(name) { ompi_mpi_op_##name##_fortran_double_complex } +#define COMPLEX_DOUBLE_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_double_complex } #else #define COMPLEX_DOUBLE(name) { NULL } +#define COMPLEX_DOUBLE_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_REAL4 && OMPI_HAVE_FORTRAN_COMPLEX8 #define COMPLEX8(name) { ompi_mpi_op_##name##_fortran_complex8 } +#define COMPLEX8_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_complex8 } #else #define COMPLEX8(name) { NULL } +#define COMPLEX8_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_REAL8 && OMPI_HAVE_FORTRAN_COMPLEX16 #define COMPLEX16(name) { ompi_mpi_op_##name##_fortran_complex16 } +#define COMPLEX16_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_complex16 } #else #define COMPLEX16(name) { NULL } +#define COMPLEX16_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_REAL16 && OMPI_HAVE_FORTRAN_COMPLEX32 #define COMPLEX32(name) { ompi_mpi_op_##name##_fortran_complex32 } +#define COMPLEX32_3BUFF(name) { ompi_mpi_op_three_buff_##name##_fortran_complex32 } #else #define COMPLEX32(name) { NULL } +#define COMPLEX32_3BUFF(name) { NULL } #endif #define COMPLEX(name) \ @@ -259,6 +376,14 @@ OBJ_CLASS_INSTANCE(ompi_op_t, opal_object_t, COMPLEX8(name), /* OMPI_OP_TYPE_COMPLEX8 */ \ COMPLEX16(name), /* OMPI_OP_TYPE_COMPLEX16 */ \ COMPLEX32(name) /* OMPI_OP_TYPE_COMPLEX32 */ + +#define COMPLEX_3BUFF(name) \ + COMPLEX_PLAIN_3BUFF(name), /* OMPI_OP_TYPE_COMPLEX */ \ + COMPLEX_DOUBLE_3BUFF(name), /* OMPI_OP_TYPE_DOUBLE_COMPLEX */ \ + COMPLEX8_3BUFF(name), /* OMPI_OP_TYPE_COMPLEX8 */ \ + COMPLEX16_3BUFF(name), /* OMPI_OP_TYPE_COMPLEX16 */ \ + COMPLEX32_3BUFF(name) /* OMPI_OP_TYPE_COMPLEX32 */ + #define COMPLEX_NULL \ { NULL }, /* OMPI_OP_TYPE_COMPLEX */ \ { NULL }, /* OMPI_OP_TYPE_DOUBLE_COMPLEX */ \ @@ -266,30 +391,49 @@ OBJ_CLASS_INSTANCE(ompi_op_t, opal_object_t, { NULL }, /* OMPI_OP_TYPE_COMPLEX16 */ \ { NULL } /* OMPI_OP_TYPE_COMPLEX32 */ +#define COMPLEX_NULL_3BUFF \ + { NULL }, /* OMPI_OP_TYPE_COMPLEX */ \ + { NULL }, /* OMPI_OP_TYPE_DOUBLE_COMPLEX */ \ + { NULL }, /* OMPI_OP_TYPE_COMPLEX8 */ \ + { NULL }, /* OMPI_OP_TYPE_COMPLEX16 */ \ + { NULL } /* OMPI_OP_TYPE_COMPLEX32 */ + /** Byte ****************************************************************/ #define BYTE(name) \ { ompi_mpi_op_##name##_byte } /* OMPI_OP_TYPE_BYTE */ +#define BYTE_3BUFF(name) \ + { ompi_mpi_op_three_buff_##name##_byte } /* OMPI_OP_TYPE_BYTE */ + #define BYTE_NULL \ { NULL } /* OMPI_OP_TYPE_BYTE */ +#define BYTE_NULL_3BUFF \ + { NULL } /* OMPI_OP_TYPE_BYTE */ + /** Fortran complex *****************************************************/ /** Fortran "2" types ***************************************************/ #if OMPI_HAVE_FORTRAN_REAL #define TWOLOC_FORTRAN_2REAL(name) { ompi_mpi_op_##name##_2real } +#define TWOLOC_FORTRAN_2REAL_3BUFF(name) { ompi_mpi_op_three_buff_##name##_2real } #else #define TWOLOC_FORTRAN_2REAL(name) { NULL } +#define TWOLOC_FORTRAN_2REAL_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION #define TWOLOC_FORTRAN_2DOUBLE_PRECISION(name) { ompi_mpi_op_##name##_2double_precision } +#define TWOLOC_FORTRAN_2DOUBLE_PRECISION_3BUFF(name) { ompi_mpi_op_three_buff_##name##_2double_precision } #else #define TWOLOC_FORTRAN_2DOUBLE_PRECISION(name) { NULL } +#define TWOLOC_FORTRAN_2DOUBLE_PRECISION_3BUFF(name) { NULL } #endif #if OMPI_HAVE_FORTRAN_INTEGER #define TWOLOC_FORTRAN_2INTEGER(name) { ompi_mpi_op_##name##_2integer } +#define TWOLOC_FORTRAN_2INTEGER_3BUFF(name) { ompi_mpi_op_three_buff_##name##_2integer } #else #define TWOLOC_FORTRAN_2INTEGER(name) { NULL } +#define TWOLOC_FORTRAN_2INTEGER_3BUFF(name) { NULL } #endif /** All "2" types *******************************************************/ @@ -304,6 +448,18 @@ OBJ_CLASS_INSTANCE(ompi_op_t, opal_object_t, { ompi_mpi_op_##name##_2int }, /* OMPI_OP_TYPE_2INT */ \ { ompi_mpi_op_##name##_short_int }, /* OMPI_OP_TYPE_SHORT_INT */ \ { ompi_mpi_op_##name##_long_double_int } /* OMPI_OP_TYPE_LONG_DOUBLE_INT */ + +#define TWOLOC_3BUFF(name) \ + TWOLOC_FORTRAN_2REAL_3BUFF(name), /* OMPI_OP_TYPE_2REAL */ \ + TWOLOC_FORTRAN_2DOUBLE_PRECISION_3BUFF(name), /* OMPI_OP_TYPE_2DOUBLE_PRECISION */ \ + TWOLOC_FORTRAN_2INTEGER_3BUFF(name), /* OMPI_OP_TYPE_2INTEGER */ \ + { ompi_mpi_op_three_buff_##name##_float_int }, /* OMPI_OP_TYPE_FLOAT_INT */ \ + { ompi_mpi_op_three_buff_##name##_double_int }, /* OMPI_OP_TYPE_DOUBLE_INT */ \ + { ompi_mpi_op_three_buff_##name##_long_int }, /* OMPI_OP_TYPE_LONG_INT */ \ + { ompi_mpi_op_three_buff_##name##_2int }, /* OMPI_OP_TYPE_2INT */ \ + { ompi_mpi_op_three_buff_##name##_short_int }, /* OMPI_OP_TYPE_SHORT_INT */ \ + { ompi_mpi_op_three_buff_##name##_long_double_int } /* OMPI_OP_TYPE_LONG_DOUBLE_INT */ + #define TWOLOC_NULL \ { NULL }, /* OMPI_OP_TYPE_2REAL */\ { NULL }, /* OMPI_OP_TYPE_2DOUBLE_PRECISION */ \ @@ -315,6 +471,17 @@ OBJ_CLASS_INSTANCE(ompi_op_t, opal_object_t, { NULL }, /* OMPI_OP_TYPE_SHORT_INT */ \ { NULL } /* OMPI_OP_TYPE_LONG_DOUBLE_INT */ +#define TWOLOC_NULL_3BUFF \ + { NULL }, /* OMPI_OP_TYPE_2REAL */\ + { NULL }, /* OMPI_OP_TYPE_2DOUBLE_PRECISION */ \ + { NULL }, /* OMPI_OP_TYPE_2INTEGER */ \ + { NULL }, /* OMPI_OP_TYPE_FLOAT_INT */ \ + { NULL }, /* OMPI_OP_TYPE_DOUBLE_INT */ \ + { NULL }, /* OMPI_OP_TYPE_LONG_INT */ \ + { NULL }, /* OMPI_OP_TYPE_2INT */ \ + { NULL }, /* OMPI_OP_TYPE_SHORT_INT */ \ + { NULL } /* OMPI_OP_TYPE_LONG_DOUBLE_INT */ + /* * MPI_OP_NULL @@ -338,7 +505,14 @@ ompi_op_t ompi_mpi_op_null = { COMPLEX_NULL, BYTE_NULL, TWOLOC_NULL }, - -1 + -1, + { C_INTEGER_NULL_3BUFF, + FORTRAN_INTEGER_NULL_3BUFF, + FLOATING_POINT_NULL_3BUFF, + LOGICAL_NULL_3BUFF, + COMPLEX_NULL_3BUFF, + BYTE_NULL_3BUFF, + TWOLOC_NULL_3BUFF } }; @@ -358,7 +532,14 @@ ompi_op_t ompi_mpi_op_max = { COMPLEX_NULL, BYTE_NULL, TWOLOC_NULL }, - -1 + -1, + { C_INTEGER_3BUFF(max), + FORTRAN_INTEGER_3BUFF(max), + FLOATING_POINT_3BUFF(max), + LOGICAL_NULL_3BUFF, + COMPLEX_NULL_3BUFF, + BYTE_NULL_3BUFF, + TWOLOC_NULL_3BUFF } }; @@ -377,7 +558,14 @@ ompi_op_t ompi_mpi_op_min = { COMPLEX_NULL, BYTE_NULL, TWOLOC_NULL }, - -1 + -1, + { C_INTEGER_3BUFF(min), + FORTRAN_INTEGER_3BUFF(min), + FLOATING_POINT_3BUFF(min), + LOGICAL_NULL_3BUFF, + COMPLEX_NULL_3BUFF, + BYTE_NULL_3BUFF, + TWOLOC_NULL_3BUFF } }; @@ -396,7 +584,14 @@ ompi_op_t ompi_mpi_op_sum = { COMPLEX(sum), BYTE_NULL, TWOLOC_NULL }, - -1 + -1, + { C_INTEGER_3BUFF(sum), + FORTRAN_INTEGER_3BUFF(sum), + FLOATING_POINT_3BUFF(sum), + LOGICAL_NULL_3BUFF, + COMPLEX_3BUFF(sum), + BYTE_NULL_3BUFF, + TWOLOC_NULL_3BUFF } }; @@ -415,7 +610,14 @@ ompi_op_t ompi_mpi_op_prod = { COMPLEX(prod), BYTE_NULL, TWOLOC_NULL }, - -1 + -1, + { C_INTEGER_3BUFF(prod), + FORTRAN_INTEGER_3BUFF(prod), + FLOATING_POINT_3BUFF(prod), + LOGICAL_NULL_3BUFF, + COMPLEX_3BUFF(prod), + BYTE_NULL_3BUFF, + TWOLOC_NULL_3BUFF } }; @@ -434,7 +636,14 @@ ompi_op_t ompi_mpi_op_land = { COMPLEX_NULL, BYTE_NULL, TWOLOC_NULL }, - -1 + -1, + { C_INTEGER_3BUFF(land), + FORTRAN_INTEGER_NULL_3BUFF, + FLOATING_POINT_NULL_3BUFF, + LOGICAL_3BUFF(land), + COMPLEX_NULL_3BUFF, + BYTE_NULL_3BUFF, + TWOLOC_NULL_3BUFF } }; @@ -453,7 +662,14 @@ ompi_op_t ompi_mpi_op_band = { COMPLEX_NULL, BYTE(band), TWOLOC_NULL }, - -1 + -1, + { C_INTEGER_3BUFF(band), + FORTRAN_INTEGER_3BUFF(band), + FLOATING_POINT_NULL_3BUFF, + LOGICAL_NULL_3BUFF, + COMPLEX_NULL_3BUFF, + BYTE_3BUFF(band), + TWOLOC_NULL_3BUFF } }; @@ -472,7 +688,14 @@ ompi_op_t ompi_mpi_op_lor = { COMPLEX_NULL, BYTE_NULL, TWOLOC_NULL }, - -1 + -1, + { C_INTEGER_3BUFF(lor), + FORTRAN_INTEGER_NULL_3BUFF, + FLOATING_POINT_NULL_3BUFF, + LOGICAL_3BUFF(lor), + COMPLEX_NULL_3BUFF, + BYTE_NULL_3BUFF, + TWOLOC_NULL_3BUFF } }; @@ -491,7 +714,14 @@ ompi_op_t ompi_mpi_op_bor = { COMPLEX_NULL, BYTE(bor), TWOLOC_NULL }, - -1 + -1, + { C_INTEGER_3BUFF(bor), + FORTRAN_INTEGER_3BUFF(bor), + FLOATING_POINT_NULL_3BUFF, + LOGICAL_NULL_3BUFF, + COMPLEX_NULL_3BUFF, + BYTE_3BUFF(bor), + TWOLOC_NULL_3BUFF } }; @@ -510,7 +740,14 @@ ompi_op_t ompi_mpi_op_lxor = { COMPLEX_NULL, BYTE_NULL, TWOLOC_NULL }, - -1 + -1, + { C_INTEGER_3BUFF(lxor), + FORTRAN_INTEGER_NULL_3BUFF, + FLOATING_POINT_NULL_3BUFF, + LOGICAL_3BUFF(lxor), + COMPLEX_NULL_3BUFF, + BYTE_NULL_3BUFF, + TWOLOC_NULL_3BUFF } }; @@ -529,7 +766,14 @@ ompi_op_t ompi_mpi_op_bxor = { COMPLEX_NULL, BYTE(bxor), TWOLOC_NULL }, - -1 + -1, + { C_INTEGER_3BUFF(bxor), + FORTRAN_INTEGER_3BUFF(bxor), + FLOATING_POINT_NULL_3BUFF, + LOGICAL_NULL_3BUFF, + COMPLEX_NULL_3BUFF, + BYTE_3BUFF(bxor), + TWOLOC_NULL_3BUFF } }; @@ -548,7 +792,14 @@ ompi_op_t ompi_mpi_op_maxloc = { COMPLEX_NULL, BYTE_NULL, TWOLOC(maxloc) }, - -1 + -1, + { C_INTEGER_NULL_3BUFF, + FORTRAN_INTEGER_NULL_3BUFF, + FLOATING_POINT_NULL_3BUFF, + LOGICAL_NULL_3BUFF, + COMPLEX_NULL_3BUFF, + BYTE_NULL_3BUFF, + TWOLOC_3BUFF(maxloc) } }; @@ -567,7 +818,14 @@ ompi_op_t ompi_mpi_op_minloc = { COMPLEX_NULL, BYTE_NULL, TWOLOC(minloc) }, - -1 + -1, + { C_INTEGER_NULL_3BUFF, + FORTRAN_INTEGER_NULL_3BUFF, + FLOATING_POINT_NULL_3BUFF, + LOGICAL_NULL_3BUFF, + COMPLEX_NULL_3BUFF, + BYTE_NULL_3BUFF, + TWOLOC_3BUFF(minloc) } }; /* @@ -589,7 +847,14 @@ ompi_op_t ompi_mpi_op_replace = { COMPLEX_NULL, BYTE_NULL, TWOLOC_NULL }, - -1 + -1, + { C_INTEGER_NULL_3BUFF, + FORTRAN_INTEGER_NULL_3BUFF, + FLOATING_POINT_NULL_3BUFF, + LOGICAL_NULL_3BUFF, + COMPLEX_NULL_3BUFF, + BYTE_NULL_3BUFF, + TWOLOC_NULL_3BUFF } }; /* diff --git a/ompi/op/op.h b/ompi/op/op.h index 4dfa86f725..8aa89be251 100644 --- a/ompi/op/op.h +++ b/ompi/op/op.h @@ -194,12 +194,23 @@ enum { */ typedef void (ompi_op_c_handler_fn_t)(void *, void *, int *, MPI_Datatype *); +/* + * Three buffer ( two input and one output) function prototype + */ +typedef void (ompi_op_3buff_c_handler_fn_t)(volatile void *, volatile void *, + volatile void *, int *, MPI_Datatype *); + /** * Typedef for fortran op functions. */ typedef void (ompi_op_fortran_handler_fn_t)(void *, void *, MPI_Fint *, MPI_Fint *); +/* + * Three buffer (2 input one output) function prototype + */ +typedef void (ompi_op_3buff_fortran_handler_fn_t)(volatile void *, + volatile void *, volatile void *, MPI_Fint *, MPI_Fint *); /** @@ -212,6 +223,12 @@ typedef void (ompi_op_fortran_handler_fn_t)(void *, void *, typedef void (ompi_op_cxx_handler_fn_t)(void *, void *, int *, MPI_Datatype *, MPI_User_function *op); +/* + * Three buffer (two input, one output) function prototype + */ +typedef void (ompi_op_3buff_cxx_handler_fn_t)(volatile void *, volatile void *, + volatile void *, int *, MPI_Datatype *, MPI_User_function *op); + /* * Flags for MPI_Op @@ -265,7 +282,22 @@ struct ompi_op_t { /** Index in Fortran <-> C translation array */ int o_f_to_c_index; + + union { + /** C handler function pointer */ + ompi_op_3buff_c_handler_fn_t *c_fn; + /** Fortran handler function pointer */ + ompi_op_3buff_fortran_handler_fn_t *fort_fn; + /** C++ intercept function pointer -- see lengthy comment in + ompi/mpi/cxx/intercepts.cc::ompi_mpi_cxx_op_intercept() for + an explanation */ + ompi_op_3buff_cxx_handler_fn_t *cxx_intercept_fn; + } o_3buff_func[OMPI_OP_TYPE_MAX]; + /**< Array of three buffer function pointers, indexed on the operation + type. For non-intrinsice MPI_Op's, only the 0th element will be + meaningful. */ }; + /** * Convenience typedef */ @@ -619,6 +651,89 @@ static inline void ompi_op_reduce(ompi_op_t *op, void *source, void *target, } } +/** + * Perform a reduction operation. + * + * @param op The operation (IN) + * @param source Source1 (input) buffer (IN) + * @param source Source2 (input) buffer (IN) + * @param target Target (output) buffer (IN/OUT) + * @param count Number of elements (IN) + * @param dtype MPI datatype (IN) + * + * @returns void As with MPI user-defined reduction functions, there + * is no return code from this function. + * + * Perform a reduction operation with count elements of type dtype in + * the buffers source and target. The target buffer obtains the + * result (i.e., the original values in the target buffer are reduced + * with the values in the source buffer and the result is stored in + * the target buffer). + * + * This function figures out which reduction operation function to + * invoke and whether to invoke it with C- or Fortran-style invocation + * methods. If the op is intrinsic and has the operation defined for + * dtype, the appropriate back-end function will be invoked. + * Otherwise, the op is assumed to be a user op and the first function + * pointer in the op array will be used. + * + * NOTE: This function assumes that a correct combination will be + * given to it; it makes no provision for errors (in the name of + * optimization). If you give it an intrinsic op with a datatype that + * is not defined to have that operation, it is likely to seg fault. + */ +static inline void ompi_3buff_op_reduce(ompi_op_t *op, void *source1, void *source2, + void *target, int count, ompi_datatype_t *dtype) +{ + MPI_Fint f_dtype, f_count; + void * restrict src1; + void * restrict src2; + void * restrict tgt; + src1=(void * restrict) source1; + src2=(void * restrict) source2; + tgt=(void * restrict) target; + + /* + * Call the reduction function. Two dimensions: a) if both the op + * and the datatype are intrinsic, we have a series of predefined + * functions for each datatype, b) if the op has a fortran callback + * function or not. + * + * NOTE: We assume here that we will get a valid result back from + * the ompi_op_ddt_map[] (and not -1) -- if we do, then the + * parameter check in the top-level MPI function should have caught + * it. If we get -1 because the top-level parameter check is turned + * off, then it's an erroneous program and it's the user's fault. + * :-) + */ + + if (0 != (op->o_flags & OMPI_OP_FLAGS_INTRINSIC) && + ompi_ddt_is_predefined(dtype)) { + if (0 != (op->o_flags & OMPI_OP_FLAGS_FORTRAN_FUNC)) { + f_dtype = OMPI_INT_2_FINT(dtype->d_f_to_c_index); + f_count = OMPI_INT_2_FINT(count); + op->o_3buff_func[ompi_op_ddt_map[dtype->id]].fort_fn(src1, src2 , tgt, + &f_count, &f_dtype); + } else { + op->o_3buff_func[ompi_op_ddt_map[dtype->id]].c_fn(src1, src2, tgt,&count, + &dtype); + } + } + + /* User-defined function - this can't work, will never be called. + * need to take this out soon. */ + + else if (0 != (op->o_flags & OMPI_OP_FLAGS_FORTRAN_FUNC)) { + f_dtype = OMPI_INT_2_FINT(dtype->d_f_to_c_index); + f_count = OMPI_INT_2_FINT(count); + op->o_3buff_func[0].fort_fn(src1, src2, tgt, &f_count, &f_dtype); + } else if (0 != (op->o_flags & OMPI_OP_FLAGS_CXX_FUNC)) { + op->o_3buff_func[0].cxx_intercept_fn(src1, src2, tgt, &count, &dtype, + op->o_func[1].c_fn); + } else { + op->o_3buff_func[0].c_fn(src1, src2, tgt, &count, &dtype); + } +} #if defined(c_plusplus) || defined(__cplusplus) } #endif diff --git a/ompi/op/op_predefined.c b/ompi/op/op_predefined.c index 9d72db6123..3b935cacac 100644 --- a/ompi/op/op_predefined.c +++ b/ompi/op/op_predefined.c @@ -673,3 +673,667 @@ LOC_FUNC(minloc, short_int, <) #if HAVE_LONG_DOUBLE LOC_FUNC(minloc, long_double_int, <) #endif + + +/* + * This is a three buffer (2 input and 1 output) version of the reduction + * routines, needed for some optimizations. + */ +#define OP_FUNC_3BUF(name, type_name, type, op) \ + void ompi_mpi_op_three_buff_##name##_##type_name(restrict void *in1, \ + restrict void *in2, restrict void *out, int *count, \ + MPI_Datatype *dtype) \ + { \ + int i; \ + type *a1 = (type *) in1; \ + type *a2 = (type *) in2; \ + type *b = (type *) out; \ + for (i = 0; i < *count; ++i) { \ + *(b++) = *(a1++) op *(a2++); \ + } \ + } + +#define COMPLEX_OP_FUNC_SUM_3BUF(type_name, type) \ + void ompi_mpi_op_sum_three_buff_##type_name(restrict void *in1, \ + restrict void * in2, restrict void *out, int *count, \ + MPI_Datatype *dtype) \ + { \ + int i; \ + type *a1 = (type *) in1; \ + type *a2 = (type *) in2; \ + type *b = (type *) out; \ + for (i = 0; i < *count; ++i, ++b, ++a) { \ + b->real = a1->real + a2->real; \ + b->imag = a1->imag + a2->imag; \ + } \ + } + +#define COMPLEX_OP_FUNC_PROD_3BUF(type_name, type) \ + void ompi_mpi_op_prod_three_buff_##type_name(restrict void *in1, \ + restrict void *in2, restrict void *out, int *count, \ + MPI_Datatype *dtype) \ + { \ + int i; \ + type *a1 = (type *) in1; \ + type *a2 = (type *) in2; \ + type *b = (type *) out; \ + type *a1 = (type *) in1; \ + for (i = 0; i < *count; ++i, ++b, ++a) { \ + *b->real = a1->real * a2->real - a1->imag * a2->imag; \ + *b->imag = a1->imag * a2->real + a1->real * a2->imag; \ + *b = temp; \ + } \ + } + + +/* + * Since all the functions in this file are essentially identical, we + * use a macro to substitute in names and types. The core operation + * in all functions that use this macro is the same. + * + * This macro is for (out = op(in1, in2)) + */ +#define FUNC_FUNC_3BUF(name, type_name, type) \ + void ompi_mpi_op_three_buff_##name##_##type_name(restrict void *in1, \ + restrict void *in2, restrict void *out, int *count, \ + MPI_Datatype *dtype) \ + { \ + int i; \ + type *a1 = (type *) in1; \ + type *a2 = (type *) in2; \ + type *b = (type *) out; \ + for (i = 0; i < *count; ++i) { \ + *(b) = current_func(*(a1), *(a2)); \ + ++b; \ + ++a1; \ + ++a2; \ + } \ + } + +/* + * Since all the functions in this file are essentially identical, we + * use a macro to substitute in names and types. The core operation + * in all functions that use this macro is the same. + * + * This macro is for minloc and maxloc + */ +/* +#define LOC_STRUCT(type_name, type1, type2) \ + typedef struct { \ + type1 v; \ + type2 k; \ + } ompi_op_predefined_##type_name##_t; +*/ + +#define LOC_FUNC_3BUF(name, type_name, op) \ + void ompi_mpi_op_three_buff_##name##_##type_name(restrict void *in1, \ + restrict void *in2, restrict void *out, int *count, \ + MPI_Datatype *dtype) \ + { \ + int i; \ + ompi_op_predefined_##type_name##_t *a1 = (ompi_op_predefined_##type_name##_t*) in1; \ + ompi_op_predefined_##type_name##_t *a2 = (ompi_op_predefined_##type_name##_t*) in2; \ + ompi_op_predefined_##type_name##_t *b = (ompi_op_predefined_##type_name##_t*) out; \ + for (i = 0; i < *count; ++i, ++a1, ++a2, ++b ) { \ + if (a1->v op a2->v) { \ + b->v = a1->v; \ + b->k = a1->k; \ + } else if (a1->v == a2->v) { \ + b->v = a1->v; \ + b->k = (a2->k < a1->k ? a2->k : a1->k); \ + } else { \ + b->v = a2->v; \ + b->k = a2->k; \ + } \ + } \ + } + +/************************************************************************* + * Max + *************************************************************************/ + +#undef current_func +#define current_func(a, b) ((a) > (b) ? (a) : (b)) +/* C integer */ +FUNC_FUNC_3BUF(max, signed_char, signed char) +FUNC_FUNC_3BUF(max, unsigned_char, unsigned char) +FUNC_FUNC_3BUF(max, int, int) +FUNC_FUNC_3BUF(max, long, long) +FUNC_FUNC_3BUF(max, short, short) +FUNC_FUNC_3BUF(max, unsigned_short, unsigned short) +FUNC_FUNC_3BUF(max, unsigned, unsigned) +FUNC_FUNC_3BUF(max, unsigned_long, unsigned long) +#if HAVE_LONG_LONG +FUNC_FUNC_3BUF(max, long_long_int, long long int) +FUNC_FUNC_3BUF(max, unsigned_long_long, unsigned long long) +#endif +/* Fortran integer */ +#if OMPI_HAVE_FORTRAN_INTEGER +FUNC_FUNC_3BUF(max, fortran_integer, ompi_fortran_integer_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER1 +FUNC_FUNC_3BUF(max, fortran_integer1, ompi_fortran_integer1_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER2 +FUNC_FUNC_3BUF(max, fortran_integer2, ompi_fortran_integer2_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER4 +FUNC_FUNC_3BUF(max, fortran_integer4, ompi_fortran_integer4_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER8 +FUNC_FUNC_3BUF(max, fortran_integer8, ompi_fortran_integer8_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER16 +FUNC_FUNC_3BUF(max, fortran_integer16, ompi_fortran_integer16_t) +#endif +/* Floating point */ +FUNC_FUNC_3BUF(max, float, float) +FUNC_FUNC_3BUF(max, double, double) +#if HAVE_LONG_DOUBLE +FUNC_FUNC_3BUF(max, long_double, long double) +#endif +#if OMPI_HAVE_FORTRAN_REAL +FUNC_FUNC_3BUF(max, fortran_real, ompi_fortran_real_t) +#endif +#if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION +FUNC_FUNC_3BUF(max, fortran_double_precision, ompi_fortran_double_precision_t) +#endif +#if OMPI_HAVE_FORTRAN_REAL2 +FUNC_FUNC_3BUF(max, fortran_real2, ompi_fortran_real2_t) +#endif +#if OMPI_HAVE_FORTRAN_REAL4 +FUNC_FUNC_3BUF(max, fortran_real4, ompi_fortran_real4_t) +#endif +#if OMPI_HAVE_FORTRAN_REAL8 +FUNC_FUNC_3BUF(max, fortran_real8, ompi_fortran_real8_t) +#endif +#if OMPI_HAVE_FORTRAN_REAL16 +FUNC_FUNC_3BUF(max, fortran_real16, ompi_fortran_real16_t) +#endif + + +/************************************************************************* + * Min + *************************************************************************/ + +#undef current_func +#define current_func(a, b) ((a) < (b) ? (a) : (b)) +/* C integer */ +FUNC_FUNC_3BUF(min, signed_char, signed char) +FUNC_FUNC_3BUF(min, unsigned_char, unsigned char) +FUNC_FUNC_3BUF(min, int, int) +FUNC_FUNC_3BUF(min, long, long) +FUNC_FUNC_3BUF(min, short, short) +FUNC_FUNC_3BUF(min, unsigned_short, unsigned short) +FUNC_FUNC_3BUF(min, unsigned, unsigned) +FUNC_FUNC_3BUF(min, unsigned_long, unsigned long) +#if HAVE_LONG_LONG +FUNC_FUNC_3BUF(min, long_long_int, long long int) +FUNC_FUNC_3BUF(min, unsigned_long_long, unsigned long long) +#endif +/* Fortran integer */ +#if OMPI_HAVE_FORTRAN_INTEGER +FUNC_FUNC_3BUF(min, fortran_integer, ompi_fortran_integer_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER1 +FUNC_FUNC_3BUF(min, fortran_integer1, ompi_fortran_integer1_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER2 +FUNC_FUNC_3BUF(min, fortran_integer2, ompi_fortran_integer2_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER4 +FUNC_FUNC_3BUF(min, fortran_integer4, ompi_fortran_integer4_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER8 +FUNC_FUNC_3BUF(min, fortran_integer8, ompi_fortran_integer8_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER16 +FUNC_FUNC_3BUF(min, fortran_integer16, ompi_fortran_integer16_t) +#endif +/* Floating point */ +FUNC_FUNC_3BUF(min, float, float) +FUNC_FUNC_3BUF(min, double, double) +#if HAVE_LONG_DOUBLE +FUNC_FUNC_3BUF(min, long_double, long double) +#endif +#if OMPI_HAVE_FORTRAN_REAL +FUNC_FUNC_3BUF(min, fortran_real, ompi_fortran_real_t) +#endif +#if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION +FUNC_FUNC_3BUF(min, fortran_double_precision, ompi_fortran_double_precision_t) +#endif +#if OMPI_HAVE_FORTRAN_REAL2 +FUNC_FUNC_3BUF(min, fortran_real2, ompi_fortran_real2_t) +#endif +#if OMPI_HAVE_FORTRAN_REAL4 +FUNC_FUNC_3BUF(min, fortran_real4, ompi_fortran_real4_t) +#endif +#if OMPI_HAVE_FORTRAN_REAL8 +FUNC_FUNC_3BUF(min, fortran_real8, ompi_fortran_real8_t) +#endif +#if OMPI_HAVE_FORTRAN_REAL16 +FUNC_FUNC_3BUF(min, fortran_real16, ompi_fortran_real16_t) +#endif + +/************************************************************************* + * Sum + *************************************************************************/ + +/* C integer */ +OP_FUNC_3BUF(sum, signed_char, signed char, +) +OP_FUNC_3BUF(sum, unsigned_char, unsigned char, +) +OP_FUNC_3BUF(sum, int, int, +) +OP_FUNC_3BUF(sum, long, long, +) +OP_FUNC_3BUF(sum, short, short, +) +OP_FUNC_3BUF(sum, unsigned_short, unsigned short, +) +OP_FUNC_3BUF(sum, unsigned, unsigned, +) +OP_FUNC_3BUF(sum, unsigned_long, unsigned long, +) +#if HAVE_LONG_LONG +OP_FUNC_3BUF(sum, long_long_int, long long int, +) +OP_FUNC_3BUF(sum, unsigned_long_long, unsigned long long, +) +#endif +/* Fortran integer */ +#if OMPI_HAVE_FORTRAN_INTEGER +OP_FUNC_3BUF(sum, fortran_integer, ompi_fortran_integer_t, +) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER1 +OP_FUNC_3BUF(sum, fortran_integer1, ompi_fortran_integer1_t, +) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER2 +OP_FUNC_3BUF(sum, fortran_integer2, ompi_fortran_integer2_t, +) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER4 +OP_FUNC_3BUF(sum, fortran_integer4, ompi_fortran_integer4_t, +) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER8 +OP_FUNC_3BUF(sum, fortran_integer8, ompi_fortran_integer8_t, +) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER16 +OP_FUNC_3BUF(sum, fortran_integer16, ompi_fortran_integer16_t, +) +#endif +/* Floating point */ +OP_FUNC_3BUF(sum, float, float, +) +OP_FUNC_3BUF(sum, double, double, +) +#if HAVE_LONG_DOUBLE +OP_FUNC_3BUF(sum, long_double, long double, +) +#endif +#if OMPI_HAVE_FORTRAN_REAL +OP_FUNC_3BUF(sum, fortran_real, ompi_fortran_real_t, +) +#endif +#if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION +OP_FUNC_3BUF(sum, fortran_double_precision, ompi_fortran_double_precision_t, +) +#endif +#if OMPI_HAVE_FORTRAN_REAL2 +OP_FUNC_3BUF(sum, fortran_real2, ompi_fortran_real2_t, +) +#endif +#if OMPI_HAVE_FORTRAN_REAL4 +OP_FUNC_3BUF(sum, fortran_real4, ompi_fortran_real4_t, +) +#endif +#if OMPI_HAVE_FORTRAN_REAL8 +OP_FUNC_3BUF(sum, fortran_real8, ompi_fortran_real8_t, +) +#endif +#if OMPI_HAVE_FORTRAN_REAL16 +OP_FUNC_3BUF(sum, fortran_real16, ompi_fortran_real16_t, +) +#endif +/* Complex */ +#if OMPI_HAVE_FORTRAN_REAL && OMPI_HAVE_FORTRAN_COMPLEX +COMPLEX_OP_FUNC_SUM_3BUF(fortran_complex, ompi_fortran_complex_t) +#endif +#if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION && OMPI_HAVE_FORTRAN_COMPLEX +COMPLEX_OP_FUNC_SUM_3BUF(fortran_double_complex, ompi_fortran_double_complex_t) +#endif +#if OMPI_HAVE_FORTRAN_REAL4 && OMPI_HAVE_FORTRAN_COMPLEX8 +COMPLEX_OP_FUNC_SUM_3BUF(fortran_complex8, ompi_fortran_complex8_t) +#endif +#if OMPI_HAVE_FORTRAN_REAL8 && OMPI_HAVE_FORTRAN_COMPLEX16 +COMPLEX_OP_FUNC_SUM_3BUF(fortran_complex16, ompi_fortran_complex16_t) +#endif +#if OMPI_HAVE_FORTRAN_REAL16 && OMPI_HAVE_FORTRAN_COMPLEX32 +COMPLEX_OP_FUNC_SUM_3BUF(fortran_complex32, ompi_fortran_complex32_t) +#endif + +/************************************************************************* + * Product + *************************************************************************/ + +/* C integer */ +OP_FUNC_3BUF(prod, signed_char, signed char, *) +OP_FUNC_3BUF(prod, unsigned_char, unsigned char, *) +OP_FUNC_3BUF(prod, int, int, *) +OP_FUNC_3BUF(prod, long, long, *) +OP_FUNC_3BUF(prod, short, short, *) +OP_FUNC_3BUF(prod, unsigned_short, unsigned short, *) +OP_FUNC_3BUF(prod, unsigned, unsigned, *) +OP_FUNC_3BUF(prod, unsigned_long, unsigned long, *) +#if HAVE_LONG_LONG +OP_FUNC_3BUF(prod, long_long_int, long long int, *) +OP_FUNC_3BUF(prod, unsigned_long_long, unsigned long long, *) +#endif +/* Fortran integer */ +#if OMPI_HAVE_FORTRAN_INTEGER +OP_FUNC_3BUF(prod, fortran_integer, ompi_fortran_integer_t, *) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER1 +OP_FUNC_3BUF(prod, fortran_integer1, ompi_fortran_integer1_t, *) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER2 +OP_FUNC_3BUF(prod, fortran_integer2, ompi_fortran_integer2_t, *) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER4 +OP_FUNC_3BUF(prod, fortran_integer4, ompi_fortran_integer4_t, *) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER8 +OP_FUNC_3BUF(prod, fortran_integer8, ompi_fortran_integer8_t, *) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER16 +OP_FUNC_3BUF(prod, fortran_integer16, ompi_fortran_integer16_t, *) +#endif +/* Floating point */ +OP_FUNC_3BUF(prod, float, float, *) +OP_FUNC_3BUF(prod, double, double, *) +#if HAVE_LONG_DOUBLE +OP_FUNC_3BUF(prod, long_double, long double, *) +#endif +#if OMPI_HAVE_FORTRAN_REAL +OP_FUNC_3BUF(prod, fortran_real, ompi_fortran_real_t, *) +#endif +#if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION +OP_FUNC_3BUF(prod, fortran_double_precision, ompi_fortran_double_precision_t, *) +#endif +#if OMPI_HAVE_FORTRAN_REAL2 +OP_FUNC_3BUF(prod, fortran_real2, ompi_fortran_real2_t, *) +#endif +#if OMPI_HAVE_FORTRAN_REAL4 +OP_FUNC_3BUF(prod, fortran_real4, ompi_fortran_real4_t, *) +#endif +#if OMPI_HAVE_FORTRAN_REAL8 +OP_FUNC_3BUF(prod, fortran_real8, ompi_fortran_real8_t, *) +#endif +#if OMPI_HAVE_FORTRAN_REAL16 +OP_FUNC_3BUF(prod, fortran_real16, ompi_fortran_real16_t, *) +#endif +/* Complex */ +#if OMPI_HAVE_FORTRAN_REAL && OMPI_HAVE_FORTRAN_COMPLEX +COMPLEX_OP_FUNC_PROD_3BUF(fortran_complex, ompi_fortran_complex_t) +#endif +#if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION && OMPI_HAVE_FORTRAN_COMPLEX +COMPLEX_OP_FUNC_PROD_3BUF(fortran_double_complex, ompi_fortran_double_complex_t) +#endif +#if OMPI_HAVE_FORTRAN_REAL4 && OMPI_HAVE_FORTRAN_COMPLEX8 +COMPLEX_OP_FUNC_PROD_3BUF(fortran_complex8, ompi_fortran_complex8_t) +#endif +#if OMPI_HAVE_FORTRAN_REAL8 && OMPI_HAVE_FORTRAN_COMPLEX16 +COMPLEX_OP_FUNC_PROD_3BUF(fortran_complex16, ompi_fortran_complex16_t) +#endif +#if OMPI_HAVE_FORTRAN_REAL16 && OMPI_HAVE_FORTRAN_COMPLEX32 +COMPLEX_OP_FUNC_PROD_3BUF(fortran_complex32, ompi_fortran_complex32_t) +#endif + +/************************************************************************* + * Logical AND + *************************************************************************/ + +#undef current_func +#define current_func(a, b) ((a) && (b)) +/* C integer */ +FUNC_FUNC_3BUF(land, unsigned_char, unsigned char) +FUNC_FUNC_3BUF(land, signed_char, signed char) +FUNC_FUNC_3BUF(land, int, int) +FUNC_FUNC_3BUF(land, long, long) +FUNC_FUNC_3BUF(land, short, short) +FUNC_FUNC_3BUF(land, unsigned_short, unsigned short) +FUNC_FUNC_3BUF(land, unsigned, unsigned) +FUNC_FUNC_3BUF(land, unsigned_long, unsigned long) +#if HAVE_LONG_LONG +FUNC_FUNC_3BUF(land, long_long_int, long long int) +FUNC_FUNC_3BUF(land, unsigned_long_long, unsigned long long) +#endif +/* Logical */ +#if OMPI_HAVE_FORTRAN_LOGICAL +FUNC_FUNC_3BUF(land, fortran_logical, ompi_fortran_logical_t) +#endif +/* C++ bool */ +FUNC_FUNC_3BUF(land, bool, bool) + +/************************************************************************* + * Logical OR + *************************************************************************/ + +#undef current_func +#define current_func(a, b) ((a) || (b)) +/* C integer */ +FUNC_FUNC_3BUF(lor, unsigned_char, unsigned char) +FUNC_FUNC_3BUF(lor, signed_char, signed char) +FUNC_FUNC_3BUF(lor, int, int) +FUNC_FUNC_3BUF(lor, long, long) +FUNC_FUNC_3BUF(lor, short, short) +FUNC_FUNC_3BUF(lor, unsigned_short, unsigned short) +FUNC_FUNC_3BUF(lor, unsigned, unsigned) +FUNC_FUNC_3BUF(lor, unsigned_long, unsigned long) +#if HAVE_LONG_LONG +FUNC_FUNC_3BUF(lor, long_long_int, long long int) +FUNC_FUNC_3BUF(lor, unsigned_long_long, unsigned long long) +#endif +/* Logical */ +#if OMPI_HAVE_FORTRAN_LOGICAL +FUNC_FUNC_3BUF(lor, fortran_logical, ompi_fortran_logical_t) +#endif +/* C++ bool */ +FUNC_FUNC_3BUF(lor, bool, bool) + +/************************************************************************* + * Logical XOR + *************************************************************************/ + +#undef current_func +#define current_func(a, b) ((a ? 1 : 0) ^ (b ? 1: 0)) +/* C integer */ +FUNC_FUNC_3BUF(lxor, unsigned_char, unsigned char) +FUNC_FUNC_3BUF(lxor, signed_char, signed char) +FUNC_FUNC_3BUF(lxor, int, int) +FUNC_FUNC_3BUF(lxor, long, long) +FUNC_FUNC_3BUF(lxor, short, short) +FUNC_FUNC_3BUF(lxor, unsigned_short, unsigned short) +FUNC_FUNC_3BUF(lxor, unsigned, unsigned) +FUNC_FUNC_3BUF(lxor, unsigned_long, unsigned long) +#if HAVE_LONG_LONG +FUNC_FUNC_3BUF(lxor, long_long_int, long long int) +FUNC_FUNC_3BUF(lxor, unsigned_long_long, unsigned long long) +#endif +/* Logical */ +#if OMPI_HAVE_FORTRAN_LOGICAL +FUNC_FUNC_3BUF(lxor, fortran_logical, ompi_fortran_logical_t) +#endif +/* C++ bool */ +FUNC_FUNC_3BUF(lxor, bool, bool) + +/************************************************************************* + * Bitwise AND + *************************************************************************/ + +#undef current_func +#define current_func(a, b) ((a) & (b)) +/* C integer */ +FUNC_FUNC_3BUF(band, unsigned_char, unsigned char) +FUNC_FUNC_3BUF(band, signed_char, signed char) +FUNC_FUNC_3BUF(band, int, int) +FUNC_FUNC_3BUF(band, long, long) +FUNC_FUNC_3BUF(band, short, short) +FUNC_FUNC_3BUF(band, unsigned_short, unsigned short) +FUNC_FUNC_3BUF(band, unsigned, unsigned) +FUNC_FUNC_3BUF(band, unsigned_long, unsigned long) +#if HAVE_LONG_LONG +FUNC_FUNC_3BUF(band, long_long_int, long long int) +FUNC_FUNC_3BUF(band, unsigned_long_long, unsigned long long) +#endif +/* Fortran integer */ +#if OMPI_HAVE_FORTRAN_INTEGER +FUNC_FUNC_3BUF(band, fortran_integer, ompi_fortran_integer_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER1 +FUNC_FUNC_3BUF(band, fortran_integer1, ompi_fortran_integer1_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER2 +FUNC_FUNC_3BUF(band, fortran_integer2, ompi_fortran_integer2_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER4 +FUNC_FUNC_3BUF(band, fortran_integer4, ompi_fortran_integer4_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER8 +FUNC_FUNC_3BUF(band, fortran_integer8, ompi_fortran_integer8_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER16 +FUNC_FUNC_3BUF(band, fortran_integer16, ompi_fortran_integer16_t) +#endif +/* Byte */ +FUNC_FUNC_3BUF(band, byte, char) + +/************************************************************************* + * Bitwise OR + *************************************************************************/ + +#undef current_func +#define current_func(a, b) ((a) | (b)) +/* C integer */ +FUNC_FUNC_3BUF(bor, unsigned_char, unsigned char) +FUNC_FUNC_3BUF(bor, signed_char, signed char) +FUNC_FUNC_3BUF(bor, int, int) +FUNC_FUNC_3BUF(bor, long, long) +FUNC_FUNC_3BUF(bor, short, short) +FUNC_FUNC_3BUF(bor, unsigned_short, unsigned short) +FUNC_FUNC_3BUF(bor, unsigned, unsigned) +FUNC_FUNC_3BUF(bor, unsigned_long, unsigned long) +#if HAVE_LONG_LONG +FUNC_FUNC_3BUF(bor, long_long_int, long long int) +FUNC_FUNC_3BUF(bor, unsigned_long_long, unsigned long long) +#endif +/* Fortran integer */ +#if OMPI_HAVE_FORTRAN_INTEGER +FUNC_FUNC_3BUF(bor, fortran_integer, ompi_fortran_integer_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER1 +FUNC_FUNC_3BUF(bor, fortran_integer1, ompi_fortran_integer1_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER2 +FUNC_FUNC_3BUF(bor, fortran_integer2, ompi_fortran_integer2_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER4 +FUNC_FUNC_3BUF(bor, fortran_integer4, ompi_fortran_integer4_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER8 +FUNC_FUNC_3BUF(bor, fortran_integer8, ompi_fortran_integer8_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER16 +FUNC_FUNC_3BUF(bor, fortran_integer16, ompi_fortran_integer16_t) +#endif +/* Byte */ +FUNC_FUNC_3BUF(bor, byte, char) + +/************************************************************************* + * Bitwise XOR + *************************************************************************/ + +#undef current_func +#define current_func(a, b) ((a) ^ (b)) +/* C integer */ +FUNC_FUNC_3BUF(bxor, unsigned_char, unsigned char) +FUNC_FUNC_3BUF(bxor, signed_char, signed char) +FUNC_FUNC_3BUF(bxor, int, int) +FUNC_FUNC_3BUF(bxor, long, long) +FUNC_FUNC_3BUF(bxor, short, short) +FUNC_FUNC_3BUF(bxor, unsigned_short, unsigned short) +FUNC_FUNC_3BUF(bxor, unsigned, unsigned) +FUNC_FUNC_3BUF(bxor, unsigned_long, unsigned long) +#if HAVE_LONG_LONG +FUNC_FUNC_3BUF(bxor, long_long_int, long long int) +FUNC_FUNC_3BUF(bxor, unsigned_long_long, unsigned long long) +#endif +/* Fortran integer */ +#if OMPI_HAVE_FORTRAN_INTEGER +FUNC_FUNC_3BUF(bxor, fortran_integer, ompi_fortran_integer_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER1 +FUNC_FUNC_3BUF(bxor, fortran_integer1, ompi_fortran_integer1_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER2 +FUNC_FUNC_3BUF(bxor, fortran_integer2, ompi_fortran_integer2_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER4 +FUNC_FUNC_3BUF(bxor, fortran_integer4, ompi_fortran_integer4_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER8 +FUNC_FUNC_3BUF(bxor, fortran_integer8, ompi_fortran_integer8_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER16 +FUNC_FUNC_3BUF(bxor, fortran_integer16, ompi_fortran_integer16_t) +#endif +/* Byte */ +FUNC_FUNC_3BUF(bxor, byte, char) + +/************************************************************************* + * Min and max location "pair" datatypes + *************************************************************************/ + +/* +#if OMPI_HAVE_FORTRAN_REAL +LOC_STRUCT_3BUF(2real, ompi_fortran_real_t, ompi_fortran_real_t) +#endif +#if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION +LOC_STRUCT_3BUF(2double_precision, ompi_fortran_double_precision_t, ompi_fortran_double_precision_t) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER +LOC_STRUCT_3BUF(2integer, ompi_fortran_integer_t, ompi_fortran_integer_t) +#endif +LOC_STRUCT_3BUF(float_int, float, int) +LOC_STRUCT_3BUF(double_int, double, int) +LOC_STRUCT_3BUF(long_int, long, int) +LOC_STRUCT_3BUF(2int, int, int) +LOC_STRUCT_3BUF(short_int, short, int) +#if HAVE_LONG_DOUBLE +LOC_STRUCT_3BUF(long_double_int, long double, int) +#endif +*/ + +/************************************************************************* + * Max location + *************************************************************************/ + +#if OMPI_HAVE_FORTRAN_REAL +LOC_FUNC_3BUF(maxloc, 2real, >) +#endif +#if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION +LOC_FUNC_3BUF(maxloc, 2double_precision, >) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER +LOC_FUNC_3BUF(maxloc, 2integer, >) +#endif +LOC_FUNC_3BUF(maxloc, float_int, >) +LOC_FUNC_3BUF(maxloc, double_int, >) +LOC_FUNC_3BUF(maxloc, long_int, >) +LOC_FUNC_3BUF(maxloc, 2int, >) +LOC_FUNC_3BUF(maxloc, short_int, >) +#if HAVE_LONG_DOUBLE +LOC_FUNC_3BUF(maxloc, long_double_int, >) +#endif + +/************************************************************************* + * Min location + *************************************************************************/ + +#if OMPI_HAVE_FORTRAN_REAL +LOC_FUNC_3BUF(minloc, 2real, <) +#endif +#if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION +LOC_FUNC_3BUF(minloc, 2double_precision, <) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER +LOC_FUNC_3BUF(minloc, 2integer, <) +#endif +LOC_FUNC_3BUF(minloc, float_int, <) +LOC_FUNC_3BUF(minloc, double_int, <) +LOC_FUNC_3BUF(minloc, long_int, <) +LOC_FUNC_3BUF(minloc, 2int, <) +LOC_FUNC_3BUF(minloc, short_int, <) +#if HAVE_LONG_DOUBLE +LOC_FUNC_3BUF(minloc, long_double_int, <) +#endif diff --git a/ompi/op/op_predefined.h b/ompi/op/op_predefined.h index 061b37ba06..547a23d8f0 100644 --- a/ompi/op/op_predefined.h +++ b/ompi/op/op_predefined.h @@ -276,6 +276,262 @@ extern "C" { */ OMPI_OP_HANDLER_2TYPE(minloc) +/* + * 3 buffer prototypes (two input and one output) + */ +#define OMPI_OP_PROTO_3BUF \ + (restrict void *in1, restrict void *in2, restrict void *out, \ + int *count, MPI_Datatype *dtype) + +/* C integer */ + +#define OMPI_OP_3BUFF_HANDLER_C_INTEGER_INTRINSIC(name) \ + void ompi_mpi_op_three_buff_##name##_unsigned_char OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_signed_char OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_int OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_long OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_short OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_unsigned_short OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_unsigned OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_unsigned_long OMPI_OP_PROTO_3BUF; +#if HAVE_LONG_LONG +#define OMPI_OP_3BUFF_HANDLER_C_INTEGER_OPTIONAL(name) \ + void ompi_mpi_op_three_buff_##name##_long_long_int OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_long_long OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_unsigned_long_long OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_C_INTEGER_OPTIONAL(name) +#endif +#define OMPI_OP_3BUFF_HANDLER_C_INTEGER(name) \ + OMPI_OP_3BUFF_HANDLER_C_INTEGER_INTRINSIC(name) \ + OMPI_OP_3BUFF_HANDLER_C_INTEGER_OPTIONAL(name) \ + +/* Fortran integer */ + +#define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER_INTRINSIC(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_integer OMPI_OP_PROTO_3BUF; +#if OMPI_HAVE_FORTRAN_INTEGER1 +#define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER1(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_integer1 OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER1(name) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER2 +#define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER2(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_integer2 OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER2(name) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER4 +#define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER4(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_integer4 OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER4(name) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER8 +#define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER8(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_integer8 OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER8(name) +#endif +#if OMPI_HAVE_FORTRAN_INTEGER16 +#define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER16(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_integer16 OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER16(name) +#endif +#define OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER(name) \ + OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER_INTRINSIC(name) \ + OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER1(name) \ + OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER2(name) \ + OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER4(name) \ + OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER8(name) \ + OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER16(name) + +/* Floating point */ + +#define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_INTRINSIC(name) \ + void ompi_mpi_op_three_buff_##name##_float OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_double OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_fortran_real OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_fortran_double_precision OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_long_double OMPI_OP_PROTO_3BUF; +#if OMPI_HAVE_FORTRAN_REAL2 +#define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL2(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_real2 OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL2(name) +#endif +#if OMPI_HAVE_FORTRAN_REAL4 +#define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL4(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_real4 OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL4(name) +#endif +#if OMPI_HAVE_FORTRAN_REAL8 +#define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL8(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_real8 OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL8(name) +#endif +#if OMPI_HAVE_FORTRAN_REAL16 +#define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL16(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_real16 OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL16(name) +#endif +#define OMPI_OP_3BUFF_HANDLER_FLOATING_POINT(name) \ + OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_INTRINSIC(name) \ + OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL4(name) \ + OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL8(name) \ + OMPI_OP_3BUFF_HANDLER_FLOATING_POINT_REAL16(name) \ + +/* Logical */ + +#define OMPI_OP_3BUFF_HANDLER_LOGICAL(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_logical OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_bool OMPI_OP_PROTO_3BUF; + +/* Complex */ + +#if OMPI_HAVE_FORTRAN_REAL +#define OMPI_OP_3BUFF_HANDLER_COMPLEX_INTRINSIC(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_complex OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_COMPLEX_INTRINSIC(name) +#endif +#if OMPI_HAVE_FORTRAN_DOUBLE_PRECISION +#define OMPI_OP_3BUFF_HANDLER_DOUBLE_COMPLEX_INTRINSIC(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_double_complex OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_DOUBLE_COMPLEX_INTRINSIC(name) +#endif +#if OMPI_HAVE_FORTRAN_REAL4 +#define OMPI_OP_3BUFF_HANDLER_COMPLEX8(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_complex8 OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_COMPLEX8(name) +#endif +#if OMPI_HAVE_FORTRAN_REAL8 +#define OMPI_OP_3BUFF_HANDLER_COMPLEX16(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_complex16 OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_COMPLEX16(name) +#endif +#if OMPI_HAVE_FORTRAN_REAL16 +#define OMPI_OP_3BUFF_HANDLER_COMPLEX32(name) \ + void ompi_mpi_op_three_buff_##name##_fortran_complex32 OMPI_OP_PROTO_3BUF; +#else +#define OMPI_OP_3BUFF_HANDLER_COMPLEX32(name) +#endif +#define OMPI_OP_3BUFF_HANDLER_COMPLEX(name) \ + OMPI_OP_3BUFF_HANDLER_COMPLEX_INTRINSIC(name) \ + OMPI_OP_3BUFF_HANDLER_DOUBLE_COMPLEX_INTRINSIC(name) \ + OMPI_OP_3BUFF_HANDLER_COMPLEX8(name) \ + OMPI_OP_3BUFF_HANDLER_COMPLEX16(name) \ + OMPI_OP_3BUFF_HANDLER_COMPLEX32(name) + +/* Byte */ + +#define OMPI_OP_3BUFF_HANDLER_BYTE(name) \ + void ompi_mpi_op_three_buff_##name##_byte OMPI_OP_PROTO_3BUF; + +/* "2 type" */ + +#define OMPI_OP_3BUFF_HANDLER_2TYPE(name) \ + void ompi_mpi_op_three_buff_##name##_2real OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_2double_precision OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_2integer OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_float_int OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_double_int OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_long_int OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_2int OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_short_int OMPI_OP_PROTO_3BUF; \ + void ompi_mpi_op_three_buff_##name##_long_double_int OMPI_OP_PROTO_3BUF; + +#if defined(c_plusplus) || defined(__cplusplus) +extern "C" { +#endif + +/** + * Handler functions for MPI_MAX + */ + OMPI_OP_3BUFF_HANDLER_C_INTEGER(max) + OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER(max) + OMPI_OP_3BUFF_HANDLER_FLOATING_POINT(max) + +/** + * Handler functions for MPI_MIN + */ + OMPI_OP_3BUFF_HANDLER_C_INTEGER(min) + OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER(min) + OMPI_OP_3BUFF_HANDLER_FLOATING_POINT(min) + +/** + * Handler functions for MPI_SUM + */ + OMPI_OP_3BUFF_HANDLER_C_INTEGER(sum) + OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER(sum) + OMPI_OP_3BUFF_HANDLER_FLOATING_POINT(sum) + OMPI_OP_3BUFF_HANDLER_COMPLEX(sum) + +/** + * Handler functions for MPI_PROD + */ + OMPI_OP_3BUFF_HANDLER_C_INTEGER(prod) + OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER(prod) + OMPI_OP_3BUFF_HANDLER_FLOATING_POINT(prod) + OMPI_OP_3BUFF_HANDLER_COMPLEX(prod) + +/** + * Handler functions for MPI_LAND + */ + OMPI_OP_3BUFF_HANDLER_C_INTEGER(land) + OMPI_OP_3BUFF_HANDLER_LOGICAL(land) + +/** + * Handler functions for MPI_BAND + */ + OMPI_OP_3BUFF_HANDLER_C_INTEGER(band) + OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER(band) + OMPI_OP_3BUFF_HANDLER_BYTE(band) + +/** + * Handler functions for MPI_LOR + */ + OMPI_OP_3BUFF_HANDLER_C_INTEGER(lor) + OMPI_OP_3BUFF_HANDLER_LOGICAL(lor) + +/** + * Handler functions for MPI_BOR + */ + OMPI_OP_3BUFF_HANDLER_C_INTEGER(bor) + OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER(bor) + OMPI_OP_3BUFF_HANDLER_BYTE(bor) + +/** + * Handler functions for MPI_LXOR + */ + OMPI_OP_3BUFF_HANDLER_C_INTEGER(lxor) + OMPI_OP_3BUFF_HANDLER_LOGICAL(lxor) + +/** + * Handler functions for MPI_BXOR + */ + OMPI_OP_3BUFF_HANDLER_C_INTEGER(bxor) + OMPI_OP_3BUFF_HANDLER_FORTRAN_INTEGER(bxor) + OMPI_OP_3BUFF_HANDLER_BYTE(bxor) + +/** + * Handler functions for MPI_MAXLOC + */ + OMPI_OP_3BUFF_HANDLER_2TYPE(maxloc) + +/** + * Handler functions for MPI_MINLOC + */ + OMPI_OP_3BUFF_HANDLER_2TYPE(minloc) + #if defined(c_plusplus) || defined(__cplusplus) } #endif