dnl/* dnl * Copyright (c) 2006 The Trustees of Indiana University and Indiana dnl * University Research and Technology dnl * Corporation. All rights reserved. dnl * Copyright (c) 2006 The Technical University of Chemnitz. All dnl * rights reserved. dnl */ dnl dnl this m4 code generate all MPI intrinsic operations dnl every macro is prefixed with m4_ to retain clarity (this means that dnl everything prefixed with m4_ will be replaced by m4!) dnl dnl dnl dnl ########## define all MPI intrinsic Operations and appropriate C code ############# define(m4_OP_MPI_MIN, `if(m4_ARG1$1 > m4_ARG2$1) m4_ARG3$1 = m4_ARG2$1; else m4_ARG3$1 = m4_ARG1$1;')dnl define(m4_OP_MPI_MAX, `if(m4_ARG1$1 < m4_ARG2$1) m4_ARG3$1 = m4_ARG2$1; else m4_ARG3$1 = m4_ARG1$1;')dnl define(m4_OP_MPI_SUM, `m4_ARG3$1 = m4_ARG1$1 + m4_ARG2$1;')dnl define(m4_OP_MPI_PROD, `m4_ARG3$1 = m4_ARG1$1 * m4_ARG2$1;')dnl define(m4_OP_MPI_LAND, `m4_ARG3$1 = m4_ARG1$1 && m4_ARG2$1;')dnl define(m4_OP_MPI_BAND, `m4_ARG3$1 = m4_ARG1$1 & m4_ARG2$1;')dnl define(m4_OP_MPI_LOR, `m4_ARG3$1 = m4_ARG1$1 || m4_ARG2$1;')dnl define(m4_OP_MPI_BOR, `m4_ARG3$1 = m4_ARG1$1 | m4_ARG2$1;')dnl define(m4_OP_MPI_LXOR, `m4_ARG3$1 = ((m4_ARG1$1 ? 1 : 0) ^ (m4_ARG2$1 ? 1 : 0));')dnl define(m4_OP_MPI_BXOR, `m4_ARG3$1 = ((m4_ARG1$1) ^ (m4_ARG2$1));')dnl define(m4_OP_MPI_MINLOC, `if(m4_ARG1$1_VAL > m4_ARG2$1_VAL) { m4_ARG3$1_VAL = m4_ARG2$1_VAL; m4_ARG3$1_RANK = m4_ARG2$1_RANK; } else { m4_ARG3$1_VAL = m4_ARG1$1_VAL; m4_ARG3$1_RANK = m4_ARG1$1_RANK; }')dnl define(m4_OP_MPI_MAXLOC, `if(m4_ARG1$1_VAL < m4_ARG2$1_VAL) { m4_ARG3$1_VAL = m4_ARG2$1_VAL; m4_ARG3$1_RANK = m4_ARG2$1_RANK; } else { m4_ARG3$1_VAL = m4_ARG1$1_VAL; m4_ARG3$1_RANK = m4_ARG1$1_RANK; }')dnl dnl dnl ########## define helper macros ################# dnl ########## loop-unrolled version -> slows it down :-( ###### dnl define(m4_IF, `if(op == $1) { dnl /* loop unrolling - 4 */ dnl for(i=0; i<count-3; i=i+4) { dnl m4_CTYPE_$2 val11, val12, val21, val22, val31, val32, val41, val42; dnl dnl val11 = *(((m4_CTYPE_$2*)buf1) + i); dnl val12 = *(((m4_CTYPE_$2*)buf2) + i); dnl val21 = *(((m4_CTYPE_$2*)buf1) + i+1); dnl val22 = *(((m4_CTYPE_$2*)buf2) + i+1); dnl val31 = *(((m4_CTYPE_$2*)buf1) + i+2); dnl val32 = *(((m4_CTYPE_$2*)buf2) + i+2); dnl val41 = *(((m4_CTYPE_$2*)buf1) + i+3); dnl val42 = *(((m4_CTYPE_$2*)buf2) + i+3); dnl dnl define(m4_ARG11_$1$2, val11)dnl dnl define(m4_ARG21_$1$2, val12)dnl dnl define(m4_ARG31_$1$2, val11)dnl dnl m4_OP_$1(1_$1$2) dnl define(m4_ARG12_$1$2, val21)dnl dnl define(m4_ARG22_$1$2, val22)dnl dnl define(m4_ARG32_$1$2, val21)dnl dnl m4_OP_$1(2_$1$2) dnl define(m4_ARG13_$1$2, val31)dnl dnl define(m4_ARG23_$1$2, val32)dnl dnl define(m4_ARG33_$1$2, val31)dnl dnl m4_OP_$1(3_$1$2) dnl define(m4_ARG14_$1$2, val41)dnl dnl define(m4_ARG24_$1$2, val42)dnl dnl define(m4_ARG34_$1$2, val41)dnl dnl m4_OP_$1(4_$1$2) dnl dnl *(((m4_CTYPE_$2*)buf3) + i) = val11; dnl *(((m4_CTYPE_$2*)buf3) + i+1) = val21; dnl *(((m4_CTYPE_$2*)buf3) + i+2) = val31; dnl *(((m4_CTYPE_$2*)buf3) + i+3) = val41; dnl } dnl for(i=i+4;i<count;i++) { dnl m4_CTYPE_$2 val11, val12; dnl dnl val11 = *(((m4_CTYPE_$2*)buf1) + i); dnl val12 = *(((m4_CTYPE_$2*)buf2) + i); dnl dnl define(m4_ARG15_$1$2, val11)dnl dnl define(m4_ARG25_$1$2, val12)dnl dnl define(m4_ARG35_$1$2, val11)dnl dnl m4_OP_$1(5_$1$2) dnl dnl *(((m4_CTYPE_$2*)buf3) + i) = val11; dnl } dnl }')dnl dnl ########################################################## dnl ########### THIS is faster as the unrolled code :-(( ##### define(m4_IF, `if(op == $1) { for(i=0; i<count; i++) { define(m4_ARG1_$2, `*(((m4_CTYPE_$2*)buf1) + i)')dnl define(m4_ARG2_$2, `*(((m4_CTYPE_$2*)buf2) + i)')dnl define(m4_ARG3_$2, `*(((m4_CTYPE_$2*)buf3) + i)')dnl m4_OP_$1(_$2) } }')dnl dnl ############################################### define(m4_LOCIF, `if(op == $1) { for(i=0; i<count; i++) { typedef struct { m4_CTYPE1_$2 val; m4_CTYPE2_$2 rank; } m4_CTYPE3_$2; m4_CTYPE3_$2 *ptr1, *ptr2, *ptr3; ptr1 = ((m4_CTYPE3_$2*)buf1) + i; ptr2 = ((m4_CTYPE3_$2*)buf2) + i; ptr3 = ((m4_CTYPE3_$2*)buf3) + i; define(m4_ARG1_VAL, ptr1->val)dnl define(m4_ARG2_VAL, ptr2->val)dnl define(m4_ARG3_VAL, ptr3->val)dnl define(m4_ARG1_RANK, ptr1->rank)dnl define(m4_ARG2_RANK, ptr2->rank)dnl define(m4_ARG3_RANK, ptr3->rank)dnl m4_OP_$1 } }')dnl dnl ########################################################## define(m4_TYPE, `if(type == $1) { m4_OPTYPE_$1($1) }')dnl dnl ########## define possible operations for each type dnl dnl dnl ####### MPI_INT ######## define(m4_OPTYPE_MPI_INT, `define(m4_CTYPE_$1, `int')dnl m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_LONG ######## define(m4_OPTYPE_MPI_LONG, `define(m4_CTYPE_$1, `long')dnl m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_SHORT ######## define(m4_OPTYPE_MPI_SHORT, `define(m4_CTYPE_$1, `short')dnl m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_UNSIGNED ######## define(m4_OPTYPE_MPI_UNSIGNED, `define(m4_CTYPE_$1, `unsigned int')dnl m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_UNSIGNED_LONG ######## define(m4_OPTYPE_MPI_UNSIGNED_LONG, `define(m4_CTYPE_$1, `unsigned long')dnl m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_UNSIGNED_SHORT ######## define(m4_OPTYPE_MPI_UNSIGNED_SHORT, `define(m4_CTYPE_$1, `unsigned short')dnl m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else m4_IF(MPI_LAND, $1) else dnl m4_IF(MPI_BAND, $1) else m4_IF(MPI_LOR, $1) else m4_IF(MPI_BOR, $1) else dnl m4_IF(MPI_LXOR, $1) else m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_FLOAT ######## define(m4_OPTYPE_MPI_FLOAT, `define(m4_CTYPE_$1, `float')dnl m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_DOUBLE ######## define(m4_OPTYPE_MPI_DOUBLE, `define(m4_CTYPE_$1, `double')dnl m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_LONG_DOUBLE ######## define(m4_OPTYPE_MPI_LONG_DOUBLE, `define(m4_CTYPE_$1, `long double')dnl m4_IF(MPI_MIN, $1) else m4_IF(MPI_MAX, $1) else dnl m4_IF(MPI_SUM, $1) else m4_IF(MPI_PROD, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_BYTE ######## define(m4_OPTYPE_MPI_BYTE, `define(m4_CTYPE_$1, `char')dnl m4_IF(MPI_BAND, $1) else m4_IF(MPI_BOR, $1) else dnl m4_IF(MPI_BXOR, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_FLOAT_INT ######## define(m4_OPTYPE_MPI_FLOAT_INT, `define(m4_CTYPE1_$1, `float')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `float_int')dnl m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_DOUBLE_INT ######## define(m4_OPTYPE_MPI_DOUBLE_INT, `define(m4_CTYPE1_$1, `double')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `double_int')dnl m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_LONG_INT ######## define(m4_OPTYPE_MPI_LONG_INT, `define(m4_CTYPE1_$1, `long')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `long_int')dnl m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_2INT ######## define(m4_OPTYPE_MPI_2INT, `define(m4_CTYPE1_$1, `int')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `int_int')dnl m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_SHORT_INT ######## define(m4_OPTYPE_MPI_SHORT_INT, `define(m4_CTYPE1_$1, `short')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `short_int')dnl m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### MPI_LONG_DOUBLE_INT ######## define(m4_OPTYPE_MPI_LONG_DOUBLE_INT, `define(m4_CTYPE1_$1, `long double')define(m4_CTYPE2_$1, `int')define(m4_CTYPE3_$1, `long_double_int')dnl m4_LOCIF(MPI_MAXLOC, $1) else m4_LOCIF(MPI_MINLOC, $1) else return NBC_OP_NOT_SUPPORTED;')dnl dnl dnl ####### begin the real program :-) ######### dnl #include "nbc.h" /****************** THIS FILE is automatically generated ********************* * changes will be deleted at the next generation of this file - see nbc_op.c.m4 */ int NBC_Operation(void *buf3, void *buf1, void *buf2, MPI_Op op, MPI_Datatype type, int count) { int i; m4_TYPE(MPI_INT) else dnl m4_TYPE(MPI_LONG) else dnl m4_TYPE(MPI_SHORT) else dnl m4_TYPE(MPI_UNSIGNED) else dnl m4_TYPE(MPI_UNSIGNED_LONG) else dnl m4_TYPE(MPI_UNSIGNED_SHORT) else dnl m4_TYPE(MPI_FLOAT) else dnl m4_TYPE(MPI_DOUBLE) else dnl m4_TYPE(MPI_LONG_DOUBLE) else dnl m4_TYPE(MPI_BYTE) else dnl m4_TYPE(MPI_FLOAT_INT) else dnl m4_TYPE(MPI_DOUBLE_INT) else dnl m4_TYPE(MPI_LONG_INT) else dnl m4_TYPE(MPI_2INT) else dnl m4_TYPE(MPI_SHORT_INT) else dnl m4_TYPE(MPI_LONG_DOUBLE_INT) else dnl return NBC_DATATYPE_NOT_SUPPORTED; return NBC_OK; }