From 4375c11a58384b5bb422f7bcc4200be72f53518a Mon Sep 17 00:00:00 2001 From: KAWASHIMA Takahiro Date: Wed, 21 Nov 2018 19:58:14 +0900 Subject: [PATCH] ompi/datatype: Add `ompi_mpi_short_float` ... and `ompi_mpi_c_short_float_complex` and `ompi_mpi_cxx_sfltcplex`. These are Open MPI internal variables intended to be defined as `MPI_SHORT_FLOAT`, `MPI_C_SHORT_FLOAT_COMPLEX`, and `MPI_CXX_SHORT_FLOAT_COMPLEX` in the future. `OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX` is also required to support `MPI_COMPLEX4` in the next commit. Signed-off-by: KAWASHIMA Takahiro --- ompi/datatype/ompi_datatype.h | 3 +- ompi/datatype/ompi_datatype_internal.h | 38 +++- ompi/datatype/ompi_datatype_module.c | 34 +++- .../coll/portals4/coll_portals4_component.c | 6 + ompi/mca/op/base/op_base_functions.c | 170 +++++++++++++++++- ompi/mca/op/op.h | 5 + ompi/op/op.c | 7 + opal/datatype/opal_datatype.h | 2 +- 8 files changed, 253 insertions(+), 12 deletions(-) diff --git a/ompi/datatype/ompi_datatype.h b/ompi/datatype/ompi_datatype.h index d9df1f1ae4..68e049ac61 100644 --- a/ompi/datatype/ompi_datatype.h +++ b/ompi/datatype/ompi_datatype.h @@ -9,6 +9,7 @@ * reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -57,7 +58,7 @@ BEGIN_C_DECLS #define OMPI_DATATYPE_FLAG_DATA_FORTRAN 0xC000 #define OMPI_DATATYPE_FLAG_DATA_LANGUAGE 0xC000 -#define OMPI_DATATYPE_MAX_PREDEFINED 47 +#define OMPI_DATATYPE_MAX_PREDEFINED 49 #if OMPI_DATATYPE_MAX_PREDEFINED > OPAL_DATATYPE_MAX_SUPPORTED #error Need to increase the number of supported dataypes by OPAL (value OPAL_DATATYPE_MAX_SUPPORTED). diff --git a/ompi/datatype/ompi_datatype_internal.h b/ompi/datatype/ompi_datatype_internal.h index 38bed445c4..4c8c2fa8e2 100644 --- a/ompi/datatype/ompi_datatype_internal.h +++ b/ompi/datatype/ompi_datatype_internal.h @@ -9,7 +9,7 @@ * reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2016-2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -99,8 +99,16 @@ */ #define OMPI_DATATYPE_MPI_COUNT 0x2E +/* + * Datatypes proposed to the MPI Forum in June 2017 for proposal in + * the MPI 4.0 standard. As of February 2019, it is not accepted yet. + * See https://github.com/mpi-forum/mpi-issues/issues/65 + */ +#define OMPI_DATATYPE_MPI_SHORT_FLOAT 0x2F +#define OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX 0x30 + /* This should __ALWAYS__ stay last */ -#define OMPI_DATATYPE_MPI_UNAVAILABLE 0x2F +#define OMPI_DATATYPE_MPI_UNAVAILABLE 0x31 #define OMPI_DATATYPE_MPI_MAX_PREDEFINED (OMPI_DATATYPE_MPI_UNAVAILABLE+1) @@ -382,6 +390,7 @@ * C++ datatypes, these map to C datatypes. */ #define OMPI_DATATYPE_MPI_CXX_BOOL OMPI_DATATYPE_MPI_C_BOOL +#define OMPI_DATATYPE_MPI_CXX_SHORT_FLOAT_COMPLEX OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX #define OMPI_DATATYPE_MPI_CXX_FLOAT_COMPLEX OMPI_DATATYPE_MPI_C_FLOAT_COMPLEX #define OMPI_DATATYPE_MPI_CXX_DOUBLE_COMPLEX OMPI_DATATYPE_MPI_C_DOUBLE_COMPLEX #define OMPI_DATATYPE_MPI_CXX_LONG_DOUBLE_COMPLEX OMPI_DATATYPE_MPI_C_LONG_DOUBLE_COMPLEX @@ -438,7 +447,7 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE( UNAVAILABLE, NAME, FLAGS ) /* - * Initilization for these types is deferred until runtime. + * Initialization for these types is deferred until runtime. * * Using this macro implies that at this point not all informations needed * to fill up the datatype are known. We fill them with zeros and then later @@ -570,6 +579,26 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX #define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG_LONG OPAL_DATATYPE_INITIALIZER_UINT16 #endif +#if defined(HAVE_SHORT_FLOAT) +#if SIZEOF_SHORT_FLOAT == 2 +#define OMPI_DATATYPE_INITIALIZER_SHORT_FLOAT OPAL_DATATYPE_INITIALIZER_FLOAT2 +#elif SIZEOF_SHORT_FLOAT == 4 +#define OMPI_DATATYPE_INITIALIZER_SHORT_FLOAT OPAL_DATATYPE_INITIALIZER_FLOAT4 +#elif SIZEOF_SHORT_FLOAT == 8 +#define OMPI_DATATYPE_INITIALIZER_SHORT_FLOAT OPAL_DATATYPE_INITIALIZER_FLOAT8 +#endif +#elif defined(HAVE_OPAL_SHORT_FLOAT_T) /* HAVE_SHORT_FLOAT */ +#if SIZEOF_OPAL_SHORT_FLOAT_T == 2 +#define OMPI_DATATYPE_INITIALIZER_SHORT_FLOAT OPAL_DATATYPE_INITIALIZER_FLOAT2 +#elif SIZEOF_OPAL_SHORT_FLOAT_T == 4 +#define OMPI_DATATYPE_INITIALIZER_SHORT_FLOAT OPAL_DATATYPE_INITIALIZER_FLOAT4 +#elif SIZEOF_OPAL_SHORT_FLOAT_T == 8 +#define OMPI_DATATYPE_INITIALIZER_SHORT_FLOAT OPAL_DATATYPE_INITIALIZER_FLOAT8 +#endif +#else /* HAVE_SHORT_FLOAT */ +#define OMPI_DATATYPE_INITIALIZER_SHORT_FLOAT OPAL_DATATYPE_INITIALIZER_UNAVAILABLE +#endif /* HAVE_SHORT_FLOAT */ + #if SIZEOF_FLOAT == 2 #define OMPI_DATATYPE_INITIALIZER_FLOAT OPAL_DATATYPE_INITIALIZER_FLOAT2 #elif SIZEOF_FLOAT == 4 @@ -604,6 +633,7 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX #define OMPI_DATATYPE_INITIALIZER_WCHAR OPAL_DATATYPE_INITIALIZER_WCHAR +#define OMPI_DATATYPE_INITIALIZER_C_SHORT_FLOAT_COMPLEX OPAL_DATATYPE_INITIALIZER_SHORT_FLOAT_COMPLEX #define OMPI_DATATYPE_INITIALIZER_C_FLOAT_COMPLEX OPAL_DATATYPE_INITIALIZER_FLOAT_COMPLEX #define OMPI_DATATYPE_INITIALIZER_C_DOUBLE_COMPLEX OPAL_DATATYPE_INITIALIZER_DOUBLE_COMPLEX #define OMPI_DATATYPE_INITIALIZER_C_LONG_DOUBLE_COMPLEX OPAL_DATATYPE_INITIALIZER_LONG_DOUBLE_COMPLEX @@ -615,7 +645,7 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX #define OMPI_DATATYPE_FIRST_TYPE OPAL_DATATYPE_MAX_PREDEFINED /* - * Derived datatypes supposely contiguous + * Derived datatypes supposedly contiguous */ #define OMPI_DATATYPE_2INT (OMPI_DATATYPE_FIRST_TYPE+6) #define OMPI_DATATYPE_2INTEGER (OMPI_DATATYPE_FIRST_TYPE+7) diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c index 06cfa773c7..5df334b10a 100644 --- a/ompi/datatype/ompi_datatype_module.c +++ b/ompi/datatype/ompi_datatype_module.c @@ -17,7 +17,7 @@ * reserved. * Copyright (c) 2015-2018 Research Organization for Information Science * and Technology (RIST). All rights reserved. - * Copyright (c) 2016 FUJITSU LIMITED. All rights reserved. + * Copyright (c) 2016-2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -49,9 +49,9 @@ int32_t ompi_datatype_number_of_predefined_data = 0; * The following initialization of C, C++ and Fortran types is fairly complex, * based on the OPAL-datatypes. * ompi_datatypes.h - * \-------> ompi_datatypes_internal.h (Macros defining type-number and initalization) + * \-------> ompi_datatypes_internal.h (Macros defining type-number and initialization) * opal_datatypes.h - * \-------> opal_datatypes_internal.h (Macros defining type-number and initalization) + * \-------> opal_datatypes_internal.h (Macros defining type-number and initialization) * * The Macros in the OMPI Layer differ in that: * Additionally to OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE, we have a OMPI_DATATYPE_INIT_PREDEFINED, @@ -82,6 +82,11 @@ ompi_predefined_datatype_t ompi_mpi_long = OMPI_DATATYPE_INIT_PREDEFIN ompi_predefined_datatype_t ompi_mpi_unsigned_long = OMPI_DATATYPE_INIT_PREDEFINED (UNSIGNED_LONG, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); ompi_predefined_datatype_t ompi_mpi_long_long_int = OMPI_DATATYPE_INIT_PREDEFINED (LONG_LONG_INT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); ompi_predefined_datatype_t ompi_mpi_unsigned_long_long = OMPI_DATATYPE_INIT_PREDEFINED (UNSIGNED_LONG_LONG, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT ); +#if defined(HAVE_SHORT_FLOAT) || defined(HAVE_OPAL_SHORT_FLOAT_T) +ompi_predefined_datatype_t ompi_mpi_short_float = OMPI_DATATYPE_INIT_PREDEFINED (SHORT_FLOAT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_FLOAT ); +#else +ompi_predefined_datatype_t ompi_mpi_short_float = OMPI_DATATYPE_INIT_UNAVAILABLE (SHORT_FLOAT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_FLOAT ); +#endif /* HAVE_SHORT_FLOAT */ ompi_predefined_datatype_t ompi_mpi_float = OMPI_DATATYPE_INIT_PREDEFINED (FLOAT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_FLOAT ); ompi_predefined_datatype_t ompi_mpi_double = OMPI_DATATYPE_INIT_PREDEFINED (DOUBLE, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_FLOAT ); ompi_predefined_datatype_t ompi_mpi_long_double = OMPI_DATATYPE_INIT_PREDEFINED (LONG_DOUBLE, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_FLOAT ); @@ -101,12 +106,22 @@ ompi_predefined_datatype_t ompi_mpi_cxx_bool = OMPI_DATATYPE_INIT_PREDEFIN /* * Complex datatypes for C (base types), C++, and fortran */ +#if defined(HAVE_SHORT_FLOAT__COMPLEX) || defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T) +ompi_predefined_datatype_t ompi_mpi_c_short_float_complex = OMPI_DATATYPE_INIT_PREDEFINED (C_SHORT_FLOAT_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); +#else +ompi_predefined_datatype_t ompi_mpi_c_short_float_complex = OMPI_DATATYPE_INIT_UNAVAILABLE (C_SHORT_FLOAT_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); +#endif /* HAVE_SHORT_FLOAT__COMPLEX */ ompi_predefined_datatype_t ompi_mpi_c_float_complex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_FLOAT_COMPLEX, C_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); ompi_predefined_datatype_t ompi_mpi_c_complex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_FLOAT_COMPLEX, C_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); ompi_predefined_datatype_t ompi_mpi_c_double_complex = OMPI_DATATYPE_INIT_PREDEFINED (C_DOUBLE_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); ompi_predefined_datatype_t ompi_mpi_c_long_double_complex = OMPI_DATATYPE_INIT_PREDEFINED (C_LONG_DOUBLE_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); /* The C++ complex datatypes are the same as the C datatypes */ +#if defined(HAVE_SHORT_FLOAT__COMPLEX) || defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T) +ompi_predefined_datatype_t ompi_mpi_cxx_sfltcplex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_SHORT_FLOAT_COMPLEX, CXX_SHORT_FLOAT_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); +#else +ompi_predefined_datatype_t ompi_mpi_cxx_sfltcplex = OMPI_DATATYPE_INIT_UNAVAILABLE (CXX_SHORT_FLOAT_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); +#endif /* HAVE_SHORT_FLOAT__COMPLEX */ ompi_predefined_datatype_t ompi_mpi_cxx_cplex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_FLOAT_COMPLEX, CXX_FLOAT_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); ompi_predefined_datatype_t ompi_mpi_cxx_dblcplex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_DOUBLE_COMPLEX, CXX_DOUBLE_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); ompi_predefined_datatype_t ompi_mpi_cxx_ldblcplex = OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_LONG_DOUBLE_COMPLEX, CXX_LONG_DOUBLE_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX ); @@ -347,6 +362,12 @@ const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX_PREDEF /* MPI 3.0 types */ [OMPI_DATATYPE_MPI_COUNT] = &ompi_mpi_count.dt, + /* Datatypes proposed to the MPI Forum in June 2017 for proposal in + * the MPI 4.0 standard. As of February 2019, it is not accepted yet. + * See https://github.com/mpi-forum/mpi-issues/issues/65 */ + [OMPI_DATATYPE_MPI_SHORT_FLOAT] = &ompi_mpi_short_float.dt, + [OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX] = &ompi_mpi_c_short_float_complex.dt, + [OMPI_DATATYPE_MPI_UNAVAILABLE] = &ompi_mpi_unavailable.dt, }; @@ -612,6 +633,13 @@ int32_t ompi_datatype_init( void ) /* MPI 3.0 types */ MOOG(count, 72); + /* Datatypes proposed to the MPI Forum in June 2017 for proposal in + * the MPI 4.0 standard. As of February 2019, it is not accepted yet. + * See https://github.com/mpi-forum/mpi-issues/issues/65 */ + MOOG(short_float, 73); + MOOG(c_short_float_complex, 74); + MOOG(cxx_sfltcplex, 75); + /** * Now make sure all non-contiguous types are marked as such. */ diff --git a/ompi/mca/coll/portals4/coll_portals4_component.c b/ompi/mca/coll/portals4/coll_portals4_component.c index d632340ee2..4665e98068 100644 --- a/ompi/mca/coll/portals4/coll_portals4_component.c +++ b/ompi/mca/coll/portals4/coll_portals4_component.c @@ -116,6 +116,12 @@ ptl_datatype_t ompi_coll_portals4_atomic_datatype [OMPI_DATATYPE_MPI_MAX_PREDEFI /* MPI 3.0 types */ [OMPI_DATATYPE_MPI_COUNT] = COLL_PORTALS4_NO_DTYPE, + /* Datatypes proposed to the MPI Forum in June 2017 for proposal in + * the MPI 4.0 standard. As of February 2019, it is not accepted yet. + * See https://github.com/mpi-forum/mpi-issues/issues/65 */ + [OMPI_DATATYPE_MPI_SHORT_FLOAT] = COLL_PORTALS4_NO_DTYPE, + [OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX] = COLL_PORTALS4_NO_DTYPE, + [OMPI_DATATYPE_MPI_UNAVAILABLE] = COLL_PORTALS4_NO_DTYPE, }; diff --git a/ompi/mca/op/base/op_base_functions.c b/ompi/mca/op/base/op_base_functions.c index 648aad5e9b..f08a9d04be 100644 --- a/ompi/mca/op/base/op_base_functions.c +++ b/ompi/mca/op/base/op_base_functions.c @@ -13,6 +13,7 @@ * Copyright (c) 2006-2014 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -102,6 +103,49 @@ } \ } +/* + * Define a function to calculate sum of complex numbers using a real + * number floating-point type (float, double, etc.). This macro is used + * when the compiler supports a real number floating-point type but does + * not supports the corresponding complex number type. + */ +#define COMPLEX_SUM_FUNC(type_name, type) \ + static void ompi_op_base_2buff_sum_##type_name(void *in, void *out, int *count, \ + struct ompi_datatype_t **dtype, \ + struct ompi_op_base_module_1_0_0_t *module) \ + { \ + int i; \ + type (*a)[2] = (type (*)[2]) in; \ + type (*b)[2] = (type (*)[2]) out; \ + for (i = 0; i < *count; ++i, ++a, ++b) { \ + (*b)[0] += (*a)[0]; \ + (*b)[1] += (*a)[1]; \ + } \ + } + +/* + * Define a function to calculate product of complex numbers using a real + * number floating-point type (float, double, etc.). This macro is used + * when the compiler supports a real number floating-point type but does + * not supports the corresponding complex number type. + */ +#define COMPLEX_PROD_FUNC(type_name, type) \ + static void ompi_op_base_2buff_prod_##type_name(void *in, void *out, int *count, \ + struct ompi_datatype_t **dtype, \ + struct ompi_op_base_module_1_0_0_t *module) \ + { \ + int i; \ + type (*a)[2] = (type (*)[2]) in; \ + type (*b)[2] = (type (*)[2]) out; \ + type c[2]; \ + for (i = 0; i < *count; ++i, ++a, ++b) { \ + c[0] = (*a)[0] * (*b)[0] - (*a)[1] * (*b)[1]; \ + c[1] = (*a)[0] * (*b)[1] + (*a)[1] * (*b)[0]; \ + (*b)[0] = c[0]; \ + (*b)[1] = c[1]; \ + } \ + } + /************************************************************************* * Max *************************************************************************/ @@ -137,6 +181,11 @@ FUNC_FUNC(max, fortran_integer8, ompi_fortran_integer8_t) FUNC_FUNC(max, fortran_integer16, ompi_fortran_integer16_t) #endif /* Floating point */ +#if defined(HAVE_SHORT_FLOAT) +FUNC_FUNC(max, short_float, short float) +#elif defined(HAVE_OPAL_SHORT_FLOAT_T) +FUNC_FUNC(max, short_float, opal_short_float_t) +#endif FUNC_FUNC(max, float, float) FUNC_FUNC(max, double, double) FUNC_FUNC(max, long_double, long double) @@ -195,6 +244,11 @@ FUNC_FUNC(min, fortran_integer8, ompi_fortran_integer8_t) FUNC_FUNC(min, fortran_integer16, ompi_fortran_integer16_t) #endif /* Floating point */ +#if defined(HAVE_SHORT_FLOAT) +FUNC_FUNC(min, short_float, short float) +#elif defined(HAVE_OPAL_SHORT_FLOAT_T) +FUNC_FUNC(min, short_float, opal_short_float_t) +#endif FUNC_FUNC(min, float, float) FUNC_FUNC(min, double, double) FUNC_FUNC(min, long_double, long double) @@ -250,6 +304,11 @@ OP_FUNC(sum, fortran_integer8, ompi_fortran_integer8_t, +=) OP_FUNC(sum, fortran_integer16, ompi_fortran_integer16_t, +=) #endif /* Floating point */ +#if defined(HAVE_SHORT_FLOAT) +OP_FUNC(sum, short_float, short float, +=) +#elif defined(HAVE_OPAL_SHORT_FLOAT_T) +OP_FUNC(sum, short_float, opal_short_float_t, +=) +#endif OP_FUNC(sum, float, float, +=) OP_FUNC(sum, double, double, +=) OP_FUNC(sum, long_double, long double, +=) @@ -272,6 +331,11 @@ OP_FUNC(sum, fortran_real8, ompi_fortran_real8_t, +=) OP_FUNC(sum, fortran_real16, ompi_fortran_real16_t, +=) #endif /* Complex */ +#if defined(HAVE_SHORT_FLOAT__COMPLEX) +OP_FUNC(sum, c_short_float_complex, short float _Complex, +=) +#elif defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T) +COMPLEX_SUM_FUNC(c_short_float_complex, opal_short_float_t) +#endif OP_FUNC(sum, c_float_complex, float _Complex, +=) OP_FUNC(sum, c_double_complex, double _Complex, +=) OP_FUNC(sum, c_long_double_complex, long double _Complex, +=) @@ -309,6 +373,11 @@ OP_FUNC(prod, fortran_integer8, ompi_fortran_integer8_t, *=) OP_FUNC(prod, fortran_integer16, ompi_fortran_integer16_t, *=) #endif /* Floating point */ +#if defined(HAVE_SHORT_FLOAT) +OP_FUNC(prod, short_float, short float, *=) +#elif defined(HAVE_OPAL_SHORT_FLOAT_T) +OP_FUNC(prod, short_float, opal_short_float_t, *=) +#endif OP_FUNC(prod, float, float, *=) OP_FUNC(prod, double, double, *=) OP_FUNC(prod, long_double, long double, *=) @@ -331,6 +400,11 @@ OP_FUNC(prod, fortran_real8, ompi_fortran_real8_t, *=) OP_FUNC(prod, fortran_real16, ompi_fortran_real16_t, *=) #endif /* Complex */ +#if defined(HAVE_SHORT_FLOAT__COMPLEX) +OP_FUNC(prod, c_short_float_complex, short float _Complex, *=) +#elif defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T) +COMPLEX_PROD_FUNC(c_short_float_complex, opal_short_float_t) +#endif OP_FUNC(prod, c_float_complex, float _Complex, *=) OP_FUNC(prod, c_double_complex, double _Complex, *=) OP_FUNC(prod, c_long_double_complex, long double _Complex, *=) @@ -656,6 +730,50 @@ LOC_FUNC(minloc, long_double_int, <) } \ } +/* + * Define a function to calculate sum of complex numbers using a real + * number floating-point type (float, double, etc.). This macro is used + * when the compiler supports a real number floating-point type but does + * not supports the corresponding complex number type. + */ +#define COMPLEX_SUM_FUNC_3BUF(type_name, type) \ + static void ompi_op_base_3buff_sum_##type_name(void * restrict in1, \ + void * restrict in2, void * restrict out, int *count, \ + struct ompi_datatype_t **dtype, \ + struct ompi_op_base_module_1_0_0_t *module) \ + { \ + int i; \ + type (*a1)[2] = (type (*)[2]) in1; \ + type (*a2)[2] = (type (*)[2]) in2; \ + type (*b)[2] = (type (*)[2]) out; \ + for (i = 0; i < *count; ++i, ++a1, ++a2, ++b) { \ + (*b)[0] = (*a1)[0] + (*a2)[0]; \ + (*b)[1] = (*a1)[1] + (*a2)[1]; \ + } \ + } + +/* + * Define a function to calculate product of complex numbers using a real + * number floating-point type (float, double, etc.). This macro is used + * when the compiler supports a real number floating-point type but does + * not supports the corresponding complex number type. + */ +#define COMPLEX_PROD_FUNC_3BUF(type_name, type) \ + static void ompi_op_base_3buff_prod_##type_name(void * restrict in1, \ + void * restrict in2, void * restrict out, int *count, \ + struct ompi_datatype_t **dtype, \ + struct ompi_op_base_module_1_0_0_t *module) \ + { \ + int i; \ + type (*a1)[2] = (type (*)[2]) in1; \ + type (*a2)[2] = (type (*)[2]) in2; \ + type (*b)[2] = (type (*)[2]) out; \ + for (i = 0; i < *count; ++i, ++a1, ++a2, ++b) { \ + (*b)[0] = (*a1)[0] * (*a2)[0] - (*a1)[1] * (*a2)[1]; \ + (*b)[1] = (*a1)[0] * (*a2)[1] + (*a1)[1] * (*a2)[0]; \ + } \ + } + /************************************************************************* * Max *************************************************************************/ @@ -691,6 +809,11 @@ FUNC_FUNC_3BUF(max, fortran_integer8, ompi_fortran_integer8_t) FUNC_FUNC_3BUF(max, fortran_integer16, ompi_fortran_integer16_t) #endif /* Floating point */ +#if defined(HAVE_SHORT_FLOAT) +FUNC_FUNC_3BUF(max, short_float, short float) +#elif defined(HAVE_OPAL_SHORT_FLOAT_T) +FUNC_FUNC_3BUF(max, short_float, opal_short_float_t) +#endif FUNC_FUNC_3BUF(max, float, float) FUNC_FUNC_3BUF(max, double, double) FUNC_FUNC_3BUF(max, long_double, long double) @@ -749,6 +872,11 @@ FUNC_FUNC_3BUF(min, fortran_integer8, ompi_fortran_integer8_t) FUNC_FUNC_3BUF(min, fortran_integer16, ompi_fortran_integer16_t) #endif /* Floating point */ +#if defined(HAVE_SHORT_FLOAT) +FUNC_FUNC_3BUF(min, short_float, short float) +#elif defined(HAVE_OPAL_SHORT_FLOAT_T) +FUNC_FUNC_3BUF(min, short_float, opal_short_float_t) +#endif FUNC_FUNC_3BUF(min, float, float) FUNC_FUNC_3BUF(min, double, double) FUNC_FUNC_3BUF(min, long_double, long double) @@ -804,6 +932,11 @@ OP_FUNC_3BUF(sum, fortran_integer8, ompi_fortran_integer8_t, +) OP_FUNC_3BUF(sum, fortran_integer16, ompi_fortran_integer16_t, +) #endif /* Floating point */ +#if defined(HAVE_SHORT_FLOAT) +OP_FUNC_3BUF(sum, short_float, short float, +) +#elif defined(HAVE_OPAL_SHORT_FLOAT_T) +OP_FUNC_3BUF(sum, short_float, opal_short_float_t, +) +#endif OP_FUNC_3BUF(sum, float, float, +) OP_FUNC_3BUF(sum, double, double, +) OP_FUNC_3BUF(sum, long_double, long double, +) @@ -826,6 +959,11 @@ OP_FUNC_3BUF(sum, fortran_real8, ompi_fortran_real8_t, +) OP_FUNC_3BUF(sum, fortran_real16, ompi_fortran_real16_t, +) #endif /* Complex */ +#if defined(HAVE_SHORT_FLOAT__COMPLEX) +OP_FUNC_3BUF(sum, c_short_float_complex, short float _Complex, +) +#elif defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T) +COMPLEX_SUM_FUNC_3BUF(c_short_float_complex, opal_short_float_t) +#endif OP_FUNC_3BUF(sum, c_float_complex, float _Complex, +) OP_FUNC_3BUF(sum, c_double_complex, double _Complex, +) OP_FUNC_3BUF(sum, c_long_double_complex, long double _Complex, +) @@ -863,6 +1001,11 @@ OP_FUNC_3BUF(prod, fortran_integer8, ompi_fortran_integer8_t, *) OP_FUNC_3BUF(prod, fortran_integer16, ompi_fortran_integer16_t, *) #endif /* Floating point */ +#if defined(HAVE_SHORT_FLOAT) +OP_FUNC_3BUF(prod, short_float, short float, *) +#elif defined(HAVE_OPAL_SHORT_FLOAT_T) +OP_FUNC_3BUF(prod, short_float, opal_short_float_t, *) +#endif OP_FUNC_3BUF(prod, float, float, *) OP_FUNC_3BUF(prod, double, double, *) OP_FUNC_3BUF(prod, long_double, long double, *) @@ -885,6 +1028,11 @@ OP_FUNC_3BUF(prod, fortran_real8, ompi_fortran_real8_t, *) OP_FUNC_3BUF(prod, fortran_real16, ompi_fortran_real16_t, *) #endif /* Complex */ +#if defined(HAVE_SHORT_FLOAT__COMPLEX) +OP_FUNC_3BUF(prod, c_short_float_complex, short float _Complex, *) +#elif defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T) +COMPLEX_PROD_FUNC_3BUF(c_short_float_complex, opal_short_float_t) +#endif OP_FUNC_3BUF(prod, c_float_complex, float _Complex, *) OP_FUNC_3BUF(prod, c_double_complex, double _Complex, *) OP_FUNC_3BUF(prod, c_long_double_complex, long double _Complex, *) @@ -1240,12 +1388,22 @@ LOC_FUNC_3BUF(minloc, long_double_int, <) /** Floating point, including all the Fortran reals *********************/ +#if defined(HAVE_SHORT_FLOAT) || defined(HAVE_OPAL_SHORT_FLOAT_T) +#define SHORT_FLOAT(name, ftype) ompi_op_base_##ftype##_##name##_short_float +#else +#define SHORT_FLOAT(name, ftype) NULL +#endif +#define FLOAT(name, ftype) ompi_op_base_##ftype##_##name##_float +#define DOUBLE(name, ftype) ompi_op_base_##ftype##_##name##_double +#define LONG_DOUBLE(name, ftype) ompi_op_base_##ftype##_##name##_long_double + #define FLOATING_POINT(name, ftype) \ - [OMPI_OP_BASE_TYPE_FLOAT] = ompi_op_base_##ftype##_##name##_float, \ - [OMPI_OP_BASE_TYPE_DOUBLE] = ompi_op_base_##ftype##_##name##_double, \ + [OMPI_OP_BASE_TYPE_SHORT_FLOAT] = SHORT_FLOAT(name, ftype), \ + [OMPI_OP_BASE_TYPE_FLOAT] = FLOAT(name, ftype), \ + [OMPI_OP_BASE_TYPE_DOUBLE] = DOUBLE(name, ftype), \ FLOATING_POINT_FORTRAN_REAL(name, ftype), \ [OMPI_OP_BASE_TYPE_DOUBLE_PRECISION] = FLOATING_POINT_FORTRAN_DOUBLE_PRECISION(name, ftype), \ - [OMPI_OP_BASE_TYPE_LONG_DOUBLE] = ompi_op_base_##ftype##_##name##_long_double + [OMPI_OP_BASE_TYPE_LONG_DOUBLE] = LONG_DOUBLE(name, ftype) /** Fortran logical *****************************************************/ @@ -1262,11 +1420,17 @@ LOC_FUNC_3BUF(minloc, long_double_int, <) /** Complex *****************************************************/ +#if defined(HAVE_SHORT_FLOAT__COMPLEX) || defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T) +#define SHORT_FLOAT_COMPLEX(name, ftype) ompi_op_base_##ftype##_##name##_c_short_float_complex +#else +#define SHORT_FLOAT_COMPLEX(name, ftype) NULL +#endif #define FLOAT_COMPLEX(name, ftype) ompi_op_base_##ftype##_##name##_c_float_complex #define DOUBLE_COMPLEX(name, ftype) ompi_op_base_##ftype##_##name##_c_double_complex #define LONG_DOUBLE_COMPLEX(name, ftype) ompi_op_base_##ftype##_##name##_c_long_double_complex #define COMPLEX(name, ftype) \ + [OMPI_OP_BASE_TYPE_C_SHORT_FLOAT_COMPLEX] = SHORT_FLOAT_COMPLEX(name, ftype), \ [OMPI_OP_BASE_TYPE_C_FLOAT_COMPLEX] = FLOAT_COMPLEX(name, ftype), \ [OMPI_OP_BASE_TYPE_C_DOUBLE_COMPLEX] = DOUBLE_COMPLEX(name, ftype), \ [OMPI_OP_BASE_TYPE_C_LONG_DOUBLE_COMPLEX] = LONG_DOUBLE_COMPLEX(name, ftype) diff --git a/ompi/mca/op/op.h b/ompi/mca/op/op.h index a4006b20c6..5d1bff93f9 100644 --- a/ompi/mca/op/op.h +++ b/ompi/mca/op/op.h @@ -16,6 +16,7 @@ * Copyright (c) 2007-2009 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights * reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -131,6 +132,8 @@ enum { /** Fortran integer*16 */ OMPI_OP_BASE_TYPE_INTEGER16, + /** Floating point: short float */ + OMPI_OP_BASE_TYPE_SHORT_FLOAT, /** Floating point: float */ OMPI_OP_BASE_TYPE_FLOAT, /** Floating point: double */ @@ -156,6 +159,8 @@ enum { OMPI_OP_BASE_TYPE_BOOL, /** Complex */ + /* short float complex */ + OMPI_OP_BASE_TYPE_C_SHORT_FLOAT_COMPLEX, /* float complex */ OMPI_OP_BASE_TYPE_C_FLOAT_COMPLEX, /* double complex */ diff --git a/ompi/op/op.c b/ompi/op/op.c index b9559ceb55..ef85227142 100644 --- a/ompi/op/op.c +++ b/ompi/op/op.c @@ -16,6 +16,7 @@ * reserved. * Copyright (c) 2015 Research Organization for Information Science * and Technology (RIST). All rights reserved. + * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -217,6 +218,12 @@ int ompi_op_init(void) #warning Unsupported definition for MPI_COUNT #endif + /* Datatypes proposed to the MPI Forum in June 2017 for proposal in + * the MPI 4.0 standard. As of February 2019, it is not accepted yet. + * See https://github.com/mpi-forum/mpi-issues/issues/65 */ + ompi_op_ddt_map[OMPI_DATATYPE_MPI_SHORT_FLOAT] = OMPI_OP_BASE_TYPE_SHORT_FLOAT; + ompi_op_ddt_map[OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX] = OMPI_OP_BASE_TYPE_C_SHORT_FLOAT_COMPLEX; + /* Create the intrinsic ops */ if (OMPI_SUCCESS != diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h index bcce6ce744..1f035b6d72 100644 --- a/opal/datatype/opal_datatype.h +++ b/opal/datatype/opal_datatype.h @@ -61,7 +61,7 @@ BEGIN_C_DECLS * * BEWARE: This constant should reflect whatever the OMPI-layer needs. */ -#define OPAL_DATATYPE_MAX_SUPPORTED 47 +#define OPAL_DATATYPE_MAX_SUPPORTED 49 /* flags for the datatypes. */