From 4375c11a58384b5bb422f7bcc4200be72f53518a Mon Sep 17 00:00:00 2001
From: KAWASHIMA Takahiro <t-kawashima@jp.fujitsu.com>
Date: Wed, 21 Nov 2018 19:58:14 +0900
Subject: [PATCH] ompi/datatype: Add `ompi_mpi_short_float`

... and `ompi_mpi_c_short_float_complex` and `ompi_mpi_cxx_sfltcplex`.

These are Open MPI internal variables intended to be defined as
`MPI_SHORT_FLOAT`, `MPI_C_SHORT_FLOAT_COMPLEX`, and
`MPI_CXX_SHORT_FLOAT_COMPLEX` in the future.

`OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX` is also required to
support `MPI_COMPLEX4` in the next commit.

Signed-off-by: KAWASHIMA Takahiro <t-kawashima@jp.fujitsu.com>
---
 ompi/datatype/ompi_datatype.h                 |   3 +-
 ompi/datatype/ompi_datatype_internal.h        |  38 +++-
 ompi/datatype/ompi_datatype_module.c          |  34 +++-
 .../coll/portals4/coll_portals4_component.c   |   6 +
 ompi/mca/op/base/op_base_functions.c          | 170 +++++++++++++++++-
 ompi/mca/op/op.h                              |   5 +
 ompi/op/op.c                                  |   7 +
 opal/datatype/opal_datatype.h                 |   2 +-
 8 files changed, 253 insertions(+), 12 deletions(-)

diff --git a/ompi/datatype/ompi_datatype.h b/ompi/datatype/ompi_datatype.h
index d9df1f1ae4..68e049ac61 100644
--- a/ompi/datatype/ompi_datatype.h
+++ b/ompi/datatype/ompi_datatype.h
@@ -9,6 +9,7 @@
  *                         reserved.
  * Copyright (c) 2015-2018 Research Organization for Information Science
  *                         and Technology (RIST). All rights reserved.
+ * Copyright (c) 2018      FUJITSU LIMITED.  All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -57,7 +58,7 @@ BEGIN_C_DECLS
 #define OMPI_DATATYPE_FLAG_DATA_FORTRAN  0xC000
 #define OMPI_DATATYPE_FLAG_DATA_LANGUAGE 0xC000
 
-#define OMPI_DATATYPE_MAX_PREDEFINED 47
+#define OMPI_DATATYPE_MAX_PREDEFINED 49
 
 #if OMPI_DATATYPE_MAX_PREDEFINED > OPAL_DATATYPE_MAX_SUPPORTED
 #error Need to increase the number of supported dataypes by OPAL (value OPAL_DATATYPE_MAX_SUPPORTED).
diff --git a/ompi/datatype/ompi_datatype_internal.h b/ompi/datatype/ompi_datatype_internal.h
index 38bed445c4..4c8c2fa8e2 100644
--- a/ompi/datatype/ompi_datatype_internal.h
+++ b/ompi/datatype/ompi_datatype_internal.h
@@ -9,7 +9,7 @@
  *                         reserved.
  * Copyright (c) 2015-2018 Research Organization for Information Science
  *                         and Technology (RIST). All rights reserved.
- * Copyright (c) 2016      FUJITSU LIMITED.  All rights reserved.
+ * Copyright (c) 2016-2018 FUJITSU LIMITED.  All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -99,8 +99,16 @@
  */
 #define OMPI_DATATYPE_MPI_COUNT                   0x2E
 
+/*
+ * Datatypes proposed to the MPI Forum in June 2017 for proposal in
+ * the MPI 4.0 standard. As of February 2019, it is not accepted yet.
+ * See https://github.com/mpi-forum/mpi-issues/issues/65
+ */
+#define OMPI_DATATYPE_MPI_SHORT_FLOAT             0x2F
+#define OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX   0x30
+
 /* This should __ALWAYS__ stay last  */
-#define OMPI_DATATYPE_MPI_UNAVAILABLE             0x2F
+#define OMPI_DATATYPE_MPI_UNAVAILABLE             0x31
 
 
 #define OMPI_DATATYPE_MPI_MAX_PREDEFINED          (OMPI_DATATYPE_MPI_UNAVAILABLE+1)
@@ -382,6 +390,7 @@
  * C++ datatypes, these map to C datatypes.
  */
 #define OMPI_DATATYPE_MPI_CXX_BOOL                OMPI_DATATYPE_MPI_C_BOOL
+#define OMPI_DATATYPE_MPI_CXX_SHORT_FLOAT_COMPLEX OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX
 #define OMPI_DATATYPE_MPI_CXX_FLOAT_COMPLEX       OMPI_DATATYPE_MPI_C_FLOAT_COMPLEX
 #define OMPI_DATATYPE_MPI_CXX_DOUBLE_COMPLEX      OMPI_DATATYPE_MPI_C_DOUBLE_COMPLEX
 #define OMPI_DATATYPE_MPI_CXX_LONG_DOUBLE_COMPLEX OMPI_DATATYPE_MPI_C_LONG_DOUBLE_COMPLEX
@@ -438,7 +447,7 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX
     OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE( UNAVAILABLE, NAME, FLAGS )
 
 /*
- * Initilization for these types is deferred until runtime.
+ * Initialization for these types is deferred until runtime.
  *
  * Using this macro implies that at this point not all informations needed
  * to fill up the datatype are known. We fill them with zeros and then later
@@ -570,6 +579,26 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX
 #define OMPI_DATATYPE_INITIALIZER_UNSIGNED_LONG_LONG  OPAL_DATATYPE_INITIALIZER_UINT16
 #endif
 
+#if defined(HAVE_SHORT_FLOAT)
+#if SIZEOF_SHORT_FLOAT == 2
+#define OMPI_DATATYPE_INITIALIZER_SHORT_FLOAT         OPAL_DATATYPE_INITIALIZER_FLOAT2
+#elif SIZEOF_SHORT_FLOAT == 4
+#define OMPI_DATATYPE_INITIALIZER_SHORT_FLOAT         OPAL_DATATYPE_INITIALIZER_FLOAT4
+#elif SIZEOF_SHORT_FLOAT == 8
+#define OMPI_DATATYPE_INITIALIZER_SHORT_FLOAT         OPAL_DATATYPE_INITIALIZER_FLOAT8
+#endif
+#elif defined(HAVE_OPAL_SHORT_FLOAT_T) /* HAVE_SHORT_FLOAT */
+#if SIZEOF_OPAL_SHORT_FLOAT_T == 2
+#define OMPI_DATATYPE_INITIALIZER_SHORT_FLOAT         OPAL_DATATYPE_INITIALIZER_FLOAT2
+#elif SIZEOF_OPAL_SHORT_FLOAT_T == 4
+#define OMPI_DATATYPE_INITIALIZER_SHORT_FLOAT         OPAL_DATATYPE_INITIALIZER_FLOAT4
+#elif SIZEOF_OPAL_SHORT_FLOAT_T == 8
+#define OMPI_DATATYPE_INITIALIZER_SHORT_FLOAT         OPAL_DATATYPE_INITIALIZER_FLOAT8
+#endif
+#else /* HAVE_SHORT_FLOAT */
+#define OMPI_DATATYPE_INITIALIZER_SHORT_FLOAT         OPAL_DATATYPE_INITIALIZER_UNAVAILABLE
+#endif /* HAVE_SHORT_FLOAT */
+
 #if SIZEOF_FLOAT == 2
 #define OMPI_DATATYPE_INITIALIZER_FLOAT               OPAL_DATATYPE_INITIALIZER_FLOAT2
 #elif SIZEOF_FLOAT == 4
@@ -604,6 +633,7 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX
 
 #define OMPI_DATATYPE_INITIALIZER_WCHAR               OPAL_DATATYPE_INITIALIZER_WCHAR
 
+#define OMPI_DATATYPE_INITIALIZER_C_SHORT_FLOAT_COMPLEX OPAL_DATATYPE_INITIALIZER_SHORT_FLOAT_COMPLEX
 #define OMPI_DATATYPE_INITIALIZER_C_FLOAT_COMPLEX       OPAL_DATATYPE_INITIALIZER_FLOAT_COMPLEX
 #define OMPI_DATATYPE_INITIALIZER_C_DOUBLE_COMPLEX      OPAL_DATATYPE_INITIALIZER_DOUBLE_COMPLEX
 #define OMPI_DATATYPE_INITIALIZER_C_LONG_DOUBLE_COMPLEX OPAL_DATATYPE_INITIALIZER_LONG_DOUBLE_COMPLEX
@@ -615,7 +645,7 @@ extern const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX
 #define OMPI_DATATYPE_FIRST_TYPE                      OPAL_DATATYPE_MAX_PREDEFINED
 
 /*
- * Derived datatypes supposely contiguous
+ * Derived datatypes supposedly contiguous
  */
 #define OMPI_DATATYPE_2INT                            (OMPI_DATATYPE_FIRST_TYPE+6)
 #define OMPI_DATATYPE_2INTEGER                        (OMPI_DATATYPE_FIRST_TYPE+7)
diff --git a/ompi/datatype/ompi_datatype_module.c b/ompi/datatype/ompi_datatype_module.c
index 06cfa773c7..5df334b10a 100644
--- a/ompi/datatype/ompi_datatype_module.c
+++ b/ompi/datatype/ompi_datatype_module.c
@@ -17,7 +17,7 @@
  *                         reserved.
  * Copyright (c) 2015-2018 Research Organization for Information Science
  *                         and Technology (RIST). All rights reserved.
- * Copyright (c) 2016      FUJITSU LIMITED.  All rights reserved.
+ * Copyright (c) 2016-2018 FUJITSU LIMITED.  All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -49,9 +49,9 @@ int32_t ompi_datatype_number_of_predefined_data = 0;
  * The following initialization of C, C++ and Fortran types is fairly complex,
  * based on the OPAL-datatypes.
  *   ompi_datatypes.h
- *       \-------> ompi_datatypes_internal.h   (Macros defining type-number and initalization)
+ *       \-------> ompi_datatypes_internal.h   (Macros defining type-number and initialization)
  *   opal_datatypes.h
- *       \-------> opal_datatypes_internal.h   (Macros defining type-number and initalization)
+ *       \-------> opal_datatypes_internal.h   (Macros defining type-number and initialization)
  *
  * The Macros in the OMPI Layer differ in that:
  *   Additionally to OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE, we have a OMPI_DATATYPE_INIT_PREDEFINED,
@@ -82,6 +82,11 @@ ompi_predefined_datatype_t ompi_mpi_long =           OMPI_DATATYPE_INIT_PREDEFIN
 ompi_predefined_datatype_t ompi_mpi_unsigned_long =  OMPI_DATATYPE_INIT_PREDEFINED (UNSIGNED_LONG, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT );
 ompi_predefined_datatype_t ompi_mpi_long_long_int =  OMPI_DATATYPE_INIT_PREDEFINED (LONG_LONG_INT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT );
 ompi_predefined_datatype_t ompi_mpi_unsigned_long_long = OMPI_DATATYPE_INIT_PREDEFINED (UNSIGNED_LONG_LONG, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_INT );
+#if defined(HAVE_SHORT_FLOAT) || defined(HAVE_OPAL_SHORT_FLOAT_T)
+ompi_predefined_datatype_t ompi_mpi_short_float =    OMPI_DATATYPE_INIT_PREDEFINED (SHORT_FLOAT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_FLOAT );
+#else
+ompi_predefined_datatype_t ompi_mpi_short_float =    OMPI_DATATYPE_INIT_UNAVAILABLE (SHORT_FLOAT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_FLOAT );
+#endif  /* HAVE_SHORT_FLOAT */
 ompi_predefined_datatype_t ompi_mpi_float =          OMPI_DATATYPE_INIT_PREDEFINED (FLOAT, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_FLOAT );
 ompi_predefined_datatype_t ompi_mpi_double =         OMPI_DATATYPE_INIT_PREDEFINED (DOUBLE, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_FLOAT );
 ompi_predefined_datatype_t ompi_mpi_long_double =    OMPI_DATATYPE_INIT_PREDEFINED (LONG_DOUBLE, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_FLOAT );
@@ -101,12 +106,22 @@ ompi_predefined_datatype_t ompi_mpi_cxx_bool =       OMPI_DATATYPE_INIT_PREDEFIN
 /*
  * Complex datatypes for C (base types), C++, and fortran
  */
+#if defined(HAVE_SHORT_FLOAT__COMPLEX) || defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T)
+ompi_predefined_datatype_t ompi_mpi_c_short_float_complex = OMPI_DATATYPE_INIT_PREDEFINED (C_SHORT_FLOAT_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
+#else
+ompi_predefined_datatype_t ompi_mpi_c_short_float_complex = OMPI_DATATYPE_INIT_UNAVAILABLE (C_SHORT_FLOAT_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
+#endif  /* HAVE_SHORT_FLOAT__COMPLEX */
 ompi_predefined_datatype_t ompi_mpi_c_float_complex =       OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_FLOAT_COMPLEX, C_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
 ompi_predefined_datatype_t ompi_mpi_c_complex =             OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_FLOAT_COMPLEX, C_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
 ompi_predefined_datatype_t ompi_mpi_c_double_complex =      OMPI_DATATYPE_INIT_PREDEFINED (C_DOUBLE_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
 ompi_predefined_datatype_t ompi_mpi_c_long_double_complex = OMPI_DATATYPE_INIT_PREDEFINED (C_LONG_DOUBLE_COMPLEX, OMPI_DATATYPE_FLAG_DATA_C | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
 
 /* The C++ complex datatypes are the same as the C datatypes */
+#if defined(HAVE_SHORT_FLOAT__COMPLEX) || defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T)
+ompi_predefined_datatype_t ompi_mpi_cxx_sfltcplex =  OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_SHORT_FLOAT_COMPLEX, CXX_SHORT_FLOAT_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
+#else
+ompi_predefined_datatype_t ompi_mpi_cxx_sfltcplex =  OMPI_DATATYPE_INIT_UNAVAILABLE (CXX_SHORT_FLOAT_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
+#endif  /* HAVE_SHORT_FLOAT__COMPLEX */
 ompi_predefined_datatype_t ompi_mpi_cxx_cplex =      OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_FLOAT_COMPLEX, CXX_FLOAT_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
 ompi_predefined_datatype_t ompi_mpi_cxx_dblcplex =   OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_DOUBLE_COMPLEX, CXX_DOUBLE_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
 ompi_predefined_datatype_t ompi_mpi_cxx_ldblcplex =  OMPI_DATATYPE_INIT_PREDEFINED_BASIC_TYPE (C_LONG_DOUBLE_COMPLEX, CXX_LONG_DOUBLE_COMPLEX, OMPI_DATATYPE_FLAG_DATA_CPP | OMPI_DATATYPE_FLAG_DATA_COMPLEX );
@@ -347,6 +362,12 @@ const ompi_datatype_t* ompi_datatype_basicDatatypes[OMPI_DATATYPE_MPI_MAX_PREDEF
     /* MPI 3.0 types */
     [OMPI_DATATYPE_MPI_COUNT] = &ompi_mpi_count.dt,
 
+    /* Datatypes proposed to the MPI Forum in June 2017 for proposal in
+     * the MPI 4.0 standard. As of February 2019, it is not accepted yet.
+     * See https://github.com/mpi-forum/mpi-issues/issues/65 */
+    [OMPI_DATATYPE_MPI_SHORT_FLOAT] = &ompi_mpi_short_float.dt,
+    [OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX] = &ompi_mpi_c_short_float_complex.dt,
+
     [OMPI_DATATYPE_MPI_UNAVAILABLE] = &ompi_mpi_unavailable.dt,
 };
 
@@ -612,6 +633,13 @@ int32_t ompi_datatype_init( void )
     /* MPI 3.0 types */
     MOOG(count, 72);
 
+    /* Datatypes proposed to the MPI Forum in June 2017 for proposal in
+     * the MPI 4.0 standard. As of February 2019, it is not accepted yet.
+     * See https://github.com/mpi-forum/mpi-issues/issues/65 */
+    MOOG(short_float, 73);
+    MOOG(c_short_float_complex, 74);
+    MOOG(cxx_sfltcplex, 75);
+
     /**
      * Now make sure all non-contiguous types are marked as such.
      */
diff --git a/ompi/mca/coll/portals4/coll_portals4_component.c b/ompi/mca/coll/portals4/coll_portals4_component.c
index d632340ee2..4665e98068 100644
--- a/ompi/mca/coll/portals4/coll_portals4_component.c
+++ b/ompi/mca/coll/portals4/coll_portals4_component.c
@@ -116,6 +116,12 @@ ptl_datatype_t ompi_coll_portals4_atomic_datatype [OMPI_DATATYPE_MPI_MAX_PREDEFI
         /* MPI 3.0 types */
         [OMPI_DATATYPE_MPI_COUNT] = COLL_PORTALS4_NO_DTYPE,
 
+        /* Datatypes proposed to the MPI Forum in June 2017 for proposal in
+         * the MPI 4.0 standard. As of February 2019, it is not accepted yet.
+         * See https://github.com/mpi-forum/mpi-issues/issues/65 */
+        [OMPI_DATATYPE_MPI_SHORT_FLOAT] = COLL_PORTALS4_NO_DTYPE,
+        [OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX] = COLL_PORTALS4_NO_DTYPE,
+
         [OMPI_DATATYPE_MPI_UNAVAILABLE] = COLL_PORTALS4_NO_DTYPE,
 
 };
diff --git a/ompi/mca/op/base/op_base_functions.c b/ompi/mca/op/base/op_base_functions.c
index 648aad5e9b..f08a9d04be 100644
--- a/ompi/mca/op/base/op_base_functions.c
+++ b/ompi/mca/op/base/op_base_functions.c
@@ -13,6 +13,7 @@
  * Copyright (c) 2006-2014 Cisco Systems, Inc.  All rights reserved.
  * Copyright (c) 2013      Los Alamos National Security, LLC. All rights
  *                         reserved.
+ * Copyright (c) 2018      FUJITSU LIMITED.  All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -102,6 +103,49 @@
         }                                                               \
     }
 
+/*
+ * Define a function to calculate sum of complex numbers using a real
+ * number floating-point type (float, double, etc.).  This macro is used
+ * when the compiler supports a real number floating-point type but does
+ * not supports the corresponding complex number type.
+ */
+#define COMPLEX_SUM_FUNC(type_name, type) \
+  static void ompi_op_base_2buff_sum_##type_name(void *in, void *out, int *count, \
+                                                 struct ompi_datatype_t **dtype, \
+                                                 struct ompi_op_base_module_1_0_0_t *module) \
+  {                                                                      \
+      int i;                                                             \
+      type (*a)[2] = (type (*)[2]) in;                                   \
+      type (*b)[2] = (type (*)[2]) out;                                  \
+      for (i = 0; i < *count; ++i, ++a, ++b) {                           \
+          (*b)[0] += (*a)[0];                                            \
+          (*b)[1] += (*a)[1];                                            \
+      }                                                                  \
+  }
+
+/*
+ * Define a function to calculate product of complex numbers using a real
+ * number floating-point type (float, double, etc.).  This macro is used
+ * when the compiler supports a real number floating-point type but does
+ * not supports the corresponding complex number type.
+ */
+#define COMPLEX_PROD_FUNC(type_name, type) \
+  static void ompi_op_base_2buff_prod_##type_name(void *in, void *out, int *count, \
+                                                  struct ompi_datatype_t **dtype, \
+                                                  struct ompi_op_base_module_1_0_0_t *module) \
+  {                                                                      \
+      int i;                                                             \
+      type (*a)[2] = (type (*)[2]) in;                                   \
+      type (*b)[2] = (type (*)[2]) out;                                  \
+      type c[2];                                                         \
+      for (i = 0; i < *count; ++i, ++a, ++b) {                           \
+          c[0] = (*a)[0] * (*b)[0] - (*a)[1] * (*b)[1];                  \
+          c[1] = (*a)[0] * (*b)[1] + (*a)[1] * (*b)[0];                  \
+          (*b)[0] = c[0];                                                \
+          (*b)[1] = c[1];                                                \
+      }                                                                  \
+  }
+
 /*************************************************************************
  * Max
  *************************************************************************/
@@ -137,6 +181,11 @@ FUNC_FUNC(max, fortran_integer8, ompi_fortran_integer8_t)
 FUNC_FUNC(max, fortran_integer16, ompi_fortran_integer16_t)
 #endif
 /* Floating point */
+#if defined(HAVE_SHORT_FLOAT)
+FUNC_FUNC(max, short_float, short float)
+#elif defined(HAVE_OPAL_SHORT_FLOAT_T)
+FUNC_FUNC(max, short_float, opal_short_float_t)
+#endif
 FUNC_FUNC(max, float, float)
 FUNC_FUNC(max, double, double)
 FUNC_FUNC(max, long_double, long double)
@@ -195,6 +244,11 @@ FUNC_FUNC(min, fortran_integer8, ompi_fortran_integer8_t)
 FUNC_FUNC(min, fortran_integer16, ompi_fortran_integer16_t)
 #endif
 /* Floating point */
+#if defined(HAVE_SHORT_FLOAT)
+FUNC_FUNC(min, short_float, short float)
+#elif defined(HAVE_OPAL_SHORT_FLOAT_T)
+FUNC_FUNC(min, short_float, opal_short_float_t)
+#endif
 FUNC_FUNC(min, float, float)
 FUNC_FUNC(min, double, double)
 FUNC_FUNC(min, long_double, long double)
@@ -250,6 +304,11 @@ OP_FUNC(sum, fortran_integer8, ompi_fortran_integer8_t, +=)
 OP_FUNC(sum, fortran_integer16, ompi_fortran_integer16_t, +=)
 #endif
 /* Floating point */
+#if defined(HAVE_SHORT_FLOAT)
+OP_FUNC(sum, short_float, short float, +=)
+#elif defined(HAVE_OPAL_SHORT_FLOAT_T)
+OP_FUNC(sum, short_float, opal_short_float_t, +=)
+#endif
 OP_FUNC(sum, float, float, +=)
 OP_FUNC(sum, double, double, +=)
 OP_FUNC(sum, long_double, long double, +=)
@@ -272,6 +331,11 @@ OP_FUNC(sum, fortran_real8, ompi_fortran_real8_t, +=)
 OP_FUNC(sum, fortran_real16, ompi_fortran_real16_t, +=)
 #endif
 /* Complex */
+#if defined(HAVE_SHORT_FLOAT__COMPLEX)
+OP_FUNC(sum, c_short_float_complex, short float _Complex, +=)
+#elif defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T)
+COMPLEX_SUM_FUNC(c_short_float_complex, opal_short_float_t)
+#endif
 OP_FUNC(sum, c_float_complex, float _Complex, +=)
 OP_FUNC(sum, c_double_complex, double _Complex, +=)
 OP_FUNC(sum, c_long_double_complex, long double _Complex, +=)
@@ -309,6 +373,11 @@ OP_FUNC(prod, fortran_integer8, ompi_fortran_integer8_t, *=)
 OP_FUNC(prod, fortran_integer16, ompi_fortran_integer16_t, *=)
 #endif
 /* Floating point */
+#if defined(HAVE_SHORT_FLOAT)
+OP_FUNC(prod, short_float, short float, *=)
+#elif defined(HAVE_OPAL_SHORT_FLOAT_T)
+OP_FUNC(prod, short_float, opal_short_float_t, *=)
+#endif
 OP_FUNC(prod, float, float, *=)
 OP_FUNC(prod, double, double, *=)
 OP_FUNC(prod, long_double, long double, *=)
@@ -331,6 +400,11 @@ OP_FUNC(prod, fortran_real8, ompi_fortran_real8_t, *=)
 OP_FUNC(prod, fortran_real16, ompi_fortran_real16_t, *=)
 #endif
 /* Complex */
+#if defined(HAVE_SHORT_FLOAT__COMPLEX)
+OP_FUNC(prod, c_short_float_complex, short float _Complex, *=)
+#elif defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T)
+COMPLEX_PROD_FUNC(c_short_float_complex, opal_short_float_t)
+#endif
 OP_FUNC(prod, c_float_complex, float _Complex, *=)
 OP_FUNC(prod, c_double_complex, double _Complex, *=)
 OP_FUNC(prod, c_long_double_complex, long double _Complex, *=)
@@ -656,6 +730,50 @@ LOC_FUNC(minloc, long_double_int, <)
       }                                                                 \
   }
 
+/*
+ * Define a function to calculate sum of complex numbers using a real
+ * number floating-point type (float, double, etc.).  This macro is used
+ * when the compiler supports a real number floating-point type but does
+ * not supports the corresponding complex number type.
+ */
+#define COMPLEX_SUM_FUNC_3BUF(type_name, type) \
+  static void ompi_op_base_3buff_sum_##type_name(void * restrict in1,    \
+                                                 void * restrict in2, void * restrict out, int *count, \
+                                                 struct ompi_datatype_t **dtype, \
+                                                 struct ompi_op_base_module_1_0_0_t *module) \
+  {                                                                      \
+      int i;                                                             \
+      type (*a1)[2] = (type (*)[2]) in1;                                 \
+      type (*a2)[2] = (type (*)[2]) in2;                                 \
+      type (*b)[2] = (type (*)[2]) out;                                  \
+      for (i = 0; i < *count; ++i, ++a1, ++a2, ++b) {                    \
+          (*b)[0] = (*a1)[0] + (*a2)[0];                                 \
+          (*b)[1] = (*a1)[1] + (*a2)[1];                                 \
+      }                                                                  \
+  }
+
+/*
+ * Define a function to calculate product of complex numbers using a real
+ * number floating-point type (float, double, etc.).  This macro is used
+ * when the compiler supports a real number floating-point type but does
+ * not supports the corresponding complex number type.
+ */
+#define COMPLEX_PROD_FUNC_3BUF(type_name, type) \
+  static void ompi_op_base_3buff_prod_##type_name(void * restrict in1,   \
+                                                  void * restrict in2, void * restrict out, int *count, \
+                                                  struct ompi_datatype_t **dtype, \
+                                                  struct ompi_op_base_module_1_0_0_t *module) \
+  {                                                                      \
+      int i;                                                             \
+      type (*a1)[2] = (type (*)[2]) in1;                                 \
+      type (*a2)[2] = (type (*)[2]) in2;                                 \
+      type (*b)[2] = (type (*)[2]) out;                                  \
+      for (i = 0; i < *count; ++i, ++a1, ++a2, ++b) {                    \
+          (*b)[0] = (*a1)[0] * (*a2)[0] - (*a1)[1] * (*a2)[1];           \
+          (*b)[1] = (*a1)[0] * (*a2)[1] + (*a1)[1] * (*a2)[0];           \
+      }                                                                  \
+  }
+
 /*************************************************************************
  * Max
  *************************************************************************/
@@ -691,6 +809,11 @@ FUNC_FUNC_3BUF(max, fortran_integer8, ompi_fortran_integer8_t)
 FUNC_FUNC_3BUF(max, fortran_integer16, ompi_fortran_integer16_t)
 #endif
 /* Floating point */
+#if defined(HAVE_SHORT_FLOAT)
+FUNC_FUNC_3BUF(max, short_float, short float)
+#elif defined(HAVE_OPAL_SHORT_FLOAT_T)
+FUNC_FUNC_3BUF(max, short_float, opal_short_float_t)
+#endif
 FUNC_FUNC_3BUF(max, float, float)
 FUNC_FUNC_3BUF(max, double, double)
 FUNC_FUNC_3BUF(max, long_double, long double)
@@ -749,6 +872,11 @@ FUNC_FUNC_3BUF(min, fortran_integer8, ompi_fortran_integer8_t)
 FUNC_FUNC_3BUF(min, fortran_integer16, ompi_fortran_integer16_t)
 #endif
 /* Floating point */
+#if defined(HAVE_SHORT_FLOAT)
+FUNC_FUNC_3BUF(min, short_float, short float)
+#elif defined(HAVE_OPAL_SHORT_FLOAT_T)
+FUNC_FUNC_3BUF(min, short_float, opal_short_float_t)
+#endif
 FUNC_FUNC_3BUF(min, float, float)
 FUNC_FUNC_3BUF(min, double, double)
 FUNC_FUNC_3BUF(min, long_double, long double)
@@ -804,6 +932,11 @@ OP_FUNC_3BUF(sum, fortran_integer8, ompi_fortran_integer8_t, +)
 OP_FUNC_3BUF(sum, fortran_integer16, ompi_fortran_integer16_t, +)
 #endif
 /* Floating point */
+#if defined(HAVE_SHORT_FLOAT)
+OP_FUNC_3BUF(sum, short_float, short float, +)
+#elif defined(HAVE_OPAL_SHORT_FLOAT_T)
+OP_FUNC_3BUF(sum, short_float, opal_short_float_t, +)
+#endif
 OP_FUNC_3BUF(sum, float, float, +)
 OP_FUNC_3BUF(sum, double, double, +)
 OP_FUNC_3BUF(sum, long_double, long double, +)
@@ -826,6 +959,11 @@ OP_FUNC_3BUF(sum, fortran_real8, ompi_fortran_real8_t, +)
 OP_FUNC_3BUF(sum, fortran_real16, ompi_fortran_real16_t, +)
 #endif
 /* Complex */
+#if defined(HAVE_SHORT_FLOAT__COMPLEX)
+OP_FUNC_3BUF(sum, c_short_float_complex, short float _Complex, +)
+#elif defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T)
+COMPLEX_SUM_FUNC_3BUF(c_short_float_complex, opal_short_float_t)
+#endif
 OP_FUNC_3BUF(sum, c_float_complex, float _Complex, +)
 OP_FUNC_3BUF(sum, c_double_complex, double _Complex, +)
 OP_FUNC_3BUF(sum, c_long_double_complex, long double _Complex, +)
@@ -863,6 +1001,11 @@ OP_FUNC_3BUF(prod, fortran_integer8, ompi_fortran_integer8_t, *)
 OP_FUNC_3BUF(prod, fortran_integer16, ompi_fortran_integer16_t, *)
 #endif
 /* Floating point */
+#if defined(HAVE_SHORT_FLOAT)
+OP_FUNC_3BUF(prod, short_float, short float, *)
+#elif defined(HAVE_OPAL_SHORT_FLOAT_T)
+OP_FUNC_3BUF(prod, short_float, opal_short_float_t, *)
+#endif
 OP_FUNC_3BUF(prod, float, float, *)
 OP_FUNC_3BUF(prod, double, double, *)
 OP_FUNC_3BUF(prod, long_double, long double, *)
@@ -885,6 +1028,11 @@ OP_FUNC_3BUF(prod, fortran_real8, ompi_fortran_real8_t, *)
 OP_FUNC_3BUF(prod, fortran_real16, ompi_fortran_real16_t, *)
 #endif
 /* Complex */
+#if defined(HAVE_SHORT_FLOAT__COMPLEX)
+OP_FUNC_3BUF(prod, c_short_float_complex, short float _Complex, *)
+#elif defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T)
+COMPLEX_PROD_FUNC_3BUF(c_short_float_complex, opal_short_float_t)
+#endif
 OP_FUNC_3BUF(prod, c_float_complex, float _Complex, *)
 OP_FUNC_3BUF(prod, c_double_complex, double _Complex, *)
 OP_FUNC_3BUF(prod, c_long_double_complex, long double _Complex, *)
@@ -1240,12 +1388,22 @@ LOC_FUNC_3BUF(minloc, long_double_int, <)
 
 /** Floating point, including all the Fortran reals *********************/
 
+#if defined(HAVE_SHORT_FLOAT) || defined(HAVE_OPAL_SHORT_FLOAT_T)
+#define SHORT_FLOAT(name, ftype) ompi_op_base_##ftype##_##name##_short_float
+#else
+#define SHORT_FLOAT(name, ftype) NULL
+#endif
+#define FLOAT(name, ftype) ompi_op_base_##ftype##_##name##_float
+#define DOUBLE(name, ftype) ompi_op_base_##ftype##_##name##_double
+#define LONG_DOUBLE(name, ftype) ompi_op_base_##ftype##_##name##_long_double
+
 #define FLOATING_POINT(name, ftype)                                                            \
-  [OMPI_OP_BASE_TYPE_FLOAT] = ompi_op_base_##ftype##_##name##_float,                           \
-  [OMPI_OP_BASE_TYPE_DOUBLE] = ompi_op_base_##ftype##_##name##_double,                         \
+  [OMPI_OP_BASE_TYPE_SHORT_FLOAT] = SHORT_FLOAT(name, ftype),                                  \
+  [OMPI_OP_BASE_TYPE_FLOAT] = FLOAT(name, ftype),                                              \
+  [OMPI_OP_BASE_TYPE_DOUBLE] = DOUBLE(name, ftype),                                            \
   FLOATING_POINT_FORTRAN_REAL(name, ftype),                                                    \
   [OMPI_OP_BASE_TYPE_DOUBLE_PRECISION] = FLOATING_POINT_FORTRAN_DOUBLE_PRECISION(name, ftype), \
-  [OMPI_OP_BASE_TYPE_LONG_DOUBLE] = ompi_op_base_##ftype##_##name##_long_double
+  [OMPI_OP_BASE_TYPE_LONG_DOUBLE] = LONG_DOUBLE(name, ftype)
 
 /** Fortran logical *****************************************************/
 
@@ -1262,11 +1420,17 @@ LOC_FUNC_3BUF(minloc, long_double_int, <)
 
 /** Complex *****************************************************/
 
+#if defined(HAVE_SHORT_FLOAT__COMPLEX) || defined(HAVE_OPAL_SHORT_FLOAT_COMPLEX_T)
+#define SHORT_FLOAT_COMPLEX(name, ftype) ompi_op_base_##ftype##_##name##_c_short_float_complex
+#else
+#define SHORT_FLOAT_COMPLEX(name, ftype) NULL
+#endif
 #define FLOAT_COMPLEX(name, ftype) ompi_op_base_##ftype##_##name##_c_float_complex
 #define DOUBLE_COMPLEX(name, ftype) ompi_op_base_##ftype##_##name##_c_double_complex
 #define LONG_DOUBLE_COMPLEX(name, ftype) ompi_op_base_##ftype##_##name##_c_long_double_complex
 
 #define COMPLEX(name, ftype)                                                  \
+    [OMPI_OP_BASE_TYPE_C_SHORT_FLOAT_COMPLEX] = SHORT_FLOAT_COMPLEX(name, ftype), \
     [OMPI_OP_BASE_TYPE_C_FLOAT_COMPLEX] = FLOAT_COMPLEX(name, ftype),         \
     [OMPI_OP_BASE_TYPE_C_DOUBLE_COMPLEX] = DOUBLE_COMPLEX(name, ftype),       \
     [OMPI_OP_BASE_TYPE_C_LONG_DOUBLE_COMPLEX] = LONG_DOUBLE_COMPLEX(name, ftype)
diff --git a/ompi/mca/op/op.h b/ompi/mca/op/op.h
index a4006b20c6..5d1bff93f9 100644
--- a/ompi/mca/op/op.h
+++ b/ompi/mca/op/op.h
@@ -16,6 +16,7 @@
  * Copyright (c) 2007-2009 Cisco Systems, Inc.  All rights reserved.
  * Copyright (c) 2013-2015 Los Alamos National Security, LLC. All rights
  *                         reserved.
+ * Copyright (c) 2018      FUJITSU LIMITED.  All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -131,6 +132,8 @@ enum {
     /** Fortran integer*16 */
     OMPI_OP_BASE_TYPE_INTEGER16,
 
+    /** Floating point: short float */
+    OMPI_OP_BASE_TYPE_SHORT_FLOAT,
     /** Floating point: float */
     OMPI_OP_BASE_TYPE_FLOAT,
     /** Floating point: double */
@@ -156,6 +159,8 @@ enum {
     OMPI_OP_BASE_TYPE_BOOL,
 
     /** Complex */
+    /* short float complex */
+    OMPI_OP_BASE_TYPE_C_SHORT_FLOAT_COMPLEX,
     /* float complex */
     OMPI_OP_BASE_TYPE_C_FLOAT_COMPLEX,
     /* double complex */
diff --git a/ompi/op/op.c b/ompi/op/op.c
index b9559ceb55..ef85227142 100644
--- a/ompi/op/op.c
+++ b/ompi/op/op.c
@@ -16,6 +16,7 @@
  *                         reserved.
  * Copyright (c) 2015      Research Organization for Information Science
  *                         and Technology (RIST). All rights reserved.
+ * Copyright (c) 2018      FUJITSU LIMITED.  All rights reserved.
  * $COPYRIGHT$
  *
  * Additional copyrights may follow
@@ -217,6 +218,12 @@ int ompi_op_init(void)
 #warning Unsupported definition for MPI_COUNT
 #endif
 
+    /* Datatypes proposed to the MPI Forum in June 2017 for proposal in
+     * the MPI 4.0 standard. As of February 2019, it is not accepted yet.
+     * See https://github.com/mpi-forum/mpi-issues/issues/65 */
+    ompi_op_ddt_map[OMPI_DATATYPE_MPI_SHORT_FLOAT] = OMPI_OP_BASE_TYPE_SHORT_FLOAT;
+    ompi_op_ddt_map[OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX] = OMPI_OP_BASE_TYPE_C_SHORT_FLOAT_COMPLEX;
+
     /* Create the intrinsic ops */
 
     if (OMPI_SUCCESS !=
diff --git a/opal/datatype/opal_datatype.h b/opal/datatype/opal_datatype.h
index bcce6ce744..1f035b6d72 100644
--- a/opal/datatype/opal_datatype.h
+++ b/opal/datatype/opal_datatype.h
@@ -61,7 +61,7 @@ BEGIN_C_DECLS
  *
  * BEWARE: This constant should reflect whatever the OMPI-layer needs.
  */
-#define OPAL_DATATYPE_MAX_SUPPORTED  47
+#define OPAL_DATATYPE_MAX_SUPPORTED  49
 
 
 /* flags for the datatypes. */