1
1

Merge remote-tracking branch 'wg/master' into topic/amo-non-blocking-ucp

Signed-off-by: Sergey Oblomov <sergeyo@mellanox.com>
Этот коммит содержится в:
Sergey Oblomov 2018-05-29 19:08:48 +03:00
родитель 0c3ed93ef0 4ebed21b6d
Коммит b668e19cd1
44 изменённых файлов: 542 добавлений и 2218 удалений

Просмотреть файл

@ -12,7 +12,7 @@
# Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved. # Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved. # Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved.
# Copyright (c) 2014 Intel, Inc. All rights reserved. # Copyright (c) 2014 Intel, Inc. All rights reserved.
# Copyright (c) 2017 Amazon.com, Inc. or its affiliates. # Copyright (c) 2017-2018 Amazon.com, Inc. or its affiliates.
# All Rights reserved. # All Rights reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
@ -22,6 +22,7 @@
# #
SUBDIRS = config contrib $(MCA_PROJECT_SUBDIRS) test SUBDIRS = config contrib $(MCA_PROJECT_SUBDIRS) test
DIST_SUBDIRS = config contrib $(MCA_PROJECT_DIST_SUBDIRS) test
EXTRA_DIST = README INSTALL VERSION Doxyfile LICENSE autogen.pl README.JAVA.txt AUTHORS EXTRA_DIST = README INSTALL VERSION Doxyfile LICENSE autogen.pl README.JAVA.txt AUTHORS
include examples/Makefile.include include examples/Makefile.include

3
NEWS
Просмотреть файл

@ -59,6 +59,9 @@ Master (not on release branches yet)
------------------------------------ ------------------------------------
- Fix rank-by algorithms to properly rank by object and span - Fix rank-by algorithms to properly rank by object and span
- Do not build Open SHMEM layer when there are no SPMLs available.
Currently, this means the Open SHMEM layer will only build if
a MXM or UCX library is found.
3.1.0 -- May, 2018 3.1.0 -- May, 2018
------------------ ------------------

Просмотреть файл

@ -12,6 +12,8 @@ dnl Copyright (c) 2004-2005 The Regents of the University of California.
dnl All rights reserved. dnl All rights reserved.
dnl Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved.
dnl Copyright (c) 2013-2017 Intel, Inc. All rights reserved. dnl Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
dnl Copyright (c) 2018 Amazon.com, Inc. or its affiliates.
dnl All Rights reserved.
dnl $COPYRIGHT$ dnl $COPYRIGHT$
dnl dnl
dnl Additional copyrights may follow dnl Additional copyrights may follow
@ -225,16 +227,19 @@ AC_DEFUN([OPAL_MCA],[
# now configure all the projects, frameworks, and components. Most # now configure all the projects, frameworks, and components. Most
# of the hard stuff is in here # of the hard stuff is in here
MCA_PROJECT_SUBDIRS= MCA_PROJECT_SUBDIRS=
MCA_PROJECT_DIST_SUBDIRS=
m4_foreach(mca_project, [mca_project_list], m4_foreach(mca_project, [mca_project_list],
[# BWB: Until projects have separate configure scripts [# BWB: Until projects have separate configure scripts
# and can skip running all of ORTE, just avoid recursing # and can skip running all of ORTE, just avoid recursing
# into orte sub directory if orte disabled # into orte sub directory if orte disabled
if (test "mca_project" = "ompi" && test "$enable_mpi" != "no") || test "mca_project" = "opal" || test "mca_project" = "orte" || test "mca_project" = "oshmem"; then if (test "mca_project" = "ompi" && test "$enable_mpi" != "no") || test "mca_project" = "opal" || test "mca_project" = "orte" || test "mca_project" = "oshmem"; then
MCA_PROJECT_SUBDIRS="$MCA_PROJECT_SUBDIRS mca_project" MCA_PROJECT_SUBDIRS="$MCA_PROJECT_SUBDIRS mca_project"
MCA_PROJECT_DIST_SUBDIRS="$MCA_PROJECT_DIST_SUBDIRS mca_project"
fi fi
MCA_CONFIGURE_PROJECT(mca_project)]) MCA_CONFIGURE_PROJECT(mca_project)])
AC_SUBST(MCA_PROJECT_SUBDIRS) AC_SUBST(MCA_PROJECT_SUBDIRS)
AC_SUBST(MCA_PROJECT_DIST_SUBDIRS)
m4_undefine([mca_component_configure_active]) m4_undefine([mca_component_configure_active])
]) ])

Просмотреть файл

@ -29,9 +29,9 @@ AC_DEFUN([OPAL_CC_HELPER],[
opal_prog_cc_c11_helper_tmp=0 opal_prog_cc_c11_helper_tmp=0
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([$3],[$4])],[ AC_LINK_IFELSE([AC_LANG_PROGRAM([$3],[$4])],[
$2=yes $2=yes
opal_prog_cc_c11_helper_tmp=1], [$2=no]) opal_prog_cc_c11_helper_tmp=1], [$2=no])
AC_DEFINE_UNQUOTED([$5], [$opal_prog_cc_c11_helper_tmp], [$6]) AC_DEFINE_UNQUOTED([$5], [$opal_prog_cc_c11_helper_tmp], [$6])

Просмотреть файл

@ -76,8 +76,10 @@ EOF
if test "$project_oshmem_amc" = "true" ; then if test "$project_oshmem_amc" = "true" ; then
echo "Build Open SHMEM support: yes" echo "Build Open SHMEM support: yes"
else elif test -z "$project_oshmem_amc" ; then
echo "Build Open SHMEM support: no" echo "Build Open SHMEM support: no"
else
echo "Build Open SHMEM support: $project_oshmem_amc"
fi fi
if test $WANT_DEBUG = 0 ; then if test $WANT_DEBUG = 0 ; then

Просмотреть файл

@ -6,6 +6,8 @@ dnl Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
dnl Copyright (c) 2014 Intel, Inc. All rights reserved dnl Copyright (c) 2014 Intel, Inc. All rights reserved
dnl Copyright (c) 2014-2015 Research Organization for Information Science dnl Copyright (c) 2014-2015 Research Organization for Information Science
dnl and Technology (RIST). All rights reserved. dnl and Technology (RIST). All rights reserved.
dnl Copyright (c) 2018 Amazon.com, Inc. or its affiliates.
dnl All Rights reserved.
dnl $COPYRIGHT$ dnl $COPYRIGHT$
dnl dnl
dnl Additional copyrights may follow dnl Additional copyrights may follow
@ -25,28 +27,23 @@ AC_SUBST(OSHMEM_LIBSHMEM_EXTRA_LDFLAGS)
AC_MSG_CHECKING([if want oshmem]) AC_MSG_CHECKING([if want oshmem])
AC_ARG_ENABLE([oshmem], AC_ARG_ENABLE([oshmem],
[AC_HELP_STRING([--enable-oshmem], [AC_HELP_STRING([--enable-oshmem],
[Enable building the OpenSHMEM interface (available on Linux only, where it is enabled by default)])], [Enable building the OpenSHMEM interface (available on Linux only, where it is enabled by default)])])
[oshmem_arg_given=yes], if test "$enable_oshmem" = "no"; then
[oshmem_arg_given=no]) AC_MSG_RESULT([no])
if test "$oshmem_arg_given" = "yes"; then elif test "$enable_oshmem" = ""; then
if test "$enable_oshmem" = "yes"; then
AC_MSG_RESULT([yes])
if test "$opal_found_linux" != "yes"; then
AC_MSG_WARN([OpenSHMEM support was requested, but currently])
AC_MSG_WARN([only supports Linux.])
AC_MSG_ERROR([Cannot continue])
fi
else
AC_MSG_RESULT([no])
fi
else
if test "$opal_found_linux" = "yes"; then if test "$opal_found_linux" = "yes"; then
enable_oshmem=yes
AC_MSG_RESULT([yes]) AC_MSG_RESULT([yes])
else else
enable_oshmem=no enable_oshmem=no
AC_MSG_RESULT([not supported on this platform]) AC_MSG_RESULT([not supported on this platform])
fi fi
else
AC_MSG_RESULT([yes])
if test "$opal_found_linux" != "yes"; then
AC_MSG_WARN([OpenSHMEM support was requested, but currently])
AC_MSG_WARN([only supports Linux.])
AC_MSG_ERROR([Cannot continue])
fi
fi fi
# #
@ -56,7 +53,7 @@ AC_MSG_CHECKING([if want SGI/Quadrics compatibility mode])
AC_ARG_ENABLE(oshmem-compat, AC_ARG_ENABLE(oshmem-compat,
AC_HELP_STRING([--enable-oshmem-compat], AC_HELP_STRING([--enable-oshmem-compat],
[enable compatibility mode (default: enabled)])) [enable compatibility mode (default: enabled)]))
if test "$enable_oshmem" != "no" && test "$enable_oshmem_compat" != "no"; then if test "$enable_oshmem_compat" != "no"; then
AC_MSG_RESULT([yes]) AC_MSG_RESULT([yes])
OSHMEM_SPEC_COMPAT=1 OSHMEM_SPEC_COMPAT=1
else else
@ -75,26 +72,21 @@ AC_MSG_CHECKING([if want OSHMEM API parameter checking])
AC_ARG_WITH(oshmem-param-check, AC_ARG_WITH(oshmem-param-check,
AC_HELP_STRING([--with-oshmem-param-check(=VALUE)], AC_HELP_STRING([--with-oshmem-param-check(=VALUE)],
[behavior of OSHMEM API function parameter checking. Valid values are: always, never. If --with-oshmem-param-check is specified with no VALUE argument, it is equivalent to a VALUE of "always"; --without-oshmem-param-check is equivalent to "never" (default: always).])) [behavior of OSHMEM API function parameter checking. Valid values are: always, never. If --with-oshmem-param-check is specified with no VALUE argument, it is equivalent to a VALUE of "always"; --without-oshmem-param-check is equivalent to "never" (default: always).]))
if test "$enable_oshmem" != "no"; then if test "$with_oshmem_param_check" = "no" || \
if test "$with_oshmem_param_check" = "no" || \ test "$with_oshmem_param_check" = "never"; then
test "$with_oshmem_param_check" = "never"; then
shmem_param_check=0
AC_MSG_RESULT([never])
elif test "$with_oshmem_param_check" = "yes" || \
test "$with_oshmem_param_check" = "always" || \
test -z "$with_oshmem_param_check"; then
shmem_param_check=1
AC_MSG_RESULT([always])
else
shmem_param_check=1
AC_MSG_RESULT([unknown])
AC_MSG_WARN([*** Unrecognized --with-oshmem-param-check value])
AC_MSG_WARN([*** See "configure --help" output])
AC_MSG_WARN([*** Defaulting to "always"])
fi
else
shmem_param_check=0 shmem_param_check=0
AC_MSG_RESULT([no]) AC_MSG_RESULT([never])
elif test "$with_oshmem_param_check" = "yes" || \
test "$with_oshmem_param_check" = "always" || \
test -z "$with_oshmem_param_check"; then
shmem_param_check=1
AC_MSG_RESULT([always])
else
shmem_param_check=1
AC_MSG_RESULT([unknown])
AC_MSG_WARN([*** Unrecognized --with-oshmem-param-check value])
AC_MSG_WARN([*** See "configure --help" output])
AC_MSG_WARN([*** Defaulting to "always"])
fi fi
AC_DEFINE_UNQUOTED(OSHMEM_PARAM_CHECK, $shmem_param_check, AC_DEFINE_UNQUOTED(OSHMEM_PARAM_CHECK, $shmem_param_check,
[Whether we want to check OSHMEM parameters always or never]) [Whether we want to check OSHMEM parameters always or never])
@ -132,7 +124,7 @@ AC_MSG_CHECKING([if want to build OSHMEM fortran bindings])
AC_ARG_ENABLE(oshmem-fortran, AC_ARG_ENABLE(oshmem-fortran,
AC_HELP_STRING([--enable-oshmem-fortran], AC_HELP_STRING([--enable-oshmem-fortran],
[enable OSHMEM Fortran bindings (default: enabled if Fortran compiler found)])) [enable OSHMEM Fortran bindings (default: enabled if Fortran compiler found)]))
if test "$enable_oshmem" != "no" && test "$enable_oshmem_fortran" != "no"; then if test "$enable_oshmem_fortran" != "no"; then
# If no OMPI FORTRAN, bail # If no OMPI FORTRAN, bail
AS_IF([test $OMPI_TRY_FORTRAN_BINDINGS -eq $OMPI_FORTRAN_NO_BINDINGS && \ AS_IF([test $OMPI_TRY_FORTRAN_BINDINGS -eq $OMPI_FORTRAN_NO_BINDINGS && \
test "$enable_oshmem_fortran" = "yes"], test "$enable_oshmem_fortran" = "yes"],

Просмотреть файл

@ -23,6 +23,8 @@
# Copyright (c) 2014-2017 Research Organization for Information Science # Copyright (c) 2014-2017 Research Organization for Information Science
# and Technology (RIST). All rights reserved. # and Technology (RIST). All rights reserved.
# Copyright (c) 2016-2017 IBM Corporation. All rights reserved. # Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
# Copyright (c) 2018 Amazon.com, Inc. or its affiliates.
# All Rights reserved.
# $COPYRIGHT$ # $COPYRIGHT$
# #
# Additional copyrights may follow # Additional copyrights may follow
@ -266,15 +268,12 @@ m4_ifdef([project_oshmem], [OSHMEM_CONFIGURE_OPTIONS])
# Set up project specific AM_CONDITIONALs # Set up project specific AM_CONDITIONALs
AS_IF([test "$enable_ompi" != "no"], [project_ompi_amc=true], [project_ompi_amc=false]) AS_IF([test "$enable_ompi" != "no"], [project_ompi_amc=true], [project_ompi_amc=false])
m4_ifndef([project_ompi], [project_ompi_amc=false]) m4_ifndef([project_ompi], [project_ompi_amc=false])
AM_CONDITIONAL([PROJECT_OMPI], [test "$project_ompi_amc" = "true"])
AS_IF([test "$enable_orte" != "no"], [project_orte_amc=true], [project_orte_amc=false]) AS_IF([test "$enable_orte" != "no"], [project_orte_amc=true], [project_orte_amc=false])
m4_ifndef([project_orte], [project_orte_amc=false]) m4_ifndef([project_orte], [project_orte_amc=false])
AM_CONDITIONAL([PROJECT_ORTE], [test "$project_orte_amc" = "true"])
AS_IF([test "$enable_oshmem" != "no"], [project_oshmem_amc=true], [project_oshmem_amc=false]) AS_IF([test "$enable_oshmem" != "no"], [project_oshmem_amc=true], [project_oshmem_amc="no (disabled)"])
m4_ifndef([project_oshmem], [project_oshmem_amc=false]) m4_ifndef([project_oshmem], [project_oshmem_amc="no (not available)"])
AM_CONDITIONAL([PROJECT_OSHMEM], [test "$project_oshmem_amc" = "true"])
if test "$enable_binaries" = "no" && test "$enable_dist" = "yes"; then if test "$enable_binaries" = "no" && test "$enable_dist" = "yes"; then
AC_MSG_WARN([--disable-binaries is incompatible with --enable dist]) AC_MSG_WARN([--disable-binaries is incompatible with --enable dist])
@ -1107,6 +1106,23 @@ OPAL_MCA
m4_ifdef([project_ompi], [OMPI_REQUIRE_ENDPOINT_TAG_FINI]) m4_ifdef([project_ompi], [OMPI_REQUIRE_ENDPOINT_TAG_FINI])
# Last minute disable of OpenSHMEM if we didn't find any oshmem SPMLs
if test "$project_oshmem_amc" = "true" && test $OSHMEM_FOUND_WORKING_SPML -eq 0 ; then
# We don't have an spml that will work, so oshmem wouldn't be able
# to run an application. Therefore, don't build the oshmem layer.
if test "$enable_oshmem" != "no" && test -n "$enable_oshmem"; then
AC_MSG_WARN([No spml found, so OpenSHMEM layer will be non functional.])
AC_MSG_ERROR([Aborting because OpenSHMEM requested, but can not build.])
else
AC_MSG_WARN([No spml found. Will not build OpenSHMEM layer.])
project_oshmem_amc="false (no spml)"
# now for the hard part, remove project from list that will
# run. This is a hack, but it works as long as the project
# remains named "oshmem".
MCA_PROJECT_SUBDIRS=`echo "$MCA_PROJECT_SUBDIRS" | sed -e 's/oshmem//'`
fi
fi
# checkpoint results # checkpoint results
AC_CACHE_SAVE AC_CACHE_SAVE
@ -1344,6 +1360,14 @@ m4_ifdef([project_ompi],
# Party on # Party on
############################################################################ ############################################################################
# set projects good/no good AM_CONDITIONALS. This is at the end so
# that the OSHMEM/OMPI projects can be disabled, if needed, based on
# MCA tests. If a project is to be disabled, also remove it from
# MCA_PROJECT_SUBDIRS to actually disable building.
AM_CONDITIONAL([PROJECT_OMPI], [test "$project_ompi_amc" = "true"])
AM_CONDITIONAL([PROJECT_ORTE], [test "$project_orte_amc" = "true"])
AM_CONDITIONAL([PROJECT_OSHMEM], [test "$project_oshmem_amc" = "true"])
AC_MSG_CHECKING([if libtool needs -no-undefined flag to build shared libraries]) AC_MSG_CHECKING([if libtool needs -no-undefined flag to build shared libraries])
case "`uname`" in case "`uname`" in
CYGWIN*|MINGW*|AIX*) CYGWIN*|MINGW*|AIX*)

Просмотреть файл

@ -253,6 +253,7 @@ int ompi_coll_base_reduce_scatter_intra_ring(REDUCESCATTER_ARGS);
int ompi_coll_base_reduce_scatter_block_basic_linear(REDUCESCATTERBLOCK_ARGS); int ompi_coll_base_reduce_scatter_block_basic_linear(REDUCESCATTERBLOCK_ARGS);
int ompi_coll_base_reduce_scatter_block_intra_recursivedoubling(REDUCESCATTERBLOCK_ARGS); int ompi_coll_base_reduce_scatter_block_intra_recursivedoubling(REDUCESCATTERBLOCK_ARGS);
int ompi_coll_base_reduce_scatter_block_intra_recursivehalving(REDUCESCATTERBLOCK_ARGS); int ompi_coll_base_reduce_scatter_block_intra_recursivehalving(REDUCESCATTERBLOCK_ARGS);
int ompi_coll_base_reduce_scatter_block_intra_butterfly(REDUCESCATTERBLOCK_ARGS);
/* Scan */ /* Scan */
int ompi_coll_base_scan_intra_recursivedoubling(SCAN_ARGS); int ompi_coll_base_scan_intra_recursivedoubling(SCAN_ARGS);

Просмотреть файл

@ -40,7 +40,6 @@
#include "coll_base_topo.h" #include "coll_base_topo.h"
#include "coll_base_util.h" #include "coll_base_util.h"
/* /*
* ompi_reduce_scatter_block_basic_linear * ompi_reduce_scatter_block_basic_linear
* *
@ -511,3 +510,408 @@ cleanup_and_return:
free(tmprecv_raw); free(tmprecv_raw);
return err; return err;
} }
/*
* ompi_mirror_perm: Returns mirror permutation of nbits low-order bits
* of x [*].
* [*] Warren Jr., Henry S. Hacker's Delight (2ed). 2013.
* Chapter 7. Rearranging Bits and Bytes.
*/
static unsigned int ompi_mirror_perm(unsigned int x, int nbits)
{
x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));
x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));
x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));
x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));
x = ((x >> 16) | (x << 16));
return x >> (sizeof(x) * CHAR_BIT - nbits);
}
static int ompi_coll_base_reduce_scatter_block_intra_butterfly_pof2(
const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, struct ompi_communicator_t *comm,
mca_coll_base_module_t *module);
/*
* ompi_coll_base_reduce_scatter_block_intra_butterfly
*
* Function: Butterfly algorithm for reduce_scatter_block
* Accepts: Same as MPI_Reduce_scatter_block
* Returns: MPI_SUCCESS or error code
*
* Description: Implements butterfly algorithm for MPI_Reduce_scatter_block [*].
* The algorithm can be used both by commutative and non-commutative
* operations, for power-of-two and non-power-of-two number of processes.
*
* [*] J.L. Traff. An improved Algorithm for (non-commutative) Reduce-scatter
* with an Application // Proc. of EuroPVM/MPI, 2005. -- pp. 129-137.
*
* Time complexity:
* m\lambda + (\alpha + m\beta + m\gamma) +
* + 2\log_2(p)\alpha + 2m(1-1/p)\beta + m(1-1/p)\gamma +
* + 3(\alpha + m/p\beta) = O(m\lambda + log(p)\alpha + m\beta + m\gamma),
* where m = rcount * comm_size, p = comm_size
* Memory requirements (per process): 2 * rcount * comm_size * typesize
*
* Example: comm_size=6, nprocs_pof2=4, nprocs_rem=2, rcount=1, sbuf=[0,1,...,5]
* Step 1. Reduce the number of processes to 4
* rank 0: [0|1|2|3|4|5]: send to 1: vrank -1
* rank 1: [0|1|2|3|4|5]: recv from 0, op: vrank 0: [0|2|4|6|8|10]
* rank 2: [0|1|2|3|4|5]: send to 3: vrank -1
* rank 3: [0|1|2|3|4|5]: recv from 2, op: vrank 1: [0|2|4|6|8|10]
* rank 4: [0|1|2|3|4|5]: vrank 2: [0|1|2|3|4|5]
* rank 5: [0|1|2|3|4|5]: vrank 3: [0|1|2|3|4|5]
*
* Step 2. Butterfly. Buffer of 6 elements is divided into 4 blocks.
* Round 1 (mask=1, nblocks=2)
* 0: vrank -1
* 1: vrank 0 [0 2|4 6|8|10]: exch with 1: send [2,3], recv [0,1]: [0 4|8 12|*|*]
* 2: vrank -1
* 3: vrank 1 [0 2|4 6|8|10]: exch with 0: send [0,1], recv [2,3]: [**|**|16|20]
* 4: vrank 2 [0 1|2 3|4|5] : exch with 3: send [2,3], recv [0,1]: [0 2|4 6|*|*]
* 5: vrank 3 [0 1|2 3|4|5] : exch with 2: send [0,1], recv [2,3]: [**|**|8|10]
*
* Round 2 (mask=2, nblocks=1)
* 0: vrank -1
* 1: vrank 0 [0 4|8 12|*|*]: exch with 2: send [1], recv [0]: [0 6|**|*|*]
* 2: vrank -1
* 3: vrank 1 [**|**|16|20] : exch with 3: send [3], recv [2]: [**|**|24|*]
* 4: vrank 2 [0 2|4 6|*|*] : exch with 0: send [0], recv [1]: [**|12 18|*|*]
* 5: vrank 3 [**|**|8|10] : exch with 1: send [2], recv [3]: [**|**|*|30]
*
* Step 3. Exchange with remote process according to a mirror permutation:
* mperm(0)=0, mperm(1)=2, mperm(2)=1, mperm(3)=3
* 0: vrank -1: recv "0" from process 0
* 1: vrank 0 [0 6|**|*|*]: send "0" to 0, copy "6" to rbuf (mperm(0)=0)
* 2: vrank -1: recv result "12" from process 4
* 3: vrank 1 [**|**|24|*]
* 4: vrank 2 [**|12 18|*|*]: send "12" to 2, send "18" to 3, recv "24" from 3
* 5: vrank 3 [**|**|*|30]: copy "30" to rbuf (mperm(3)=3)
*/
int
ompi_coll_base_reduce_scatter_block_intra_butterfly(
const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
char *tmpbuf[2] = {NULL, NULL}, *psend, *precv;
ptrdiff_t span, gap, totalcount, extent;
int err = MPI_SUCCESS;
int comm_size = ompi_comm_size(comm);
int rank = ompi_comm_rank(comm);
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
"coll:base:reduce_scatter_block_intra_butterfly: rank %d/%d",
rank, comm_size));
if (rcount == 0 || comm_size < 2)
return MPI_SUCCESS;
if (!(comm_size & (comm_size - 1))) {
/* Special case: comm_size is a power of two */
return ompi_coll_base_reduce_scatter_block_intra_butterfly_pof2(
sbuf, rbuf, rcount, dtype, op, comm, module);
}
totalcount = comm_size * rcount;
ompi_datatype_type_extent(dtype, &extent);
span = opal_datatype_span(&dtype->super, totalcount, &gap);
tmpbuf[0] = malloc(span);
tmpbuf[1] = malloc(span);
if (NULL == tmpbuf[0] || NULL == tmpbuf[1]) {
err = OMPI_ERR_OUT_OF_RESOURCE;
goto cleanup_and_return;
}
psend = tmpbuf[0] - gap;
precv = tmpbuf[1] - gap;
if (sbuf != MPI_IN_PLACE) {
err = ompi_datatype_copy_content_same_ddt(dtype, totalcount, psend, (char *)sbuf);
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
} else {
err = ompi_datatype_copy_content_same_ddt(dtype, totalcount, psend, rbuf);
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
}
/*
* Step 1. Reduce the number of processes to the nearest lower power of two
* p' = 2^{\floor{\log_2 p}} by removing r = p - p' processes.
* In the first 2r processes (ranks 0 to 2r - 1), all the even ranks send
* the input vector to their neighbor (rank + 1) and all the odd ranks recv
* the input vector and perform local reduction.
* The odd ranks (0 to 2r - 1) contain the reduction with the input
* vector on their neighbors (the even ranks). The first r odd
* processes and the p - 2r last processes are renumbered from
* 0 to 2^{\floor{\log_2 p}} - 1. Even ranks do not participate in the
* rest of the algorithm.
*/
/* Find nearest power-of-two less than or equal to comm_size */
int nprocs_pof2 = opal_next_poweroftwo(comm_size);
nprocs_pof2 >>= 1;
int nprocs_rem = comm_size - nprocs_pof2;
int log2_size = opal_cube_dim(nprocs_pof2);
int vrank = -1;
if (rank < 2 * nprocs_rem) {
if ((rank % 2) == 0) {
/* Even process */
err = MCA_PML_CALL(send(psend, totalcount, dtype, rank + 1,
MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
MCA_PML_BASE_SEND_STANDARD, comm));
if (OMPI_SUCCESS != err) { goto cleanup_and_return; }
/* This process does not participate in the rest of the algorithm */
vrank = -1;
} else {
/* Odd process */
err = MCA_PML_CALL(recv(precv, totalcount, dtype, rank - 1,
MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
comm, MPI_STATUS_IGNORE));
if (OMPI_SUCCESS != err) { goto cleanup_and_return; }
ompi_op_reduce(op, precv, psend, totalcount, dtype);
/* Adjust rank to be the bottom "remain" ranks */
vrank = rank / 2;
}
} else {
/* Adjust rank to show that the bottom "even remain" ranks dropped out */
vrank = rank - nprocs_rem;
}
if (vrank != -1) {
/*
* Now, psend vector of size rcount * comm_size elements is divided into
* nprocs_pof2 blocks:
* block 0 has 2*rcount elems (for process 0 and 1)
* block 1 has 2*rcount elems (for process 2 and 3)
* ...
* block r-1 has 2*rcount elems (for process 2*(r-1) and 2*(r-1)+1)
* block r has rcount elems (for process r+r)
* block r+1 has rcount elems (for process r+r+1)
* ...
* block nprocs_pof2 - 1 has rcount elems (for process r + nprocs_pof2-1)
*/
int nblocks = nprocs_pof2, send_index = 0, recv_index = 0;
for (int mask = 1; mask < nprocs_pof2; mask <<= 1) {
int vpeer = vrank ^ mask;
int peer = (vpeer < nprocs_rem) ? vpeer * 2 + 1 : vpeer + nprocs_rem;
nblocks /= 2;
if ((vrank & mask) == 0) {
/* Send the upper half of reduction buffer, recv the lower half */
send_index += nblocks;
} else {
/* Send the upper half of reduction buffer, recv the lower half */
recv_index += nblocks;
}
int send_count = rcount * ompi_range_sum(send_index,
send_index + nblocks - 1, nprocs_rem - 1);
int recv_count = rcount * ompi_range_sum(recv_index,
recv_index + nblocks - 1, nprocs_rem - 1);
ptrdiff_t sdispl = rcount * ((send_index <= nprocs_rem - 1) ?
2 * send_index : nprocs_rem + send_index);
ptrdiff_t rdispl = rcount * ((recv_index <= nprocs_rem - 1) ?
2 * recv_index : nprocs_rem + recv_index);
err = ompi_coll_base_sendrecv(psend + (ptrdiff_t)sdispl * extent, send_count,
dtype, peer, MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
precv + (ptrdiff_t)rdispl * extent, recv_count,
dtype, peer, MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
comm, MPI_STATUS_IGNORE, rank);
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
if (vrank < vpeer) {
/* precv = psend <op> precv */
ompi_op_reduce(op, psend + (ptrdiff_t)rdispl * extent,
precv + (ptrdiff_t)rdispl * extent, recv_count, dtype);
char *p = psend;
psend = precv;
precv = p;
} else {
/* psend = precv <op> psend */
ompi_op_reduce(op, precv + (ptrdiff_t)rdispl * extent,
psend + (ptrdiff_t)rdispl * extent, recv_count, dtype);
}
send_index = recv_index;
}
/*
* psend points to the result: [send_index, send_index + recv_count - 1]
* Exchange results with remote process according to a mirror permutation.
*/
int vpeer = ompi_mirror_perm(vrank, log2_size);
int peer = (vpeer < nprocs_rem) ? vpeer * 2 + 1 : vpeer + nprocs_rem;
if (vpeer < nprocs_rem) {
/*
* Process has two blocks: for excluded process and own.
* Send result to the excluded process.
*/
ptrdiff_t sdispl = rcount * ((send_index <= nprocs_rem - 1) ?
2 * send_index : nprocs_rem + send_index);
err = MCA_PML_CALL(send(psend + (ptrdiff_t)sdispl * extent,
rcount, dtype, peer - 1,
MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
MCA_PML_BASE_SEND_STANDARD, comm));
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
}
/* Send result to a remote process according to a mirror permutation */
ptrdiff_t sdispl = rcount * ((send_index <= nprocs_rem - 1) ?
2 * send_index : nprocs_rem + send_index);
/* If process has two blocks, then send the second block (own block) */
if (vpeer < nprocs_rem)
sdispl += rcount;
if (vpeer != vrank) {
err = ompi_coll_base_sendrecv(psend + (ptrdiff_t)sdispl * extent, rcount,
dtype, peer, MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
rbuf, rcount, dtype, peer,
MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
comm, MPI_STATUS_IGNORE, rank);
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
} else {
err = ompi_datatype_copy_content_same_ddt(dtype, rcount, rbuf,
psend + (ptrdiff_t)sdispl * extent);
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
}
} else {
/* Excluded process: receive result */
int vpeer = ompi_mirror_perm((rank + 1) / 2, log2_size);
int peer = (vpeer < nprocs_rem) ? vpeer * 2 + 1 : vpeer + nprocs_rem;
err = MCA_PML_CALL(recv(rbuf, rcount, dtype, peer,
MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK, comm,
MPI_STATUS_IGNORE));
if (OMPI_SUCCESS != err) { goto cleanup_and_return; }
}
cleanup_and_return:
if (tmpbuf[0])
free(tmpbuf[0]);
if (tmpbuf[1])
free(tmpbuf[1]);
return err;
}
/*
* ompi_coll_base_reduce_scatter_block_intra_butterfly_pof2
*
* Function: Butterfly algorithm for reduce_scatter_block
* Accepts: Same as MPI_Reduce_scatter_block
* Returns: MPI_SUCCESS or error code
* Limitations: Power-of-two number of processes.
*
* Description: Implements butterfly algorithm for MPI_Reduce_scatter_block [*].
* The algorithm can be used both by commutative and non-commutative
* operations, for power-of-two number of processes.
*
* [*] J.L. Traff. An improved Algorithm for (non-commutative) Reduce-scatter
* with an Application // Proc. of EuroPVM/MPI, 2005. -- pp. 129-137.
*
* Time complexity:
* m\lambda + 2\log_2(p)\alpha + 2m(1-1/p)\beta + m(1-1/p)\gamma + m/p\lambda =
* = O(m\lambda + log(p)\alpha + m\beta + m\gamma),
* where m = rcount * comm_size, p = comm_size
* Memory requirements (per process): 2 * rcount * comm_size * typesize
*
* Example: comm_size=4, rcount=1, sbuf=[0,1,2,3]
* Step 1. Permute the blocks according to a mirror permutation:
* mperm(0)=0, mperm(1)=2, mperm(2)=1, mperm(3)=3
* sbuf=[0|1|2|3] ==> psend=[0|2|1|3]
*
* Step 2. Butterfly
* Round 1 (mask=1, nblocks=2)
* 0: [0|2|1|3]: exch with 1: send [2,3], recv [0,1]: [0|4|*|*]
* 1: [0|2|1|3]: exch with 0: send [0,1], recv [2,3]: [*|*|2|6]
* 2: [0|2|1|3]: exch with 3: send [2,3], recv [0,1]: [0|4|*|*]
* 3: [0|2|1|3]: exch with 2: send [0,1], recv [2,3]: [*|*|2|6]
*
* Round 2 (mask=2, nblocks=1)
* 0: [0|4|*|*]: exch with 2: send [1], recv [0]: [0|*|*|*]
* 1: [*|*|2|6]: exch with 3: send [3], recv [2]: [*|*|4|*]
* 2: [0|4|*|*]: exch with 0: send [0], recv [1]: [*|8|*|*]
* 3: [*|*|2|6]: exch with 1: send [2], recv [3]: [*|*|*|12]
*
* Step 3. Copy result to rbuf
*/
static int
ompi_coll_base_reduce_scatter_block_intra_butterfly_pof2(
const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype,
struct ompi_op_t *op, struct ompi_communicator_t *comm,
mca_coll_base_module_t *module)
{
char *tmpbuf[2] = {NULL, NULL}, *psend, *precv;
ptrdiff_t span, gap, totalcount, extent;
int err = MPI_SUCCESS;
int comm_size = ompi_comm_size(comm);
int rank = ompi_comm_rank(comm);
if (rcount == 0 || comm_size < 2)
return MPI_SUCCESS;
totalcount = comm_size * rcount;
ompi_datatype_type_extent(dtype, &extent);
span = opal_datatype_span(&dtype->super, totalcount, &gap);
tmpbuf[0] = malloc(span);
tmpbuf[1] = malloc(span);
if (NULL == tmpbuf[0] || NULL == tmpbuf[1]) {
err = OMPI_ERR_OUT_OF_RESOURCE;
goto cleanup_and_return;
}
psend = tmpbuf[0] - gap;
precv = tmpbuf[1] - gap;
/* Permute the blocks according to a mirror permutation */
int log2_comm_size = opal_cube_dim(comm_size);
char *pdata = (sbuf != MPI_IN_PLACE) ? (char *)sbuf : rbuf;
for (int i = 0; i < comm_size; i++) {
char *src = pdata + (ptrdiff_t)i * extent * rcount;
char *dst = psend + (ptrdiff_t)ompi_mirror_perm(i, log2_comm_size) * extent * rcount;
err = ompi_datatype_copy_content_same_ddt(dtype, rcount, dst, src);
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
}
int nblocks = totalcount, send_index = 0, recv_index = 0;
for (int mask = 1; mask < comm_size; mask <<= 1) {
int peer = rank ^ mask;
nblocks /= 2;
if ((rank & mask) == 0) {
/* Send the upper half of reduction buffer, recv the lower half */
send_index += nblocks;
} else {
/* Send the upper half of reduction buffer, recv the lower half */
recv_index += nblocks;
}
err = ompi_coll_base_sendrecv(psend + (ptrdiff_t)send_index * extent,
nblocks, dtype, peer,
MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
precv + (ptrdiff_t)recv_index * extent,
nblocks, dtype, peer,
MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
comm, MPI_STATUS_IGNORE, rank);
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
if (rank < peer) {
/* precv = psend <op> precv */
ompi_op_reduce(op, psend + (ptrdiff_t)recv_index * extent,
precv + (ptrdiff_t)recv_index * extent, nblocks, dtype);
char *p = psend;
psend = precv;
precv = p;
} else {
/* psend = precv <op> psend */
ompi_op_reduce(op, precv + (ptrdiff_t)recv_index * extent,
psend + (ptrdiff_t)recv_index * extent, nblocks, dtype);
}
send_index = recv_index;
}
/* Copy the result to the rbuf */
err = ompi_datatype_copy_content_same_ddt(dtype, rcount, rbuf,
psend + (ptrdiff_t)recv_index * extent);
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
cleanup_and_return:
if (tmpbuf[0])
free(tmpbuf[0]);
if (tmpbuf[1])
free(tmpbuf[1]);
return err;
}

Просмотреть файл

@ -36,6 +36,7 @@ static mca_base_var_enum_value_t reduce_scatter_block_algorithms[] = {
{1, "basic_linear"}, {1, "basic_linear"},
{2, "recursive_doubling"}, {2, "recursive_doubling"},
{3, "recursive_halving"}, {3, "recursive_halving"},
{4, "butterfly"},
{0, NULL} {0, NULL}
}; };
@ -75,7 +76,8 @@ int ompi_coll_tuned_reduce_scatter_block_intra_check_forced_init (coll_tuned_for
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version, mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
"reduce_scatter_block_algorithm", "reduce_scatter_block_algorithm",
"Which reduce reduce_scatter_block algorithm is used. " "Which reduce reduce_scatter_block algorithm is used. "
"Can be locked down to choice of: 0 ignore, 1 basic_linear, 2 recursive_doubling", "Can be locked down to choice of: 0 ignore, 1 basic_linear, 2 recursive_doubling, "
"3 recursive_halving, 4 butterfly",
MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE, MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_5, OPAL_INFO_LVL_5,
MCA_BASE_VAR_SCOPE_ALL, MCA_BASE_VAR_SCOPE_ALL,
@ -128,6 +130,8 @@ int ompi_coll_tuned_reduce_scatter_block_intra_do_this(const void *sbuf, void *r
dtype, op, comm, module); dtype, op, comm, module);
case (3): return ompi_coll_base_reduce_scatter_block_intra_recursivehalving(sbuf, rbuf, rcount, case (3): return ompi_coll_base_reduce_scatter_block_intra_recursivehalving(sbuf, rbuf, rcount,
dtype, op, comm, module); dtype, op, comm, module);
case (4): return ompi_coll_base_reduce_scatter_block_intra_butterfly(sbuf, rbuf, rcount, dtype, op, comm,
module);
} /* switch */ } /* switch */
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_scatter_block_intra_do_this attempt to select algorithm %d when only 0-%d is valid?", OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_scatter_block_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCESCATTERBLOCK])); algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCESCATTERBLOCK]));

Просмотреть файл

@ -172,7 +172,7 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh,
} }
} }
if ( SIMPLE != mca_io_ompio_grouping_option || SIMPLE_PLUS != mca_io_ompio_grouping_option ) { if ( SIMPLE != mca_io_ompio_grouping_option && SIMPLE_PLUS != mca_io_ompio_grouping_option ) {
ret = mca_io_ompio_fview_based_grouping(fh, ret = mca_io_ompio_fview_based_grouping(fh,
&num_groups, &num_groups,

Просмотреть файл

@ -1,51 +0,0 @@
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
# Copyright (c) 2017 IBM Corporation. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
AM_CPPFLAGS = $(mtl_mxm_CPPFLAGS)
dist_ompidata_DATA = help-mtl-mxm.txt
mtl_mxm_sources = \
mtl_mxm.c \
mtl_mxm.h \
mtl_mxm_cancel.c \
mtl_mxm_component.c \
mtl_mxm_endpoint.c \
mtl_mxm_endpoint.h \
mtl_mxm_probe.c \
mtl_mxm_recv.c \
mtl_mxm_request.h \
mtl_mxm_send.c \
mtl_mxm_debug.h \
mtl_mxm_types.h
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
if MCA_BUILD_ompi_mtl_mxm_DSO
component_noinst =
component_install = mca_mtl_mxm.la
else
component_noinst = libmca_mtl_mxm.la
component_install =
endif
mcacomponentdir = $(ompilibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_mtl_mxm_la_SOURCES = $(mtl_mxm_sources)
mca_mtl_mxm_la_LIBADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \
$(mtl_mxm_LIBS)
mca_mtl_mxm_la_LDFLAGS = -module -avoid-version $(mtl_mxm_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)
libmca_mtl_mxm_la_SOURCES = $(mtl_mxm_sources)
libmca_mtl_mxm_la_LIBADD = $(mtl_mxm_LIBS)
libmca_mtl_mxm_la_LDFLAGS = -module -avoid-version $(mtl_mxm_LDFLAGS)

Просмотреть файл

@ -1,39 +0,0 @@
# -*- shell-script -*-
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
# Copyright (c) 2013 Sandia National Laboratories. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# MCA_ompi_mtl_mxm_POST_CONFIG(will_build)
# ----------------------------------------
# Only require the tag if we're actually going to be built
AC_DEFUN([MCA_ompi_mtl_mxm_POST_CONFIG], [
AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([MTL])])
])dnl
# MCA_mtl_mxm_CONFIG([action-if-can-compile],
# [action-if-cant-compile])
# ------------------------------------------------
AC_DEFUN([MCA_ompi_mtl_mxm_CONFIG],[
AC_CONFIG_FILES([ompi/mca/mtl/mxm/Makefile])
OMPI_CHECK_MXM([mtl_mxm],
[mtl_mxm_happy="yes"],
[mtl_mxm_happy="no"])
AS_IF([test "$mtl_mxm_happy" = "yes"],
[$1],
[$2])
# substitute in the things needed to build mxm
AC_SUBST([mtl_mxm_CFLAGS])
AC_SUBST([mtl_mxm_CPPFLAGS])
AC_SUBST([mtl_mxm_LDFLAGS])
AC_SUBST([mtl_mxm_LIBS])
])dnl

Просмотреть файл

@ -1,67 +0,0 @@
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
[no uuid present]
Error obtaining unique transport key from ORTE (orte_precondition_transports %s
the environment).
Local host: %s
[unable to create endpoint]
MXM was unable to create an endpoint. Please make sure that the network link is
active on the node and the hardware is functioning.
Error: %s
[unable to extract endpoint ptl address]
MXM was unable to read settings for endpoint
PTL ID: %d
Error: %s
[unable to extract endpoint address]
MXM was unable to read settings for endpoint
Error: %s
[mxm mq create]
Failed to create MQ for endpoint
Error: %s
[errors during mxm_progress]
Error %s occurred in attempting to make network progress (mxm_progress).
[mxm init]
Initialization of MXM library failed.
Error: %s
[error posting receive]
Unable to post application receive buffer
Error: %s
Buffer: %p
Length: %d
[error posting message receive]
Unable to post application receive buffer
Error: %s
Buffer: %p
Length: %d
[error posting send]
Unable to post application send buffer
Error: %s

Просмотреть файл

@ -1,679 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (C) 2001-2011 Mellanox Technologies Ltd. ALL RIGHTS RESERVED.
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
* Copyright (c) 2014-2016 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/mca/mtl/mtl.h"
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
#include "ompi/proc/proc.h"
#include "ompi/communicator/communicator.h"
#include "opal/memoryhooks/memory.h"
#include "opal/util/show_help.h"
#include "opal/mca/pmix/pmix.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_endpoint.h"
#include "mtl_mxm_request.h"
mca_mtl_mxm_module_t ompi_mtl_mxm = {
{
0, /* max context id */
0, /* max tag value */
0, /* request reserve space */
0, /* flags */
ompi_mtl_mxm_add_procs,
ompi_mtl_mxm_del_procs,
ompi_mtl_mxm_finalize,
ompi_mtl_mxm_send,
ompi_mtl_mxm_isend,
ompi_mtl_mxm_irecv,
ompi_mtl_mxm_iprobe,
ompi_mtl_mxm_imrecv,
ompi_mtl_mxm_improbe,
ompi_mtl_mxm_cancel,
ompi_mtl_mxm_add_comm,
ompi_mtl_mxm_del_comm
},
0,
0,
NULL,
NULL
};
#if MXM_API < MXM_VERSION(2,0)
static uint32_t ompi_mtl_mxm_get_job_id(void)
{
uint8_t unique_job_key[16];
uint32_t job_key;
unsigned long long *uu;
char *generated_key;
uu = (unsigned long long *) unique_job_key;
generated_key = getenv(OPAL_MCA_PREFIX"orte_precondition_transports");
memset(uu, 0, sizeof(unique_job_key));
if (!generated_key || (strlen(generated_key) != 33) || sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2) {
opal_show_help("help-mtl-mxm.txt", "no uuid present", true,
generated_key ? "could not be parsed from" :
"not present in", ompi_process_info.nodename);
return 0;
}
/*
* decode OPAL_MCA_PREFIX"orte_precondition_transports" that looks as
* 000003ca00000000-0000000100000000
* jobfam-stepid
* to get jobid coded with ORTE_CONSTRUCT_LOCAL_JOBID()
*/
#define GET_LOCAL_JOBID(local, job) \
( ((local) & 0xffff0000) | ((job) & 0x0000ffff) )
job_key = GET_LOCAL_JOBID((uu[0]>>(8 * sizeof(int))) << 16, uu[1]>>(8 * sizeof(int)));
return job_key;
}
#endif
int ompi_mtl_mxm_progress(void);
#if MXM_API >= MXM_VERSION(2,0)
static void ompi_mtl_mxm_mem_release_cb(void *buf, size_t length,
void *cbdata, bool from_alloc);
#endif
#if MXM_API < MXM_VERSION(2,0)
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm_ptl_id_t ptlid)
{
size_t addrlen;
mxm_error_t err;
addrlen = sizeof(ep_info->ptl_addr[ptlid]);
err = mxm_ep_address(ompi_mtl_mxm.ep, ptlid,
(struct sockaddr *) &ep_info->ptl_addr[ptlid], &addrlen);
if (MXM_OK != err) {
opal_show_help("help-mtl-mxm.txt", "unable to extract endpoint ptl address",
true, (int)ptlid, mxm_error_string(err));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
#else
static int ompi_mtl_mxm_get_ep_address(void **address_p, size_t *address_len_p)
{
mxm_error_t err;
*address_len_p = 0;
err = mxm_ep_get_address(ompi_mtl_mxm.ep, NULL, address_len_p);
if (err != MXM_ERR_BUFFER_TOO_SMALL) {
MXM_ERROR("Failed to get ep address length");
return OMPI_ERROR;
}
*address_p = malloc(*address_len_p);
if (*address_p == NULL) {
MXM_ERROR("Failed to allocate ep address buffer");
return OMPI_ERR_OUT_OF_RESOURCE;
}
err = mxm_ep_get_address(ompi_mtl_mxm.ep, *address_p, address_len_p);
if (MXM_OK != err) {
opal_show_help("help-mtl-mxm.txt", "unable to extract endpoint address",
true, mxm_error_string(err));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
#endif
#define max(a,b) ((a)>(b)?(a):(b))
static mxm_error_t
ompi_mtl_mxm_create_ep(mxm_h ctx, mxm_ep_h *ep, unsigned ptl_bitmap, int lr,
uint32_t jobid, uint64_t mxlr, int nlps)
{
mxm_error_t err;
#if MXM_API < MXM_VERSION(2,0)
ompi_mtl_mxm.mxm_ep_opts->job_id = jobid;
ompi_mtl_mxm.mxm_ep_opts->local_rank = lr;
ompi_mtl_mxm.mxm_ep_opts->num_local_procs = nlps;
err = mxm_ep_create(ctx, ompi_mtl_mxm.mxm_ep_opts, ep);
#else
err = mxm_ep_create(ctx, ompi_mtl_mxm.mxm_ep_opts, ep);
#endif
return err;
}
/*
* send information using modex (in some case there is limitation on data size for example ess/pmi)
* set size of data sent for once
*
*/
static int ompi_mtl_mxm_send_ep_address(void *address, size_t address_len)
{
char *modex_component_name = mca_base_component_to_string(&mca_mtl_mxm_component.super.mtl_version);
char *modex_name = malloc(strlen(modex_component_name) + 5);
const size_t modex_max_size = 0x60;
unsigned char *modex_buf_ptr;
size_t modex_buf_size;
size_t modex_cur_size;
int modex_name_id = 0;
int rc;
/* Send address length */
sprintf(modex_name, "%s-len", modex_component_name);
OPAL_MODEX_SEND_STRING(rc, OPAL_PMIX_GLOBAL,
modex_name, &address_len, sizeof(address_len));
if (OMPI_SUCCESS != rc) {
MXM_ERROR("failed to send address length");
goto bail;
}
/* Send address, in parts.
* modex name looks as mtl.mxm.1.5-18 where mtl.mxm.1.5 is the component and 18 is part index.
*/
modex_buf_size = address_len;
modex_buf_ptr = address;
while (modex_buf_size) {
sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id);
modex_cur_size = (modex_buf_size < modex_max_size) ? modex_buf_size : modex_max_size;
OPAL_MODEX_SEND_STRING(rc, OPAL_PMIX_GLOBAL,
modex_name, modex_buf_ptr, modex_cur_size);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Open MPI couldn't distribute EP connection details");
goto bail;
}
modex_name_id++;
modex_buf_ptr += modex_cur_size;
modex_buf_size -= modex_cur_size;
}
rc = OMPI_SUCCESS;
bail:
free(modex_component_name);
free(modex_name);
return rc;
}
/*
* recieve information using modex
*/
static int ompi_mtl_mxm_recv_ep_address(ompi_proc_t *source_proc, void **address_p,
size_t *address_len_p)
{
char *modex_component_name = mca_base_component_to_string(&mca_mtl_mxm_component.super.mtl_version);
char *modex_name = malloc(strlen(modex_component_name) + 5);
uint8_t *modex_buf_ptr;
int32_t modex_cur_size;
size_t modex_buf_size;
size_t *address_len_buf_ptr;
int modex_name_id = 0;
int rc;
*address_p = NULL;
*address_len_p = 0;
/* Receive address length */
sprintf(modex_name, "%s-len", modex_component_name);
OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super.proc_name,
(uint8_t **)&address_len_buf_ptr,
&modex_cur_size);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Failed to receive ep address length");
goto bail;
}
/* Allocate buffer to hold the address */
*address_len_p = *address_len_buf_ptr;
*address_p = malloc(*address_len_p);
if (*address_p == NULL) {
MXM_ERROR("Failed to allocate modex receive buffer");
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto bail;
}
/* Receive the data, in parts */
modex_buf_size = 0;
while (modex_buf_size < *address_len_p) {
sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id);
OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super.proc_name,
&modex_buf_ptr,
&modex_cur_size);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Open MPI couldn't distribute EP connection details");
free(*address_p);
*address_p = NULL;
*address_len_p = 0;
goto bail;
}
memcpy((char*)(*address_p) + modex_buf_size, modex_buf_ptr, modex_cur_size);
modex_buf_size += modex_cur_size;
modex_name_id++;
}
rc = OMPI_SUCCESS;
bail:
free(modex_component_name);
free(modex_name);
return rc;
}
int ompi_mtl_mxm_module_init(void)
{
#if MXM_API < MXM_VERSION(2,0)
ompi_mtl_mxm_ep_conn_info_t ep_info;
#endif
void *ep_address;
size_t ep_address_len;
mxm_error_t err;
uint32_t jobid;
uint64_t mxlr;
ompi_proc_t **procs;
unsigned ptl_bitmap;
size_t totps, proc;
int lr, nlps;
int rc;
mxlr = 0;
lr = -1;
jobid = 0;
#if MXM_API < MXM_VERSION(2,0)
jobid = ompi_mtl_mxm_get_job_id();
if (0 == jobid) {
MXM_ERROR("Failed to generate jobid");
return OMPI_ERROR;
}
#endif
totps = ompi_proc_world_size ();
if (totps < (size_t)ompi_mtl_mxm.mxm_np) {
MXM_VERBOSE(1, "MXM support will be disabled because of total number "
"of processes (%lu) is less than the minimum set by the "
"mtl_mxm_np MCA parameter (%u)", totps, ompi_mtl_mxm.mxm_np);
return OMPI_ERR_NOT_SUPPORTED;
}
MXM_VERBOSE(1, "MXM support enabled");
if (ORTE_NODE_RANK_INVALID == (lr = ompi_process_info.my_node_rank)) {
MXM_ERROR("Unable to obtain local node rank");
return OMPI_ERROR;
}
nlps = ompi_process_info.num_local_peers + 1;
/* local procs are always allocated. if that ever changes this will need to
* be modified. */
procs = ompi_proc_get_allocated (&totps);
if (NULL == procs) {
MXM_ERROR("Unable to obtain process list");
return OMPI_ERROR;
}
for (proc = 0; proc < totps; proc++) {
if (OPAL_PROC_ON_LOCAL_NODE(procs[proc]->super.proc_flags)) {
mxlr = max(mxlr, procs[proc]->super.proc_name.vpid);
}
}
free(procs);
/* Setup the endpoint options and local addresses to bind to. */
#if MXM_API < MXM_VERSION(2,0)
ptl_bitmap = ompi_mtl_mxm.mxm_ctx_opts->ptl_bitmap;
#else
ptl_bitmap = 0;
#endif
/* Open MXM endpoint */
err = ompi_mtl_mxm_create_ep(ompi_mtl_mxm.mxm_context, &ompi_mtl_mxm.ep,
ptl_bitmap, lr, jobid, mxlr, nlps);
if (MXM_OK != err) {
opal_show_help("help-mtl-mxm.txt", "unable to create endpoint", true,
mxm_error_string(err));
return OMPI_ERROR;
}
/*
* Get address for each PTL on this endpoint, and share it with other ranks.
*/
#if MXM_API < MXM_VERSION(2,0)
if ((ptl_bitmap & MXM_BIT(MXM_PTL_SELF)) &&
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SELF)) {
return OMPI_ERROR;
}
if ((ptl_bitmap & MXM_BIT(MXM_PTL_RDMA)) &&
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_RDMA)) {
return OMPI_ERROR;
}
if ((ptl_bitmap & MXM_BIT(MXM_PTL_SHM)) &&
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SHM)) {
return OMPI_ERROR;
}
ep_address = &ep_info;
ep_address_len = sizeof(ep_info);
#else
rc = ompi_mtl_mxm_get_ep_address(&ep_address, &ep_address_len);
if (OMPI_SUCCESS != rc) {
return rc;
}
#endif
rc = ompi_mtl_mxm_send_ep_address(ep_address, ep_address_len);
if (OMPI_SUCCESS != rc) {
MXM_ERROR("Modex session failed.");
return rc;
}
#if MXM_API >= MXM_VERSION(2,0)
free(ep_address);
#endif
/* Register the MXM progress function */
opal_progress_register(ompi_mtl_mxm_progress);
ompi_mtl_mxm.super.mtl_flags |= MCA_MTL_BASE_FLAG_REQUIRE_WORLD;
#if MXM_API >= MXM_VERSION(2,0)
if (ompi_mtl_mxm.using_mem_hooks) {
opal_mem_hooks_register_release(ompi_mtl_mxm_mem_release_cb, NULL);
}
#endif
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_finalize(struct mca_mtl_base_module_t* mtl)
{
#if MXM_API >= MXM_VERSION(2,0)
if (ompi_mtl_mxm.using_mem_hooks) {
opal_mem_hooks_unregister_release(ompi_mtl_mxm_mem_release_cb);
}
#endif
opal_progress_unregister(ompi_mtl_mxm_progress);
mxm_ep_destroy(ompi_mtl_mxm.ep);
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
struct ompi_proc_t** procs)
{
#if MXM_API < MXM_VERSION(2,0)
ompi_mtl_mxm_ep_conn_info_t *ep_info;
mxm_conn_req_t *conn_reqs;
size_t ep_index = 0;
#endif
void *ep_address = NULL;
size_t ep_address_len;
mxm_error_t err;
size_t i;
int rc;
mca_mtl_mxm_endpoint_t *endpoint;
assert(mtl == &ompi_mtl_mxm.super);
#if MXM_API < MXM_VERSION(2,0)
/* Allocate connection requests */
conn_reqs = calloc(nprocs, sizeof(mxm_conn_req_t));
ep_info = calloc(nprocs, sizeof(ompi_mtl_mxm_ep_conn_info_t));
if (NULL == conn_reqs || NULL == ep_info) {
rc = OMPI_ERR_OUT_OF_RESOURCE;
goto bail;
}
#endif
/* Get the EP connection requests for all the processes from modex */
for (i = 0; i < nprocs; ++i) {
if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) {
continue; /* already connected to this endpoint */
}
rc = ompi_mtl_mxm_recv_ep_address(procs[i], &ep_address, &ep_address_len);
if (rc != OMPI_SUCCESS) {
goto bail;
}
#if MXM_API < MXM_VERSION(2,0)
if (ep_address_len != sizeof(ep_info[i])) {
MXM_ERROR("Invalid endpoint address length");
free(ep_address);
rc = OMPI_ERROR;
goto bail;
}
memcpy(&ep_info[i], ep_address, ep_address_len);
free(ep_address);
conn_reqs[ep_index].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SELF]);
conn_reqs[ep_index].ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SHM]);
conn_reqs[ep_index].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_RDMA]);
ep_index++;
#else
endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t);
endpoint->mtl_mxm_module = &ompi_mtl_mxm;
err = mxm_ep_connect(ompi_mtl_mxm.ep, ep_address, &endpoint->mxm_conn);
free(ep_address);
if (err != MXM_OK) {
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
rc = OMPI_ERROR;
goto bail;
}
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
#endif
}
#if MXM_API < MXM_VERSION(2,0)
/* Connect to remote peers */
err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_reqs, ep_index, -1);
if (MXM_OK != err) {
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
for (i = 0; i < ep_index; ++i) {
if (MXM_OK != conn_reqs[i].error) {
MXM_ERROR("MXM EP connect to %s error: %s\n",
(NULL == procs[i]->super.proc_hostname) ?
"unknown" : procs[i]->proc_hostname,
mxm_error_string(conn_reqs[i].error));
}
}
rc = OMPI_ERROR;
goto bail;
}
/* Save returned connections */
for (i = 0; i < ep_index; ++i) {
endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t);
endpoint->mtl_mxm_module = &ompi_mtl_mxm;
endpoint->mxm_conn = conn_reqs[i].conn;
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
}
#endif
#if MXM_API >= MXM_VERSION(3,1)
if (ompi_mtl_mxm.bulk_connect) {
mxm_ep_wireup(ompi_mtl_mxm.ep);
}
#endif
rc = OMPI_SUCCESS;
bail:
#if MXM_API < MXM_VERSION(2,0)
free(conn_reqs);
free(ep_info);
#endif
return rc;
}
int ompi_mtl_add_single_proc(struct mca_mtl_base_module_t *mtl,
struct ompi_proc_t* procs)
{
void *ep_address = NULL;
size_t ep_address_len;
mxm_error_t err;
int rc;
mca_mtl_mxm_endpoint_t *endpoint;
assert(mtl == &ompi_mtl_mxm.super);
if (NULL != procs->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) {
return OMPI_SUCCESS;
}
rc = ompi_mtl_mxm_recv_ep_address(procs, &ep_address, &ep_address_len);
if (rc != OMPI_SUCCESS) {
return rc;
}
#if MXM_API < MXM_VERSION(2,0)
ompi_mtl_mxm_ep_conn_info_t ep_info;
mxm_conn_req_t conn_req;
if (ep_address_len != sizeof(ep_info)) {
MXM_ERROR("Invalid endpoint address length");
free(ep_address);
return OMPI_ERROR;
}
memcpy(&ep_info, ep_address, ep_address_len);
free(ep_address);
conn_req.ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info.ptl_addr[MXM_PTL_SELF]);
conn_req.ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info.ptl_addr[MXM_PTL_SHM]);
conn_req.ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info.ptl_addr[MXM_PTL_RDMA]);
/* Connect to remote peers */
err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_req, 1, -1);
if (MXM_OK != err) {
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
if (MXM_OK != conn_req.error) {
MXM_ERROR("MXM EP connect to %s error: %s\n",
(NULL == procs->super.proc_hostname) ?
"unknown" : procs->proc_hostname,
mxm_error_string(conn_reqs.error));
}
return OMPI_ERROR;
}
/* Save returned connections */
endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t);
endpoint->mtl_mxm_module = &ompi_mtl_mxm;
endpoint->mxm_conn = conn_reqs.conn;
procs->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
#else
endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t);
endpoint->mtl_mxm_module = &ompi_mtl_mxm;
err = mxm_ep_connect(ompi_mtl_mxm.ep, ep_address, &endpoint->mxm_conn);
free(ep_address);
if (err != MXM_OK) {
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
return OMPI_ERROR;
}
procs->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
#endif
#if MXM_API >= MXM_VERSION(3,1)
if (ompi_mtl_mxm.bulk_connect) {
mxm_ep_wireup(ompi_mtl_mxm.ep);
}
#endif
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_del_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
struct ompi_proc_t** procs)
{
size_t i;
#if MXM_API >= MXM_VERSION(3,1)
if (ompi_mtl_mxm.bulk_disconnect && ((int)nprocs) == ompi_proc_world_size ()) {
mxm_ep_powerdown(ompi_mtl_mxm.ep);
}
#endif
/* XXX: Directly accessing the obj_reference_count is an abstraction
* violation of the object system. We know this needs to be fixed, but
* are deferring the fix to a later time as it involves a design issue
* in the way we handle endpoints as objects
*/
for (i = 0; i < nprocs; ++i) {
mca_mtl_mxm_endpoint_t *endpoint = (mca_mtl_mxm_endpoint_t*)
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
if (endpoint) {
mxm_ep_disconnect(endpoint->mxm_conn);
OBJ_RELEASE(endpoint);
}
}
opal_pmix.fence(NULL, 0);
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_add_comm(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm)
{
mxm_error_t err;
mxm_mq_h mq;
assert(mtl == &ompi_mtl_mxm.super);
assert(NULL != ompi_mtl_mxm.mxm_context);
err = mxm_mq_create(ompi_mtl_mxm.mxm_context, comm->c_contextid, &mq);
if (MXM_OK != err) {
opal_show_help("help-mtl-mxm.txt", "mxm mq create", true, mxm_error_string(err));
return OMPI_ERROR;
}
comm->c_pml_comm = (void*)mq;
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_del_comm(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm)
{
assert(mtl == &ompi_mtl_mxm.super);
if (NULL != ompi_mtl_mxm.mxm_context) {
mxm_mq_destroy((mxm_mq_h)comm->c_pml_comm);
}
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_progress(void)
{
mxm_error_t err;
err = mxm_progress(ompi_mtl_mxm.mxm_context);
if ((MXM_OK != err) && (MXM_ERR_NO_PROGRESS != err) ) {
opal_show_help("help-mtl-mxm.txt", "errors during mxm_progress", true, mxm_error_string(err));
}
return 1;
}
#if MXM_API >= MXM_VERSION(2,0)
static void ompi_mtl_mxm_mem_release_cb(void *buf, size_t length,
void *cbdata, bool from_alloc)
{
mxm_mem_unmap(ompi_mtl_mxm.mxm_context, buf, length,
from_alloc ? MXM_MEM_UNMAP_MARK_INVALID : 0);
}
#endif
OBJ_CLASS_INSTANCE(
ompi_mtl_mxm_message_t,
opal_free_list_item_t,
NULL,
NULL);

Просмотреть файл

@ -1,117 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MTL_MXM_H_HAS_BEEN_INCLUDED
#define MTL_MXM_H_HAS_BEEN_INCLUDED
#include <stdint.h>
#include <sys/types.h>
#include <unistd.h>
#include <mxm/api/mxm_api.h>
#ifndef MXM_VERSION
#define MXM_VERSION(major, minor) (((major)<<MXM_MAJOR_BIT)|((minor)<<MXM_MINOR_BIT))
#endif
#if MXM_API < MXM_VERSION(1,5)
#error "Unsupported MXM version, version 1.5 or above required"
#endif
#if MXM_API < MXM_VERSION(2,0)
#include <mxm/api/mxm_addr.h>
#endif
#include "ompi/mca/pml/pml.h"
#include "ompi/mca/mtl/mtl.h"
#include "ompi/mca/mtl/base/base.h"
#include "opal/class/opal_free_list.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "opal/datatype/opal_convertor.h"
#include "mtl_mxm_debug.h"
BEGIN_C_DECLS
/* MTL interface functions */
extern int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t* mtl,
size_t nprocs, struct ompi_proc_t** procs);
extern int ompi_mtl_add_single_proc(struct mca_mtl_base_module_t *mtl,
struct ompi_proc_t* procs);
extern int ompi_mtl_mxm_del_procs(struct mca_mtl_base_module_t* mtl,
size_t nprocs, struct ompi_proc_t** procs);
extern int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm, int dest, int tag,
struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode);
extern int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm, int dest,
int tag, struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode, bool blocking,
mca_mtl_request_t * mtl_request);
extern int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm, int src,
int tag, struct opal_convertor_t *convertor,
struct mca_mtl_request_t *mtl_request);
extern int ompi_mtl_mxm_iprobe(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm, int src,
int tag, int *flag,
struct ompi_status_public_t *status);
extern int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl,
struct mca_mtl_request_t *mtl_request, int flag);
extern int ompi_mtl_mxm_imrecv(struct mca_mtl_base_module_t* mtl,
struct opal_convertor_t *convertor,
struct ompi_message_t **message,
struct mca_mtl_request_t *mtl_request);
extern int ompi_mtl_mxm_improbe(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm,
int src,
int tag,
int *matched,
struct ompi_message_t **message,
struct ompi_status_public_t *status);
extern int ompi_mtl_mxm_add_comm(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm);
extern int ompi_mtl_mxm_del_comm(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm);
extern int ompi_mtl_mxm_finalize(struct mca_mtl_base_module_t* mtl);
int ompi_mtl_mxm_module_init(void);
struct ompi_mtl_mxm_message_t {
opal_free_list_item_t super;
mxm_mq_h mq;
mxm_conn_h conn;
mxm_message_h mxm_msg;
mxm_tag_t tag;
mxm_tag_t tag_mask;
};
typedef struct ompi_mtl_mxm_message_t ompi_mtl_mxm_message_t;
OBJ_CLASS_DECLARATION(ompi_mtl_mxm_message_t);
END_C_DECLS
#endif

Просмотреть файл

@ -1,34 +0,0 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "mtl_mxm.h"
#include "mtl_mxm_request.h"
int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl,
struct mca_mtl_request_t *mtl_request, int flag)
{
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
mxm_error_t err;
#if MXM_API >= MXM_VERSION(2,0)
if (mtl_mxm_request->is_send) {
err = mxm_req_cancel_send(&mtl_mxm_request->mxm.send);
} else {
err = mxm_req_cancel_recv(&mtl_mxm_request->mxm.recv);
}
#else
err = mxm_req_cancel(&mtl_mxm_request->mxm.base);
#endif
if ((err != MXM_OK) && (err != MXM_ERR_NO_PROGRESS)) {
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,316 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "opal/util/output.h"
#include "opal/util/show_help.h"
#include "ompi/proc/proc.h"
#include "opal/memoryhooks/memory.h"
#include "opal/mca/memory/base/base.h"
#include "ompi/runtime/mpiruntime.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_request.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
static int ompi_mtl_mxm_component_open(void);
static int ompi_mtl_mxm_component_query(mca_base_module_t **module, int *priority);
static int ompi_mtl_mxm_component_close(void);
static int ompi_mtl_mxm_component_register(void);
static int param_priority;
int mca_mtl_mxm_output = -1;
static mca_mtl_base_module_t
* ompi_mtl_mxm_component_init(bool enable_progress_threads,
bool enable_mpi_threads);
mca_mtl_mxm_component_t mca_mtl_mxm_component = {
{
/*
* First, the mca_base_component_t struct containing meta
* information about the component itself
*/
.mtl_version = {
MCA_MTL_BASE_VERSION_2_0_0,
.mca_component_name = "mxm",
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
OMPI_RELEASE_VERSION),
.mca_open_component = ompi_mtl_mxm_component_open,
.mca_close_component = ompi_mtl_mxm_component_close,
.mca_query_component = ompi_mtl_mxm_component_query,
.mca_register_component_params = ompi_mtl_mxm_component_register,
},
.mtl_data = {
/* The component is not checkpoint ready */
MCA_BASE_METADATA_PARAM_NONE
},
.mtl_init = ompi_mtl_mxm_component_init,
}
};
static int ompi_mtl_mxm_component_register(void)
{
mca_base_component_t*c;
#if MXM_API < MXM_VERSION(3,0)
unsigned long cur_ver;
long major, minor;
char* runtime_version;
#endif
c = &mca_mtl_mxm_component.super.mtl_version;
ompi_mtl_mxm.verbose = 0;
(void) mca_base_component_var_register(c, "verbose",
"Verbose level of the MXM component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_LOCAL,
&ompi_mtl_mxm.verbose);
#if MXM_API > MXM_VERSION(2,0)
ompi_mtl_mxm.mxm_np = 0;
#else
ompi_mtl_mxm.mxm_np = 128;
#endif
(void) mca_base_component_var_register(c, "np",
"[integer] Minimal number of MPI processes in a single job "
"required to activate the MXM transport",
MCA_BASE_VAR_TYPE_INT, NULL,0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mtl_mxm.mxm_np);
ompi_mtl_mxm.compiletime_version = MXM_VERNO_STRING;
(void) mca_base_component_var_register(c,
MCA_COMPILETIME_VER,
"Version of the libmxm library with which Open MPI was compiled",
MCA_BASE_VAR_TYPE_VERSION_STRING,
NULL, 0, 0,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mtl_mxm.compiletime_version);
#if MXM_API >= MXM_VERSION(3,0)
ompi_mtl_mxm.runtime_version = (char *)mxm_get_version_string();
#else
cur_ver = mxm_get_version();
major = (cur_ver >> MXM_MAJOR_BIT) & 0xff;
minor = (cur_ver >> MXM_MINOR_BIT) & 0xff;
asprintf(&runtime_version, "%ld.%ld", major, minor);
ompi_mtl_mxm.runtime_version = runtime_version;
#endif
(void) mca_base_component_var_register(c,
MCA_RUNTIME_VER,
"Version of the libmxm library with which Open MPI is running",
MCA_BASE_VAR_TYPE_VERSION_STRING,
NULL, 0, 0,
OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mtl_mxm.runtime_version);
#if MXM_API < MXM_VERSION(3,0)
free(runtime_version);
#endif
/* set high enought to defeat ob1's default */
param_priority = 30;
(void) mca_base_component_var_register (c,
"priority", "Priority of the MXM MTL component",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&param_priority);
#if MXM_API >= MXM_VERSION(3,1)
{
unsigned long cur_ver = mxm_get_version();
ompi_mtl_mxm.bulk_connect = 0;
if (cur_ver < MXM_VERSION(3,2)) {
ompi_mtl_mxm.bulk_disconnect = 0;
} else {
ompi_mtl_mxm.bulk_disconnect = 1;
}
(void) mca_base_component_var_register(c, "bulk_connect",
"[integer] use bulk connect",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mtl_mxm.bulk_connect);
(void) mca_base_component_var_register(c, "bulk_disconnect",
"[integer] use bulk disconnect",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_9,
MCA_BASE_VAR_SCOPE_READONLY,
&ompi_mtl_mxm.bulk_disconnect);
if (cur_ver < MXM_VERSION(3,2) &&
(ompi_mtl_mxm.bulk_connect || ompi_mtl_mxm.bulk_disconnect)) {
ompi_mtl_mxm.bulk_connect = 0;
ompi_mtl_mxm.bulk_disconnect = 0;
MXM_VERBOSE(1, "WARNING: OMPI runs with %s version of MXM that is less than 3.2, "
"so bulk connect/disconnect cannot work properly and will be turn off.",
ompi_mtl_mxm.runtime_version);
}
}
#endif
return OMPI_SUCCESS;
}
static int ompi_mtl_mxm_component_open(void)
{
mxm_error_t err;
unsigned long cur_ver;
int rc;
mca_mtl_mxm_output = opal_output_open(NULL);
opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose);
cur_ver = mxm_get_version();
if (cur_ver != MXM_API) {
MXM_VERBOSE(1,
"WARNING: OMPI was compiled with MXM version %d.%d but version %ld.%ld detected.",
MXM_VERNO_MAJOR,
MXM_VERNO_MINOR,
(cur_ver >> MXM_MAJOR_BIT) & 0xff,
(cur_ver >> MXM_MINOR_BIT) & 0xff);
}
#if MXM_API >= MXM_VERSION(2,0)
(void)mca_base_framework_open(&opal_memory_base_framework, 0);
/* Register memory hooks */
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) &
opal_mem_hooks_support_level()))
{
setenv("MXM_MPI_MEM_ON_DEMAND_MAP", "y", 0);
MXM_VERBOSE(1, "Enabling on-demand memory mapping");
ompi_mtl_mxm.using_mem_hooks = 1;
} else {
MXM_VERBOSE(1, "Disabling on-demand memory mapping");
ompi_mtl_mxm.using_mem_hooks = 0;
}
setenv("MXM_MPI_SINGLE_THREAD", ompi_mpi_thread_multiple ? "n" : "y" , 0);
#endif
#if MXM_API >= MXM_VERSION(2,1)
if (MXM_OK != mxm_config_read_opts(&ompi_mtl_mxm.mxm_ctx_opts,
&ompi_mtl_mxm.mxm_ep_opts,
"MPI", NULL, 0))
#else
if ((MXM_OK != mxm_config_read_context_opts(&ompi_mtl_mxm.mxm_ctx_opts)) ||
(MXM_OK != mxm_config_read_ep_opts(&ompi_mtl_mxm.mxm_ep_opts)))
#endif
{
MXM_ERROR("Failed to parse MXM configuration");
return OPAL_ERR_BAD_PARAM;
}
err = mxm_init(ompi_mtl_mxm.mxm_ctx_opts, &ompi_mtl_mxm.mxm_context);
MXM_VERBOSE(1, "mxm component open");
if (MXM_OK != err) {
if (MXM_ERR_NO_DEVICE == err) {
MXM_VERBOSE(1, "No supported device found, disqualifying mxm");
} else {
opal_show_help("help-mtl-mxm.txt", "mxm init", true,
mxm_error_string(err));
}
return OPAL_ERR_NOT_AVAILABLE;
}
OBJ_CONSTRUCT(&mca_mtl_mxm_component.mxm_messages, opal_free_list_t);
rc = opal_free_list_init (&mca_mtl_mxm_component.mxm_messages,
sizeof(ompi_mtl_mxm_message_t),
opal_cache_line_size,
OBJ_CLASS(ompi_mtl_mxm_message_t),
0, opal_cache_line_size,
32 /* free list num */,
-1 /* free list max */,
32 /* free list inc */,
NULL, 0, NULL, NULL, NULL);
if (OMPI_SUCCESS != rc) {
opal_show_help("help-mtl-mxm.txt", "mxm init", true,
mxm_error_string(err));
return OPAL_ERR_NOT_AVAILABLE;
}
return OMPI_SUCCESS;
}
static int ompi_mtl_mxm_component_query(mca_base_module_t **module, int *priority)
{
/*
* if we get here it means that mxm is available so give high priority
*/
ompi_mpi_dynamics_disable("the MXM MTL does not support MPI dynamic process functionality");
*priority = param_priority;
*module = (mca_base_module_t *)&ompi_mtl_mxm.super;
return OMPI_SUCCESS;
}
static int ompi_mtl_mxm_component_close(void)
{
if (ompi_mtl_mxm.mxm_context != NULL) {
mxm_cleanup(ompi_mtl_mxm.mxm_context);
ompi_mtl_mxm.mxm_context = NULL;
OBJ_DESTRUCT(&mca_mtl_mxm_component.mxm_messages);
#if MXM_API >= MXM_VERSION(2,0)
mxm_config_free_ep_opts(ompi_mtl_mxm.mxm_ep_opts);
mxm_config_free_context_opts(ompi_mtl_mxm.mxm_ctx_opts);
mca_base_framework_close(&opal_memory_base_framework);
#else
mxm_config_free(ompi_mtl_mxm.mxm_ep_opts);
mxm_config_free(ompi_mtl_mxm.mxm_ctx_opts);
#endif
}
return OMPI_SUCCESS;
}
static mca_mtl_base_module_t*
ompi_mtl_mxm_component_init(bool enable_progress_threads,
bool enable_mpi_threads)
{
int rc;
rc = ompi_mtl_mxm_module_init();
if (OMPI_SUCCESS != rc) {
return NULL;
}
/* Calculate MTL constraints according to MXM types */
ompi_mtl_mxm.super.mtl_max_contextid = 1UL << (sizeof(mxm_ctxid_t) * 8);
ompi_mtl_mxm.super.mtl_max_tag = 1UL << (sizeof(mxm_tag_t) * 8 - 2);
ompi_mtl_mxm.super.mtl_request_size =
sizeof(mca_mtl_mxm_request_t) - sizeof(struct mca_mtl_request_t);
return &ompi_mtl_mxm.super;
}

Просмотреть файл

@ -1,34 +0,0 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MTL_MXM_DEBUG_H
#define MTL_MXM_DEBUG_H
#pragma GCC system_header
#ifdef __BASE_FILE__
#define __MXM_FILE__ __BASE_FILE__
#else
#define __MXM_FILE__ __FILE__
#endif
#define MXM_VERBOSE(level, format, ...) \
opal_output_verbose(level, mca_mtl_mxm_output, "%s:%d - %s() " format, \
__MXM_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
#define MXM_ERROR(format, ... ) \
opal_output_verbose(0, mca_mtl_mxm_output, "Error: %s:%d - %s() " format, \
__MXM_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
#define MXM_MODULE_VERBOSE(mxm_module, level, format, ...) \
MXM_VERBOSE(level, "[%d] " format, (mxm_module)->rank, ## __VA_ARGS__)
extern int mca_mtl_mxm_output;
#endif

Просмотреть файл

@ -1,42 +0,0 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include <sys/time.h>
#include <time.h>
#include "ompi/types.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_endpoint.h"
/*
* Initialize state of the endpoint instance.
*
*/
static void mca_mtl_mxm_endpoint_construct(mca_mtl_mxm_endpoint_t* endpoint)
{
endpoint->mtl_mxm_module = NULL;
}
/*
* Destroy a endpoint
*
*/
static void mca_mtl_mxm_endpoint_destruct(mca_mtl_mxm_endpoint_t* endpoint)
{
}
OBJ_CLASS_INSTANCE(
mca_mtl_mxm_endpoint_t,
opal_list_item_t,
mca_mtl_mxm_endpoint_construct,
mca_mtl_mxm_endpoint_destruct);

Просмотреть файл

@ -1,41 +0,0 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MCA_MTL_MXM_ENDPOINT_H
#define MCA_MTL_MXM_ENDPOINT_H
#include "opal/class/opal_list.h"
#include "ompi/mca/mtl/mtl.h"
#include "mtl_mxm.h"
BEGIN_C_DECLS
OBJ_CLASS_DECLARATION(mca_mtl_mxm_endpoint_t);
/**
* An abstraction that represents a connection to a endpoint process.
* An instance of mca_mtl_mxm_endpoint_t is associated w/ each process
* and MTL pair at startup. However, connections to the endpoint
* are established dynamically on an as-needed basis:
*/
struct mca_mtl_mxm_endpoint_t {
opal_list_item_t super;
struct mca_mtl_mxm_module_t* mtl_mxm_module;
/**< MTL instance that created this connection */
mxm_conn_h mxm_conn;
/**< MXM Connection handle*/
};
typedef struct mca_mtl_mxm_endpoint_t mca_mtl_mxm_endpoint_t;
OBJ_CLASS_DECLARATION(mca_mtl_mxm_endpoint);
END_C_DECLS
#endif

Просмотреть файл

@ -1,115 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* Copyright (c) 2013 The University of Tennessee and The University
* of Tennessee Research Foundation. All rights
* reserved.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "ompi/message/message.h"
#include "ompi/communicator/communicator.h"
int ompi_mtl_mxm_iprobe(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm, int src, int tag,
int *flag, struct ompi_status_public_t *status)
{
mxm_error_t err;
mxm_recv_req_t req;
req.base.state = MXM_REQ_NEW;
ompi_mtl_mxm_set_recv_envelope(&req, comm, src, tag);
err = mxm_req_probe(&req);
if (MXM_OK == err) {
*flag = 1;
if (MPI_STATUS_IGNORE != status) {
ompi_mtl_mxm_to_mpi_status(err, status);
status->MPI_SOURCE = req.completion.sender_imm;
status->MPI_TAG = req.completion.sender_tag;
status->_ucount = req.completion.sender_len;
}
return OMPI_SUCCESS;
} else if (MXM_ERR_NO_MESSAGE == err) {
*flag = 0;
return OMPI_SUCCESS;
} else {
return OMPI_ERROR;
}
}
int ompi_mtl_mxm_improbe(struct mca_mtl_base_module_t *mtl,
struct ompi_communicator_t *comm,
int src,
int tag,
int *matched,
struct ompi_message_t **message,
struct ompi_status_public_t *status)
{
mxm_error_t err;
mxm_recv_req_t req;
opal_free_list_item_t *item;
ompi_mtl_mxm_message_t *msgp;
item = opal_free_list_wait (&mca_mtl_mxm_component.mxm_messages);
if (OPAL_UNLIKELY(NULL == item)) {
return OMPI_ERR_OUT_OF_RESOURCE;
}
msgp = (ompi_mtl_mxm_message_t *) item;
req.base.state = MXM_REQ_NEW;
ompi_mtl_mxm_set_recv_envelope(&req, comm, src, tag);
msgp->mq = req.base.mq;
msgp->conn = req.base.conn;
msgp->tag = req.tag;
msgp->tag_mask = req.tag_mask;
err = mxm_req_mprobe(&req, &msgp->mxm_msg);
if (MXM_OK == err) {
if (MPI_STATUS_IGNORE != status) {
*matched = 1;
ompi_mtl_mxm_to_mpi_status(err, status);
status->MPI_SOURCE = req.completion.sender_imm;
status->MPI_TAG = req.completion.sender_tag;
status->_ucount = req.completion.sender_len;
} else{
*matched = 0;
*message = MPI_MESSAGE_NULL;
return OMPI_SUCCESS;
}
} else if (MXM_ERR_NO_MESSAGE == err) {
*matched = 0;
*message = MPI_MESSAGE_NULL;
return OMPI_SUCCESS;
} else {
return OMPI_ERROR;
}
(*message) = ompi_message_alloc();
if (OPAL_UNLIKELY(NULL == (*message))) {
*matched = 0;
*message = MPI_MESSAGE_NULL;
return OMPI_ERR_OUT_OF_RESOURCE;
}
(*message)->comm = comm;
(*message)->req_ptr = msgp;
(*message)->peer = status->MPI_SOURCE;
(*message)->count = status->_ucount;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,197 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/message/message.h"
#include "opal/datatype/opal_convertor.h"
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
#include "opal/util/show_help.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_request.h"
static void ompi_mtl_mxm_recv_completion_cb(void *context)
{
mca_mtl_mxm_request_t *req = (mca_mtl_mxm_request_t *) context;
struct ompi_request_t *ompi_req = req->super.ompi_req;
mxm_recv_req_t *mxm_recv_req = &req->mxm.recv;
/* Set completion status and envelope */
ompi_mtl_mxm_to_mpi_status(mxm_recv_req->base.error, &ompi_req->req_status);
ompi_req->req_status.MPI_TAG = mxm_recv_req->completion.sender_tag;
ompi_req->req_status.MPI_SOURCE = mxm_recv_req->completion.sender_imm;
ompi_req->req_status._ucount = mxm_recv_req->completion.actual_len;
req->super.completion_callback(&req->super);
}
static size_t ompi_mtl_mxm_stream_unpack(void *buffer, size_t length,
size_t offset, void *context)
{
struct iovec iov;
uint32_t iov_count = 1;
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) context;
opal_convertor_t *convertor = mtl_mxm_request->convertor;
iov.iov_len = length;
iov.iov_base = buffer;
opal_convertor_set_position(convertor, &offset);
opal_convertor_unpack(convertor, &iov, &iov_count, &length);
return length;
}
static inline __opal_attribute_always_inline__ int
ompi_mtl_mxm_choose_recv_datatype(mca_mtl_mxm_request_t *mtl_mxm_request)
{
void **buffer = &mtl_mxm_request->buf;
size_t *buffer_len = &mtl_mxm_request->length;
mxm_recv_req_t *mxm_recv_req = &mtl_mxm_request->mxm.recv;
opal_convertor_t *convertor = mtl_mxm_request->convertor;
opal_convertor_get_packed_size(convertor, buffer_len);
if (0 == *buffer_len) {
*buffer = NULL;
*buffer_len = 0;
mxm_recv_req->base.data_type = MXM_REQ_DATA_BUFFER;
return OMPI_SUCCESS;
}
if (opal_convertor_need_buffers(convertor)) {
mxm_recv_req->base.data_type = MXM_REQ_DATA_STREAM;
mxm_recv_req->base.data.stream.length = *buffer_len;
mxm_recv_req->base.data.stream.cb = ompi_mtl_mxm_stream_unpack;
return OMPI_SUCCESS;
}
mxm_recv_req->base.data_type = MXM_REQ_DATA_BUFFER;
*buffer = convertor->pBaseBuf +
convertor->use_desc->desc[convertor->use_desc->used].end_loop.first_elem_disp;
mxm_recv_req->base.data.buffer.ptr = *buffer;
mxm_recv_req->base.data.buffer.length = *buffer_len;
return OMPI_SUCCESS;
}
static inline __opal_attribute_always_inline__ int
ompi_mtl_mxm_recv_init(mca_mtl_mxm_request_t *mtl_mxm_request,
opal_convertor_t *convertor,
mxm_recv_req_t *mxm_recv_req)
{
int ret;
mtl_mxm_request->convertor = convertor;
ret = ompi_mtl_mxm_choose_recv_datatype(mtl_mxm_request);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
#if MXM_API >= MXM_VERSION(2,0)
mtl_mxm_request->is_send = 0;
#endif
mxm_recv_req->base.state = MXM_REQ_NEW;
#if MXM_API < MXM_VERSION(2,0)
mxm_recv_req->base.flags = 0;
#endif
mxm_recv_req->base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
mxm_recv_req->base.context = mtl_mxm_request;
mxm_recv_req->base.completed_cb = ompi_mtl_mxm_recv_completion_cb;
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t *comm, int src, int tag,
struct opal_convertor_t *convertor,
struct mca_mtl_request_t *mtl_request)
{
int ret;
mxm_error_t err;
mxm_recv_req_t *mxm_recv_req;
mca_mtl_mxm_request_t *mtl_mxm_request;
mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
mxm_recv_req = &mtl_mxm_request->mxm.recv;
ompi_mtl_mxm_set_recv_envelope(mxm_recv_req, comm, src, tag);
/* prepare a receive request embedded in the MTL request */
ret = ompi_mtl_mxm_recv_init(mtl_mxm_request, convertor, mxm_recv_req);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
/* post-recv */
err = mxm_req_recv(mxm_recv_req);
if (OPAL_UNLIKELY(MXM_OK != err)) {
opal_show_help("help-mtl-mxm.txt", "error posting receive", true,
mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length);
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_imrecv(struct mca_mtl_base_module_t* mtl,
struct opal_convertor_t *convertor,
struct ompi_message_t **message,
struct mca_mtl_request_t *mtl_request)
{
int ret;
mxm_error_t err;
mxm_recv_req_t *mxm_recv_req;
mca_mtl_mxm_request_t *mtl_mxm_request;
ompi_mtl_mxm_message_t *msgp =
(ompi_mtl_mxm_message_t *) (*message)->req_ptr;
mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
mxm_recv_req = &mtl_mxm_request->mxm.recv;
/* prepare a receive request embedded in the MTL request */
ret = ompi_mtl_mxm_recv_init(mtl_mxm_request, convertor, mxm_recv_req);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
mxm_recv_req->tag = msgp->tag;
mxm_recv_req->tag_mask = msgp->tag_mask;
mxm_recv_req->base.mq = msgp->mq;
mxm_recv_req->base.conn = msgp->conn;
err = mxm_message_recv(mxm_recv_req, msgp->mxm_msg);
if (OPAL_UNLIKELY(MXM_OK != err)) {
opal_show_help("help-mtl-mxm.txt", "error posting message receive", true,
mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length);
return OMPI_ERROR;
}
opal_free_list_return (&mca_mtl_mxm_component.mxm_messages, (opal_free_list_item_t *) msgp);
ompi_message_return(*message);
(*message) = MPI_MESSAGE_NULL;
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,35 +0,0 @@
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef OMPI_MTL_MXM_REQUEST_H
#define OMPI_MTL_MXM_REQUEST_H
#include "opal/datatype/opal_convertor.h"
#include "mtl_mxm.h"
struct mca_mtl_mxm_request_t {
struct mca_mtl_request_t super;
union {
mxm_req_base_t base;
mxm_send_req_t send;
mxm_recv_req_t recv;
} mxm;
#if MXM_API >= MXM_VERSION(2,0)
int is_send;
#endif
/* mxm_segment_t mxm_segment[1]; */
void *buf;
size_t length;
struct opal_convertor_t *convertor;
bool free_after;
};
typedef struct mca_mtl_mxm_request_t mca_mtl_mxm_request_t;
#endif

Просмотреть файл

@ -1,238 +0,0 @@
/* * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "ompi_config.h"
#include "ompi/mca/pml/pml.h"
#include "opal/datatype/opal_convertor.h"
#include "opal/util/show_help.h"
#include "mtl_mxm.h"
#include "mtl_mxm_types.h"
#include "mtl_mxm_request.h"
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
static inline __opal_attribute_always_inline__
size_t ompi_mtl_mxm_stream_pack(opal_convertor_t *convertor, void *buffer,
size_t length, size_t offset)
{
struct iovec iov;
uint32_t iov_count = 1;
iov.iov_len = length;
iov.iov_base = buffer;
opal_convertor_set_position(convertor, &offset);
opal_convertor_pack(convertor, &iov, &iov_count, &length);
return length;
}
static size_t ompi_mtl_mxm_stream_isend(void *buffer, size_t length, size_t offset, void *context)
{
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) context;
opal_convertor_t *convertor = mtl_mxm_request->convertor;
return ompi_mtl_mxm_stream_pack(convertor, buffer, length, offset);
}
static size_t ompi_mtl_mxm_stream_send(void *buffer, size_t length, size_t offset, void *context)
{
opal_convertor_t *convertor = (opal_convertor_t *) context;
return ompi_mtl_mxm_stream_pack(convertor, buffer, length, offset);
}
static inline __opal_attribute_always_inline__ int
ompi_mtl_mxm_choose_send_datatype(mxm_send_req_t *mxm_send_req,
opal_convertor_t *convertor,
mxm_stream_cb_t stream_cb)
{
struct iovec iov;
uint32_t iov_count = 1;
size_t *buffer_len = &mxm_send_req->base.data.buffer.length;
#if !(OPAL_ENABLE_HETEROGENEOUS_SUPPORT)
if (convertor->pDesc &&
opal_datatype_is_contiguous_memory_layout(convertor->pDesc,
convertor->count)) {
mxm_send_req->base.data.buffer.ptr = convertor->pBaseBuf;
mxm_send_req->base.data.buffer.length = convertor->local_size;
mxm_send_req->base.data_type = MXM_REQ_DATA_BUFFER;
return OMPI_SUCCESS;
}
#endif
opal_convertor_get_packed_size(convertor, buffer_len);
if (0 == *buffer_len) {
mxm_send_req->base.data.buffer.ptr = NULL;
mxm_send_req->base.data_type = MXM_REQ_DATA_BUFFER;
return OMPI_SUCCESS;
}
if (opal_convertor_need_buffers(convertor)) {
mxm_send_req->base.data_type = MXM_REQ_DATA_STREAM;
mxm_send_req->base.data.stream.length = *buffer_len;
mxm_send_req->base.data.stream.cb = stream_cb;
return OMPI_SUCCESS;
}
mxm_send_req->base.data_type = MXM_REQ_DATA_BUFFER;
iov.iov_base = NULL;
iov.iov_len = *buffer_len;
opal_convertor_pack(convertor, &iov, &iov_count, buffer_len);
mxm_send_req->base.data.buffer.ptr = iov.iov_base;
return OMPI_SUCCESS;
}
static void ompi_mtl_mxm_send_completion_cb(void *context)
{
mca_mtl_mxm_request_t *mtl_mxm_request = context;
ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm.base.error,
&mtl_mxm_request->super.ompi_req->req_status);
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
}
static void ompi_mtl_mxm_send_progress_cb(void *user_data)
{
opal_progress();
}
int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm, int dest, int tag,
struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode)
{
mxm_send_req_t mxm_send_req;
mxm_wait_t wait;
mxm_error_t err;
int ret;
/* prepare local send request */
mxm_send_req.base.state = MXM_REQ_NEW;
mxm_send_req.base.mq = ompi_mtl_mxm_mq_lookup(comm);
mxm_send_req.base.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
mxm_send_req.base.context = convertor;
mxm_send_req.base.completed_cb = NULL;
ret = ompi_mtl_mxm_choose_send_datatype(&mxm_send_req, convertor,
ompi_mtl_mxm_stream_send);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
mxm_send_req.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
mxm_send_req.op.send.tag = tag;
mxm_send_req.op.send.imm_data = ompi_comm_rank(comm);
#if MXM_API < MXM_VERSION(2,0)
mxm_send_req.base.flags = MXM_REQ_FLAG_BLOCKING;
mxm_send_req.opcode = MXM_REQ_OP_SEND;
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
mxm_send_req.base.flags |= MXM_REQ_FLAG_SEND_SYNC;
}
#else
mxm_send_req.flags = MXM_REQ_SEND_FLAG_BLOCKING;
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
mxm_send_req.opcode = MXM_REQ_OP_SEND_SYNC;
} else {
mxm_send_req.opcode = MXM_REQ_OP_SEND;
}
#endif
/* post-send */
err = mxm_req_send(&mxm_send_req);
if (MXM_OK != err) {
opal_show_help("help-mtl-mxm.txt", "error posting send", true, 0, mxm_error_string(err));
return OMPI_ERROR;
}
/* wait for request completion */
wait.req = &mxm_send_req.base;
wait.state = MXM_REQ_COMPLETED;
wait.progress_cb = ompi_mtl_mxm_send_progress_cb;
wait.progress_arg = NULL;
mxm_wait(&wait);
return OMPI_SUCCESS;
}
int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
struct ompi_communicator_t* comm, int dest, int tag,
struct opal_convertor_t *convertor,
mca_pml_base_send_mode_t mode, bool blocking,
mca_mtl_request_t * mtl_request)
{
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) mtl_request;
mxm_send_req_t *mxm_send_req;
mxm_error_t err;
int ret;
assert(mtl == &ompi_mtl_mxm.super);
mtl_mxm_request->convertor = convertor;
mxm_send_req = &mtl_mxm_request->mxm.send;
#if MXM_API >= MXM_VERSION(2,0)
mtl_mxm_request->is_send = 1;
#endif
/* prepare a send request embedded in the MTL request */
mxm_send_req->base.state = MXM_REQ_NEW;
mxm_send_req->base.mq = ompi_mtl_mxm_mq_lookup(comm);
mxm_send_req->base.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
ret = ompi_mtl_mxm_choose_send_datatype(mxm_send_req, convertor,
ompi_mtl_mxm_stream_isend);
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
return ret;
}
mtl_mxm_request->buf = mxm_send_req->base.data.buffer.ptr;
mtl_mxm_request->length = mxm_send_req->base.data.buffer.length;
mxm_send_req->base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
mxm_send_req->base.context = mtl_mxm_request;
mxm_send_req->base.completed_cb = ompi_mtl_mxm_send_completion_cb;
#if MXM_API < MXM_VERSION(2,0)
mxm_send_req->base.flags = 0;
mxm_send_req->opcode = MXM_REQ_OP_SEND;
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
mxm_send_req->base.flags |= MXM_REQ_FLAG_SEND_SYNC;
}
#else
#if defined(MXM_REQ_SEND_FLAG_REENTRANT)
mxm_send_req->flags = MXM_REQ_SEND_FLAG_REENTRANT;
#else
mxm_send_req->flags = 0;
#endif
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
mxm_send_req->opcode = MXM_REQ_OP_SEND_SYNC;
} else {
mxm_send_req->opcode = MXM_REQ_OP_SEND;
}
#endif
mxm_send_req->op.send.tag = tag;
mxm_send_req->op.send.imm_data = ompi_comm_rank(comm);
/* post-send */
err = mxm_req_send(mxm_send_req);
if (MXM_OK != err) {
opal_show_help("help-mtl-mxm.txt", "error posting send", true, 1, mxm_error_string(err));
return OMPI_ERROR;
}
return OMPI_SUCCESS;
}

Просмотреть файл

@ -1,123 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#ifndef MTL_MXM_TYPES_H_HAS_BEEN_INCLUDED
#define MTL_MXM_TYPES_H_HAS_BEEN_INCLUDED
#include "ompi_config.h"
#include "mtl_mxm.h"
#include "ompi/mca/mtl/mtl.h"
#include "ompi/mca/mtl/base/base.h"
#include "ompi/communicator/communicator.h"
#include "mtl_mxm_endpoint.h"
BEGIN_C_DECLS
/**
* MTL Module Interface
*/
typedef struct mca_mtl_mxm_module_t {
mca_mtl_base_module_t super; /**< base MTL interface */
int verbose;
int mxm_np;
mxm_h mxm_context;
mxm_ep_h ep;
mxm_context_opts_t *mxm_ctx_opts;
mxm_ep_opts_t *mxm_ep_opts;
#if MXM_API >= MXM_VERSION(2,0)
int using_mem_hooks;
#endif
#if MXM_API >= MXM_VERSION(3,1)
int bulk_connect; /* use bulk connect */
int bulk_disconnect; /* use bulk disconnect */
#endif
char* runtime_version;
char* compiletime_version;
} mca_mtl_mxm_module_t;
#if MXM_API < MXM_VERSION(2,0)
typedef struct ompi_mtl_mxm_ep_conn_info_t {
struct sockaddr_storage ptl_addr[MXM_PTL_LAST];
} ompi_mtl_mxm_ep_conn_info_t;
#endif
extern mca_mtl_mxm_module_t ompi_mtl_mxm;
typedef struct mca_mtl_mxm_component_t {
mca_mtl_base_component_2_0_0_t super; /**< base MTL component */
opal_free_list_t mxm_messages; /* will be used for MPI_Mprobe and MPI_Mrecv calls */
} mca_mtl_mxm_component_t;
OMPI_DECLSPEC mca_mtl_mxm_component_t mca_mtl_mxm_component;
static inline mxm_conn_h ompi_mtl_mxm_conn_lookup(struct ompi_communicator_t* comm, int rank) {
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup(comm, rank);
mca_mtl_mxm_endpoint_t *endpoint = (mca_mtl_mxm_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
if (endpoint != NULL) {
return endpoint->mxm_conn;
}
MXM_VERBOSE(80, "First communication with [%s:%s]: set endpoint connection.",
ompi_proc->super.proc_hostname, OPAL_NAME_PRINT(ompi_proc->super.proc_name));
ompi_mtl_add_single_proc(ompi_mtl, ompi_proc);
endpoint = (mca_mtl_mxm_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
return endpoint->mxm_conn;
}
static inline mxm_mq_h ompi_mtl_mxm_mq_lookup(struct ompi_communicator_t* comm) {
return (mxm_mq_h)comm->c_pml_comm;
}
static inline void ompi_mtl_mxm_to_mpi_status(mxm_error_t status, ompi_status_public_t *ompi_status) {
switch (status) {
case MXM_OK:
ompi_status->MPI_ERROR = OMPI_SUCCESS;
break;
case MXM_ERR_CANCELED:
ompi_status->_cancelled = true;
break;
case MXM_ERR_MESSAGE_TRUNCATED:
ompi_status->MPI_ERROR = MPI_ERR_TRUNCATE;
break;
default:
ompi_status->MPI_ERROR = MPI_ERR_INTERN;
break;
}
}
static inline void ompi_mtl_mxm_set_recv_envelope(mxm_recv_req_t *req,
struct ompi_communicator_t *comm,
int src, int tag) {
req->base.mq = (mxm_mq_h)comm->c_pml_comm;
req->base.conn = (src == MPI_ANY_SOURCE)
? NULL
: ompi_mtl_mxm_conn_lookup(comm, src);
if (tag == MPI_ANY_TAG) {
req->tag = 0;
req->tag_mask = 0x80000000U; /* MPI_ANY_TAG should not match against negative tags */
} else {
req->tag = tag;
req->tag_mask = 0xffffffffU;
}
}
END_C_DECLS
#endif

Просмотреть файл

@ -1,7 +0,0 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: MELLANOX
status: active

Просмотреть файл

@ -12,7 +12,7 @@
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved. * Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2014 Intel, Inc. All rights reserved. * Copyright (c) 2014 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Research Organization for Information Science * Copyright (c) 2015-2017 Research Organization for Information Science
* and Technology (RIST). All rights reserved. * and Technology (RIST). All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -123,7 +123,7 @@ opal_bitmap_set_bit(opal_bitmap_t *bm, int bit)
out of range. We don't throw any error here, because this is out of range. We don't throw any error here, because this is
valid and we simply expand the bitmap */ valid and we simply expand the bitmap */
new_size = (int)(((size_t)index / bm->array_size + 1 ) * bm->array_size); new_size = index + 1;
if( new_size > bm->max_size ) if( new_size > bm->max_size )
new_size = bm->max_size; new_size = bm->max_size;

Просмотреть файл

@ -429,8 +429,10 @@ int mca_common_cuda_stage_one_init(void)
if (true != stage_one_init_passed) { if (true != stage_one_init_passed) {
errmsg = opal_argv_join(errmsgs, '\n'); errmsg = opal_argv_join(errmsgs, '\n');
opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true, if (opal_warn_on_missing_libcuda) {
errmsg); opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true,
errmsg);
}
opal_cuda_support = 0; opal_cuda_support = 0;
} }
opal_argv_free(errmsgs); opal_argv_free(errmsgs);

Просмотреть файл

@ -166,7 +166,7 @@ The library attempted to open the following supporting CUDA libraries,
but each of them failed. CUDA-aware support is disabled. but each of them failed. CUDA-aware support is disabled.
%s %s
If you are not interested in CUDA-aware support, then run with If you are not interested in CUDA-aware support, then run with
--mca mpi_cuda_support 0 to suppress this message. If you are interested --mca opal_warn_on_missing_libcuda 0 to suppress this message. If you are interested
in CUDA-aware support, then try setting LD_LIBRARY_PATH to the location in CUDA-aware support, then try setting LD_LIBRARY_PATH to the location
of libcuda.so.1 to get passed this issue. of libcuda.so.1 to get passed this issue.
# #

Просмотреть файл

@ -61,6 +61,7 @@ bool opal_timing_overhead = true;
bool opal_built_with_cuda_support = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT); bool opal_built_with_cuda_support = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT);
bool opal_cuda_support = false; bool opal_cuda_support = false;
bool opal_warn_on_missing_libcuda = true;
#if OPAL_ENABLE_FT_CR == 1 #if OPAL_ENABLE_FT_CR == 1
bool opal_base_distill_checkpoint_ready = false; bool opal_base_distill_checkpoint_ready = false;
#endif #endif
@ -245,6 +246,16 @@ int opal_register_params(void)
return ret; return ret;
} }
opal_warn_on_missing_libcuda = true;
ret = mca_base_var_register ("opal", "opal", NULL, "warn_on_missing_libcuda",
"Whether to print a message when CUDA support is enabled but libcuda is not found",
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_ALL_EQ,
&opal_warn_on_missing_libcuda);
if (0 > ret) {
return ret;
}
/* Leave pinned parameter */ /* Leave pinned parameter */
opal_leave_pinned = -1; opal_leave_pinned = -1;
ret = mca_base_var_register("ompi", "mpi", NULL, "leave_pinned", ret = mca_base_var_register("ompi", "mpi", NULL, "leave_pinned",

Просмотреть файл

@ -48,6 +48,11 @@ OPAL_DECLSPEC extern bool opal_built_with_cuda_support;
* */ * */
OPAL_DECLSPEC extern bool opal_cuda_support; OPAL_DECLSPEC extern bool opal_cuda_support;
/**
* * Whether we want to warn the user when libcuda is missing.
* */
OPAL_DECLSPEC extern bool opal_warn_on_missing_libcuda;
/** /**
* Whether to use the "leave pinned" protocol or not (0 = no, 1 = yes, * Whether to use the "leave pinned" protocol or not (0 = no, 1 = yes,
* -1 = determine at runtime). * -1 = determine at runtime).

Просмотреть файл

@ -1,6 +1,6 @@
/* oshmem/include/shmem-compat.h. This file contains OpenSHMEM lagacy API */ /* oshmem/include/shmem-compat.h. This file contains OpenSHMEM lagacy API */
/* /*
* Copyright (c) 2014-2015 Mellanox Technologies, Inc. * Copyright (c) 2014-2017 Mellanox Technologies, Inc.
* All rights reserved. * All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
* *
@ -33,11 +33,6 @@ OSHMEM_DECLSPEC void* shmemalign(size_t align, size_t size);
OSHMEM_DECLSPEC void* shrealloc(void *ptr, size_t size); OSHMEM_DECLSPEC void* shrealloc(void *ptr, size_t size);
OSHMEM_DECLSPEC void shfree(void* ptr); OSHMEM_DECLSPEC void shfree(void* ptr);
OSHMEM_DECLSPEC void shmem_char_put(char *target, const char *source, size_t len, int pe);
OSHMEM_DECLSPEC void shmem_char_get(char *target, const char *source, size_t len, int pe);
OSHMEM_DECLSPEC void shmem_put(void *target, const void *source, size_t len, int pe);
OSHMEM_DECLSPEC void shmem_get(void *target, const void *source, size_t len, int pe);
OSHMEM_DECLSPEC void globalexit(int status); OSHMEM_DECLSPEC void globalexit(int status);
#if defined(c_plusplus) || defined(__cplusplus) #if defined(c_plusplus) || defined(__cplusplus)

Просмотреть файл

@ -72,10 +72,10 @@ int mca_scoll_basic_alltoall(struct oshmem_group_t *group,
return rc; return rc;
} }
/* fence (which currently acts as quiet) is needed /* quiet is needed because scoll level barrier does not
* because scoll level barrier does not guarantee put completion * guarantee put completion
*/ */
MCA_SPML_CALL(fence()); MCA_SPML_CALL(quiet());
/* Wait for operation completion */ /* Wait for operation completion */
SCOLL_VERBOSE(14, "[#%d] Wait for operation completion", group->my_pe); SCOLL_VERBOSE(14, "[#%d] Wait for operation completion", group->my_pe);

Просмотреть файл

@ -167,8 +167,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group,
The root could leave the first barrier and in the second barrier it could get SHMEM_SYNC_WAIT value on The root could leave the first barrier and in the second barrier it could get SHMEM_SYNC_WAIT value on
remote node before the remote node receives its SHMEM_SYNC_RUN value in the first barrier remote node before the remote node receives its SHMEM_SYNC_RUN value in the first barrier
*/ */
/* TODO: actually it must be quiet */ MCA_SPML_CALL(quiet());
MCA_SPML_CALL(fence());
} }
/* Wait for RUN signal */ /* Wait for RUN signal */
else { else {

Просмотреть файл

@ -146,10 +146,10 @@ static int _algorithm_central_counter(struct oshmem_group_t *group,
rc = MCA_SPML_CALL(put(target, nlong, (void *)source, pe_cur)); rc = MCA_SPML_CALL(put(target, nlong, (void *)source, pe_cur));
} }
} }
/* fence (which currently acts as quiet) is needed /* quiet is needed because scoll level barrier does not
* because scoll level barrier does not guarantee put completion * guarantee put completion
*/ */
MCA_SPML_CALL(fence()); MCA_SPML_CALL(quiet());
} }
if (rc == OSHMEM_SUCCESS) { if (rc == OSHMEM_SUCCESS) {

Просмотреть файл

@ -153,11 +153,7 @@ int mca_spml_base_wait(void* addr, int cmp, void* value, int datatype)
*/ */
int mca_spml_base_wait_nb(void* handle) int mca_spml_base_wait_nb(void* handle)
{ {
/* TODO fence is a gag for more accurate code MCA_SPML_CALL(quiet());
* Use shmem_quiet() (or a function calling shmem_quiet()) or
* shmem_wait_nb() to force completion of transfers for non-blocking operations.
*/
MCA_SPML_CALL(fence());
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;
} }

Просмотреть файл

@ -16,4 +16,10 @@ AC_DEFUN([MCA_oshmem_spml_CONFIG],[
# this is a direct callable component, so set that up. # this is a direct callable component, so set that up.
MCA_SETUP_DIRECT_CALL($1, $2) MCA_SETUP_DIRECT_CALL($1, $2)
if test -z "$MCA_$1_$2_DSO_COMPONENTS" && test -z "$MCA_$1_$2_STATIC_COMPONENTS"; then
OSHMEM_FOUND_WORKING_SPML=0
else
OSHMEM_FOUND_WORKING_SPML=1
fi
]) ])

Просмотреть файл

@ -168,6 +168,7 @@ mca_spml_ikrit_t mca_spml_ikrit = {
mca_spml_ikrit_send, mca_spml_ikrit_send,
mca_spml_base_wait, mca_spml_base_wait,
mca_spml_base_wait_nb, mca_spml_base_wait_nb,
mca_spml_ikrit_fence, /* fence is implemented as quiet */
mca_spml_ikrit_fence, mca_spml_ikrit_fence,
mca_spml_ikrit_cache_mkeys, mca_spml_ikrit_cache_mkeys,
mca_spml_base_rmkey_free, mca_spml_base_rmkey_free,

Просмотреть файл

@ -275,12 +275,19 @@ typedef int (*mca_spml_base_module_send_fn_t)(void *buf,
mca_spml_base_put_mode_t mode); mca_spml_base_put_mode_t mode);
/** /**
* Wait for completion of all outstanding put() requests * Assures ordering of delivery of put() requests
* *
* @return - OSHMEM_SUCCESS or failure status. * @return - OSHMEM_SUCCESS or failure status.
*/ */
typedef int (*mca_spml_base_module_fence_fn_t)(void); typedef int (*mca_spml_base_module_fence_fn_t)(void);
/**
* Wait for completion of all outstanding put() requests
*
* @return - OSHMEM_SUCCESS or failure status.
*/
typedef int (*mca_spml_base_module_quiet_fn_t)(void);
/** /**
* Waits for completion of a non-blocking put or get issued by the calling PE. * Waits for completion of a non-blocking put or get issued by the calling PE.
* *
@ -321,6 +328,7 @@ struct mca_spml_base_module_1_0_0_t {
mca_spml_base_module_wait_fn_t spml_wait; mca_spml_base_module_wait_fn_t spml_wait;
mca_spml_base_module_wait_nb_fn_t spml_wait_nb; mca_spml_base_module_wait_nb_fn_t spml_wait_nb;
mca_spml_base_module_fence_fn_t spml_fence; mca_spml_base_module_fence_fn_t spml_fence;
mca_spml_base_module_quiet_fn_t spml_quiet;
mca_spml_base_module_mkey_unpack_fn_t spml_rmkey_unpack; mca_spml_base_module_mkey_unpack_fn_t spml_rmkey_unpack;
mca_spml_base_module_mkey_free_fn_t spml_rmkey_free; mca_spml_base_module_mkey_free_fn_t spml_rmkey_free;

Просмотреть файл

@ -60,8 +60,8 @@ mca_spml_ucx_t mca_spml_ucx = {
mca_spml_ucx_send, mca_spml_ucx_send,
mca_spml_base_wait, mca_spml_base_wait,
mca_spml_base_wait_nb, mca_spml_base_wait_nb,
mca_spml_ucx_quiet, /* At the moment fence is the same as quite for mca_spml_ucx_fence,
every spml */ mca_spml_ucx_quiet,
mca_spml_ucx_rmkey_unpack, mca_spml_ucx_rmkey_unpack,
mca_spml_ucx_rmkey_free, mca_spml_ucx_rmkey_free,
mca_spml_ucx_rmkey_ptr, mca_spml_ucx_rmkey_ptr,
@ -520,7 +520,7 @@ int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys)
spml_ucx_mkey_t *ucx_mkey; spml_ucx_mkey_t *ucx_mkey;
map_segment_t *mem_seg; map_segment_t *mem_seg;
MCA_SPML_CALL(fence()); MCA_SPML_CALL(quiet());
if (!mkeys) if (!mkeys)
return OSHMEM_SUCCESS; return OSHMEM_SUCCESS;
@ -598,7 +598,7 @@ int mca_spml_ucx_fence(void)
{ {
ucs_status_t err; ucs_status_t err;
err = ucp_worker_flush(mca_spml_ucx.ucp_worker); err = ucp_worker_fence(mca_spml_ucx.ucp_worker);
if (UCS_OK != err) { if (UCS_OK != err) {
SPML_ERROR("fence failed: %s", ucs_status_string(err)); SPML_ERROR("fence failed: %s", ucs_status_string(err));
oshmem_shmem_abort(-1); oshmem_shmem_abort(-1);

Просмотреть файл

@ -36,7 +36,7 @@ void shmem_barrier(int PE_start, int logPE_stride, int PE_size, long *pSync)
#if OSHMEM_SPEC_COMPAT == 1 #if OSHMEM_SPEC_COMPAT == 1
/* all outstanding puts must be completed */ /* all outstanding puts must be completed */
shmem_fence(); shmem_quiet();
#endif #endif
/* Create group basing PE_start, logPE_stride and PE_size */ /* Create group basing PE_start, logPE_stride and PE_size */
@ -54,7 +54,7 @@ void shmem_barrier_all(void)
#if OSHMEM_SPEC_COMPAT == 1 #if OSHMEM_SPEC_COMPAT == 1
/* all outstanding puts must be completed */ /* all outstanding puts must be completed */
shmem_fence(); shmem_quiet();
#endif #endif
if (mca_scoll_sync_array) { if (mca_scoll_sync_array) {

Просмотреть файл

@ -23,5 +23,5 @@
void shmem_quiet(void) void shmem_quiet(void)
{ {
MCA_SPML_CALL(fence()); MCA_SPML_CALL(quiet());
} }

Просмотреть файл

@ -30,5 +30,5 @@ SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void,
void shmem_quiet_f(void) void shmem_quiet_f(void)
{ {
MCA_SPML_CALL(fence()); MCA_SPML_CALL(quiet());
} }