Merge remote-tracking branch 'wg/master' into topic/amo-non-blocking-ucp
Signed-off-by: Sergey Oblomov <sergeyo@mellanox.com>
Этот коммит содержится в:
Коммит
b668e19cd1
@ -12,7 +12,7 @@
|
||||
# Copyright (c) 2006-2016 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2012-2015 Los Alamos National Security, Inc. All rights reserved.
|
||||
# Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
# Copyright (c) 2017 Amazon.com, Inc. or its affiliates.
|
||||
# Copyright (c) 2017-2018 Amazon.com, Inc. or its affiliates.
|
||||
# All Rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
@ -22,6 +22,7 @@
|
||||
#
|
||||
|
||||
SUBDIRS = config contrib $(MCA_PROJECT_SUBDIRS) test
|
||||
DIST_SUBDIRS = config contrib $(MCA_PROJECT_DIST_SUBDIRS) test
|
||||
EXTRA_DIST = README INSTALL VERSION Doxyfile LICENSE autogen.pl README.JAVA.txt AUTHORS
|
||||
|
||||
include examples/Makefile.include
|
||||
|
3
NEWS
3
NEWS
@ -59,6 +59,9 @@ Master (not on release branches yet)
|
||||
------------------------------------
|
||||
|
||||
- Fix rank-by algorithms to properly rank by object and span
|
||||
- Do not build Open SHMEM layer when there are no SPMLs available.
|
||||
Currently, this means the Open SHMEM layer will only build if
|
||||
a MXM or UCX library is found.
|
||||
|
||||
3.1.0 -- May, 2018
|
||||
------------------
|
||||
|
@ -12,6 +12,8 @@ dnl Copyright (c) 2004-2005 The Regents of the University of California.
|
||||
dnl All rights reserved.
|
||||
dnl Copyright (c) 2010-2016 Cisco Systems, Inc. All rights reserved.
|
||||
dnl Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
|
||||
dnl Copyright (c) 2018 Amazon.com, Inc. or its affiliates.
|
||||
dnl All Rights reserved.
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
dnl Additional copyrights may follow
|
||||
@ -225,16 +227,19 @@ AC_DEFUN([OPAL_MCA],[
|
||||
# now configure all the projects, frameworks, and components. Most
|
||||
# of the hard stuff is in here
|
||||
MCA_PROJECT_SUBDIRS=
|
||||
MCA_PROJECT_DIST_SUBDIRS=
|
||||
m4_foreach(mca_project, [mca_project_list],
|
||||
[# BWB: Until projects have separate configure scripts
|
||||
# and can skip running all of ORTE, just avoid recursing
|
||||
# into orte sub directory if orte disabled
|
||||
if (test "mca_project" = "ompi" && test "$enable_mpi" != "no") || test "mca_project" = "opal" || test "mca_project" = "orte" || test "mca_project" = "oshmem"; then
|
||||
MCA_PROJECT_SUBDIRS="$MCA_PROJECT_SUBDIRS mca_project"
|
||||
MCA_PROJECT_DIST_SUBDIRS="$MCA_PROJECT_DIST_SUBDIRS mca_project"
|
||||
fi
|
||||
MCA_CONFIGURE_PROJECT(mca_project)])
|
||||
|
||||
AC_SUBST(MCA_PROJECT_SUBDIRS)
|
||||
AC_SUBST(MCA_PROJECT_DIST_SUBDIRS)
|
||||
|
||||
m4_undefine([mca_component_configure_active])
|
||||
])
|
||||
|
@ -29,9 +29,9 @@ AC_DEFUN([OPAL_CC_HELPER],[
|
||||
|
||||
opal_prog_cc_c11_helper_tmp=0
|
||||
|
||||
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([$3],[$4])],[
|
||||
$2=yes
|
||||
opal_prog_cc_c11_helper_tmp=1], [$2=no])
|
||||
AC_LINK_IFELSE([AC_LANG_PROGRAM([$3],[$4])],[
|
||||
$2=yes
|
||||
opal_prog_cc_c11_helper_tmp=1], [$2=no])
|
||||
|
||||
AC_DEFINE_UNQUOTED([$5], [$opal_prog_cc_c11_helper_tmp], [$6])
|
||||
|
||||
|
@ -76,8 +76,10 @@ EOF
|
||||
|
||||
if test "$project_oshmem_amc" = "true" ; then
|
||||
echo "Build Open SHMEM support: yes"
|
||||
else
|
||||
elif test -z "$project_oshmem_amc" ; then
|
||||
echo "Build Open SHMEM support: no"
|
||||
else
|
||||
echo "Build Open SHMEM support: $project_oshmem_amc"
|
||||
fi
|
||||
|
||||
if test $WANT_DEBUG = 0 ; then
|
||||
|
@ -6,6 +6,8 @@ dnl Copyright (c) 2013-2014 Cisco Systems, Inc. All rights reserved.
|
||||
dnl Copyright (c) 2014 Intel, Inc. All rights reserved
|
||||
dnl Copyright (c) 2014-2015 Research Organization for Information Science
|
||||
dnl and Technology (RIST). All rights reserved.
|
||||
dnl Copyright (c) 2018 Amazon.com, Inc. or its affiliates.
|
||||
dnl All Rights reserved.
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
dnl Additional copyrights may follow
|
||||
@ -25,28 +27,23 @@ AC_SUBST(OSHMEM_LIBSHMEM_EXTRA_LDFLAGS)
|
||||
AC_MSG_CHECKING([if want oshmem])
|
||||
AC_ARG_ENABLE([oshmem],
|
||||
[AC_HELP_STRING([--enable-oshmem],
|
||||
[Enable building the OpenSHMEM interface (available on Linux only, where it is enabled by default)])],
|
||||
[oshmem_arg_given=yes],
|
||||
[oshmem_arg_given=no])
|
||||
if test "$oshmem_arg_given" = "yes"; then
|
||||
if test "$enable_oshmem" = "yes"; then
|
||||
AC_MSG_RESULT([yes])
|
||||
if test "$opal_found_linux" != "yes"; then
|
||||
AC_MSG_WARN([OpenSHMEM support was requested, but currently])
|
||||
AC_MSG_WARN([only supports Linux.])
|
||||
AC_MSG_ERROR([Cannot continue])
|
||||
fi
|
||||
else
|
||||
AC_MSG_RESULT([no])
|
||||
fi
|
||||
else
|
||||
[Enable building the OpenSHMEM interface (available on Linux only, where it is enabled by default)])])
|
||||
if test "$enable_oshmem" = "no"; then
|
||||
AC_MSG_RESULT([no])
|
||||
elif test "$enable_oshmem" = ""; then
|
||||
if test "$opal_found_linux" = "yes"; then
|
||||
enable_oshmem=yes
|
||||
AC_MSG_RESULT([yes])
|
||||
else
|
||||
enable_oshmem=no
|
||||
AC_MSG_RESULT([not supported on this platform])
|
||||
fi
|
||||
else
|
||||
AC_MSG_RESULT([yes])
|
||||
if test "$opal_found_linux" != "yes"; then
|
||||
AC_MSG_WARN([OpenSHMEM support was requested, but currently])
|
||||
AC_MSG_WARN([only supports Linux.])
|
||||
AC_MSG_ERROR([Cannot continue])
|
||||
fi
|
||||
fi
|
||||
|
||||
#
|
||||
@ -56,7 +53,7 @@ AC_MSG_CHECKING([if want SGI/Quadrics compatibility mode])
|
||||
AC_ARG_ENABLE(oshmem-compat,
|
||||
AC_HELP_STRING([--enable-oshmem-compat],
|
||||
[enable compatibility mode (default: enabled)]))
|
||||
if test "$enable_oshmem" != "no" && test "$enable_oshmem_compat" != "no"; then
|
||||
if test "$enable_oshmem_compat" != "no"; then
|
||||
AC_MSG_RESULT([yes])
|
||||
OSHMEM_SPEC_COMPAT=1
|
||||
else
|
||||
@ -75,26 +72,21 @@ AC_MSG_CHECKING([if want OSHMEM API parameter checking])
|
||||
AC_ARG_WITH(oshmem-param-check,
|
||||
AC_HELP_STRING([--with-oshmem-param-check(=VALUE)],
|
||||
[behavior of OSHMEM API function parameter checking. Valid values are: always, never. If --with-oshmem-param-check is specified with no VALUE argument, it is equivalent to a VALUE of "always"; --without-oshmem-param-check is equivalent to "never" (default: always).]))
|
||||
if test "$enable_oshmem" != "no"; then
|
||||
if test "$with_oshmem_param_check" = "no" || \
|
||||
test "$with_oshmem_param_check" = "never"; then
|
||||
shmem_param_check=0
|
||||
AC_MSG_RESULT([never])
|
||||
elif test "$with_oshmem_param_check" = "yes" || \
|
||||
test "$with_oshmem_param_check" = "always" || \
|
||||
test -z "$with_oshmem_param_check"; then
|
||||
shmem_param_check=1
|
||||
AC_MSG_RESULT([always])
|
||||
else
|
||||
shmem_param_check=1
|
||||
AC_MSG_RESULT([unknown])
|
||||
AC_MSG_WARN([*** Unrecognized --with-oshmem-param-check value])
|
||||
AC_MSG_WARN([*** See "configure --help" output])
|
||||
AC_MSG_WARN([*** Defaulting to "always"])
|
||||
fi
|
||||
else
|
||||
if test "$with_oshmem_param_check" = "no" || \
|
||||
test "$with_oshmem_param_check" = "never"; then
|
||||
shmem_param_check=0
|
||||
AC_MSG_RESULT([no])
|
||||
AC_MSG_RESULT([never])
|
||||
elif test "$with_oshmem_param_check" = "yes" || \
|
||||
test "$with_oshmem_param_check" = "always" || \
|
||||
test -z "$with_oshmem_param_check"; then
|
||||
shmem_param_check=1
|
||||
AC_MSG_RESULT([always])
|
||||
else
|
||||
shmem_param_check=1
|
||||
AC_MSG_RESULT([unknown])
|
||||
AC_MSG_WARN([*** Unrecognized --with-oshmem-param-check value])
|
||||
AC_MSG_WARN([*** See "configure --help" output])
|
||||
AC_MSG_WARN([*** Defaulting to "always"])
|
||||
fi
|
||||
AC_DEFINE_UNQUOTED(OSHMEM_PARAM_CHECK, $shmem_param_check,
|
||||
[Whether we want to check OSHMEM parameters always or never])
|
||||
@ -132,7 +124,7 @@ AC_MSG_CHECKING([if want to build OSHMEM fortran bindings])
|
||||
AC_ARG_ENABLE(oshmem-fortran,
|
||||
AC_HELP_STRING([--enable-oshmem-fortran],
|
||||
[enable OSHMEM Fortran bindings (default: enabled if Fortran compiler found)]))
|
||||
if test "$enable_oshmem" != "no" && test "$enable_oshmem_fortran" != "no"; then
|
||||
if test "$enable_oshmem_fortran" != "no"; then
|
||||
# If no OMPI FORTRAN, bail
|
||||
AS_IF([test $OMPI_TRY_FORTRAN_BINDINGS -eq $OMPI_FORTRAN_NO_BINDINGS && \
|
||||
test "$enable_oshmem_fortran" = "yes"],
|
||||
|
34
configure.ac
34
configure.ac
@ -23,6 +23,8 @@
|
||||
# Copyright (c) 2014-2017 Research Organization for Information Science
|
||||
# and Technology (RIST). All rights reserved.
|
||||
# Copyright (c) 2016-2017 IBM Corporation. All rights reserved.
|
||||
# Copyright (c) 2018 Amazon.com, Inc. or its affiliates.
|
||||
# All Rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
@ -266,15 +268,12 @@ m4_ifdef([project_oshmem], [OSHMEM_CONFIGURE_OPTIONS])
|
||||
# Set up project specific AM_CONDITIONALs
|
||||
AS_IF([test "$enable_ompi" != "no"], [project_ompi_amc=true], [project_ompi_amc=false])
|
||||
m4_ifndef([project_ompi], [project_ompi_amc=false])
|
||||
AM_CONDITIONAL([PROJECT_OMPI], [test "$project_ompi_amc" = "true"])
|
||||
|
||||
AS_IF([test "$enable_orte" != "no"], [project_orte_amc=true], [project_orte_amc=false])
|
||||
m4_ifndef([project_orte], [project_orte_amc=false])
|
||||
AM_CONDITIONAL([PROJECT_ORTE], [test "$project_orte_amc" = "true"])
|
||||
|
||||
AS_IF([test "$enable_oshmem" != "no"], [project_oshmem_amc=true], [project_oshmem_amc=false])
|
||||
m4_ifndef([project_oshmem], [project_oshmem_amc=false])
|
||||
AM_CONDITIONAL([PROJECT_OSHMEM], [test "$project_oshmem_amc" = "true"])
|
||||
AS_IF([test "$enable_oshmem" != "no"], [project_oshmem_amc=true], [project_oshmem_amc="no (disabled)"])
|
||||
m4_ifndef([project_oshmem], [project_oshmem_amc="no (not available)"])
|
||||
|
||||
if test "$enable_binaries" = "no" && test "$enable_dist" = "yes"; then
|
||||
AC_MSG_WARN([--disable-binaries is incompatible with --enable dist])
|
||||
@ -1107,6 +1106,23 @@ OPAL_MCA
|
||||
|
||||
m4_ifdef([project_ompi], [OMPI_REQUIRE_ENDPOINT_TAG_FINI])
|
||||
|
||||
# Last minute disable of OpenSHMEM if we didn't find any oshmem SPMLs
|
||||
if test "$project_oshmem_amc" = "true" && test $OSHMEM_FOUND_WORKING_SPML -eq 0 ; then
|
||||
# We don't have an spml that will work, so oshmem wouldn't be able
|
||||
# to run an application. Therefore, don't build the oshmem layer.
|
||||
if test "$enable_oshmem" != "no" && test -n "$enable_oshmem"; then
|
||||
AC_MSG_WARN([No spml found, so OpenSHMEM layer will be non functional.])
|
||||
AC_MSG_ERROR([Aborting because OpenSHMEM requested, but can not build.])
|
||||
else
|
||||
AC_MSG_WARN([No spml found. Will not build OpenSHMEM layer.])
|
||||
project_oshmem_amc="false (no spml)"
|
||||
# now for the hard part, remove project from list that will
|
||||
# run. This is a hack, but it works as long as the project
|
||||
# remains named "oshmem".
|
||||
MCA_PROJECT_SUBDIRS=`echo "$MCA_PROJECT_SUBDIRS" | sed -e 's/oshmem//'`
|
||||
fi
|
||||
fi
|
||||
|
||||
# checkpoint results
|
||||
AC_CACHE_SAVE
|
||||
|
||||
@ -1344,6 +1360,14 @@ m4_ifdef([project_ompi],
|
||||
# Party on
|
||||
############################################################################
|
||||
|
||||
# set projects good/no good AM_CONDITIONALS. This is at the end so
|
||||
# that the OSHMEM/OMPI projects can be disabled, if needed, based on
|
||||
# MCA tests. If a project is to be disabled, also remove it from
|
||||
# MCA_PROJECT_SUBDIRS to actually disable building.
|
||||
AM_CONDITIONAL([PROJECT_OMPI], [test "$project_ompi_amc" = "true"])
|
||||
AM_CONDITIONAL([PROJECT_ORTE], [test "$project_orte_amc" = "true"])
|
||||
AM_CONDITIONAL([PROJECT_OSHMEM], [test "$project_oshmem_amc" = "true"])
|
||||
|
||||
AC_MSG_CHECKING([if libtool needs -no-undefined flag to build shared libraries])
|
||||
case "`uname`" in
|
||||
CYGWIN*|MINGW*|AIX*)
|
||||
|
@ -253,6 +253,7 @@ int ompi_coll_base_reduce_scatter_intra_ring(REDUCESCATTER_ARGS);
|
||||
int ompi_coll_base_reduce_scatter_block_basic_linear(REDUCESCATTERBLOCK_ARGS);
|
||||
int ompi_coll_base_reduce_scatter_block_intra_recursivedoubling(REDUCESCATTERBLOCK_ARGS);
|
||||
int ompi_coll_base_reduce_scatter_block_intra_recursivehalving(REDUCESCATTERBLOCK_ARGS);
|
||||
int ompi_coll_base_reduce_scatter_block_intra_butterfly(REDUCESCATTERBLOCK_ARGS);
|
||||
|
||||
/* Scan */
|
||||
int ompi_coll_base_scan_intra_recursivedoubling(SCAN_ARGS);
|
||||
|
@ -40,7 +40,6 @@
|
||||
#include "coll_base_topo.h"
|
||||
#include "coll_base_util.h"
|
||||
|
||||
|
||||
/*
|
||||
* ompi_reduce_scatter_block_basic_linear
|
||||
*
|
||||
@ -511,3 +510,408 @@ cleanup_and_return:
|
||||
free(tmprecv_raw);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* ompi_mirror_perm: Returns mirror permutation of nbits low-order bits
|
||||
* of x [*].
|
||||
* [*] Warren Jr., Henry S. Hacker's Delight (2ed). 2013.
|
||||
* Chapter 7. Rearranging Bits and Bytes.
|
||||
*/
|
||||
static unsigned int ompi_mirror_perm(unsigned int x, int nbits)
|
||||
{
|
||||
x = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));
|
||||
x = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));
|
||||
x = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));
|
||||
x = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));
|
||||
x = ((x >> 16) | (x << 16));
|
||||
return x >> (sizeof(x) * CHAR_BIT - nbits);
|
||||
}
|
||||
|
||||
static int ompi_coll_base_reduce_scatter_block_intra_butterfly_pof2(
|
||||
const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
/*
|
||||
* ompi_coll_base_reduce_scatter_block_intra_butterfly
|
||||
*
|
||||
* Function: Butterfly algorithm for reduce_scatter_block
|
||||
* Accepts: Same as MPI_Reduce_scatter_block
|
||||
* Returns: MPI_SUCCESS or error code
|
||||
*
|
||||
* Description: Implements butterfly algorithm for MPI_Reduce_scatter_block [*].
|
||||
* The algorithm can be used both by commutative and non-commutative
|
||||
* operations, for power-of-two and non-power-of-two number of processes.
|
||||
*
|
||||
* [*] J.L. Traff. An improved Algorithm for (non-commutative) Reduce-scatter
|
||||
* with an Application // Proc. of EuroPVM/MPI, 2005. -- pp. 129-137.
|
||||
*
|
||||
* Time complexity:
|
||||
* m\lambda + (\alpha + m\beta + m\gamma) +
|
||||
* + 2\log_2(p)\alpha + 2m(1-1/p)\beta + m(1-1/p)\gamma +
|
||||
* + 3(\alpha + m/p\beta) = O(m\lambda + log(p)\alpha + m\beta + m\gamma),
|
||||
* where m = rcount * comm_size, p = comm_size
|
||||
* Memory requirements (per process): 2 * rcount * comm_size * typesize
|
||||
*
|
||||
* Example: comm_size=6, nprocs_pof2=4, nprocs_rem=2, rcount=1, sbuf=[0,1,...,5]
|
||||
* Step 1. Reduce the number of processes to 4
|
||||
* rank 0: [0|1|2|3|4|5]: send to 1: vrank -1
|
||||
* rank 1: [0|1|2|3|4|5]: recv from 0, op: vrank 0: [0|2|4|6|8|10]
|
||||
* rank 2: [0|1|2|3|4|5]: send to 3: vrank -1
|
||||
* rank 3: [0|1|2|3|4|5]: recv from 2, op: vrank 1: [0|2|4|6|8|10]
|
||||
* rank 4: [0|1|2|3|4|5]: vrank 2: [0|1|2|3|4|5]
|
||||
* rank 5: [0|1|2|3|4|5]: vrank 3: [0|1|2|3|4|5]
|
||||
*
|
||||
* Step 2. Butterfly. Buffer of 6 elements is divided into 4 blocks.
|
||||
* Round 1 (mask=1, nblocks=2)
|
||||
* 0: vrank -1
|
||||
* 1: vrank 0 [0 2|4 6|8|10]: exch with 1: send [2,3], recv [0,1]: [0 4|8 12|*|*]
|
||||
* 2: vrank -1
|
||||
* 3: vrank 1 [0 2|4 6|8|10]: exch with 0: send [0,1], recv [2,3]: [**|**|16|20]
|
||||
* 4: vrank 2 [0 1|2 3|4|5] : exch with 3: send [2,3], recv [0,1]: [0 2|4 6|*|*]
|
||||
* 5: vrank 3 [0 1|2 3|4|5] : exch with 2: send [0,1], recv [2,3]: [**|**|8|10]
|
||||
*
|
||||
* Round 2 (mask=2, nblocks=1)
|
||||
* 0: vrank -1
|
||||
* 1: vrank 0 [0 4|8 12|*|*]: exch with 2: send [1], recv [0]: [0 6|**|*|*]
|
||||
* 2: vrank -1
|
||||
* 3: vrank 1 [**|**|16|20] : exch with 3: send [3], recv [2]: [**|**|24|*]
|
||||
* 4: vrank 2 [0 2|4 6|*|*] : exch with 0: send [0], recv [1]: [**|12 18|*|*]
|
||||
* 5: vrank 3 [**|**|8|10] : exch with 1: send [2], recv [3]: [**|**|*|30]
|
||||
*
|
||||
* Step 3. Exchange with remote process according to a mirror permutation:
|
||||
* mperm(0)=0, mperm(1)=2, mperm(2)=1, mperm(3)=3
|
||||
* 0: vrank -1: recv "0" from process 0
|
||||
* 1: vrank 0 [0 6|**|*|*]: send "0" to 0, copy "6" to rbuf (mperm(0)=0)
|
||||
* 2: vrank -1: recv result "12" from process 4
|
||||
* 3: vrank 1 [**|**|24|*]
|
||||
* 4: vrank 2 [**|12 18|*|*]: send "12" to 2, send "18" to 3, recv "24" from 3
|
||||
* 5: vrank 3 [**|**|*|30]: copy "30" to rbuf (mperm(3)=3)
|
||||
*/
|
||||
int
|
||||
ompi_coll_base_reduce_scatter_block_intra_butterfly(
|
||||
const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
char *tmpbuf[2] = {NULL, NULL}, *psend, *precv;
|
||||
ptrdiff_t span, gap, totalcount, extent;
|
||||
int err = MPI_SUCCESS;
|
||||
int comm_size = ompi_comm_size(comm);
|
||||
int rank = ompi_comm_rank(comm);
|
||||
|
||||
OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
|
||||
"coll:base:reduce_scatter_block_intra_butterfly: rank %d/%d",
|
||||
rank, comm_size));
|
||||
if (rcount == 0 || comm_size < 2)
|
||||
return MPI_SUCCESS;
|
||||
|
||||
if (!(comm_size & (comm_size - 1))) {
|
||||
/* Special case: comm_size is a power of two */
|
||||
return ompi_coll_base_reduce_scatter_block_intra_butterfly_pof2(
|
||||
sbuf, rbuf, rcount, dtype, op, comm, module);
|
||||
}
|
||||
|
||||
totalcount = comm_size * rcount;
|
||||
ompi_datatype_type_extent(dtype, &extent);
|
||||
span = opal_datatype_span(&dtype->super, totalcount, &gap);
|
||||
tmpbuf[0] = malloc(span);
|
||||
tmpbuf[1] = malloc(span);
|
||||
if (NULL == tmpbuf[0] || NULL == tmpbuf[1]) {
|
||||
err = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto cleanup_and_return;
|
||||
}
|
||||
psend = tmpbuf[0] - gap;
|
||||
precv = tmpbuf[1] - gap;
|
||||
|
||||
if (sbuf != MPI_IN_PLACE) {
|
||||
err = ompi_datatype_copy_content_same_ddt(dtype, totalcount, psend, (char *)sbuf);
|
||||
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
|
||||
} else {
|
||||
err = ompi_datatype_copy_content_same_ddt(dtype, totalcount, psend, rbuf);
|
||||
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
|
||||
}
|
||||
|
||||
/*
|
||||
* Step 1. Reduce the number of processes to the nearest lower power of two
|
||||
* p' = 2^{\floor{\log_2 p}} by removing r = p - p' processes.
|
||||
* In the first 2r processes (ranks 0 to 2r - 1), all the even ranks send
|
||||
* the input vector to their neighbor (rank + 1) and all the odd ranks recv
|
||||
* the input vector and perform local reduction.
|
||||
* The odd ranks (0 to 2r - 1) contain the reduction with the input
|
||||
* vector on their neighbors (the even ranks). The first r odd
|
||||
* processes and the p - 2r last processes are renumbered from
|
||||
* 0 to 2^{\floor{\log_2 p}} - 1. Even ranks do not participate in the
|
||||
* rest of the algorithm.
|
||||
*/
|
||||
|
||||
/* Find nearest power-of-two less than or equal to comm_size */
|
||||
int nprocs_pof2 = opal_next_poweroftwo(comm_size);
|
||||
nprocs_pof2 >>= 1;
|
||||
int nprocs_rem = comm_size - nprocs_pof2;
|
||||
int log2_size = opal_cube_dim(nprocs_pof2);
|
||||
|
||||
int vrank = -1;
|
||||
if (rank < 2 * nprocs_rem) {
|
||||
if ((rank % 2) == 0) {
|
||||
/* Even process */
|
||||
err = MCA_PML_CALL(send(psend, totalcount, dtype, rank + 1,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (OMPI_SUCCESS != err) { goto cleanup_and_return; }
|
||||
/* This process does not participate in the rest of the algorithm */
|
||||
vrank = -1;
|
||||
} else {
|
||||
/* Odd process */
|
||||
err = MCA_PML_CALL(recv(precv, totalcount, dtype, rank - 1,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
|
||||
comm, MPI_STATUS_IGNORE));
|
||||
if (OMPI_SUCCESS != err) { goto cleanup_and_return; }
|
||||
ompi_op_reduce(op, precv, psend, totalcount, dtype);
|
||||
/* Adjust rank to be the bottom "remain" ranks */
|
||||
vrank = rank / 2;
|
||||
}
|
||||
} else {
|
||||
/* Adjust rank to show that the bottom "even remain" ranks dropped out */
|
||||
vrank = rank - nprocs_rem;
|
||||
}
|
||||
|
||||
if (vrank != -1) {
|
||||
/*
|
||||
* Now, psend vector of size rcount * comm_size elements is divided into
|
||||
* nprocs_pof2 blocks:
|
||||
* block 0 has 2*rcount elems (for process 0 and 1)
|
||||
* block 1 has 2*rcount elems (for process 2 and 3)
|
||||
* ...
|
||||
* block r-1 has 2*rcount elems (for process 2*(r-1) and 2*(r-1)+1)
|
||||
* block r has rcount elems (for process r+r)
|
||||
* block r+1 has rcount elems (for process r+r+1)
|
||||
* ...
|
||||
* block nprocs_pof2 - 1 has rcount elems (for process r + nprocs_pof2-1)
|
||||
*/
|
||||
int nblocks = nprocs_pof2, send_index = 0, recv_index = 0;
|
||||
for (int mask = 1; mask < nprocs_pof2; mask <<= 1) {
|
||||
int vpeer = vrank ^ mask;
|
||||
int peer = (vpeer < nprocs_rem) ? vpeer * 2 + 1 : vpeer + nprocs_rem;
|
||||
|
||||
nblocks /= 2;
|
||||
if ((vrank & mask) == 0) {
|
||||
/* Send the upper half of reduction buffer, recv the lower half */
|
||||
send_index += nblocks;
|
||||
} else {
|
||||
/* Send the upper half of reduction buffer, recv the lower half */
|
||||
recv_index += nblocks;
|
||||
}
|
||||
int send_count = rcount * ompi_range_sum(send_index,
|
||||
send_index + nblocks - 1, nprocs_rem - 1);
|
||||
int recv_count = rcount * ompi_range_sum(recv_index,
|
||||
recv_index + nblocks - 1, nprocs_rem - 1);
|
||||
ptrdiff_t sdispl = rcount * ((send_index <= nprocs_rem - 1) ?
|
||||
2 * send_index : nprocs_rem + send_index);
|
||||
ptrdiff_t rdispl = rcount * ((recv_index <= nprocs_rem - 1) ?
|
||||
2 * recv_index : nprocs_rem + recv_index);
|
||||
|
||||
err = ompi_coll_base_sendrecv(psend + (ptrdiff_t)sdispl * extent, send_count,
|
||||
dtype, peer, MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
|
||||
precv + (ptrdiff_t)rdispl * extent, recv_count,
|
||||
dtype, peer, MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
|
||||
comm, MPI_STATUS_IGNORE, rank);
|
||||
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
|
||||
|
||||
if (vrank < vpeer) {
|
||||
/* precv = psend <op> precv */
|
||||
ompi_op_reduce(op, psend + (ptrdiff_t)rdispl * extent,
|
||||
precv + (ptrdiff_t)rdispl * extent, recv_count, dtype);
|
||||
char *p = psend;
|
||||
psend = precv;
|
||||
precv = p;
|
||||
} else {
|
||||
/* psend = precv <op> psend */
|
||||
ompi_op_reduce(op, precv + (ptrdiff_t)rdispl * extent,
|
||||
psend + (ptrdiff_t)rdispl * extent, recv_count, dtype);
|
||||
}
|
||||
send_index = recv_index;
|
||||
}
|
||||
/*
|
||||
* psend points to the result: [send_index, send_index + recv_count - 1]
|
||||
* Exchange results with remote process according to a mirror permutation.
|
||||
*/
|
||||
int vpeer = ompi_mirror_perm(vrank, log2_size);
|
||||
int peer = (vpeer < nprocs_rem) ? vpeer * 2 + 1 : vpeer + nprocs_rem;
|
||||
|
||||
if (vpeer < nprocs_rem) {
|
||||
/*
|
||||
* Process has two blocks: for excluded process and own.
|
||||
* Send result to the excluded process.
|
||||
*/
|
||||
ptrdiff_t sdispl = rcount * ((send_index <= nprocs_rem - 1) ?
|
||||
2 * send_index : nprocs_rem + send_index);
|
||||
err = MCA_PML_CALL(send(psend + (ptrdiff_t)sdispl * extent,
|
||||
rcount, dtype, peer - 1,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
|
||||
MCA_PML_BASE_SEND_STANDARD, comm));
|
||||
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
|
||||
}
|
||||
|
||||
/* Send result to a remote process according to a mirror permutation */
|
||||
ptrdiff_t sdispl = rcount * ((send_index <= nprocs_rem - 1) ?
|
||||
2 * send_index : nprocs_rem + send_index);
|
||||
/* If process has two blocks, then send the second block (own block) */
|
||||
if (vpeer < nprocs_rem)
|
||||
sdispl += rcount;
|
||||
if (vpeer != vrank) {
|
||||
err = ompi_coll_base_sendrecv(psend + (ptrdiff_t)sdispl * extent, rcount,
|
||||
dtype, peer, MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
|
||||
rbuf, rcount, dtype, peer,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
|
||||
comm, MPI_STATUS_IGNORE, rank);
|
||||
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
|
||||
} else {
|
||||
err = ompi_datatype_copy_content_same_ddt(dtype, rcount, rbuf,
|
||||
psend + (ptrdiff_t)sdispl * extent);
|
||||
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
|
||||
}
|
||||
|
||||
} else {
|
||||
/* Excluded process: receive result */
|
||||
int vpeer = ompi_mirror_perm((rank + 1) / 2, log2_size);
|
||||
int peer = (vpeer < nprocs_rem) ? vpeer * 2 + 1 : vpeer + nprocs_rem;
|
||||
err = MCA_PML_CALL(recv(rbuf, rcount, dtype, peer,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK, comm,
|
||||
MPI_STATUS_IGNORE));
|
||||
if (OMPI_SUCCESS != err) { goto cleanup_and_return; }
|
||||
}
|
||||
|
||||
cleanup_and_return:
|
||||
if (tmpbuf[0])
|
||||
free(tmpbuf[0]);
|
||||
if (tmpbuf[1])
|
||||
free(tmpbuf[1]);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* ompi_coll_base_reduce_scatter_block_intra_butterfly_pof2
|
||||
*
|
||||
* Function: Butterfly algorithm for reduce_scatter_block
|
||||
* Accepts: Same as MPI_Reduce_scatter_block
|
||||
* Returns: MPI_SUCCESS or error code
|
||||
* Limitations: Power-of-two number of processes.
|
||||
*
|
||||
* Description: Implements butterfly algorithm for MPI_Reduce_scatter_block [*].
|
||||
* The algorithm can be used both by commutative and non-commutative
|
||||
* operations, for power-of-two number of processes.
|
||||
*
|
||||
* [*] J.L. Traff. An improved Algorithm for (non-commutative) Reduce-scatter
|
||||
* with an Application // Proc. of EuroPVM/MPI, 2005. -- pp. 129-137.
|
||||
*
|
||||
* Time complexity:
|
||||
* m\lambda + 2\log_2(p)\alpha + 2m(1-1/p)\beta + m(1-1/p)\gamma + m/p\lambda =
|
||||
* = O(m\lambda + log(p)\alpha + m\beta + m\gamma),
|
||||
* where m = rcount * comm_size, p = comm_size
|
||||
* Memory requirements (per process): 2 * rcount * comm_size * typesize
|
||||
*
|
||||
* Example: comm_size=4, rcount=1, sbuf=[0,1,2,3]
|
||||
* Step 1. Permute the blocks according to a mirror permutation:
|
||||
* mperm(0)=0, mperm(1)=2, mperm(2)=1, mperm(3)=3
|
||||
* sbuf=[0|1|2|3] ==> psend=[0|2|1|3]
|
||||
*
|
||||
* Step 2. Butterfly
|
||||
* Round 1 (mask=1, nblocks=2)
|
||||
* 0: [0|2|1|3]: exch with 1: send [2,3], recv [0,1]: [0|4|*|*]
|
||||
* 1: [0|2|1|3]: exch with 0: send [0,1], recv [2,3]: [*|*|2|6]
|
||||
* 2: [0|2|1|3]: exch with 3: send [2,3], recv [0,1]: [0|4|*|*]
|
||||
* 3: [0|2|1|3]: exch with 2: send [0,1], recv [2,3]: [*|*|2|6]
|
||||
*
|
||||
* Round 2 (mask=2, nblocks=1)
|
||||
* 0: [0|4|*|*]: exch with 2: send [1], recv [0]: [0|*|*|*]
|
||||
* 1: [*|*|2|6]: exch with 3: send [3], recv [2]: [*|*|4|*]
|
||||
* 2: [0|4|*|*]: exch with 0: send [0], recv [1]: [*|8|*|*]
|
||||
* 3: [*|*|2|6]: exch with 1: send [2], recv [3]: [*|*|*|12]
|
||||
*
|
||||
* Step 3. Copy result to rbuf
|
||||
*/
|
||||
static int
|
||||
ompi_coll_base_reduce_scatter_block_intra_butterfly_pof2(
|
||||
const void *sbuf, void *rbuf, int rcount, struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
char *tmpbuf[2] = {NULL, NULL}, *psend, *precv;
|
||||
ptrdiff_t span, gap, totalcount, extent;
|
||||
int err = MPI_SUCCESS;
|
||||
int comm_size = ompi_comm_size(comm);
|
||||
int rank = ompi_comm_rank(comm);
|
||||
|
||||
if (rcount == 0 || comm_size < 2)
|
||||
return MPI_SUCCESS;
|
||||
|
||||
totalcount = comm_size * rcount;
|
||||
ompi_datatype_type_extent(dtype, &extent);
|
||||
span = opal_datatype_span(&dtype->super, totalcount, &gap);
|
||||
tmpbuf[0] = malloc(span);
|
||||
tmpbuf[1] = malloc(span);
|
||||
if (NULL == tmpbuf[0] || NULL == tmpbuf[1]) {
|
||||
err = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto cleanup_and_return;
|
||||
}
|
||||
psend = tmpbuf[0] - gap;
|
||||
precv = tmpbuf[1] - gap;
|
||||
|
||||
/* Permute the blocks according to a mirror permutation */
|
||||
int log2_comm_size = opal_cube_dim(comm_size);
|
||||
char *pdata = (sbuf != MPI_IN_PLACE) ? (char *)sbuf : rbuf;
|
||||
for (int i = 0; i < comm_size; i++) {
|
||||
char *src = pdata + (ptrdiff_t)i * extent * rcount;
|
||||
char *dst = psend + (ptrdiff_t)ompi_mirror_perm(i, log2_comm_size) * extent * rcount;
|
||||
err = ompi_datatype_copy_content_same_ddt(dtype, rcount, dst, src);
|
||||
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
|
||||
}
|
||||
|
||||
int nblocks = totalcount, send_index = 0, recv_index = 0;
|
||||
for (int mask = 1; mask < comm_size; mask <<= 1) {
|
||||
int peer = rank ^ mask;
|
||||
nblocks /= 2;
|
||||
|
||||
if ((rank & mask) == 0) {
|
||||
/* Send the upper half of reduction buffer, recv the lower half */
|
||||
send_index += nblocks;
|
||||
} else {
|
||||
/* Send the upper half of reduction buffer, recv the lower half */
|
||||
recv_index += nblocks;
|
||||
}
|
||||
err = ompi_coll_base_sendrecv(psend + (ptrdiff_t)send_index * extent,
|
||||
nblocks, dtype, peer,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
|
||||
precv + (ptrdiff_t)recv_index * extent,
|
||||
nblocks, dtype, peer,
|
||||
MCA_COLL_BASE_TAG_REDUCE_SCATTER_BLOCK,
|
||||
comm, MPI_STATUS_IGNORE, rank);
|
||||
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
|
||||
|
||||
if (rank < peer) {
|
||||
/* precv = psend <op> precv */
|
||||
ompi_op_reduce(op, psend + (ptrdiff_t)recv_index * extent,
|
||||
precv + (ptrdiff_t)recv_index * extent, nblocks, dtype);
|
||||
char *p = psend;
|
||||
psend = precv;
|
||||
precv = p;
|
||||
} else {
|
||||
/* psend = precv <op> psend */
|
||||
ompi_op_reduce(op, precv + (ptrdiff_t)recv_index * extent,
|
||||
psend + (ptrdiff_t)recv_index * extent, nblocks, dtype);
|
||||
}
|
||||
send_index = recv_index;
|
||||
}
|
||||
/* Copy the result to the rbuf */
|
||||
err = ompi_datatype_copy_content_same_ddt(dtype, rcount, rbuf,
|
||||
psend + (ptrdiff_t)recv_index * extent);
|
||||
if (MPI_SUCCESS != err) { goto cleanup_and_return; }
|
||||
|
||||
cleanup_and_return:
|
||||
if (tmpbuf[0])
|
||||
free(tmpbuf[0]);
|
||||
if (tmpbuf[1])
|
||||
free(tmpbuf[1]);
|
||||
return err;
|
||||
}
|
||||
|
@ -36,6 +36,7 @@ static mca_base_var_enum_value_t reduce_scatter_block_algorithms[] = {
|
||||
{1, "basic_linear"},
|
||||
{2, "recursive_doubling"},
|
||||
{3, "recursive_halving"},
|
||||
{4, "butterfly"},
|
||||
{0, NULL}
|
||||
};
|
||||
|
||||
@ -75,7 +76,8 @@ int ompi_coll_tuned_reduce_scatter_block_intra_check_forced_init (coll_tuned_for
|
||||
mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
|
||||
"reduce_scatter_block_algorithm",
|
||||
"Which reduce reduce_scatter_block algorithm is used. "
|
||||
"Can be locked down to choice of: 0 ignore, 1 basic_linear, 2 recursive_doubling",
|
||||
"Can be locked down to choice of: 0 ignore, 1 basic_linear, 2 recursive_doubling, "
|
||||
"3 recursive_halving, 4 butterfly",
|
||||
MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_5,
|
||||
MCA_BASE_VAR_SCOPE_ALL,
|
||||
@ -128,6 +130,8 @@ int ompi_coll_tuned_reduce_scatter_block_intra_do_this(const void *sbuf, void *r
|
||||
dtype, op, comm, module);
|
||||
case (3): return ompi_coll_base_reduce_scatter_block_intra_recursivehalving(sbuf, rbuf, rcount,
|
||||
dtype, op, comm, module);
|
||||
case (4): return ompi_coll_base_reduce_scatter_block_intra_butterfly(sbuf, rbuf, rcount, dtype, op, comm,
|
||||
module);
|
||||
} /* switch */
|
||||
OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:reduce_scatter_block_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
|
||||
algorithm, ompi_coll_tuned_forced_max_algorithms[REDUCESCATTERBLOCK]));
|
||||
|
@ -172,7 +172,7 @@ int mca_common_ompio_set_view (mca_io_ompio_file_t *fh,
|
||||
}
|
||||
}
|
||||
|
||||
if ( SIMPLE != mca_io_ompio_grouping_option || SIMPLE_PLUS != mca_io_ompio_grouping_option ) {
|
||||
if ( SIMPLE != mca_io_ompio_grouping_option && SIMPLE_PLUS != mca_io_ompio_grouping_option ) {
|
||||
|
||||
ret = mca_io_ompio_fview_based_grouping(fh,
|
||||
&num_groups,
|
||||
|
@ -1,51 +0,0 @@
|
||||
#
|
||||
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
# Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
AM_CPPFLAGS = $(mtl_mxm_CPPFLAGS)
|
||||
|
||||
dist_ompidata_DATA = help-mtl-mxm.txt
|
||||
|
||||
mtl_mxm_sources = \
|
||||
mtl_mxm.c \
|
||||
mtl_mxm.h \
|
||||
mtl_mxm_cancel.c \
|
||||
mtl_mxm_component.c \
|
||||
mtl_mxm_endpoint.c \
|
||||
mtl_mxm_endpoint.h \
|
||||
mtl_mxm_probe.c \
|
||||
mtl_mxm_recv.c \
|
||||
mtl_mxm_request.h \
|
||||
mtl_mxm_send.c \
|
||||
mtl_mxm_debug.h \
|
||||
mtl_mxm_types.h
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_ompi_mtl_mxm_DSO
|
||||
component_noinst =
|
||||
component_install = mca_mtl_mxm.la
|
||||
else
|
||||
component_noinst = libmca_mtl_mxm.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(ompilibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_mtl_mxm_la_SOURCES = $(mtl_mxm_sources)
|
||||
mca_mtl_mxm_la_LIBADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \
|
||||
$(mtl_mxm_LIBS)
|
||||
mca_mtl_mxm_la_LDFLAGS = -module -avoid-version $(mtl_mxm_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_mtl_mxm_la_SOURCES = $(mtl_mxm_sources)
|
||||
libmca_mtl_mxm_la_LIBADD = $(mtl_mxm_LIBS)
|
||||
libmca_mtl_mxm_la_LDFLAGS = -module -avoid-version $(mtl_mxm_LDFLAGS)
|
@ -1,39 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
# Copyright (c) 2013 Sandia National Laboratories. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# MCA_ompi_mtl_mxm_POST_CONFIG(will_build)
|
||||
# ----------------------------------------
|
||||
# Only require the tag if we're actually going to be built
|
||||
AC_DEFUN([MCA_ompi_mtl_mxm_POST_CONFIG], [
|
||||
AS_IF([test "$1" = "1"], [OMPI_REQUIRE_ENDPOINT_TAG([MTL])])
|
||||
])dnl
|
||||
|
||||
# MCA_mtl_mxm_CONFIG([action-if-can-compile],
|
||||
# [action-if-cant-compile])
|
||||
# ------------------------------------------------
|
||||
AC_DEFUN([MCA_ompi_mtl_mxm_CONFIG],[
|
||||
AC_CONFIG_FILES([ompi/mca/mtl/mxm/Makefile])
|
||||
|
||||
OMPI_CHECK_MXM([mtl_mxm],
|
||||
[mtl_mxm_happy="yes"],
|
||||
[mtl_mxm_happy="no"])
|
||||
|
||||
AS_IF([test "$mtl_mxm_happy" = "yes"],
|
||||
[$1],
|
||||
[$2])
|
||||
|
||||
# substitute in the things needed to build mxm
|
||||
AC_SUBST([mtl_mxm_CFLAGS])
|
||||
AC_SUBST([mtl_mxm_CPPFLAGS])
|
||||
AC_SUBST([mtl_mxm_LDFLAGS])
|
||||
AC_SUBST([mtl_mxm_LIBS])
|
||||
])dnl
|
||||
|
@ -1,67 +0,0 @@
|
||||
#
|
||||
# Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
|
||||
[no uuid present]
|
||||
Error obtaining unique transport key from ORTE (orte_precondition_transports %s
|
||||
the environment).
|
||||
|
||||
Local host: %s
|
||||
|
||||
[unable to create endpoint]
|
||||
MXM was unable to create an endpoint. Please make sure that the network link is
|
||||
active on the node and the hardware is functioning.
|
||||
|
||||
Error: %s
|
||||
|
||||
[unable to extract endpoint ptl address]
|
||||
MXM was unable to read settings for endpoint
|
||||
|
||||
PTL ID: %d
|
||||
Error: %s
|
||||
|
||||
[unable to extract endpoint address]
|
||||
MXM was unable to read settings for endpoint
|
||||
|
||||
Error: %s
|
||||
|
||||
[mxm mq create]
|
||||
Failed to create MQ for endpoint
|
||||
|
||||
Error: %s
|
||||
|
||||
[errors during mxm_progress]
|
||||
|
||||
Error %s occurred in attempting to make network progress (mxm_progress).
|
||||
|
||||
|
||||
[mxm init]
|
||||
Initialization of MXM library failed.
|
||||
|
||||
Error: %s
|
||||
|
||||
[error posting receive]
|
||||
Unable to post application receive buffer
|
||||
|
||||
Error: %s
|
||||
Buffer: %p
|
||||
Length: %d
|
||||
|
||||
[error posting message receive]
|
||||
Unable to post application receive buffer
|
||||
|
||||
Error: %s
|
||||
Buffer: %p
|
||||
Length: %d
|
||||
|
||||
[error posting send]
|
||||
Unable to post application send buffer
|
||||
|
||||
Error: %s
|
||||
|
@ -1,679 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (C) 2001-2011 Mellanox Technologies Ltd. ALL RIGHTS RESERVED.
|
||||
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "ompi/mca/mtl/mtl.h"
|
||||
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "opal/memoryhooks/memory.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_types.h"
|
||||
#include "mtl_mxm_endpoint.h"
|
||||
#include "mtl_mxm_request.h"
|
||||
|
||||
mca_mtl_mxm_module_t ompi_mtl_mxm = {
|
||||
{
|
||||
0, /* max context id */
|
||||
0, /* max tag value */
|
||||
0, /* request reserve space */
|
||||
0, /* flags */
|
||||
ompi_mtl_mxm_add_procs,
|
||||
ompi_mtl_mxm_del_procs,
|
||||
ompi_mtl_mxm_finalize,
|
||||
ompi_mtl_mxm_send,
|
||||
ompi_mtl_mxm_isend,
|
||||
ompi_mtl_mxm_irecv,
|
||||
ompi_mtl_mxm_iprobe,
|
||||
ompi_mtl_mxm_imrecv,
|
||||
ompi_mtl_mxm_improbe,
|
||||
ompi_mtl_mxm_cancel,
|
||||
ompi_mtl_mxm_add_comm,
|
||||
ompi_mtl_mxm_del_comm
|
||||
},
|
||||
0,
|
||||
0,
|
||||
NULL,
|
||||
NULL
|
||||
};
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
static uint32_t ompi_mtl_mxm_get_job_id(void)
|
||||
{
|
||||
uint8_t unique_job_key[16];
|
||||
uint32_t job_key;
|
||||
unsigned long long *uu;
|
||||
char *generated_key;
|
||||
|
||||
uu = (unsigned long long *) unique_job_key;
|
||||
|
||||
generated_key = getenv(OPAL_MCA_PREFIX"orte_precondition_transports");
|
||||
memset(uu, 0, sizeof(unique_job_key));
|
||||
|
||||
if (!generated_key || (strlen(generated_key) != 33) || sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2) {
|
||||
opal_show_help("help-mtl-mxm.txt", "no uuid present", true,
|
||||
generated_key ? "could not be parsed from" :
|
||||
"not present in", ompi_process_info.nodename);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* decode OPAL_MCA_PREFIX"orte_precondition_transports" that looks as
|
||||
* 000003ca00000000-0000000100000000
|
||||
* jobfam-stepid
|
||||
* to get jobid coded with ORTE_CONSTRUCT_LOCAL_JOBID()
|
||||
*/
|
||||
#define GET_LOCAL_JOBID(local, job) \
|
||||
( ((local) & 0xffff0000) | ((job) & 0x0000ffff) )
|
||||
job_key = GET_LOCAL_JOBID((uu[0]>>(8 * sizeof(int))) << 16, uu[1]>>(8 * sizeof(int)));
|
||||
|
||||
return job_key;
|
||||
}
|
||||
#endif
|
||||
|
||||
int ompi_mtl_mxm_progress(void);
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
static void ompi_mtl_mxm_mem_release_cb(void *buf, size_t length,
|
||||
void *cbdata, bool from_alloc);
|
||||
#endif
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
static int ompi_mtl_mxm_get_ep_address(ompi_mtl_mxm_ep_conn_info_t *ep_info, mxm_ptl_id_t ptlid)
|
||||
{
|
||||
size_t addrlen;
|
||||
mxm_error_t err;
|
||||
|
||||
addrlen = sizeof(ep_info->ptl_addr[ptlid]);
|
||||
err = mxm_ep_address(ompi_mtl_mxm.ep, ptlid,
|
||||
(struct sockaddr *) &ep_info->ptl_addr[ptlid], &addrlen);
|
||||
if (MXM_OK != err) {
|
||||
opal_show_help("help-mtl-mxm.txt", "unable to extract endpoint ptl address",
|
||||
true, (int)ptlid, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
#else
|
||||
static int ompi_mtl_mxm_get_ep_address(void **address_p, size_t *address_len_p)
|
||||
{
|
||||
mxm_error_t err;
|
||||
|
||||
*address_len_p = 0;
|
||||
err = mxm_ep_get_address(ompi_mtl_mxm.ep, NULL, address_len_p);
|
||||
if (err != MXM_ERR_BUFFER_TOO_SMALL) {
|
||||
MXM_ERROR("Failed to get ep address length");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
*address_p = malloc(*address_len_p);
|
||||
if (*address_p == NULL) {
|
||||
MXM_ERROR("Failed to allocate ep address buffer");
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
err = mxm_ep_get_address(ompi_mtl_mxm.ep, *address_p, address_len_p);
|
||||
if (MXM_OK != err) {
|
||||
opal_show_help("help-mtl-mxm.txt", "unable to extract endpoint address",
|
||||
true, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
#define max(a,b) ((a)>(b)?(a):(b))
|
||||
|
||||
static mxm_error_t
|
||||
ompi_mtl_mxm_create_ep(mxm_h ctx, mxm_ep_h *ep, unsigned ptl_bitmap, int lr,
|
||||
uint32_t jobid, uint64_t mxlr, int nlps)
|
||||
{
|
||||
mxm_error_t err;
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
ompi_mtl_mxm.mxm_ep_opts->job_id = jobid;
|
||||
ompi_mtl_mxm.mxm_ep_opts->local_rank = lr;
|
||||
ompi_mtl_mxm.mxm_ep_opts->num_local_procs = nlps;
|
||||
err = mxm_ep_create(ctx, ompi_mtl_mxm.mxm_ep_opts, ep);
|
||||
#else
|
||||
err = mxm_ep_create(ctx, ompi_mtl_mxm.mxm_ep_opts, ep);
|
||||
#endif
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* send information using modex (in some case there is limitation on data size for example ess/pmi)
|
||||
* set size of data sent for once
|
||||
*
|
||||
*/
|
||||
static int ompi_mtl_mxm_send_ep_address(void *address, size_t address_len)
|
||||
{
|
||||
char *modex_component_name = mca_base_component_to_string(&mca_mtl_mxm_component.super.mtl_version);
|
||||
char *modex_name = malloc(strlen(modex_component_name) + 5);
|
||||
const size_t modex_max_size = 0x60;
|
||||
unsigned char *modex_buf_ptr;
|
||||
size_t modex_buf_size;
|
||||
size_t modex_cur_size;
|
||||
int modex_name_id = 0;
|
||||
int rc;
|
||||
|
||||
/* Send address length */
|
||||
sprintf(modex_name, "%s-len", modex_component_name);
|
||||
OPAL_MODEX_SEND_STRING(rc, OPAL_PMIX_GLOBAL,
|
||||
modex_name, &address_len, sizeof(address_len));
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
MXM_ERROR("failed to send address length");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* Send address, in parts.
|
||||
* modex name looks as mtl.mxm.1.5-18 where mtl.mxm.1.5 is the component and 18 is part index.
|
||||
*/
|
||||
modex_buf_size = address_len;
|
||||
modex_buf_ptr = address;
|
||||
while (modex_buf_size) {
|
||||
sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id);
|
||||
modex_cur_size = (modex_buf_size < modex_max_size) ? modex_buf_size : modex_max_size;
|
||||
OPAL_MODEX_SEND_STRING(rc, OPAL_PMIX_GLOBAL,
|
||||
modex_name, modex_buf_ptr, modex_cur_size);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
MXM_ERROR("Open MPI couldn't distribute EP connection details");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
modex_name_id++;
|
||||
modex_buf_ptr += modex_cur_size;
|
||||
modex_buf_size -= modex_cur_size;
|
||||
}
|
||||
|
||||
rc = OMPI_SUCCESS;
|
||||
|
||||
bail:
|
||||
free(modex_component_name);
|
||||
free(modex_name);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* recieve information using modex
|
||||
*/
|
||||
static int ompi_mtl_mxm_recv_ep_address(ompi_proc_t *source_proc, void **address_p,
|
||||
size_t *address_len_p)
|
||||
{
|
||||
char *modex_component_name = mca_base_component_to_string(&mca_mtl_mxm_component.super.mtl_version);
|
||||
char *modex_name = malloc(strlen(modex_component_name) + 5);
|
||||
uint8_t *modex_buf_ptr;
|
||||
int32_t modex_cur_size;
|
||||
size_t modex_buf_size;
|
||||
size_t *address_len_buf_ptr;
|
||||
int modex_name_id = 0;
|
||||
int rc;
|
||||
|
||||
*address_p = NULL;
|
||||
*address_len_p = 0;
|
||||
|
||||
/* Receive address length */
|
||||
sprintf(modex_name, "%s-len", modex_component_name);
|
||||
OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super.proc_name,
|
||||
(uint8_t **)&address_len_buf_ptr,
|
||||
&modex_cur_size);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
MXM_ERROR("Failed to receive ep address length");
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* Allocate buffer to hold the address */
|
||||
*address_len_p = *address_len_buf_ptr;
|
||||
*address_p = malloc(*address_len_p);
|
||||
if (*address_p == NULL) {
|
||||
MXM_ERROR("Failed to allocate modex receive buffer");
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* Receive the data, in parts */
|
||||
modex_buf_size = 0;
|
||||
while (modex_buf_size < *address_len_p) {
|
||||
sprintf(modex_name, "%s-%d", modex_component_name, modex_name_id);
|
||||
OPAL_MODEX_RECV_STRING(rc, modex_name, &source_proc->super.proc_name,
|
||||
&modex_buf_ptr,
|
||||
&modex_cur_size);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
MXM_ERROR("Open MPI couldn't distribute EP connection details");
|
||||
free(*address_p);
|
||||
*address_p = NULL;
|
||||
*address_len_p = 0;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
memcpy((char*)(*address_p) + modex_buf_size, modex_buf_ptr, modex_cur_size);
|
||||
modex_buf_size += modex_cur_size;
|
||||
modex_name_id++;
|
||||
}
|
||||
|
||||
rc = OMPI_SUCCESS;
|
||||
bail:
|
||||
free(modex_component_name);
|
||||
free(modex_name);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_module_init(void)
|
||||
{
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
ompi_mtl_mxm_ep_conn_info_t ep_info;
|
||||
#endif
|
||||
void *ep_address;
|
||||
size_t ep_address_len;
|
||||
mxm_error_t err;
|
||||
uint32_t jobid;
|
||||
uint64_t mxlr;
|
||||
ompi_proc_t **procs;
|
||||
unsigned ptl_bitmap;
|
||||
size_t totps, proc;
|
||||
int lr, nlps;
|
||||
int rc;
|
||||
|
||||
mxlr = 0;
|
||||
lr = -1;
|
||||
jobid = 0;
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
jobid = ompi_mtl_mxm_get_job_id();
|
||||
if (0 == jobid) {
|
||||
MXM_ERROR("Failed to generate jobid");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
#endif
|
||||
|
||||
totps = ompi_proc_world_size ();
|
||||
|
||||
if (totps < (size_t)ompi_mtl_mxm.mxm_np) {
|
||||
MXM_VERBOSE(1, "MXM support will be disabled because of total number "
|
||||
"of processes (%lu) is less than the minimum set by the "
|
||||
"mtl_mxm_np MCA parameter (%u)", totps, ompi_mtl_mxm.mxm_np);
|
||||
return OMPI_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
MXM_VERBOSE(1, "MXM support enabled");
|
||||
|
||||
if (ORTE_NODE_RANK_INVALID == (lr = ompi_process_info.my_node_rank)) {
|
||||
MXM_ERROR("Unable to obtain local node rank");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
nlps = ompi_process_info.num_local_peers + 1;
|
||||
|
||||
/* local procs are always allocated. if that ever changes this will need to
|
||||
* be modified. */
|
||||
procs = ompi_proc_get_allocated (&totps);
|
||||
if (NULL == procs) {
|
||||
MXM_ERROR("Unable to obtain process list");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
for (proc = 0; proc < totps; proc++) {
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(procs[proc]->super.proc_flags)) {
|
||||
mxlr = max(mxlr, procs[proc]->super.proc_name.vpid);
|
||||
}
|
||||
}
|
||||
free(procs);
|
||||
|
||||
/* Setup the endpoint options and local addresses to bind to. */
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
ptl_bitmap = ompi_mtl_mxm.mxm_ctx_opts->ptl_bitmap;
|
||||
#else
|
||||
ptl_bitmap = 0;
|
||||
#endif
|
||||
|
||||
/* Open MXM endpoint */
|
||||
err = ompi_mtl_mxm_create_ep(ompi_mtl_mxm.mxm_context, &ompi_mtl_mxm.ep,
|
||||
ptl_bitmap, lr, jobid, mxlr, nlps);
|
||||
if (MXM_OK != err) {
|
||||
opal_show_help("help-mtl-mxm.txt", "unable to create endpoint", true,
|
||||
mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get address for each PTL on this endpoint, and share it with other ranks.
|
||||
*/
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
if ((ptl_bitmap & MXM_BIT(MXM_PTL_SELF)) &&
|
||||
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SELF)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
if ((ptl_bitmap & MXM_BIT(MXM_PTL_RDMA)) &&
|
||||
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_RDMA)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
if ((ptl_bitmap & MXM_BIT(MXM_PTL_SHM)) &&
|
||||
OMPI_SUCCESS != ompi_mtl_mxm_get_ep_address(&ep_info, MXM_PTL_SHM)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
ep_address = &ep_info;
|
||||
ep_address_len = sizeof(ep_info);
|
||||
#else
|
||||
rc = ompi_mtl_mxm_get_ep_address(&ep_address, &ep_address_len);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
#endif
|
||||
|
||||
rc = ompi_mtl_mxm_send_ep_address(ep_address, ep_address_len);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
MXM_ERROR("Modex session failed.");
|
||||
return rc;
|
||||
}
|
||||
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
free(ep_address);
|
||||
#endif
|
||||
|
||||
/* Register the MXM progress function */
|
||||
opal_progress_register(ompi_mtl_mxm_progress);
|
||||
|
||||
ompi_mtl_mxm.super.mtl_flags |= MCA_MTL_BASE_FLAG_REQUIRE_WORLD;
|
||||
|
||||
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
if (ompi_mtl_mxm.using_mem_hooks) {
|
||||
opal_mem_hooks_register_release(ompi_mtl_mxm_mem_release_cb, NULL);
|
||||
}
|
||||
#endif
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_finalize(struct mca_mtl_base_module_t* mtl)
|
||||
{
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
if (ompi_mtl_mxm.using_mem_hooks) {
|
||||
opal_mem_hooks_unregister_release(ompi_mtl_mxm_mem_release_cb);
|
||||
}
|
||||
#endif
|
||||
opal_progress_unregister(ompi_mtl_mxm_progress);
|
||||
mxm_ep_destroy(ompi_mtl_mxm.ep);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
|
||||
struct ompi_proc_t** procs)
|
||||
{
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
ompi_mtl_mxm_ep_conn_info_t *ep_info;
|
||||
mxm_conn_req_t *conn_reqs;
|
||||
size_t ep_index = 0;
|
||||
#endif
|
||||
void *ep_address = NULL;
|
||||
size_t ep_address_len;
|
||||
mxm_error_t err;
|
||||
size_t i;
|
||||
int rc;
|
||||
mca_mtl_mxm_endpoint_t *endpoint;
|
||||
|
||||
assert(mtl == &ompi_mtl_mxm.super);
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
/* Allocate connection requests */
|
||||
conn_reqs = calloc(nprocs, sizeof(mxm_conn_req_t));
|
||||
ep_info = calloc(nprocs, sizeof(ompi_mtl_mxm_ep_conn_info_t));
|
||||
if (NULL == conn_reqs || NULL == ep_info) {
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE;
|
||||
goto bail;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Get the EP connection requests for all the processes from modex */
|
||||
for (i = 0; i < nprocs; ++i) {
|
||||
if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) {
|
||||
continue; /* already connected to this endpoint */
|
||||
}
|
||||
rc = ompi_mtl_mxm_recv_ep_address(procs[i], &ep_address, &ep_address_len);
|
||||
if (rc != OMPI_SUCCESS) {
|
||||
goto bail;
|
||||
}
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
if (ep_address_len != sizeof(ep_info[i])) {
|
||||
MXM_ERROR("Invalid endpoint address length");
|
||||
free(ep_address);
|
||||
rc = OMPI_ERROR;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
memcpy(&ep_info[i], ep_address, ep_address_len);
|
||||
free(ep_address);
|
||||
conn_reqs[ep_index].ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SELF]);
|
||||
conn_reqs[ep_index].ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_SHM]);
|
||||
conn_reqs[ep_index].ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info[i].ptl_addr[MXM_PTL_RDMA]);
|
||||
ep_index++;
|
||||
|
||||
#else
|
||||
endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t);
|
||||
endpoint->mtl_mxm_module = &ompi_mtl_mxm;
|
||||
err = mxm_ep_connect(ompi_mtl_mxm.ep, ep_address, &endpoint->mxm_conn);
|
||||
free(ep_address);
|
||||
if (err != MXM_OK) {
|
||||
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
|
||||
rc = OMPI_ERROR;
|
||||
goto bail;
|
||||
}
|
||||
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
/* Connect to remote peers */
|
||||
err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_reqs, ep_index, -1);
|
||||
if (MXM_OK != err) {
|
||||
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
|
||||
for (i = 0; i < ep_index; ++i) {
|
||||
if (MXM_OK != conn_reqs[i].error) {
|
||||
MXM_ERROR("MXM EP connect to %s error: %s\n",
|
||||
(NULL == procs[i]->super.proc_hostname) ?
|
||||
"unknown" : procs[i]->proc_hostname,
|
||||
mxm_error_string(conn_reqs[i].error));
|
||||
}
|
||||
}
|
||||
rc = OMPI_ERROR;
|
||||
goto bail;
|
||||
}
|
||||
|
||||
/* Save returned connections */
|
||||
for (i = 0; i < ep_index; ++i) {
|
||||
endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t);
|
||||
endpoint->mtl_mxm_module = &ompi_mtl_mxm;
|
||||
endpoint->mxm_conn = conn_reqs[i].conn;
|
||||
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if MXM_API >= MXM_VERSION(3,1)
|
||||
if (ompi_mtl_mxm.bulk_connect) {
|
||||
mxm_ep_wireup(ompi_mtl_mxm.ep);
|
||||
}
|
||||
#endif
|
||||
|
||||
rc = OMPI_SUCCESS;
|
||||
|
||||
bail:
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
free(conn_reqs);
|
||||
free(ep_info);
|
||||
#endif
|
||||
return rc;
|
||||
}
|
||||
|
||||
int ompi_mtl_add_single_proc(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_proc_t* procs)
|
||||
{
|
||||
void *ep_address = NULL;
|
||||
size_t ep_address_len;
|
||||
mxm_error_t err;
|
||||
int rc;
|
||||
mca_mtl_mxm_endpoint_t *endpoint;
|
||||
|
||||
assert(mtl == &ompi_mtl_mxm.super);
|
||||
|
||||
if (NULL != procs->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
rc = ompi_mtl_mxm_recv_ep_address(procs, &ep_address, &ep_address_len);
|
||||
if (rc != OMPI_SUCCESS) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
ompi_mtl_mxm_ep_conn_info_t ep_info;
|
||||
mxm_conn_req_t conn_req;
|
||||
|
||||
if (ep_address_len != sizeof(ep_info)) {
|
||||
MXM_ERROR("Invalid endpoint address length");
|
||||
free(ep_address);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
memcpy(&ep_info, ep_address, ep_address_len);
|
||||
free(ep_address);
|
||||
conn_req.ptl_addr[MXM_PTL_SELF] = (struct sockaddr *)&(ep_info.ptl_addr[MXM_PTL_SELF]);
|
||||
conn_req.ptl_addr[MXM_PTL_SHM] = (struct sockaddr *)&(ep_info.ptl_addr[MXM_PTL_SHM]);
|
||||
conn_req.ptl_addr[MXM_PTL_RDMA] = (struct sockaddr *)&(ep_info.ptl_addr[MXM_PTL_RDMA]);
|
||||
|
||||
/* Connect to remote peers */
|
||||
err = mxm_ep_connect(ompi_mtl_mxm.ep, conn_req, 1, -1);
|
||||
if (MXM_OK != err) {
|
||||
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
|
||||
if (MXM_OK != conn_req.error) {
|
||||
MXM_ERROR("MXM EP connect to %s error: %s\n",
|
||||
(NULL == procs->super.proc_hostname) ?
|
||||
"unknown" : procs->proc_hostname,
|
||||
mxm_error_string(conn_reqs.error));
|
||||
}
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Save returned connections */
|
||||
endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t);
|
||||
endpoint->mtl_mxm_module = &ompi_mtl_mxm;
|
||||
endpoint->mxm_conn = conn_reqs.conn;
|
||||
procs->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
|
||||
#else
|
||||
endpoint = OBJ_NEW(mca_mtl_mxm_endpoint_t);
|
||||
endpoint->mtl_mxm_module = &ompi_mtl_mxm;
|
||||
err = mxm_ep_connect(ompi_mtl_mxm.ep, ep_address, &endpoint->mxm_conn);
|
||||
free(ep_address);
|
||||
if (err != MXM_OK) {
|
||||
MXM_ERROR("MXM returned connect error: %s\n", mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
procs->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
|
||||
#endif
|
||||
|
||||
#if MXM_API >= MXM_VERSION(3,1)
|
||||
if (ompi_mtl_mxm.bulk_connect) {
|
||||
mxm_ep_wireup(ompi_mtl_mxm.ep);
|
||||
}
|
||||
#endif
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_del_procs(struct mca_mtl_base_module_t *mtl, size_t nprocs,
|
||||
struct ompi_proc_t** procs)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
#if MXM_API >= MXM_VERSION(3,1)
|
||||
if (ompi_mtl_mxm.bulk_disconnect && ((int)nprocs) == ompi_proc_world_size ()) {
|
||||
mxm_ep_powerdown(ompi_mtl_mxm.ep);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* XXX: Directly accessing the obj_reference_count is an abstraction
|
||||
* violation of the object system. We know this needs to be fixed, but
|
||||
* are deferring the fix to a later time as it involves a design issue
|
||||
* in the way we handle endpoints as objects
|
||||
*/
|
||||
for (i = 0; i < nprocs; ++i) {
|
||||
mca_mtl_mxm_endpoint_t *endpoint = (mca_mtl_mxm_endpoint_t*)
|
||||
procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
|
||||
if (endpoint) {
|
||||
mxm_ep_disconnect(endpoint->mxm_conn);
|
||||
OBJ_RELEASE(endpoint);
|
||||
}
|
||||
}
|
||||
opal_pmix.fence(NULL, 0);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_add_comm(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
mxm_error_t err;
|
||||
mxm_mq_h mq;
|
||||
|
||||
assert(mtl == &ompi_mtl_mxm.super);
|
||||
assert(NULL != ompi_mtl_mxm.mxm_context);
|
||||
|
||||
err = mxm_mq_create(ompi_mtl_mxm.mxm_context, comm->c_contextid, &mq);
|
||||
if (MXM_OK != err) {
|
||||
opal_show_help("help-mtl-mxm.txt", "mxm mq create", true, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
comm->c_pml_comm = (void*)mq;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_del_comm(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
assert(mtl == &ompi_mtl_mxm.super);
|
||||
if (NULL != ompi_mtl_mxm.mxm_context) {
|
||||
mxm_mq_destroy((mxm_mq_h)comm->c_pml_comm);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_progress(void)
|
||||
{
|
||||
mxm_error_t err;
|
||||
|
||||
err = mxm_progress(ompi_mtl_mxm.mxm_context);
|
||||
if ((MXM_OK != err) && (MXM_ERR_NO_PROGRESS != err) ) {
|
||||
opal_show_help("help-mtl-mxm.txt", "errors during mxm_progress", true, mxm_error_string(err));
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
static void ompi_mtl_mxm_mem_release_cb(void *buf, size_t length,
|
||||
void *cbdata, bool from_alloc)
|
||||
{
|
||||
mxm_mem_unmap(ompi_mtl_mxm.mxm_context, buf, length,
|
||||
from_alloc ? MXM_MEM_UNMAP_MARK_INVALID : 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
ompi_mtl_mxm_message_t,
|
||||
opal_free_list_item_t,
|
||||
NULL,
|
||||
NULL);
|
@ -1,117 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MTL_MXM_H_HAS_BEEN_INCLUDED
|
||||
#define MTL_MXM_H_HAS_BEEN_INCLUDED
|
||||
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <mxm/api/mxm_api.h>
|
||||
#ifndef MXM_VERSION
|
||||
#define MXM_VERSION(major, minor) (((major)<<MXM_MAJOR_BIT)|((minor)<<MXM_MINOR_BIT))
|
||||
#endif
|
||||
|
||||
#if MXM_API < MXM_VERSION(1,5)
|
||||
#error "Unsupported MXM version, version 1.5 or above required"
|
||||
#endif
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
#include <mxm/api/mxm_addr.h>
|
||||
#endif
|
||||
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/mca/mtl/mtl.h"
|
||||
#include "ompi/mca/mtl/base/base.h"
|
||||
#include "opal/class/opal_free_list.h"
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
|
||||
#include "mtl_mxm_debug.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/* MTL interface functions */
|
||||
extern int ompi_mtl_mxm_add_procs(struct mca_mtl_base_module_t* mtl,
|
||||
size_t nprocs, struct ompi_proc_t** procs);
|
||||
extern int ompi_mtl_add_single_proc(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_proc_t* procs);
|
||||
extern int ompi_mtl_mxm_del_procs(struct mca_mtl_base_module_t* mtl,
|
||||
size_t nprocs, struct ompi_proc_t** procs);
|
||||
|
||||
extern int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t* comm, int dest, int tag,
|
||||
struct opal_convertor_t *convertor,
|
||||
mca_pml_base_send_mode_t mode);
|
||||
|
||||
extern int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t* comm, int dest,
|
||||
int tag, struct opal_convertor_t *convertor,
|
||||
mca_pml_base_send_mode_t mode, bool blocking,
|
||||
mca_mtl_request_t * mtl_request);
|
||||
|
||||
extern int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t *comm, int src,
|
||||
int tag, struct opal_convertor_t *convertor,
|
||||
struct mca_mtl_request_t *mtl_request);
|
||||
|
||||
extern int ompi_mtl_mxm_iprobe(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t *comm, int src,
|
||||
int tag, int *flag,
|
||||
struct ompi_status_public_t *status);
|
||||
|
||||
extern int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl,
|
||||
struct mca_mtl_request_t *mtl_request, int flag);
|
||||
|
||||
extern int ompi_mtl_mxm_imrecv(struct mca_mtl_base_module_t* mtl,
|
||||
struct opal_convertor_t *convertor,
|
||||
struct ompi_message_t **message,
|
||||
struct mca_mtl_request_t *mtl_request);
|
||||
|
||||
extern int ompi_mtl_mxm_improbe(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_communicator_t *comm,
|
||||
int src,
|
||||
int tag,
|
||||
int *matched,
|
||||
struct ompi_message_t **message,
|
||||
struct ompi_status_public_t *status);
|
||||
|
||||
extern int ompi_mtl_mxm_add_comm(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
extern int ompi_mtl_mxm_del_comm(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_communicator_t *comm);
|
||||
|
||||
extern int ompi_mtl_mxm_finalize(struct mca_mtl_base_module_t* mtl);
|
||||
|
||||
int ompi_mtl_mxm_module_init(void);
|
||||
|
||||
struct ompi_mtl_mxm_message_t {
|
||||
opal_free_list_item_t super;
|
||||
|
||||
mxm_mq_h mq;
|
||||
mxm_conn_h conn;
|
||||
mxm_message_h mxm_msg;
|
||||
|
||||
mxm_tag_t tag;
|
||||
mxm_tag_t tag_mask;
|
||||
};
|
||||
typedef struct ompi_mtl_mxm_message_t ompi_mtl_mxm_message_t;
|
||||
OBJ_CLASS_DECLARATION(ompi_mtl_mxm_message_t);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -1,34 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_request.h"
|
||||
|
||||
int ompi_mtl_mxm_cancel(struct mca_mtl_base_module_t* mtl,
|
||||
struct mca_mtl_request_t *mtl_request, int flag)
|
||||
{
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
|
||||
mxm_error_t err;
|
||||
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
if (mtl_mxm_request->is_send) {
|
||||
err = mxm_req_cancel_send(&mtl_mxm_request->mxm.send);
|
||||
} else {
|
||||
err = mxm_req_cancel_recv(&mtl_mxm_request->mxm.recv);
|
||||
}
|
||||
#else
|
||||
err = mxm_req_cancel(&mtl_mxm_request->mxm.base);
|
||||
#endif
|
||||
if ((err != MXM_OK) && (err != MXM_ERR_NO_PROGRESS)) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
@ -1,316 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
#include "opal/memoryhooks/memory.h"
|
||||
#include "opal/mca/memory/base/base.h"
|
||||
#include "ompi/runtime/mpiruntime.h"
|
||||
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_types.h"
|
||||
#include "mtl_mxm_request.h"
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
static int ompi_mtl_mxm_component_open(void);
|
||||
static int ompi_mtl_mxm_component_query(mca_base_module_t **module, int *priority);
|
||||
static int ompi_mtl_mxm_component_close(void);
|
||||
static int ompi_mtl_mxm_component_register(void);
|
||||
|
||||
static int param_priority;
|
||||
|
||||
int mca_mtl_mxm_output = -1;
|
||||
|
||||
|
||||
static mca_mtl_base_module_t
|
||||
* ompi_mtl_mxm_component_init(bool enable_progress_threads,
|
||||
bool enable_mpi_threads);
|
||||
|
||||
mca_mtl_mxm_component_t mca_mtl_mxm_component = {
|
||||
{
|
||||
/*
|
||||
* First, the mca_base_component_t struct containing meta
|
||||
* information about the component itself
|
||||
*/
|
||||
.mtl_version = {
|
||||
MCA_MTL_BASE_VERSION_2_0_0,
|
||||
.mca_component_name = "mxm",
|
||||
MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
|
||||
OMPI_RELEASE_VERSION),
|
||||
.mca_open_component = ompi_mtl_mxm_component_open,
|
||||
.mca_close_component = ompi_mtl_mxm_component_close,
|
||||
.mca_query_component = ompi_mtl_mxm_component_query,
|
||||
.mca_register_component_params = ompi_mtl_mxm_component_register,
|
||||
},
|
||||
.mtl_data = {
|
||||
/* The component is not checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_NONE
|
||||
},
|
||||
.mtl_init = ompi_mtl_mxm_component_init,
|
||||
}
|
||||
};
|
||||
|
||||
static int ompi_mtl_mxm_component_register(void)
|
||||
{
|
||||
mca_base_component_t*c;
|
||||
|
||||
#if MXM_API < MXM_VERSION(3,0)
|
||||
unsigned long cur_ver;
|
||||
long major, minor;
|
||||
char* runtime_version;
|
||||
#endif
|
||||
|
||||
c = &mca_mtl_mxm_component.super.mtl_version;
|
||||
|
||||
ompi_mtl_mxm.verbose = 0;
|
||||
(void) mca_base_component_var_register(c, "verbose",
|
||||
"Verbose level of the MXM component",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&ompi_mtl_mxm.verbose);
|
||||
|
||||
#if MXM_API > MXM_VERSION(2,0)
|
||||
ompi_mtl_mxm.mxm_np = 0;
|
||||
#else
|
||||
ompi_mtl_mxm.mxm_np = 128;
|
||||
#endif
|
||||
(void) mca_base_component_var_register(c, "np",
|
||||
"[integer] Minimal number of MPI processes in a single job "
|
||||
"required to activate the MXM transport",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL,0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mtl_mxm.mxm_np);
|
||||
|
||||
ompi_mtl_mxm.compiletime_version = MXM_VERNO_STRING;
|
||||
(void) mca_base_component_var_register(c,
|
||||
MCA_COMPILETIME_VER,
|
||||
"Version of the libmxm library with which Open MPI was compiled",
|
||||
MCA_BASE_VAR_TYPE_VERSION_STRING,
|
||||
NULL, 0, 0,
|
||||
OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mtl_mxm.compiletime_version);
|
||||
|
||||
#if MXM_API >= MXM_VERSION(3,0)
|
||||
ompi_mtl_mxm.runtime_version = (char *)mxm_get_version_string();
|
||||
#else
|
||||
cur_ver = mxm_get_version();
|
||||
major = (cur_ver >> MXM_MAJOR_BIT) & 0xff;
|
||||
minor = (cur_ver >> MXM_MINOR_BIT) & 0xff;
|
||||
asprintf(&runtime_version, "%ld.%ld", major, minor);
|
||||
ompi_mtl_mxm.runtime_version = runtime_version;
|
||||
#endif
|
||||
|
||||
(void) mca_base_component_var_register(c,
|
||||
MCA_RUNTIME_VER,
|
||||
"Version of the libmxm library with which Open MPI is running",
|
||||
MCA_BASE_VAR_TYPE_VERSION_STRING,
|
||||
NULL, 0, 0,
|
||||
OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mtl_mxm.runtime_version);
|
||||
|
||||
#if MXM_API < MXM_VERSION(3,0)
|
||||
free(runtime_version);
|
||||
#endif
|
||||
|
||||
/* set high enought to defeat ob1's default */
|
||||
param_priority = 30;
|
||||
(void) mca_base_component_var_register (c,
|
||||
"priority", "Priority of the MXM MTL component",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
¶m_priority);
|
||||
|
||||
|
||||
#if MXM_API >= MXM_VERSION(3,1)
|
||||
{
|
||||
unsigned long cur_ver = mxm_get_version();
|
||||
|
||||
ompi_mtl_mxm.bulk_connect = 0;
|
||||
|
||||
if (cur_ver < MXM_VERSION(3,2)) {
|
||||
ompi_mtl_mxm.bulk_disconnect = 0;
|
||||
} else {
|
||||
ompi_mtl_mxm.bulk_disconnect = 1;
|
||||
}
|
||||
|
||||
(void) mca_base_component_var_register(c, "bulk_connect",
|
||||
"[integer] use bulk connect",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mtl_mxm.bulk_connect);
|
||||
|
||||
(void) mca_base_component_var_register(c, "bulk_disconnect",
|
||||
"[integer] use bulk disconnect",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&ompi_mtl_mxm.bulk_disconnect);
|
||||
|
||||
if (cur_ver < MXM_VERSION(3,2) &&
|
||||
(ompi_mtl_mxm.bulk_connect || ompi_mtl_mxm.bulk_disconnect)) {
|
||||
ompi_mtl_mxm.bulk_connect = 0;
|
||||
ompi_mtl_mxm.bulk_disconnect = 0;
|
||||
|
||||
MXM_VERBOSE(1, "WARNING: OMPI runs with %s version of MXM that is less than 3.2, "
|
||||
"so bulk connect/disconnect cannot work properly and will be turn off.",
|
||||
ompi_mtl_mxm.runtime_version);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int ompi_mtl_mxm_component_open(void)
|
||||
{
|
||||
mxm_error_t err;
|
||||
unsigned long cur_ver;
|
||||
int rc;
|
||||
|
||||
mca_mtl_mxm_output = opal_output_open(NULL);
|
||||
opal_output_set_verbosity(mca_mtl_mxm_output, ompi_mtl_mxm.verbose);
|
||||
cur_ver = mxm_get_version();
|
||||
if (cur_ver != MXM_API) {
|
||||
MXM_VERBOSE(1,
|
||||
"WARNING: OMPI was compiled with MXM version %d.%d but version %ld.%ld detected.",
|
||||
MXM_VERNO_MAJOR,
|
||||
MXM_VERNO_MINOR,
|
||||
(cur_ver >> MXM_MAJOR_BIT) & 0xff,
|
||||
(cur_ver >> MXM_MINOR_BIT) & 0xff);
|
||||
}
|
||||
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
(void)mca_base_framework_open(&opal_memory_base_framework, 0);
|
||||
/* Register memory hooks */
|
||||
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
|
||||
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) &
|
||||
opal_mem_hooks_support_level()))
|
||||
{
|
||||
setenv("MXM_MPI_MEM_ON_DEMAND_MAP", "y", 0);
|
||||
MXM_VERBOSE(1, "Enabling on-demand memory mapping");
|
||||
ompi_mtl_mxm.using_mem_hooks = 1;
|
||||
} else {
|
||||
MXM_VERBOSE(1, "Disabling on-demand memory mapping");
|
||||
ompi_mtl_mxm.using_mem_hooks = 0;
|
||||
}
|
||||
setenv("MXM_MPI_SINGLE_THREAD", ompi_mpi_thread_multiple ? "n" : "y" , 0);
|
||||
#endif
|
||||
|
||||
#if MXM_API >= MXM_VERSION(2,1)
|
||||
if (MXM_OK != mxm_config_read_opts(&ompi_mtl_mxm.mxm_ctx_opts,
|
||||
&ompi_mtl_mxm.mxm_ep_opts,
|
||||
"MPI", NULL, 0))
|
||||
#else
|
||||
if ((MXM_OK != mxm_config_read_context_opts(&ompi_mtl_mxm.mxm_ctx_opts)) ||
|
||||
(MXM_OK != mxm_config_read_ep_opts(&ompi_mtl_mxm.mxm_ep_opts)))
|
||||
#endif
|
||||
{
|
||||
MXM_ERROR("Failed to parse MXM configuration");
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
err = mxm_init(ompi_mtl_mxm.mxm_ctx_opts, &ompi_mtl_mxm.mxm_context);
|
||||
MXM_VERBOSE(1, "mxm component open");
|
||||
|
||||
if (MXM_OK != err) {
|
||||
if (MXM_ERR_NO_DEVICE == err) {
|
||||
MXM_VERBOSE(1, "No supported device found, disqualifying mxm");
|
||||
} else {
|
||||
opal_show_help("help-mtl-mxm.txt", "mxm init", true,
|
||||
mxm_error_string(err));
|
||||
}
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&mca_mtl_mxm_component.mxm_messages, opal_free_list_t);
|
||||
rc = opal_free_list_init (&mca_mtl_mxm_component.mxm_messages,
|
||||
sizeof(ompi_mtl_mxm_message_t),
|
||||
opal_cache_line_size,
|
||||
OBJ_CLASS(ompi_mtl_mxm_message_t),
|
||||
0, opal_cache_line_size,
|
||||
32 /* free list num */,
|
||||
-1 /* free list max */,
|
||||
32 /* free list inc */,
|
||||
NULL, 0, NULL, NULL, NULL);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
opal_show_help("help-mtl-mxm.txt", "mxm init", true,
|
||||
mxm_error_string(err));
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int ompi_mtl_mxm_component_query(mca_base_module_t **module, int *priority)
|
||||
{
|
||||
|
||||
/*
|
||||
* if we get here it means that mxm is available so give high priority
|
||||
*/
|
||||
|
||||
ompi_mpi_dynamics_disable("the MXM MTL does not support MPI dynamic process functionality");
|
||||
|
||||
*priority = param_priority;
|
||||
*module = (mca_base_module_t *)&ompi_mtl_mxm.super;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int ompi_mtl_mxm_component_close(void)
|
||||
{
|
||||
if (ompi_mtl_mxm.mxm_context != NULL) {
|
||||
mxm_cleanup(ompi_mtl_mxm.mxm_context);
|
||||
ompi_mtl_mxm.mxm_context = NULL;
|
||||
OBJ_DESTRUCT(&mca_mtl_mxm_component.mxm_messages);
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
mxm_config_free_ep_opts(ompi_mtl_mxm.mxm_ep_opts);
|
||||
mxm_config_free_context_opts(ompi_mtl_mxm.mxm_ctx_opts);
|
||||
mca_base_framework_close(&opal_memory_base_framework);
|
||||
#else
|
||||
mxm_config_free(ompi_mtl_mxm.mxm_ep_opts);
|
||||
mxm_config_free(ompi_mtl_mxm.mxm_ctx_opts);
|
||||
#endif
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static mca_mtl_base_module_t*
|
||||
ompi_mtl_mxm_component_init(bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
{
|
||||
int rc;
|
||||
|
||||
rc = ompi_mtl_mxm_module_init();
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Calculate MTL constraints according to MXM types */
|
||||
ompi_mtl_mxm.super.mtl_max_contextid = 1UL << (sizeof(mxm_ctxid_t) * 8);
|
||||
ompi_mtl_mxm.super.mtl_max_tag = 1UL << (sizeof(mxm_tag_t) * 8 - 2);
|
||||
ompi_mtl_mxm.super.mtl_request_size =
|
||||
sizeof(mca_mtl_mxm_request_t) - sizeof(struct mca_mtl_request_t);
|
||||
return &ompi_mtl_mxm.super;
|
||||
}
|
@ -1,34 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MTL_MXM_DEBUG_H
|
||||
#define MTL_MXM_DEBUG_H
|
||||
#pragma GCC system_header
|
||||
|
||||
#ifdef __BASE_FILE__
|
||||
#define __MXM_FILE__ __BASE_FILE__
|
||||
#else
|
||||
#define __MXM_FILE__ __FILE__
|
||||
#endif
|
||||
|
||||
#define MXM_VERBOSE(level, format, ...) \
|
||||
opal_output_verbose(level, mca_mtl_mxm_output, "%s:%d - %s() " format, \
|
||||
__MXM_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
|
||||
#define MXM_ERROR(format, ... ) \
|
||||
opal_output_verbose(0, mca_mtl_mxm_output, "Error: %s:%d - %s() " format, \
|
||||
__MXM_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
|
||||
|
||||
#define MXM_MODULE_VERBOSE(mxm_module, level, format, ...) \
|
||||
MXM_VERBOSE(level, "[%d] " format, (mxm_module)->rank, ## __VA_ARGS__)
|
||||
|
||||
extern int mca_mtl_mxm_output;
|
||||
|
||||
#endif
|
@ -1,42 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
#include "ompi/types.h"
|
||||
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_types.h"
|
||||
#include "mtl_mxm_endpoint.h"
|
||||
|
||||
/*
|
||||
* Initialize state of the endpoint instance.
|
||||
*
|
||||
*/
|
||||
|
||||
static void mca_mtl_mxm_endpoint_construct(mca_mtl_mxm_endpoint_t* endpoint)
|
||||
{
|
||||
endpoint->mtl_mxm_module = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Destroy a endpoint
|
||||
*
|
||||
*/
|
||||
|
||||
static void mca_mtl_mxm_endpoint_destruct(mca_mtl_mxm_endpoint_t* endpoint)
|
||||
{
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(
|
||||
mca_mtl_mxm_endpoint_t,
|
||||
opal_list_item_t,
|
||||
mca_mtl_mxm_endpoint_construct,
|
||||
mca_mtl_mxm_endpoint_destruct);
|
@ -1,41 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_MTL_MXM_ENDPOINT_H
|
||||
#define MCA_MTL_MXM_ENDPOINT_H
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "ompi/mca/mtl/mtl.h"
|
||||
#include "mtl_mxm.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_mtl_mxm_endpoint_t);
|
||||
|
||||
/**
|
||||
* An abstraction that represents a connection to a endpoint process.
|
||||
* An instance of mca_mtl_mxm_endpoint_t is associated w/ each process
|
||||
* and MTL pair at startup. However, connections to the endpoint
|
||||
* are established dynamically on an as-needed basis:
|
||||
*/
|
||||
|
||||
struct mca_mtl_mxm_endpoint_t {
|
||||
opal_list_item_t super;
|
||||
|
||||
struct mca_mtl_mxm_module_t* mtl_mxm_module;
|
||||
/**< MTL instance that created this connection */
|
||||
|
||||
mxm_conn_h mxm_conn;
|
||||
/**< MXM Connection handle*/
|
||||
};
|
||||
|
||||
typedef struct mca_mtl_mxm_endpoint_t mca_mtl_mxm_endpoint_t;
|
||||
OBJ_CLASS_DECLARATION(mca_mtl_mxm_endpoint);
|
||||
|
||||
END_C_DECLS
|
||||
#endif
|
@ -1,115 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* Copyright (c) 2013 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_types.h"
|
||||
|
||||
#include "ompi/message/message.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
|
||||
int ompi_mtl_mxm_iprobe(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t *comm, int src, int tag,
|
||||
int *flag, struct ompi_status_public_t *status)
|
||||
{
|
||||
mxm_error_t err;
|
||||
mxm_recv_req_t req;
|
||||
|
||||
req.base.state = MXM_REQ_NEW;
|
||||
ompi_mtl_mxm_set_recv_envelope(&req, comm, src, tag);
|
||||
|
||||
err = mxm_req_probe(&req);
|
||||
if (MXM_OK == err) {
|
||||
*flag = 1;
|
||||
if (MPI_STATUS_IGNORE != status) {
|
||||
ompi_mtl_mxm_to_mpi_status(err, status);
|
||||
status->MPI_SOURCE = req.completion.sender_imm;
|
||||
status->MPI_TAG = req.completion.sender_tag;
|
||||
status->_ucount = req.completion.sender_len;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
} else if (MXM_ERR_NO_MESSAGE == err) {
|
||||
*flag = 0;
|
||||
return OMPI_SUCCESS;
|
||||
} else {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int ompi_mtl_mxm_improbe(struct mca_mtl_base_module_t *mtl,
|
||||
struct ompi_communicator_t *comm,
|
||||
int src,
|
||||
int tag,
|
||||
int *matched,
|
||||
struct ompi_message_t **message,
|
||||
struct ompi_status_public_t *status)
|
||||
{
|
||||
mxm_error_t err;
|
||||
mxm_recv_req_t req;
|
||||
|
||||
opal_free_list_item_t *item;
|
||||
ompi_mtl_mxm_message_t *msgp;
|
||||
|
||||
item = opal_free_list_wait (&mca_mtl_mxm_component.mxm_messages);
|
||||
if (OPAL_UNLIKELY(NULL == item)) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
msgp = (ompi_mtl_mxm_message_t *) item;
|
||||
|
||||
req.base.state = MXM_REQ_NEW;
|
||||
ompi_mtl_mxm_set_recv_envelope(&req, comm, src, tag);
|
||||
|
||||
msgp->mq = req.base.mq;
|
||||
msgp->conn = req.base.conn;
|
||||
msgp->tag = req.tag;
|
||||
msgp->tag_mask = req.tag_mask;
|
||||
|
||||
err = mxm_req_mprobe(&req, &msgp->mxm_msg);
|
||||
if (MXM_OK == err) {
|
||||
if (MPI_STATUS_IGNORE != status) {
|
||||
*matched = 1;
|
||||
ompi_mtl_mxm_to_mpi_status(err, status);
|
||||
status->MPI_SOURCE = req.completion.sender_imm;
|
||||
status->MPI_TAG = req.completion.sender_tag;
|
||||
status->_ucount = req.completion.sender_len;
|
||||
} else{
|
||||
*matched = 0;
|
||||
*message = MPI_MESSAGE_NULL;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
} else if (MXM_ERR_NO_MESSAGE == err) {
|
||||
*matched = 0;
|
||||
*message = MPI_MESSAGE_NULL;
|
||||
return OMPI_SUCCESS;
|
||||
} else {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
(*message) = ompi_message_alloc();
|
||||
if (OPAL_UNLIKELY(NULL == (*message))) {
|
||||
*matched = 0;
|
||||
*message = MPI_MESSAGE_NULL;
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
(*message)->comm = comm;
|
||||
(*message)->req_ptr = msgp;
|
||||
(*message)->peer = status->MPI_SOURCE;
|
||||
(*message)->count = status->_ucount;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
@ -1,197 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/message/message.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_types.h"
|
||||
#include "mtl_mxm_request.h"
|
||||
|
||||
static void ompi_mtl_mxm_recv_completion_cb(void *context)
|
||||
{
|
||||
mca_mtl_mxm_request_t *req = (mca_mtl_mxm_request_t *) context;
|
||||
struct ompi_request_t *ompi_req = req->super.ompi_req;
|
||||
mxm_recv_req_t *mxm_recv_req = &req->mxm.recv;
|
||||
|
||||
/* Set completion status and envelope */
|
||||
ompi_mtl_mxm_to_mpi_status(mxm_recv_req->base.error, &ompi_req->req_status);
|
||||
ompi_req->req_status.MPI_TAG = mxm_recv_req->completion.sender_tag;
|
||||
ompi_req->req_status.MPI_SOURCE = mxm_recv_req->completion.sender_imm;
|
||||
ompi_req->req_status._ucount = mxm_recv_req->completion.actual_len;
|
||||
|
||||
req->super.completion_callback(&req->super);
|
||||
}
|
||||
|
||||
static size_t ompi_mtl_mxm_stream_unpack(void *buffer, size_t length,
|
||||
size_t offset, void *context)
|
||||
{
|
||||
struct iovec iov;
|
||||
uint32_t iov_count = 1;
|
||||
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) context;
|
||||
opal_convertor_t *convertor = mtl_mxm_request->convertor;
|
||||
|
||||
iov.iov_len = length;
|
||||
iov.iov_base = buffer;
|
||||
|
||||
opal_convertor_set_position(convertor, &offset);
|
||||
opal_convertor_unpack(convertor, &iov, &iov_count, &length);
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
static inline __opal_attribute_always_inline__ int
|
||||
ompi_mtl_mxm_choose_recv_datatype(mca_mtl_mxm_request_t *mtl_mxm_request)
|
||||
{
|
||||
void **buffer = &mtl_mxm_request->buf;
|
||||
size_t *buffer_len = &mtl_mxm_request->length;
|
||||
|
||||
mxm_recv_req_t *mxm_recv_req = &mtl_mxm_request->mxm.recv;
|
||||
opal_convertor_t *convertor = mtl_mxm_request->convertor;
|
||||
|
||||
opal_convertor_get_packed_size(convertor, buffer_len);
|
||||
|
||||
if (0 == *buffer_len) {
|
||||
*buffer = NULL;
|
||||
*buffer_len = 0;
|
||||
|
||||
mxm_recv_req->base.data_type = MXM_REQ_DATA_BUFFER;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
if (opal_convertor_need_buffers(convertor)) {
|
||||
mxm_recv_req->base.data_type = MXM_REQ_DATA_STREAM;
|
||||
mxm_recv_req->base.data.stream.length = *buffer_len;
|
||||
mxm_recv_req->base.data.stream.cb = ompi_mtl_mxm_stream_unpack;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
mxm_recv_req->base.data_type = MXM_REQ_DATA_BUFFER;
|
||||
|
||||
*buffer = convertor->pBaseBuf +
|
||||
convertor->use_desc->desc[convertor->use_desc->used].end_loop.first_elem_disp;
|
||||
|
||||
mxm_recv_req->base.data.buffer.ptr = *buffer;
|
||||
mxm_recv_req->base.data.buffer.length = *buffer_len;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static inline __opal_attribute_always_inline__ int
|
||||
ompi_mtl_mxm_recv_init(mca_mtl_mxm_request_t *mtl_mxm_request,
|
||||
opal_convertor_t *convertor,
|
||||
mxm_recv_req_t *mxm_recv_req)
|
||||
{
|
||||
int ret;
|
||||
|
||||
mtl_mxm_request->convertor = convertor;
|
||||
ret = ompi_mtl_mxm_choose_recv_datatype(mtl_mxm_request);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
mtl_mxm_request->is_send = 0;
|
||||
#endif
|
||||
|
||||
mxm_recv_req->base.state = MXM_REQ_NEW;
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
mxm_recv_req->base.flags = 0;
|
||||
#endif
|
||||
|
||||
mxm_recv_req->base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
|
||||
mxm_recv_req->base.context = mtl_mxm_request;
|
||||
mxm_recv_req->base.completed_cb = ompi_mtl_mxm_recv_completion_cb;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_irecv(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t *comm, int src, int tag,
|
||||
struct opal_convertor_t *convertor,
|
||||
struct mca_mtl_request_t *mtl_request)
|
||||
{
|
||||
int ret;
|
||||
mxm_error_t err;
|
||||
mxm_recv_req_t *mxm_recv_req;
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request;
|
||||
|
||||
mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
|
||||
mxm_recv_req = &mtl_mxm_request->mxm.recv;
|
||||
|
||||
ompi_mtl_mxm_set_recv_envelope(mxm_recv_req, comm, src, tag);
|
||||
|
||||
/* prepare a receive request embedded in the MTL request */
|
||||
ret = ompi_mtl_mxm_recv_init(mtl_mxm_request, convertor, mxm_recv_req);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* post-recv */
|
||||
err = mxm_req_recv(mxm_recv_req);
|
||||
if (OPAL_UNLIKELY(MXM_OK != err)) {
|
||||
opal_show_help("help-mtl-mxm.txt", "error posting receive", true,
|
||||
mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_imrecv(struct mca_mtl_base_module_t* mtl,
|
||||
struct opal_convertor_t *convertor,
|
||||
struct ompi_message_t **message,
|
||||
struct mca_mtl_request_t *mtl_request)
|
||||
{
|
||||
int ret;
|
||||
mxm_error_t err;
|
||||
mxm_recv_req_t *mxm_recv_req;
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request;
|
||||
|
||||
ompi_mtl_mxm_message_t *msgp =
|
||||
(ompi_mtl_mxm_message_t *) (*message)->req_ptr;
|
||||
|
||||
mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request;
|
||||
mxm_recv_req = &mtl_mxm_request->mxm.recv;
|
||||
|
||||
/* prepare a receive request embedded in the MTL request */
|
||||
ret = ompi_mtl_mxm_recv_init(mtl_mxm_request, convertor, mxm_recv_req);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
mxm_recv_req->tag = msgp->tag;
|
||||
mxm_recv_req->tag_mask = msgp->tag_mask;
|
||||
mxm_recv_req->base.mq = msgp->mq;
|
||||
mxm_recv_req->base.conn = msgp->conn;
|
||||
|
||||
err = mxm_message_recv(mxm_recv_req, msgp->mxm_msg);
|
||||
if (OPAL_UNLIKELY(MXM_OK != err)) {
|
||||
opal_show_help("help-mtl-mxm.txt", "error posting message receive", true,
|
||||
mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
opal_free_list_return (&mca_mtl_mxm_component.mxm_messages, (opal_free_list_item_t *) msgp);
|
||||
|
||||
ompi_message_return(*message);
|
||||
(*message) = MPI_MESSAGE_NULL;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef OMPI_MTL_MXM_REQUEST_H
|
||||
#define OMPI_MTL_MXM_REQUEST_H
|
||||
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "mtl_mxm.h"
|
||||
|
||||
|
||||
struct mca_mtl_mxm_request_t {
|
||||
struct mca_mtl_request_t super;
|
||||
union {
|
||||
mxm_req_base_t base;
|
||||
mxm_send_req_t send;
|
||||
mxm_recv_req_t recv;
|
||||
} mxm;
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
int is_send;
|
||||
#endif
|
||||
/* mxm_segment_t mxm_segment[1]; */
|
||||
void *buf;
|
||||
size_t length;
|
||||
struct opal_convertor_t *convertor;
|
||||
bool free_after;
|
||||
};
|
||||
typedef struct mca_mtl_mxm_request_t mca_mtl_mxm_request_t;
|
||||
|
||||
#endif
|
@ -1,238 +0,0 @@
|
||||
/* * Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "mtl_mxm.h"
|
||||
#include "mtl_mxm_types.h"
|
||||
#include "mtl_mxm_request.h"
|
||||
#include "ompi/mca/mtl/base/mtl_base_datatype.h"
|
||||
|
||||
static inline __opal_attribute_always_inline__
|
||||
size_t ompi_mtl_mxm_stream_pack(opal_convertor_t *convertor, void *buffer,
|
||||
size_t length, size_t offset)
|
||||
{
|
||||
struct iovec iov;
|
||||
uint32_t iov_count = 1;
|
||||
|
||||
iov.iov_len = length;
|
||||
iov.iov_base = buffer;
|
||||
|
||||
opal_convertor_set_position(convertor, &offset);
|
||||
opal_convertor_pack(convertor, &iov, &iov_count, &length);
|
||||
|
||||
return length;
|
||||
}
|
||||
|
||||
static size_t ompi_mtl_mxm_stream_isend(void *buffer, size_t length, size_t offset, void *context)
|
||||
{
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) context;
|
||||
opal_convertor_t *convertor = mtl_mxm_request->convertor;
|
||||
|
||||
return ompi_mtl_mxm_stream_pack(convertor, buffer, length, offset);
|
||||
}
|
||||
|
||||
static size_t ompi_mtl_mxm_stream_send(void *buffer, size_t length, size_t offset, void *context)
|
||||
{
|
||||
opal_convertor_t *convertor = (opal_convertor_t *) context;
|
||||
|
||||
return ompi_mtl_mxm_stream_pack(convertor, buffer, length, offset);
|
||||
}
|
||||
|
||||
static inline __opal_attribute_always_inline__ int
|
||||
ompi_mtl_mxm_choose_send_datatype(mxm_send_req_t *mxm_send_req,
|
||||
opal_convertor_t *convertor,
|
||||
mxm_stream_cb_t stream_cb)
|
||||
{
|
||||
struct iovec iov;
|
||||
uint32_t iov_count = 1;
|
||||
|
||||
size_t *buffer_len = &mxm_send_req->base.data.buffer.length;
|
||||
|
||||
#if !(OPAL_ENABLE_HETEROGENEOUS_SUPPORT)
|
||||
if (convertor->pDesc &&
|
||||
opal_datatype_is_contiguous_memory_layout(convertor->pDesc,
|
||||
convertor->count)) {
|
||||
mxm_send_req->base.data.buffer.ptr = convertor->pBaseBuf;
|
||||
mxm_send_req->base.data.buffer.length = convertor->local_size;
|
||||
mxm_send_req->base.data_type = MXM_REQ_DATA_BUFFER;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
opal_convertor_get_packed_size(convertor, buffer_len);
|
||||
if (0 == *buffer_len) {
|
||||
mxm_send_req->base.data.buffer.ptr = NULL;
|
||||
mxm_send_req->base.data_type = MXM_REQ_DATA_BUFFER;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
if (opal_convertor_need_buffers(convertor)) {
|
||||
mxm_send_req->base.data_type = MXM_REQ_DATA_STREAM;
|
||||
mxm_send_req->base.data.stream.length = *buffer_len;
|
||||
mxm_send_req->base.data.stream.cb = stream_cb;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
mxm_send_req->base.data_type = MXM_REQ_DATA_BUFFER;
|
||||
|
||||
iov.iov_base = NULL;
|
||||
iov.iov_len = *buffer_len;
|
||||
|
||||
opal_convertor_pack(convertor, &iov, &iov_count, buffer_len);
|
||||
mxm_send_req->base.data.buffer.ptr = iov.iov_base;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static void ompi_mtl_mxm_send_completion_cb(void *context)
|
||||
{
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request = context;
|
||||
|
||||
ompi_mtl_mxm_to_mpi_status(mtl_mxm_request->mxm.base.error,
|
||||
&mtl_mxm_request->super.ompi_req->req_status);
|
||||
mtl_mxm_request->super.completion_callback(&mtl_mxm_request->super);
|
||||
}
|
||||
|
||||
static void ompi_mtl_mxm_send_progress_cb(void *user_data)
|
||||
{
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_send(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t* comm, int dest, int tag,
|
||||
struct opal_convertor_t *convertor,
|
||||
mca_pml_base_send_mode_t mode)
|
||||
{
|
||||
mxm_send_req_t mxm_send_req;
|
||||
mxm_wait_t wait;
|
||||
mxm_error_t err;
|
||||
int ret;
|
||||
|
||||
/* prepare local send request */
|
||||
mxm_send_req.base.state = MXM_REQ_NEW;
|
||||
mxm_send_req.base.mq = ompi_mtl_mxm_mq_lookup(comm);
|
||||
mxm_send_req.base.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
|
||||
mxm_send_req.base.context = convertor;
|
||||
mxm_send_req.base.completed_cb = NULL;
|
||||
|
||||
ret = ompi_mtl_mxm_choose_send_datatype(&mxm_send_req, convertor,
|
||||
ompi_mtl_mxm_stream_send);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
mxm_send_req.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
|
||||
mxm_send_req.op.send.tag = tag;
|
||||
mxm_send_req.op.send.imm_data = ompi_comm_rank(comm);
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
mxm_send_req.base.flags = MXM_REQ_FLAG_BLOCKING;
|
||||
mxm_send_req.opcode = MXM_REQ_OP_SEND;
|
||||
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
|
||||
mxm_send_req.base.flags |= MXM_REQ_FLAG_SEND_SYNC;
|
||||
}
|
||||
#else
|
||||
mxm_send_req.flags = MXM_REQ_SEND_FLAG_BLOCKING;
|
||||
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
|
||||
mxm_send_req.opcode = MXM_REQ_OP_SEND_SYNC;
|
||||
} else {
|
||||
mxm_send_req.opcode = MXM_REQ_OP_SEND;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* post-send */
|
||||
err = mxm_req_send(&mxm_send_req);
|
||||
if (MXM_OK != err) {
|
||||
opal_show_help("help-mtl-mxm.txt", "error posting send", true, 0, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* wait for request completion */
|
||||
wait.req = &mxm_send_req.base;
|
||||
wait.state = MXM_REQ_COMPLETED;
|
||||
wait.progress_cb = ompi_mtl_mxm_send_progress_cb;
|
||||
wait.progress_arg = NULL;
|
||||
mxm_wait(&wait);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_mtl_mxm_isend(struct mca_mtl_base_module_t* mtl,
|
||||
struct ompi_communicator_t* comm, int dest, int tag,
|
||||
struct opal_convertor_t *convertor,
|
||||
mca_pml_base_send_mode_t mode, bool blocking,
|
||||
mca_mtl_request_t * mtl_request)
|
||||
{
|
||||
mca_mtl_mxm_request_t *mtl_mxm_request = (mca_mtl_mxm_request_t *) mtl_request;
|
||||
mxm_send_req_t *mxm_send_req;
|
||||
mxm_error_t err;
|
||||
int ret;
|
||||
|
||||
assert(mtl == &ompi_mtl_mxm.super);
|
||||
|
||||
mtl_mxm_request->convertor = convertor;
|
||||
|
||||
mxm_send_req = &mtl_mxm_request->mxm.send;
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
mtl_mxm_request->is_send = 1;
|
||||
#endif
|
||||
|
||||
/* prepare a send request embedded in the MTL request */
|
||||
mxm_send_req->base.state = MXM_REQ_NEW;
|
||||
mxm_send_req->base.mq = ompi_mtl_mxm_mq_lookup(comm);
|
||||
mxm_send_req->base.conn = ompi_mtl_mxm_conn_lookup(comm, dest);
|
||||
|
||||
ret = ompi_mtl_mxm_choose_send_datatype(mxm_send_req, convertor,
|
||||
ompi_mtl_mxm_stream_isend);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
mtl_mxm_request->buf = mxm_send_req->base.data.buffer.ptr;
|
||||
mtl_mxm_request->length = mxm_send_req->base.data.buffer.length;
|
||||
|
||||
mxm_send_req->base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
|
||||
mxm_send_req->base.context = mtl_mxm_request;
|
||||
mxm_send_req->base.completed_cb = ompi_mtl_mxm_send_completion_cb;
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
mxm_send_req->base.flags = 0;
|
||||
mxm_send_req->opcode = MXM_REQ_OP_SEND;
|
||||
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
|
||||
mxm_send_req->base.flags |= MXM_REQ_FLAG_SEND_SYNC;
|
||||
}
|
||||
#else
|
||||
#if defined(MXM_REQ_SEND_FLAG_REENTRANT)
|
||||
mxm_send_req->flags = MXM_REQ_SEND_FLAG_REENTRANT;
|
||||
#else
|
||||
mxm_send_req->flags = 0;
|
||||
#endif
|
||||
if (mode == MCA_PML_BASE_SEND_SYNCHRONOUS) {
|
||||
mxm_send_req->opcode = MXM_REQ_OP_SEND_SYNC;
|
||||
} else {
|
||||
mxm_send_req->opcode = MXM_REQ_OP_SEND;
|
||||
}
|
||||
#endif
|
||||
mxm_send_req->op.send.tag = tag;
|
||||
mxm_send_req->op.send.imm_data = ompi_comm_rank(comm);
|
||||
|
||||
/* post-send */
|
||||
err = mxm_req_send(mxm_send_req);
|
||||
if (MXM_OK != err) {
|
||||
opal_show_help("help-mtl-mxm.txt", "error posting send", true, 1, mxm_error_string(err));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
@ -1,123 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (C) Mellanox Technologies Ltd. 2001-2011. ALL RIGHTS RESERVED.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MTL_MXM_TYPES_H_HAS_BEEN_INCLUDED
|
||||
#define MTL_MXM_TYPES_H_HAS_BEEN_INCLUDED
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "mtl_mxm.h"
|
||||
|
||||
#include "ompi/mca/mtl/mtl.h"
|
||||
#include "ompi/mca/mtl/base/base.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "mtl_mxm_endpoint.h"
|
||||
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/**
|
||||
* MTL Module Interface
|
||||
*/
|
||||
typedef struct mca_mtl_mxm_module_t {
|
||||
mca_mtl_base_module_t super; /**< base MTL interface */
|
||||
int verbose;
|
||||
int mxm_np;
|
||||
mxm_h mxm_context;
|
||||
mxm_ep_h ep;
|
||||
mxm_context_opts_t *mxm_ctx_opts;
|
||||
mxm_ep_opts_t *mxm_ep_opts;
|
||||
#if MXM_API >= MXM_VERSION(2,0)
|
||||
int using_mem_hooks;
|
||||
#endif
|
||||
#if MXM_API >= MXM_VERSION(3,1)
|
||||
int bulk_connect; /* use bulk connect */
|
||||
int bulk_disconnect; /* use bulk disconnect */
|
||||
#endif
|
||||
char* runtime_version;
|
||||
char* compiletime_version;
|
||||
} mca_mtl_mxm_module_t;
|
||||
|
||||
|
||||
#if MXM_API < MXM_VERSION(2,0)
|
||||
typedef struct ompi_mtl_mxm_ep_conn_info_t {
|
||||
struct sockaddr_storage ptl_addr[MXM_PTL_LAST];
|
||||
} ompi_mtl_mxm_ep_conn_info_t;
|
||||
#endif
|
||||
|
||||
extern mca_mtl_mxm_module_t ompi_mtl_mxm;
|
||||
|
||||
typedef struct mca_mtl_mxm_component_t {
|
||||
mca_mtl_base_component_2_0_0_t super; /**< base MTL component */
|
||||
opal_free_list_t mxm_messages; /* will be used for MPI_Mprobe and MPI_Mrecv calls */
|
||||
} mca_mtl_mxm_component_t;
|
||||
|
||||
|
||||
OMPI_DECLSPEC mca_mtl_mxm_component_t mca_mtl_mxm_component;
|
||||
|
||||
|
||||
static inline mxm_conn_h ompi_mtl_mxm_conn_lookup(struct ompi_communicator_t* comm, int rank) {
|
||||
ompi_proc_t* ompi_proc = ompi_comm_peer_lookup(comm, rank);
|
||||
mca_mtl_mxm_endpoint_t *endpoint = (mca_mtl_mxm_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
|
||||
|
||||
if (endpoint != NULL) {
|
||||
return endpoint->mxm_conn;
|
||||
}
|
||||
|
||||
MXM_VERBOSE(80, "First communication with [%s:%s]: set endpoint connection.",
|
||||
ompi_proc->super.proc_hostname, OPAL_NAME_PRINT(ompi_proc->super.proc_name));
|
||||
ompi_mtl_add_single_proc(ompi_mtl, ompi_proc);
|
||||
endpoint = (mca_mtl_mxm_endpoint_t*) ompi_proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL];
|
||||
|
||||
return endpoint->mxm_conn;
|
||||
}
|
||||
|
||||
static inline mxm_mq_h ompi_mtl_mxm_mq_lookup(struct ompi_communicator_t* comm) {
|
||||
return (mxm_mq_h)comm->c_pml_comm;
|
||||
}
|
||||
|
||||
static inline void ompi_mtl_mxm_to_mpi_status(mxm_error_t status, ompi_status_public_t *ompi_status) {
|
||||
switch (status) {
|
||||
case MXM_OK:
|
||||
ompi_status->MPI_ERROR = OMPI_SUCCESS;
|
||||
break;
|
||||
case MXM_ERR_CANCELED:
|
||||
ompi_status->_cancelled = true;
|
||||
break;
|
||||
case MXM_ERR_MESSAGE_TRUNCATED:
|
||||
ompi_status->MPI_ERROR = MPI_ERR_TRUNCATE;
|
||||
break;
|
||||
default:
|
||||
ompi_status->MPI_ERROR = MPI_ERR_INTERN;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ompi_mtl_mxm_set_recv_envelope(mxm_recv_req_t *req,
|
||||
struct ompi_communicator_t *comm,
|
||||
int src, int tag) {
|
||||
req->base.mq = (mxm_mq_h)comm->c_pml_comm;
|
||||
req->base.conn = (src == MPI_ANY_SOURCE)
|
||||
? NULL
|
||||
: ompi_mtl_mxm_conn_lookup(comm, src);
|
||||
if (tag == MPI_ANY_TAG) {
|
||||
req->tag = 0;
|
||||
req->tag_mask = 0x80000000U; /* MPI_ANY_TAG should not match against negative tags */
|
||||
} else {
|
||||
req->tag = tag;
|
||||
req->tag_mask = 0xffffffffU;
|
||||
}
|
||||
}
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
||||
|
@ -1,7 +0,0 @@
|
||||
#
|
||||
# owner/status file
|
||||
# owner: institution that is responsible for this package
|
||||
# status: e.g. active, maintenance, unmaintained
|
||||
#
|
||||
owner: MELLANOX
|
||||
status: active
|
@ -12,7 +12,7 @@
|
||||
* Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2010-2012 Oak Ridge National Labs. All rights reserved.
|
||||
* Copyright (c) 2014 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* Copyright (c) 2015-2017 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -123,7 +123,7 @@ opal_bitmap_set_bit(opal_bitmap_t *bm, int bit)
|
||||
out of range. We don't throw any error here, because this is
|
||||
valid and we simply expand the bitmap */
|
||||
|
||||
new_size = (int)(((size_t)index / bm->array_size + 1 ) * bm->array_size);
|
||||
new_size = index + 1;
|
||||
if( new_size > bm->max_size )
|
||||
new_size = bm->max_size;
|
||||
|
||||
|
@ -429,8 +429,10 @@ int mca_common_cuda_stage_one_init(void)
|
||||
|
||||
if (true != stage_one_init_passed) {
|
||||
errmsg = opal_argv_join(errmsgs, '\n');
|
||||
opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true,
|
||||
errmsg);
|
||||
if (opal_warn_on_missing_libcuda) {
|
||||
opal_show_help("help-mpi-common-cuda.txt", "dlopen failed", true,
|
||||
errmsg);
|
||||
}
|
||||
opal_cuda_support = 0;
|
||||
}
|
||||
opal_argv_free(errmsgs);
|
||||
|
@ -166,7 +166,7 @@ The library attempted to open the following supporting CUDA libraries,
|
||||
but each of them failed. CUDA-aware support is disabled.
|
||||
%s
|
||||
If you are not interested in CUDA-aware support, then run with
|
||||
--mca mpi_cuda_support 0 to suppress this message. If you are interested
|
||||
--mca opal_warn_on_missing_libcuda 0 to suppress this message. If you are interested
|
||||
in CUDA-aware support, then try setting LD_LIBRARY_PATH to the location
|
||||
of libcuda.so.1 to get passed this issue.
|
||||
#
|
||||
|
@ -61,6 +61,7 @@ bool opal_timing_overhead = true;
|
||||
|
||||
bool opal_built_with_cuda_support = OPAL_INT_TO_BOOL(OPAL_CUDA_SUPPORT);
|
||||
bool opal_cuda_support = false;
|
||||
bool opal_warn_on_missing_libcuda = true;
|
||||
#if OPAL_ENABLE_FT_CR == 1
|
||||
bool opal_base_distill_checkpoint_ready = false;
|
||||
#endif
|
||||
@ -245,6 +246,16 @@ int opal_register_params(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
opal_warn_on_missing_libcuda = true;
|
||||
ret = mca_base_var_register ("opal", "opal", NULL, "warn_on_missing_libcuda",
|
||||
"Whether to print a message when CUDA support is enabled but libcuda is not found",
|
||||
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
|
||||
OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_ALL_EQ,
|
||||
&opal_warn_on_missing_libcuda);
|
||||
if (0 > ret) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Leave pinned parameter */
|
||||
opal_leave_pinned = -1;
|
||||
ret = mca_base_var_register("ompi", "mpi", NULL, "leave_pinned",
|
||||
|
@ -48,6 +48,11 @@ OPAL_DECLSPEC extern bool opal_built_with_cuda_support;
|
||||
* */
|
||||
OPAL_DECLSPEC extern bool opal_cuda_support;
|
||||
|
||||
/**
|
||||
* * Whether we want to warn the user when libcuda is missing.
|
||||
* */
|
||||
OPAL_DECLSPEC extern bool opal_warn_on_missing_libcuda;
|
||||
|
||||
/**
|
||||
* Whether to use the "leave pinned" protocol or not (0 = no, 1 = yes,
|
||||
* -1 = determine at runtime).
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* oshmem/include/shmem-compat.h. This file contains OpenSHMEM lagacy API */
|
||||
/*
|
||||
* Copyright (c) 2014-2015 Mellanox Technologies, Inc.
|
||||
* Copyright (c) 2014-2017 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -33,11 +33,6 @@ OSHMEM_DECLSPEC void* shmemalign(size_t align, size_t size);
|
||||
OSHMEM_DECLSPEC void* shrealloc(void *ptr, size_t size);
|
||||
OSHMEM_DECLSPEC void shfree(void* ptr);
|
||||
|
||||
OSHMEM_DECLSPEC void shmem_char_put(char *target, const char *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_char_get(char *target, const char *source, size_t len, int pe);
|
||||
|
||||
OSHMEM_DECLSPEC void shmem_put(void *target, const void *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void shmem_get(void *target, const void *source, size_t len, int pe);
|
||||
OSHMEM_DECLSPEC void globalexit(int status);
|
||||
|
||||
#if defined(c_plusplus) || defined(__cplusplus)
|
||||
|
@ -72,10 +72,10 @@ int mca_scoll_basic_alltoall(struct oshmem_group_t *group,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* fence (which currently acts as quiet) is needed
|
||||
* because scoll level barrier does not guarantee put completion
|
||||
/* quiet is needed because scoll level barrier does not
|
||||
* guarantee put completion
|
||||
*/
|
||||
MCA_SPML_CALL(fence());
|
||||
MCA_SPML_CALL(quiet());
|
||||
|
||||
/* Wait for operation completion */
|
||||
SCOLL_VERBOSE(14, "[#%d] Wait for operation completion", group->my_pe);
|
||||
|
@ -167,8 +167,7 @@ static int _algorithm_central_counter(struct oshmem_group_t *group,
|
||||
The root could leave the first barrier and in the second barrier it could get SHMEM_SYNC_WAIT value on
|
||||
remote node before the remote node receives its SHMEM_SYNC_RUN value in the first barrier
|
||||
*/
|
||||
/* TODO: actually it must be quiet */
|
||||
MCA_SPML_CALL(fence());
|
||||
MCA_SPML_CALL(quiet());
|
||||
}
|
||||
/* Wait for RUN signal */
|
||||
else {
|
||||
|
@ -146,10 +146,10 @@ static int _algorithm_central_counter(struct oshmem_group_t *group,
|
||||
rc = MCA_SPML_CALL(put(target, nlong, (void *)source, pe_cur));
|
||||
}
|
||||
}
|
||||
/* fence (which currently acts as quiet) is needed
|
||||
* because scoll level barrier does not guarantee put completion
|
||||
/* quiet is needed because scoll level barrier does not
|
||||
* guarantee put completion
|
||||
*/
|
||||
MCA_SPML_CALL(fence());
|
||||
MCA_SPML_CALL(quiet());
|
||||
}
|
||||
|
||||
if (rc == OSHMEM_SUCCESS) {
|
||||
|
@ -153,11 +153,7 @@ int mca_spml_base_wait(void* addr, int cmp, void* value, int datatype)
|
||||
*/
|
||||
int mca_spml_base_wait_nb(void* handle)
|
||||
{
|
||||
/* TODO fence is a gag for more accurate code
|
||||
* Use shmem_quiet() (or a function calling shmem_quiet()) or
|
||||
* shmem_wait_nb() to force completion of transfers for non-blocking operations.
|
||||
*/
|
||||
MCA_SPML_CALL(fence());
|
||||
MCA_SPML_CALL(quiet());
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
@ -16,4 +16,10 @@ AC_DEFUN([MCA_oshmem_spml_CONFIG],[
|
||||
|
||||
# this is a direct callable component, so set that up.
|
||||
MCA_SETUP_DIRECT_CALL($1, $2)
|
||||
|
||||
if test -z "$MCA_$1_$2_DSO_COMPONENTS" && test -z "$MCA_$1_$2_STATIC_COMPONENTS"; then
|
||||
OSHMEM_FOUND_WORKING_SPML=0
|
||||
else
|
||||
OSHMEM_FOUND_WORKING_SPML=1
|
||||
fi
|
||||
])
|
||||
|
@ -168,6 +168,7 @@ mca_spml_ikrit_t mca_spml_ikrit = {
|
||||
mca_spml_ikrit_send,
|
||||
mca_spml_base_wait,
|
||||
mca_spml_base_wait_nb,
|
||||
mca_spml_ikrit_fence, /* fence is implemented as quiet */
|
||||
mca_spml_ikrit_fence,
|
||||
mca_spml_ikrit_cache_mkeys,
|
||||
mca_spml_base_rmkey_free,
|
||||
|
@ -275,12 +275,19 @@ typedef int (*mca_spml_base_module_send_fn_t)(void *buf,
|
||||
mca_spml_base_put_mode_t mode);
|
||||
|
||||
/**
|
||||
* Wait for completion of all outstanding put() requests
|
||||
* Assures ordering of delivery of put() requests
|
||||
*
|
||||
* @return - OSHMEM_SUCCESS or failure status.
|
||||
*/
|
||||
typedef int (*mca_spml_base_module_fence_fn_t)(void);
|
||||
|
||||
/**
|
||||
* Wait for completion of all outstanding put() requests
|
||||
*
|
||||
* @return - OSHMEM_SUCCESS or failure status.
|
||||
*/
|
||||
typedef int (*mca_spml_base_module_quiet_fn_t)(void);
|
||||
|
||||
/**
|
||||
* Waits for completion of a non-blocking put or get issued by the calling PE.
|
||||
*
|
||||
@ -321,6 +328,7 @@ struct mca_spml_base_module_1_0_0_t {
|
||||
mca_spml_base_module_wait_fn_t spml_wait;
|
||||
mca_spml_base_module_wait_nb_fn_t spml_wait_nb;
|
||||
mca_spml_base_module_fence_fn_t spml_fence;
|
||||
mca_spml_base_module_quiet_fn_t spml_quiet;
|
||||
|
||||
mca_spml_base_module_mkey_unpack_fn_t spml_rmkey_unpack;
|
||||
mca_spml_base_module_mkey_free_fn_t spml_rmkey_free;
|
||||
|
@ -60,8 +60,8 @@ mca_spml_ucx_t mca_spml_ucx = {
|
||||
mca_spml_ucx_send,
|
||||
mca_spml_base_wait,
|
||||
mca_spml_base_wait_nb,
|
||||
mca_spml_ucx_quiet, /* At the moment fence is the same as quite for
|
||||
every spml */
|
||||
mca_spml_ucx_fence,
|
||||
mca_spml_ucx_quiet,
|
||||
mca_spml_ucx_rmkey_unpack,
|
||||
mca_spml_ucx_rmkey_free,
|
||||
mca_spml_ucx_rmkey_ptr,
|
||||
@ -520,7 +520,7 @@ int mca_spml_ucx_deregister(sshmem_mkey_t *mkeys)
|
||||
spml_ucx_mkey_t *ucx_mkey;
|
||||
map_segment_t *mem_seg;
|
||||
|
||||
MCA_SPML_CALL(fence());
|
||||
MCA_SPML_CALL(quiet());
|
||||
if (!mkeys)
|
||||
return OSHMEM_SUCCESS;
|
||||
|
||||
@ -598,7 +598,7 @@ int mca_spml_ucx_fence(void)
|
||||
{
|
||||
ucs_status_t err;
|
||||
|
||||
err = ucp_worker_flush(mca_spml_ucx.ucp_worker);
|
||||
err = ucp_worker_fence(mca_spml_ucx.ucp_worker);
|
||||
if (UCS_OK != err) {
|
||||
SPML_ERROR("fence failed: %s", ucs_status_string(err));
|
||||
oshmem_shmem_abort(-1);
|
||||
|
@ -36,7 +36,7 @@ void shmem_barrier(int PE_start, int logPE_stride, int PE_size, long *pSync)
|
||||
|
||||
#if OSHMEM_SPEC_COMPAT == 1
|
||||
/* all outstanding puts must be completed */
|
||||
shmem_fence();
|
||||
shmem_quiet();
|
||||
#endif
|
||||
|
||||
/* Create group basing PE_start, logPE_stride and PE_size */
|
||||
@ -54,7 +54,7 @@ void shmem_barrier_all(void)
|
||||
|
||||
#if OSHMEM_SPEC_COMPAT == 1
|
||||
/* all outstanding puts must be completed */
|
||||
shmem_fence();
|
||||
shmem_quiet();
|
||||
#endif
|
||||
|
||||
if (mca_scoll_sync_array) {
|
||||
|
@ -23,5 +23,5 @@
|
||||
void shmem_quiet(void)
|
||||
{
|
||||
|
||||
MCA_SPML_CALL(fence());
|
||||
MCA_SPML_CALL(quiet());
|
||||
}
|
||||
|
@ -30,5 +30,5 @@ SHMEM_GENERATE_FORTRAN_BINDINGS_SUB (void,
|
||||
|
||||
void shmem_quiet_f(void)
|
||||
{
|
||||
MCA_SPML_CALL(fence());
|
||||
MCA_SPML_CALL(quiet());
|
||||
}
|
||||
|
Загрузка…
x
Ссылка в новой задаче
Block a user