COLL: removed FCA component
- removed FCA collectives from coll/scoll Signed-off-by: Sergey Oblomov <sergeyo@mellanox.com>
Этот коммит содержится в:
родитель
acc2a706d3
Коммит
0759bb8561
@ -1,70 +0,0 @@
|
||||
dnl -*- shell-script -*-
|
||||
dnl
|
||||
dnl Copyright (c) 2011 Mellanox Technologies. All rights reserved.
|
||||
dnl Copyright (c) 2013 Cisco Systems, Inc. All rights reserved.
|
||||
dnl Copyright (c) 2015 Research Organization for Information Science
|
||||
dnl and Technology (RIST). All rights reserved.
|
||||
dnl $COPYRIGHT$
|
||||
dnl
|
||||
dnl Additional copyrights may follow
|
||||
dnl
|
||||
dnl $HEADER$
|
||||
dnl
|
||||
|
||||
# OMPI_CHECK_FCA(prefix, [action-if-found], [action-if-not-found])
|
||||
# --------------------------------------------------------
|
||||
# check if fca support can be found. sets prefix_{CPPFLAGS,
|
||||
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
|
||||
# support, otherwise executes action-if-not-found
|
||||
AC_DEFUN([OMPI_CHECK_FCA],[
|
||||
OPAL_VAR_SCOPE_PUSH([ompi_check_fca_libs ompi_check_fca_happy CPPFLAGS_save LDFLAGS_save LIBS_save])
|
||||
|
||||
AC_ARG_WITH([fca],
|
||||
[AC_HELP_STRING([--with-fca(=DIR)],
|
||||
[Build fca (Mellanox Fabric Collective Accelerator) support, optionally adding
|
||||
DIR/include and DIR/lib or DIR/lib64 to the search path for headers and libraries])])
|
||||
|
||||
AS_IF([test "$with_fca" != "no"],
|
||||
[ompi_check_fca_libs=fca
|
||||
AS_IF([test ! -z "$with_fca" && test "$with_fca" != "yes"],
|
||||
[ompi_check_fca_dir=$with_fca
|
||||
AC_SUBST([coll_fca_HOME], "$ompi_check_fca_dir")],
|
||||
[AC_SUBST([coll_fca_HOME], "/")])
|
||||
|
||||
CPPFLAGS_save=$CPPFLAGS
|
||||
LDFLAGS_save=$LDFLAGS
|
||||
LIBS_save=$LIBS
|
||||
|
||||
|
||||
OPAL_LOG_MSG([$1_CPPFLAGS : $$1_CPPFLAGS], 1)
|
||||
OPAL_LOG_MSG([$1_LDFLAGS : $$1_LDFLAGS], 1)
|
||||
OPAL_LOG_MSG([$1_LIBS : $$1_LIBS], 1)
|
||||
|
||||
OPAL_CHECK_PACKAGE([$1],
|
||||
[fca/fca_api.h],
|
||||
[$ompi_check_fca_libs],
|
||||
[fca_get_version],
|
||||
[],
|
||||
[$ompi_check_fca_dir],
|
||||
[],
|
||||
[ompi_check_fca_happy="yes"],
|
||||
[ompi_check_fca_happy="no"])
|
||||
|
||||
CPPFLAGS=$CPPFLAGS_save
|
||||
LDFLAGS=$LDFLAGS_save
|
||||
LIBS=$LIBS_save],
|
||||
[ompi_check_fca_happy="no"])
|
||||
|
||||
AS_IF([test "$ompi_check_fca_happy" = "yes" && test "$enable_progress_threads" = "yes"],
|
||||
[AC_MSG_WARN([fca driver does not currently support progress threads. Disabling FCA.])
|
||||
ompi_check_fca_happy="no"])
|
||||
|
||||
AS_IF([test "$ompi_check_fca_happy" = "yes"],
|
||||
[$2],
|
||||
[AS_IF([test ! -z "$with_fca" && test "$with_fca" != "no"],
|
||||
[AC_MSG_ERROR([FCA support requested but not found. Aborting])])
|
||||
$3])
|
||||
|
||||
OPAL_VAR_SCOPE_POP
|
||||
])
|
||||
|
@ -1,48 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
#
|
||||
# Copyright (c) 2011 Mellanox Technologies. All rights reserved.
|
||||
# Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
#
|
||||
|
||||
AM_CPPFLAGS = $(coll_fca_CPPFLAGS) -DCOLL_FCA_HOME=\"$(coll_fca_HOME)\" $(coll_fca_extra_CPPFLAGS)
|
||||
|
||||
#dist_ompidata_DATA = help-coll-fca.txt
|
||||
coll_fca_sources = \
|
||||
coll_fca.h \
|
||||
coll_fca_debug.h \
|
||||
coll_fca_api.h \
|
||||
coll_fca_convertor.h \
|
||||
coll_fca_module.c \
|
||||
coll_fca_component.c \
|
||||
coll_fca_ops.c
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
if MCA_BUILD_ompi_coll_fca_DSO
|
||||
component_noinst =
|
||||
component_install = mca_coll_fca.la
|
||||
else
|
||||
component_noinst = libmca_coll_fca.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(ompilibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_coll_fca_la_SOURCES = $(coll_fca_sources)
|
||||
mca_coll_fca_la_LIBADD = $(top_builddir)/ompi/lib@OMPI_LIBMPI_NAME@.la \
|
||||
$(coll_fca_LIBS)
|
||||
mca_coll_fca_la_LDFLAGS = -module -avoid-version $(coll_fca_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_coll_fca_la_SOURCES =$(coll_fca_sources)
|
||||
libmca_coll_fca_la_LIBADD = $(coll_fca_LIBS)
|
||||
libmca_coll_fca_la_LDFLAGS = -module -avoid-version $(coll_fca_LDFLAGS)
|
@ -1,360 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_COLL_FCA_H
|
||||
#define MCA_COLL_FCA_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "mpi.h"
|
||||
#include "ompi/mca/mca.h"
|
||||
#include "ompi/mca/coll/coll.h"
|
||||
#include "ompi/request/request.h"
|
||||
#include "ompi/mca/pml/pml.h"
|
||||
#include "ompi/mca/coll/base/coll_tags.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "ompi/op/op.h"
|
||||
#include "orte/runtime/orte_globals.h"
|
||||
#include "coll_fca_api.h"
|
||||
#include "coll_fca_debug.h"
|
||||
|
||||
|
||||
#ifdef OMPI_DATATYPE_MAX_PREDEFINED
|
||||
|
||||
#include "opal/datatype/opal_convertor.h"
|
||||
#define FCA_CONVERTOR_T opal_convertor_t
|
||||
#define FCA_CONVERTOR_COPY_AND_PREPARE_FOR_SEND opal_convertor_copy_and_prepare_for_send
|
||||
#define FCA_CONVERTOR_COPY_AND_PREPARE_FOR_RECV opal_convertor_copy_and_prepare_for_recv
|
||||
#define FCA_CONVERTOR_CONVERTOR_GET_PACKED_SIZE opal_convertor_get_packed_size
|
||||
#define FCA_CONVERTOR_CONVERTOR_UNPACK opal_convertor_unpack
|
||||
#define FCA_CONVERTOR_CONVERTOR_PACK opal_convertor_pack
|
||||
|
||||
#define FCA_DT_MAX_PREDEFINED OMPI_DATATYPE_MAX_PREDEFINED
|
||||
#define FCA_DT_EXTENT ompi_datatype_type_extent
|
||||
#define FCA_DT_GET_TRUE_EXTENT ompi_datatype_get_true_extent
|
||||
#define FCA_DT_IS_CONTIGUOUS_MEMORY_LAYOUT(__dtype, __count) \
|
||||
ompi_datatype_is_contiguous_memory_layout(__dtype, __count)
|
||||
#else
|
||||
|
||||
#include "ompi/datatype/convertor.h"
|
||||
#define FCA_CONVERTOR_T ompi_convertor_t
|
||||
#define FCA_CONVERTOR_COPY_AND_PREPARE_FOR_SEND ompi_convertor_copy_and_prepare_for_send
|
||||
#define FCA_CONVERTOR_COPY_AND_PREPARE_FOR_RECV ompi_convertor_copy_and_prepare_for_recv
|
||||
#define FCA_CONVERTOR_CONVERTOR_GET_PACKED_SIZE ompi_convertor_get_packed_size
|
||||
#define FCA_CONVERTOR_CONVERTOR_UNPACK ompi_convertor_unpack
|
||||
#define FCA_CONVERTOR_CONVERTOR_PACK ompi_convertor_pack
|
||||
|
||||
#define FCA_DT_MAX_PREDEFINED DT_MAX_PREDEFINED
|
||||
#define FCA_DT_EXTENT ompi_ddt_type_extent
|
||||
#define FCA_DT_GET_TRUE_EXTENT ompi_ddt_get_true_extent
|
||||
#define FCA_DT_IS_CONTIGUOUS_MEMORY_LAYOUT(__dtype, __count) \
|
||||
ompi_ddt_is_contiguous_memory_layout(__dtype, __count)
|
||||
#endif
|
||||
|
||||
BEGIN_C_DECLS
|
||||
|
||||
/*
|
||||
* FCA library functions.
|
||||
* Used to load the library dynamically.
|
||||
*/
|
||||
|
||||
int modular_pow(uint64_t base, uint64_t exponent, uint64_t modulus);
|
||||
|
||||
/**
|
||||
* FCA data type information
|
||||
*/
|
||||
struct mca_coll_fca_dtype_info_t {
|
||||
ompi_datatype_t *mpi_dtype;
|
||||
size_t mpi_dtype_extent;
|
||||
int fca_dtype; /* -1 if invalid */
|
||||
size_t fca_dtype_extent;
|
||||
|
||||
};
|
||||
typedef struct mca_coll_fca_dtype_info_t mca_coll_fca_dtype_info_t;
|
||||
|
||||
/**
|
||||
* FCA operator information
|
||||
*/
|
||||
struct mca_coll_fca_op_info_t {
|
||||
ompi_op_t *mpi_op;
|
||||
int fca_op; /* -1 if invalid */
|
||||
};
|
||||
typedef struct mca_coll_fca_op_info_t mca_coll_fca_op_info_t;
|
||||
|
||||
#define FCA_MAX_OPS 32 /* Must be large enough to hold all FCA-supported operators */
|
||||
|
||||
/**
|
||||
* Globally exported structure
|
||||
*/
|
||||
struct mca_coll_fca_component_t {
|
||||
/** Base coll component */
|
||||
mca_coll_base_component_2_0_0_t super;
|
||||
|
||||
/** MCA parameter: Priority of this component */
|
||||
int fca_priority;
|
||||
|
||||
/** MCA parameter: Verbose level of this component */
|
||||
int fca_verbose;
|
||||
|
||||
/** MCA parameter: Comm_mLid */
|
||||
char *fca_comm_mlid;
|
||||
|
||||
/** MCA parameter: Comm_mGid */
|
||||
char *fca_comm_mgid;
|
||||
|
||||
/** MCA parameter: FCA_Mlid */
|
||||
char *fca_fmm_mlid;
|
||||
|
||||
/** MCA parameter: Path to fca spec file */
|
||||
char* fca_spec_file;
|
||||
|
||||
/** MCA parameter: FCA device */
|
||||
char* fca_dev;
|
||||
|
||||
/** MCA parameter: Enable FCA */
|
||||
int fca_enable;
|
||||
|
||||
/** MCA parameter: Enable FCA Barrier */
|
||||
int fca_enable_barrier;
|
||||
|
||||
/** MCA parameter: Enable FCA Bcast */
|
||||
int fca_enable_bcast;
|
||||
|
||||
/** MCA parameter: Enable FCA Reduce */
|
||||
int fca_enable_reduce;
|
||||
|
||||
/** MCA parameter: Enable FCA Reduce_Scatter */
|
||||
int fca_enable_reduce_scatter;
|
||||
|
||||
/** MCA parameter: Enable FCA Allreduce */
|
||||
int fca_enable_allreduce;
|
||||
|
||||
/** MCA parameter: Enable FCA Allgather */
|
||||
int fca_enable_allgather;
|
||||
|
||||
/** MCA parameter: Enable FCA Allgatherv */
|
||||
int fca_enable_allgatherv;
|
||||
|
||||
/** MCA parameter: Enable FCA Gather */
|
||||
int fca_enable_gather;
|
||||
|
||||
/** MCA parameter: Enable FCA Gatherv */
|
||||
int fca_enable_gatherv;
|
||||
|
||||
/** MCA parameter: Enable FCA AlltoAll */
|
||||
int fca_enable_alltoall;
|
||||
|
||||
/** MCA parameter: Enable FCA AlltoAllv */
|
||||
int fca_enable_alltoallv;
|
||||
|
||||
/** MCA parameter: Enable FCA AlltoAllw */
|
||||
int fca_enable_alltoallw;
|
||||
|
||||
/** MCA parameter: FCA NP */
|
||||
int fca_np;
|
||||
|
||||
/** MCA parameter: Enable FCA comm cache */
|
||||
int fca_enable_cache;
|
||||
|
||||
/** MCA parameter: Enable parallel hash calc */
|
||||
int fca_parallel_hash_calc;
|
||||
|
||||
/* r/o MCA parameter: compiletime libfca version */
|
||||
char* compiletime_version;
|
||||
|
||||
/* r/o MCA parameter: runtime libfca version */
|
||||
char* runtime_version;
|
||||
|
||||
/** Some statistics counters */
|
||||
double fca_total_work_time;
|
||||
double fca_work_time_parallel;
|
||||
double fca_work_time_sequency;
|
||||
int fca_cache_hit;
|
||||
int fca_cache_miss;
|
||||
int fca_hash_hit;
|
||||
int fca_hash_miss;
|
||||
int fca_max_deep_in_cache;
|
||||
|
||||
/** MCA parameter: Enable hash for cache */
|
||||
int fca_enable_hash;
|
||||
|
||||
/* FCA global stuff */
|
||||
fca_t *fca_context; /* FCA context handle */
|
||||
mca_coll_fca_dtype_info_t fca_dtypes[FCA_DT_MAX_PREDEFINED]; /* FCA dtype translation */
|
||||
mca_coll_fca_op_info_t fca_reduce_ops[FCA_MAX_OPS]; /* FCA op translation */
|
||||
|
||||
/* communicator cache */
|
||||
opal_list_t c_cache;
|
||||
|
||||
/* pointer to primes */
|
||||
int *fca_primes;
|
||||
|
||||
/** MCA parameter hash table size*/
|
||||
int fca_hash_size;
|
||||
|
||||
/** MCA parameter hash table size*/
|
||||
int fca_number_of_primes;
|
||||
|
||||
/* Pointer to hash */
|
||||
opal_list_t **fca_hash;
|
||||
|
||||
};
|
||||
typedef struct mca_coll_fca_component_t mca_coll_fca_component_t;
|
||||
|
||||
OMPI_MODULE_DECLSPEC extern mca_coll_fca_component_t mca_coll_fca_component;
|
||||
|
||||
struct mca_coll_fca_comm_wrap_t {
|
||||
opal_object_t super;
|
||||
fca_comm_t *fca_comm;
|
||||
int comm_id, rank;
|
||||
};
|
||||
typedef struct mca_coll_fca_comm_wrap_t mca_coll_fca_comm_wrap_t;
|
||||
OBJ_CLASS_DECLARATION(mca_coll_fca_comm_wrap_t);
|
||||
|
||||
struct mca_coll_fca_c_cache_item_t {
|
||||
opal_list_item_t super;
|
||||
int size;
|
||||
ompi_communicator_t *comm;
|
||||
mca_coll_fca_comm_wrap_t *fca_comm_wrap;
|
||||
};
|
||||
typedef struct mca_coll_fca_c_cache_item_t mca_coll_fca_c_cache_item_t;
|
||||
OBJ_CLASS_DECLARATION(mca_coll_fca_c_cache_item_t);
|
||||
|
||||
|
||||
/**
|
||||
* FCA enabled communicator
|
||||
*/
|
||||
struct mca_coll_fca_module_t {
|
||||
mca_coll_base_module_t super;
|
||||
|
||||
MPI_Comm comm;
|
||||
int rank;
|
||||
int local_proc_idx;
|
||||
int num_local_procs;
|
||||
int *local_ranks;
|
||||
fca_comm_t *fca_comm;
|
||||
fca_comm_desc_t fca_comm_desc;
|
||||
fca_comm_caps_t fca_comm_caps;
|
||||
|
||||
/* Saved handlers - for fallback */
|
||||
mca_coll_base_module_reduce_fn_t previous_reduce;
|
||||
mca_coll_base_module_t *previous_reduce_module;
|
||||
mca_coll_base_module_allreduce_fn_t previous_allreduce;
|
||||
mca_coll_base_module_t *previous_allreduce_module;
|
||||
mca_coll_base_module_bcast_fn_t previous_bcast;
|
||||
mca_coll_base_module_t *previous_bcast_module;
|
||||
mca_coll_base_module_barrier_fn_t previous_barrier;
|
||||
mca_coll_base_module_t *previous_barrier_module;
|
||||
mca_coll_base_module_allgather_fn_t previous_allgather;
|
||||
mca_coll_base_module_t *previous_allgather_module;
|
||||
mca_coll_base_module_allgatherv_fn_t previous_allgatherv;
|
||||
mca_coll_base_module_t *previous_allgatherv_module;
|
||||
mca_coll_base_module_alltoall_fn_t previous_alltoall;
|
||||
mca_coll_base_module_t *previous_alltoall_module;
|
||||
mca_coll_base_module_alltoallv_fn_t previous_alltoallv;
|
||||
mca_coll_base_module_t *previous_alltoallv_module;
|
||||
mca_coll_base_module_alltoallw_fn_t previous_alltoallw;
|
||||
mca_coll_base_module_t *previous_alltoallw_module;
|
||||
mca_coll_base_module_gather_fn_t previous_gather;
|
||||
mca_coll_base_module_t *previous_gather_module;
|
||||
mca_coll_base_module_gatherv_fn_t previous_gatherv;
|
||||
mca_coll_base_module_t *previous_gatherv_module;
|
||||
mca_coll_base_module_reduce_scatter_fn_t previous_reduce_scatter;
|
||||
mca_coll_base_module_t *previous_reduce_scatter_module;
|
||||
#if OMPI_FCA_VERSION >= 30
|
||||
mca_coll_base_module_scatter_fn_t previous_scatter;
|
||||
mca_coll_base_module_t *previous_scatter_module;
|
||||
#endif
|
||||
};
|
||||
typedef struct mca_coll_fca_module_t mca_coll_fca_module_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_coll_fca_module_t);
|
||||
|
||||
|
||||
/* API functions */
|
||||
int mca_coll_fca_init_query(bool enable_progress_threads, bool enable_mpi_threads);
|
||||
mca_coll_base_module_t *mca_coll_fca_comm_query(struct ompi_communicator_t *comm, int *priority);
|
||||
int mca_coll_fca_get_fca_lib(struct ompi_communicator_t *comm);
|
||||
|
||||
|
||||
/* Collective functions */
|
||||
int mca_coll_fca_allreduce(const void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
int mca_coll_fca_bcast(void *buff, int count, struct ompi_datatype_t *datatype,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_fca_reduce(const void *sbuf, void* rbuf, int count,
|
||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_fca_barrier(struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_fca_allgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_fca_allgatherv(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, const int *rcounts, const int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_fca_alltoall(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_fca_alltoallv(const void *sbuf, const int *scounts, const int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, const int *rcounts, const int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_fca_alltoallw(const void *sbuf, const int *scounts, const int *sdisps,
|
||||
struct ompi_datatype_t * const *sdtypes,
|
||||
void *rbuf, const int *rcounts, const int *rdisps,
|
||||
struct ompi_datatype_t * const *rdtypes,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_fca_gather(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_fca_gatherv(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, const int *rcounts, const int *disps,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
int mca_coll_fca_reduce_scatter(const void *sbuf, void *rbuf, const int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module);
|
||||
|
||||
END_C_DECLS
|
||||
|
||||
#endif
|
@ -1,117 +0,0 @@
|
||||
/**
|
||||
Copyright (c) 2011 Mellanox Technologies. All rights reserved.
|
||||
Copyright (c) 2015 Research Organization for Information Science
|
||||
and Technology (RIST). All rights reserved.
|
||||
$COPYRIGHT$
|
||||
|
||||
Additional copyrights may follow
|
||||
|
||||
$HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include <fca/fca_api.h>
|
||||
#include <fca/config/fca_parse_specfile.h>
|
||||
|
||||
#ifndef FCA_API
|
||||
#define OMPI_FCA_VERSION 12
|
||||
#else
|
||||
#define OMPI_FCA_VERSION FCA_API
|
||||
#endif
|
||||
|
||||
/*
|
||||
* FCA API compatibility layer.
|
||||
* MPI build must define an FCA version macro.
|
||||
*/
|
||||
|
||||
#define OMPI_FCA_BARRIER 1
|
||||
#define OMPI_FCA_BCAST 1
|
||||
#define OMPI_FCA_REDUCE 1
|
||||
#define OMPI_FCA_ALLREDUCE 1
|
||||
|
||||
#define OMPI_FCA_REDUCE_SCATTER 0
|
||||
#define OMPI_FCA_GATHER 0
|
||||
#define OMPI_FCA_GATHERV 0
|
||||
#define OMPI_FCA_ALLTOALL 0
|
||||
#define OMPI_FCA_ALLTOALLV 0
|
||||
#define OMPI_FCA_ALLTOALLW 0
|
||||
|
||||
|
||||
#if OMPI_FCA_VERSION == 12
|
||||
|
||||
#define OMPI_FCA_ALLGATHER 0
|
||||
|
||||
#define FCA_API_ABI_MAJOR 1
|
||||
#define FCA_API_ABI_MINOR 2
|
||||
#define FCA_MAJOR_BIT 24ul
|
||||
#define FCA_MINOR_BIT 16ul
|
||||
#define EUSEMPI 287
|
||||
|
||||
static inline void mca_coll_fca_get_bcast_root(int root_rank, int *local_ranks,
|
||||
int num_local_ranks,
|
||||
fca_bcast_spec_t *spec)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_local_ranks; ++i) {
|
||||
if (local_ranks[i] == root_rank) {
|
||||
spec->root_indx = i;
|
||||
return;
|
||||
}
|
||||
}
|
||||
spec->root_indx = -1;
|
||||
}
|
||||
|
||||
static inline void mca_coll_fca_get_reduce_root(int root_rank, int my_rank,
|
||||
fca_reduce_spec_t *spec)
|
||||
{
|
||||
spec->is_root = root_rank == my_rank;
|
||||
}
|
||||
|
||||
#elif OMPI_FCA_VERSION >= 20
|
||||
|
||||
#define OMPI_FCA_ALLGATHER 1
|
||||
#define OMPI_FCA_ALLGATHERV 1
|
||||
#define OMPI_FCA_PROGRESS 1
|
||||
|
||||
static inline int mca_coll_fca_comm_init(fca_t *fca_context, int rank, int comm_size,
|
||||
int local_proc_idx, int num_local_procs,
|
||||
fca_comm_desc_t *comm_desc,
|
||||
fca_comm_t **fca_comm
|
||||
#if OMPI_FCA_VERSION >= 30
|
||||
, void *comm_init_data
|
||||
#endif
|
||||
)
|
||||
{
|
||||
fca_comm_init_spec_t spec;
|
||||
|
||||
spec.rank = rank;
|
||||
spec.size = comm_size;
|
||||
spec.desc = *comm_desc;
|
||||
spec.proc_idx = local_proc_idx;
|
||||
spec.num_procs = num_local_procs;
|
||||
#if OMPI_FCA_VERSION >= 30
|
||||
spec.comm_init_data = comm_init_data;
|
||||
#endif
|
||||
return fca_comm_init(fca_context, &spec, fca_comm);
|
||||
}
|
||||
|
||||
static inline void mca_coll_fca_get_bcast_root(int root_rank, int *local_ranks,
|
||||
int num_local_ranks,
|
||||
fca_bcast_spec_t *spec)
|
||||
{
|
||||
spec->root = root_rank;
|
||||
}
|
||||
|
||||
static inline void mca_coll_fca_get_reduce_root(int root_rank, int my_rank,
|
||||
fca_reduce_spec_t *spec)
|
||||
{
|
||||
spec->root = root_rank;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#error "FCA API version is unsupported"
|
||||
|
||||
#endif
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
@ -1,86 +0,0 @@
|
||||
#ifndef MCA_COLL_FCA_CONVERTOR_H
|
||||
#define MCA_COLL_FCA_CONVERTOR_H
|
||||
|
||||
|
||||
|
||||
|
||||
enum {
|
||||
MCA_COLL_CONVERTOR_NULL = 0,
|
||||
MCA_COLL_FCA_CONV_SEND,
|
||||
MCA_COLL_FCA_CONV_RECV
|
||||
};
|
||||
|
||||
|
||||
struct mca_coll_fca_convertor {
|
||||
int type;
|
||||
FCA_CONVERTOR_T ompic;
|
||||
size_t size;
|
||||
void *buf;
|
||||
};
|
||||
|
||||
#define MCA_COLL_FCA_DECLARE_CONVERTOR(__name) \
|
||||
struct mca_coll_fca_convertor __name = {MCA_COLL_CONVERTOR_NULL}
|
||||
|
||||
|
||||
static inline void mca_coll_fca_convertor_set(struct mca_coll_fca_convertor *conv,
|
||||
struct ompi_datatype_t *datatype,
|
||||
void *buffer, int count)
|
||||
{
|
||||
if (conv->type == MCA_COLL_FCA_CONV_SEND) {
|
||||
FCA_CONVERTOR_COPY_AND_PREPARE_FOR_SEND(ompi_mpi_local_convertor,
|
||||
&datatype->super, count,
|
||||
buffer, 0, &conv->ompic);
|
||||
} else if (conv->type == MCA_COLL_FCA_CONV_RECV) {
|
||||
FCA_CONVERTOR_COPY_AND_PREPARE_FOR_RECV(ompi_mpi_local_convertor,
|
||||
&datatype->super, count,
|
||||
buffer, 0, &conv->ompic);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void mca_coll_fca_convertor_create(struct mca_coll_fca_convertor *conv,
|
||||
struct ompi_datatype_t *datatype,
|
||||
int count, void *buffer, int type,
|
||||
void **tmpbuf, size_t *size)
|
||||
{
|
||||
OBJ_CONSTRUCT(&conv->ompic, FCA_CONVERTOR_T);
|
||||
conv->type = type;
|
||||
mca_coll_fca_convertor_set(conv, datatype, buffer, count);
|
||||
FCA_CONVERTOR_CONVERTOR_GET_PACKED_SIZE(&conv->ompic, &conv->size);
|
||||
conv->buf = malloc(conv->size);
|
||||
*tmpbuf = conv->buf;
|
||||
*size = conv->size;
|
||||
}
|
||||
|
||||
static inline int mca_coll_fca_convertor_valid(struct mca_coll_fca_convertor *conv)
|
||||
{
|
||||
return conv->type != MCA_COLL_CONVERTOR_NULL;
|
||||
}
|
||||
|
||||
static inline void mca_coll_fca_convertor_destroy(struct mca_coll_fca_convertor *conv)
|
||||
{
|
||||
if (mca_coll_fca_convertor_valid(conv)) {
|
||||
free(conv->buf);
|
||||
OBJ_DESTRUCT(&conv->ompic);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int32_t mca_coll_fca_convertor_process(struct mca_coll_fca_convertor *conv,
|
||||
size_t offset)
|
||||
{
|
||||
struct iovec invec;
|
||||
unsigned iov_count;
|
||||
size_t size;
|
||||
|
||||
iov_count = 1;
|
||||
invec.iov_base = (char*)conv->buf + offset;
|
||||
invec.iov_len = conv->size;
|
||||
size = conv->size;
|
||||
|
||||
if (conv->type == MCA_COLL_FCA_CONV_SEND) {
|
||||
return FCA_CONVERTOR_CONVERTOR_PACK(&conv->ompic, &invec, &iov_count, &size);
|
||||
} else if (conv->type == MCA_COLL_FCA_CONV_RECV) {
|
||||
return FCA_CONVERTOR_CONVERTOR_UNPACK(&conv->ompic, &invec, &iov_count, &size);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
#endif
|
@ -1,34 +0,0 @@
|
||||
/**
|
||||
Copyright (c) 2011 Mellanox Technologies. All rights reserved.
|
||||
$COPYRIGHT$
|
||||
|
||||
Additional copyrights may follow
|
||||
|
||||
$HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_COLL_FCA_DEBUG_H
|
||||
#define MCA_COLL_FCA_DEBUG_H
|
||||
#pragma GCC system_header
|
||||
|
||||
#ifdef __BASE_FILE__
|
||||
#define __FCA_FILE__ __BASE_FILE__
|
||||
#else
|
||||
#define __FCA_FILE__ __FILE__
|
||||
#endif
|
||||
|
||||
#define FCA_VERBOSE(level, format, ...) \
|
||||
opal_output_verbose(level, mca_coll_fca_output, "%s:%d - %s() " format, \
|
||||
__FCA_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
|
||||
#define FCA_ERROR(format, ... ) \
|
||||
opal_output_verbose(0, mca_coll_fca_output, "Error: %s:%d - %s() " format, \
|
||||
__FCA_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
|
||||
|
||||
#define FCA_MODULE_VERBOSE(fca_module, level, format, ...) \
|
||||
FCA_VERBOSE(level, "[%p:%d] " format, (void*)(fca_module)->comm, (fca_module)->rank, ## __VA_ARGS__)
|
||||
|
||||
extern int mca_coll_fca_output;
|
||||
|
||||
#endif
|
@ -1,695 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2017 The University of Tennessee and The University
|
||||
* of Tennessee Research Foundation. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "coll_fca.h"
|
||||
|
||||
/*
|
||||
* Initial query function that is invoked during MPI_INIT, allowing
|
||||
* this module to indicate what level of thread support it provides.
|
||||
*/
|
||||
|
||||
int modular_pow(uint64_t base, uint64_t exponent, uint64_t modulus)
|
||||
{
|
||||
int result = 1;
|
||||
while(exponent>0)
|
||||
{
|
||||
if((exponent % 2) == 1)
|
||||
result = (result * base) % modulus;
|
||||
exponent = exponent >> 1;
|
||||
base = (base * base) % modulus;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int mca_coll_fca_init_query(bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static inline ompi_proc_t* __local_rank_lookup(ompi_communicator_t *comm, int rank)
|
||||
{
|
||||
return ompi_group_peer_lookup(comm->c_local_group, rank);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills local rank information in fca_module.
|
||||
*/
|
||||
static int __get_local_ranks(mca_coll_fca_module_t *fca_module)
|
||||
{
|
||||
ompi_communicator_t *comm = fca_module->comm;
|
||||
ompi_proc_t* proc;
|
||||
int i, rank;
|
||||
|
||||
/* Count the local ranks */
|
||||
fca_module->num_local_procs = 0;
|
||||
for (rank = 0; rank < ompi_comm_size(comm); ++rank) {
|
||||
proc = __local_rank_lookup(comm, rank);
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
|
||||
if (rank == fca_module->rank) {
|
||||
fca_module->local_proc_idx = fca_module->num_local_procs;
|
||||
}
|
||||
++fca_module->num_local_procs;
|
||||
}
|
||||
}
|
||||
|
||||
/* Make a list of local ranks */
|
||||
fca_module->local_ranks = calloc(fca_module->num_local_procs,
|
||||
sizeof *fca_module->local_ranks);
|
||||
if (!fca_module->local_ranks) {
|
||||
FCA_ERROR("Failed to allocate memory for %d local ranks",
|
||||
fca_module->num_local_procs);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
for (rank = 0; rank < ompi_comm_size(comm); ++rank) {
|
||||
proc = __local_rank_lookup(comm, rank);
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
|
||||
fca_module->local_ranks[i++] = rank;
|
||||
}
|
||||
}
|
||||
|
||||
FCA_MODULE_VERBOSE(fca_module, 3, "i am %d/%d", fca_module->local_proc_idx,
|
||||
fca_module->num_local_procs);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int __fca_comm_new(mca_coll_fca_module_t *fca_module)
|
||||
{
|
||||
ompi_communicator_t *comm = fca_module->comm;
|
||||
fca_comm_new_spec_t spec = {0,};
|
||||
int info_size, all_info_size;
|
||||
void *all_info = NULL;
|
||||
void *my_info = NULL;
|
||||
int *rcounts = NULL;
|
||||
int *displs = NULL;
|
||||
int i, rc, ret, comm_size = ompi_comm_size(fca_module->comm);
|
||||
|
||||
/* call fca_get_rank_info() on node managers only*/
|
||||
if (fca_module->local_proc_idx == 0) {
|
||||
#if OMPI_FCA_VERSION >= 30
|
||||
my_info = fca_get_rank_info(mca_coll_fca_component.fca_context,
|
||||
fca_module->rank, &info_size);
|
||||
#else
|
||||
my_info = fca_get_rank_info(mca_coll_fca_component.fca_context,
|
||||
&info_size);
|
||||
#endif
|
||||
if (!my_info) {
|
||||
FCA_ERROR("fca_get_rank_info returned NULL");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
} else {
|
||||
info_size = 0;
|
||||
}
|
||||
FCA_MODULE_VERBOSE(fca_module, 1, "Info size: %d", info_size);
|
||||
|
||||
/* Allocate gather buffer on the root rank */
|
||||
if (fca_module->rank == 0) {
|
||||
rcounts = calloc(comm_size, sizeof *rcounts);
|
||||
}
|
||||
|
||||
/* Get all rank info sizes using MPI_Gather */
|
||||
rc = comm->c_coll->coll_gather(&info_size, 1, MPI_INT, rcounts, 1, MPI_INT, 0,
|
||||
comm, comm->c_coll->coll_gather_module);
|
||||
if (rc != OMPI_SUCCESS)
|
||||
return rc;
|
||||
|
||||
/* Allocate buffer for gathering rank information on rank0 */
|
||||
if (fca_module->rank == 0) {
|
||||
all_info_size = 0;
|
||||
displs = calloc(comm_size, sizeof *displs);
|
||||
|
||||
for (i = 0; i < comm_size; ++i) {
|
||||
displs[i] = all_info_size;
|
||||
all_info_size += rcounts[i];
|
||||
|
||||
if (rcounts[i] > 0)
|
||||
++spec.rank_count;
|
||||
|
||||
FCA_MODULE_VERBOSE(fca_module, 1, "gatherv: rcounts[%d]=%d displs[%d]=%d",
|
||||
i, rcounts[i], i, displs[i]);
|
||||
}
|
||||
|
||||
FCA_MODULE_VERBOSE(fca_module, 1, "Total rank_info size: %d", all_info_size);
|
||||
all_info = calloc(all_info_size, 1);
|
||||
}
|
||||
|
||||
/* Send all node managers information to rank0 using MPI_Gatherv */
|
||||
rc = comm->c_coll->coll_gatherv(my_info, info_size, MPI_BYTE,
|
||||
all_info, rcounts, displs, MPI_BYTE, 0,
|
||||
comm, comm->c_coll->coll_gather_module);
|
||||
if (rc != OMPI_SUCCESS) {
|
||||
FCA_ERROR("Failed to gather rank information to rank0: %d", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Rank0 calls fca_comm_new() and fills fca_comm_spec filed */
|
||||
if (fca_module->rank == 0) {
|
||||
spec.rank_info = all_info;
|
||||
spec.is_comm_world = comm == MPI_COMM_WORLD;
|
||||
|
||||
free(displs);
|
||||
free(rcounts);
|
||||
#if OMPI_FCA_VERSION >= 30
|
||||
spec.comm_size = comm_size;
|
||||
|
||||
spec.comm_init_data = NULL;
|
||||
spec.comm_init_data_size = 0;
|
||||
#endif
|
||||
FCA_MODULE_VERBOSE(fca_module, 1, "starting fca_comm_new(), rank_count: %d",
|
||||
spec.rank_count);
|
||||
|
||||
ret = fca_comm_new(mca_coll_fca_component.fca_context,
|
||||
&spec, &fca_module->fca_comm_desc);
|
||||
|
||||
free(all_info);
|
||||
}
|
||||
|
||||
/* Broadcast return value from rank0 to all other ranks */
|
||||
rc = fca_module->previous_bcast(&ret, 1, MPI_INT, 0, comm,
|
||||
fca_module->previous_bcast_module);
|
||||
if (rc != OMPI_SUCCESS) {
|
||||
FCA_ERROR("Failed to broadcast comm_new return value from rank0: %d", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Examine comm_new return value */
|
||||
if (ret < 0) {
|
||||
FCA_ERROR("COMM_NEW failed: %s", fca_strerror(ret));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
#if OMPI_FCA_VERSION >= 30
|
||||
/* Send comm_ini_data_size to all ranks in comm */
|
||||
rc = fca_module->previous_bcast(&spec.comm_init_data_size, 1, MPI_INT,
|
||||
0, comm, fca_module->previous_bcast_module);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
FCA_ERROR("Failed to broadcast comm init data size value from rank0: %d", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (0 != fca_module->rank &&
|
||||
NULL == (spec.comm_init_data = calloc(1, spec.comm_init_data_size))) {
|
||||
FCA_ERROR("Failed to allocate memory for comm init data.");
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Send comm_ini_data to all ranks in comm */
|
||||
rc = fca_module->previous_scatter(spec.comm_init_data, spec.comm_init_data_size, MPI_BYTE,
|
||||
spec.comm_init_data, spec.comm_init_data_size, MPI_BYTE,
|
||||
0, comm, fca_module->previous_scatter_module);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
FCA_ERROR("Failed to scatter comm_init sizes return value from rank0: %d", rc);
|
||||
return rc;
|
||||
}
|
||||
#endif
|
||||
/* Release allocate rank_info on node managers */
|
||||
if (fca_module->local_proc_idx == 0) {
|
||||
fca_free_rank_info(my_info);
|
||||
}
|
||||
|
||||
/* Pass fca_comm_desc to all ranks using MPI_Bcast */
|
||||
rc = fca_module->previous_bcast(&fca_module->fca_comm_desc,
|
||||
sizeof(fca_module->fca_comm_desc), MPI_BYTE, 0,
|
||||
comm, fca_module->previous_bcast_module);
|
||||
if (rc != OMPI_SUCCESS) {
|
||||
FCA_ERROR("Failed to broadcast comm_desc from rank0: %d", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
FCA_MODULE_VERBOSE(fca_module, 1, "Received FCA communicator spec, comm_id %d",
|
||||
fca_module->fca_comm_desc.comm_id);
|
||||
|
||||
#if OMPI_FCA_VERSION >= 30
|
||||
/* allocate comm_init_spec */
|
||||
FCA_MODULE_VERBOSE(fca_module, 1, "Starting COMM_INIT comm_id %d proc_idx %d num_procs %d",
|
||||
fca_module->fca_comm_desc.comm_id, fca_module->local_proc_idx,
|
||||
fca_module->num_local_procs);
|
||||
|
||||
ret = mca_coll_fca_comm_init(mca_coll_fca_component.fca_context,
|
||||
fca_module->rank, comm_size,
|
||||
fca_module->local_proc_idx, fca_module->num_local_procs,
|
||||
&fca_module->fca_comm_desc, &fca_module->fca_comm,
|
||||
spec.comm_init_data);
|
||||
if (ret < 0) {
|
||||
FCA_ERROR("COMM_INIT failed: %s", fca_strerror(ret));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (0 != fca_module->rank) {
|
||||
free(spec.comm_init_data);
|
||||
}
|
||||
#endif
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int __create_fca_comm(mca_coll_fca_module_t *fca_module)
|
||||
{
|
||||
int rc, ret;
|
||||
int result = MPI_UNEQUAL;
|
||||
mca_coll_fca_c_cache_item_t *c_item = NULL, *c_item_new = NULL;
|
||||
mca_coll_fca_component_t *cm = &mca_coll_fca_component;
|
||||
int comm_size = ompi_comm_size(fca_module->comm);
|
||||
ompi_communicator_t *comm = fca_module->comm;
|
||||
opal_list_t *c_cache;
|
||||
struct timeval start, end, seq_start, seq_end, par_start, par_end;
|
||||
int act_deep;
|
||||
|
||||
|
||||
if(mca_coll_fca_component.fca_verbose == 10) {
|
||||
gettimeofday(&start, NULL);
|
||||
}
|
||||
|
||||
if(mca_coll_fca_component.fca_enable_cache) {
|
||||
|
||||
c_cache = &cm->c_cache;
|
||||
|
||||
if(mca_coll_fca_component.fca_enable_hash){
|
||||
|
||||
int grank = ORTE_PROC_MY_NAME->vpid;
|
||||
int hash_index, part_of_hash_index;
|
||||
|
||||
if(mca_coll_fca_component.fca_parallel_hash_calc == 1) {
|
||||
|
||||
if(mca_coll_fca_component.fca_verbose == 10){
|
||||
gettimeofday(&par_start, NULL);
|
||||
}
|
||||
|
||||
part_of_hash_index = modular_pow(cm->fca_primes[grank % cm->fca_number_of_primes], grank, cm->fca_hash_size);
|
||||
rc = comm->c_coll->coll_allreduce(&part_of_hash_index, &hash_index, 1, MPI_INT, MPI_SUM, comm, comm->c_coll->coll_allreduce_module);
|
||||
if (rc != OMPI_SUCCESS) {
|
||||
FCA_ERROR("Failed to reduce hash_index: %d", rc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
if(mca_coll_fca_component.fca_verbose == 10){
|
||||
gettimeofday(&par_end, NULL);
|
||||
mca_coll_fca_component.fca_work_time_parallel =+
|
||||
par_end.tv_sec - par_start.tv_sec + 1e-6 * (par_end.tv_usec - par_start.tv_usec);
|
||||
}
|
||||
}else{
|
||||
|
||||
if(mca_coll_fca_component.fca_verbose == 10){
|
||||
gettimeofday(&seq_start, NULL);
|
||||
}
|
||||
|
||||
hash_index = 0;
|
||||
int q_counter = 0;
|
||||
int q_comm_size = ompi_comm_size (comm);
|
||||
|
||||
for(q_counter = 0; q_counter < q_comm_size; q_counter++)
|
||||
{
|
||||
hash_index += modular_pow(cm->fca_primes[q_counter % cm->fca_number_of_primes], q_counter, cm->fca_hash_size);
|
||||
}
|
||||
|
||||
if(mca_coll_fca_component.fca_verbose == 10){
|
||||
gettimeofday(&seq_end, NULL);
|
||||
mca_coll_fca_component.fca_work_time_sequency =+
|
||||
seq_end.tv_sec - seq_start.tv_sec + 1e-6 * (seq_end.tv_usec - seq_start.tv_usec);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(cm->fca_hash[hash_index % cm->fca_hash_size] != NULL)
|
||||
{
|
||||
c_cache = cm->fca_hash[hash_index % cm->fca_hash_size];
|
||||
if(mca_coll_fca_component.fca_verbose == 10) {
|
||||
gettimeofday(&end, NULL);
|
||||
mca_coll_fca_component.fca_total_work_time =+
|
||||
end.tv_sec - start.tv_sec + 1e-6 * (end.tv_usec - start.tv_usec);
|
||||
mca_coll_fca_component.fca_hash_hit += 1;
|
||||
}
|
||||
}else
|
||||
{
|
||||
if(mca_coll_fca_component.fca_verbose == 10) {
|
||||
mca_coll_fca_component.fca_hash_miss += 1;
|
||||
}
|
||||
c_cache = OBJ_NEW(opal_list_t);
|
||||
cm->fca_hash[hash_index % cm->fca_hash_size] = c_cache;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
act_deep = 0;
|
||||
for( c_item = (mca_coll_fca_c_cache_item_t *)opal_list_get_first(c_cache);
|
||||
c_item != (mca_coll_fca_c_cache_item_t *)opal_list_get_end(c_cache);
|
||||
c_item = (mca_coll_fca_c_cache_item_t *)opal_list_get_next((opal_list_item_t *) c_item)){
|
||||
act_deep++;
|
||||
/* first check the size */
|
||||
if( c_item && (comm_size == c_item->size)) {
|
||||
/* then we have a potential cache hit */
|
||||
ompi_comm_compare(comm, c_item->comm, &result);
|
||||
if( MPI_CONGRUENT == result) {
|
||||
/* cache hit! Return the context and be done with it */
|
||||
/* first bump the score */
|
||||
ret = fca_comm_get_caps(c_item->fca_comm_wrap->fca_comm, &fca_module->fca_comm_caps);
|
||||
if (ret < 0) {
|
||||
FCA_ERROR("GET_COMM_CAPS failed: %s", fca_strerror(ret));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
fca_module->fca_comm = c_item->fca_comm_wrap->fca_comm;
|
||||
|
||||
if(mca_coll_fca_component.fca_verbose == 10) {
|
||||
gettimeofday(&end, NULL);
|
||||
|
||||
mca_coll_fca_component.fca_total_work_time =+
|
||||
end.tv_sec - start.tv_sec + 1e-6 * (end.tv_usec - start.tv_usec);
|
||||
|
||||
mca_coll_fca_component.fca_cache_hit += 1;
|
||||
|
||||
if(act_deep>mca_coll_fca_component.fca_max_deep_in_cache)
|
||||
mca_coll_fca_component.fca_max_deep_in_cache = act_deep;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
rc = __fca_comm_new(fca_module);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
#if OMPI_FCA_VERSION < 30
|
||||
/* allocate comm_init_spec */
|
||||
FCA_MODULE_VERBOSE(fca_module, 1, "Starting COMM_INIT comm_id %d proc_idx %d num_procs %d",
|
||||
fca_module->fca_comm_desc.comm_id, fca_module->local_proc_idx,
|
||||
fca_module->num_local_procs);
|
||||
|
||||
ret = mca_coll_fca_comm_init(mca_coll_fca_component.fca_context,
|
||||
fca_module->rank, ompi_comm_size(fca_module->comm),
|
||||
fca_module->local_proc_idx, fca_module->num_local_procs,
|
||||
&fca_module->fca_comm_desc, &fca_module->fca_comm);
|
||||
if (ret < 0) {
|
||||
FCA_ERROR("COMM_INIT failed: %s", fca_strerror(ret));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
#endif
|
||||
/* get communicator capabilities */
|
||||
ret = fca_comm_get_caps(fca_module->fca_comm, &fca_module->fca_comm_caps);
|
||||
if (ret < 0) {
|
||||
FCA_ERROR("GET_COMM_CAPS failed: %s", fca_strerror(ret));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* by this point every rank in the communicator is set up */
|
||||
FCA_MODULE_VERBOSE(fca_module, 1, "Initialized FCA communicator, comm_id %d",
|
||||
fca_module->fca_comm_desc.comm_id);
|
||||
if(mca_coll_fca_component.fca_enable_cache) {
|
||||
|
||||
c_item_new = OBJ_NEW(mca_coll_fca_c_cache_item_t);
|
||||
c_item_new->fca_comm_wrap = OBJ_NEW(mca_coll_fca_comm_wrap_t);
|
||||
|
||||
OBJ_RETAIN(comm);
|
||||
|
||||
c_item_new->size = comm_size;
|
||||
c_item_new->comm = comm;
|
||||
c_item_new->fca_comm_wrap->fca_comm = fca_module->fca_comm;
|
||||
c_item_new->fca_comm_wrap->rank = fca_module->rank;
|
||||
c_item_new->fca_comm_wrap->comm_id = fca_module->fca_comm_desc.comm_id;
|
||||
|
||||
opal_list_append(c_cache,(opal_list_item_t *) c_item_new);
|
||||
}
|
||||
|
||||
if(mca_coll_fca_component.fca_verbose == 10) {
|
||||
|
||||
gettimeofday(&end, NULL);
|
||||
|
||||
mca_coll_fca_component.fca_total_work_time =+
|
||||
end.tv_sec - start.tv_sec + 1e-6 * (end.tv_usec - start.tv_usec);
|
||||
|
||||
mca_coll_fca_component.fca_cache_miss += 1;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static void __destroy_fca_comm(mca_coll_fca_module_t *fca_module)
|
||||
{
|
||||
int ret;
|
||||
|
||||
fca_comm_destroy(fca_module->fca_comm);
|
||||
if (fca_module->rank == 0) {
|
||||
ret = fca_comm_end(mca_coll_fca_component.fca_context,
|
||||
fca_module->fca_comm_desc.comm_id);
|
||||
if (ret < 0) {
|
||||
FCA_ERROR("COMM_END failed: %s", fca_strerror(ret));
|
||||
}
|
||||
}
|
||||
|
||||
FCA_MODULE_VERBOSE(fca_module, 1, "Destroyed FCA communicator, comm_id %d",
|
||||
fca_module->fca_comm_desc.comm_id);
|
||||
}
|
||||
|
||||
#define FCA_SAVE_PREV_COLL_API(__api) do {\
|
||||
fca_module->previous_ ## __api = comm->c_coll->coll_ ## __api;\
|
||||
fca_module->previous_ ## __api ## _module = comm->c_coll->coll_ ## __api ## _module;\
|
||||
if (!comm->c_coll->coll_ ## __api || !comm->c_coll->coll_ ## __api ## _module) {\
|
||||
FCA_VERBOSE(1, "(%d/%s): no underlying " # __api"; disqualifying myself",\
|
||||
comm->c_contextid, comm->c_name);\
|
||||
return OMPI_ERROR;\
|
||||
}\
|
||||
OBJ_RETAIN(fca_module->previous_ ## __api ## _module);\
|
||||
} while(0)
|
||||
|
||||
static int __save_coll_handlers(mca_coll_fca_module_t *fca_module)
|
||||
{
|
||||
ompi_communicator_t *comm = fca_module->comm;
|
||||
|
||||
FCA_SAVE_PREV_COLL_API(barrier);
|
||||
FCA_SAVE_PREV_COLL_API(bcast);
|
||||
FCA_SAVE_PREV_COLL_API(reduce);
|
||||
FCA_SAVE_PREV_COLL_API(allreduce);
|
||||
FCA_SAVE_PREV_COLL_API(allgather);
|
||||
FCA_SAVE_PREV_COLL_API(allgatherv);
|
||||
FCA_SAVE_PREV_COLL_API(gather);
|
||||
FCA_SAVE_PREV_COLL_API(gatherv);
|
||||
FCA_SAVE_PREV_COLL_API(alltoall);
|
||||
FCA_SAVE_PREV_COLL_API(alltoallv);
|
||||
FCA_SAVE_PREV_COLL_API(alltoallw);
|
||||
FCA_SAVE_PREV_COLL_API(reduce_scatter);
|
||||
#if OMPI_FCA_VERSION >= 30
|
||||
FCA_SAVE_PREV_COLL_API(scatter);
|
||||
#endif
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize module on the communicator
|
||||
*/
|
||||
static int mca_coll_fca_module_enable(mca_coll_base_module_t *module,
|
||||
struct ompi_communicator_t *comm)
|
||||
{
|
||||
|
||||
mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*) module;
|
||||
int rc;
|
||||
|
||||
fca_module->comm = comm;
|
||||
fca_module->rank = ompi_comm_rank(comm);
|
||||
|
||||
rc = mca_coll_fca_get_fca_lib(comm);
|
||||
if (rc != OMPI_SUCCESS)
|
||||
return rc;
|
||||
|
||||
rc = __save_coll_handlers(fca_module);
|
||||
if (rc != OMPI_SUCCESS)
|
||||
return rc;
|
||||
|
||||
rc = __get_local_ranks(fca_module);
|
||||
if (rc != OMPI_SUCCESS)
|
||||
return rc;
|
||||
|
||||
rc = __create_fca_comm(fca_module);
|
||||
if (rc != OMPI_SUCCESS)
|
||||
return rc;
|
||||
|
||||
FCA_MODULE_VERBOSE(fca_module, 1, "FCA Module initialized");
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
static int mca_coll_fca_ft_event(int state)
|
||||
{
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static void mca_coll_fca_module_clear(mca_coll_fca_module_t *fca_module)
|
||||
{
|
||||
fca_module->num_local_procs = 0;
|
||||
fca_module->local_ranks = NULL;
|
||||
fca_module->fca_comm = NULL;
|
||||
|
||||
fca_module->previous_barrier = NULL;
|
||||
fca_module->previous_bcast = NULL;
|
||||
fca_module->previous_reduce = NULL;
|
||||
fca_module->previous_allreduce = NULL;
|
||||
fca_module->previous_allgather = NULL;
|
||||
fca_module->previous_allgatherv = NULL;
|
||||
fca_module->previous_gather = NULL;
|
||||
fca_module->previous_gatherv = NULL;
|
||||
fca_module->previous_alltoall = NULL;
|
||||
fca_module->previous_alltoallv = NULL;
|
||||
fca_module->previous_alltoallw = NULL;
|
||||
fca_module->previous_reduce_scatter = NULL;
|
||||
}
|
||||
|
||||
static void mca_coll_fca_module_construct(mca_coll_fca_module_t *fca_module)
|
||||
{
|
||||
FCA_VERBOSE(5, "==>");
|
||||
mca_coll_fca_module_clear(fca_module);
|
||||
}
|
||||
|
||||
static void mca_coll_fca_module_destruct(mca_coll_fca_module_t *fca_module)
|
||||
{
|
||||
FCA_VERBOSE(5, "==>");
|
||||
if(mca_coll_fca_component.fca_enable_cache == 0) {
|
||||
|
||||
if (fca_module->fca_comm) {
|
||||
__destroy_fca_comm(fca_module);
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_RELEASE(fca_module->previous_barrier_module);
|
||||
OBJ_RELEASE(fca_module->previous_bcast_module);
|
||||
OBJ_RELEASE(fca_module->previous_reduce_module);
|
||||
OBJ_RELEASE(fca_module->previous_allreduce_module);
|
||||
OBJ_RELEASE(fca_module->previous_allgather_module);
|
||||
OBJ_RELEASE(fca_module->previous_allgatherv_module);
|
||||
OBJ_RELEASE(fca_module->previous_gather_module);
|
||||
OBJ_RELEASE(fca_module->previous_gatherv_module);
|
||||
OBJ_RELEASE(fca_module->previous_alltoall_module);
|
||||
OBJ_RELEASE(fca_module->previous_alltoallv_module);
|
||||
OBJ_RELEASE(fca_module->previous_alltoallw_module);
|
||||
OBJ_RELEASE(fca_module->previous_reduce_scatter_module);
|
||||
#if OMPI_FCA_VERSION >= 30
|
||||
OBJ_RELEASE(fca_module->previous_scatter_module);
|
||||
#endif
|
||||
|
||||
free(fca_module->local_ranks);
|
||||
mca_coll_fca_module_clear(fca_module);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Invoked when there's a new communicator that has been created.
|
||||
* Look at the communicator and decide which set of functions and
|
||||
* priority we want to return.
|
||||
*/
|
||||
mca_coll_base_module_t *
|
||||
mca_coll_fca_comm_query(struct ompi_communicator_t *comm, int *priority)
|
||||
{
|
||||
mca_coll_base_module_t *module;
|
||||
int size = ompi_comm_size(comm);
|
||||
int local_peers = 0;
|
||||
mca_coll_fca_module_t *fca_module;
|
||||
|
||||
*priority = 0;
|
||||
module = NULL;
|
||||
|
||||
if (!mca_coll_fca_component.fca_enable)
|
||||
goto exit;
|
||||
|
||||
if (size < mca_coll_fca_component.fca_np)
|
||||
goto exit;
|
||||
|
||||
if (!ompi_group_have_remote_peers(comm->c_local_group) || OMPI_COMM_IS_INTER(comm))
|
||||
goto exit;
|
||||
|
||||
fca_module = OBJ_NEW(mca_coll_fca_module_t);
|
||||
if (!fca_module)
|
||||
goto exit;
|
||||
|
||||
fca_module->super.coll_module_enable = mca_coll_fca_module_enable;
|
||||
fca_module->super.ft_event = mca_coll_fca_ft_event;
|
||||
fca_module->super.coll_allgather = mca_coll_fca_component.fca_enable_allgather? mca_coll_fca_allgather : NULL;
|
||||
fca_module->super.coll_allgatherv = mca_coll_fca_component.fca_enable_allgatherv? mca_coll_fca_allgatherv : NULL;
|
||||
fca_module->super.coll_allreduce = mca_coll_fca_component.fca_enable_allreduce? mca_coll_fca_allreduce : NULL;
|
||||
fca_module->super.coll_alltoall = mca_coll_fca_component.fca_enable_alltoall? mca_coll_fca_alltoall : NULL;
|
||||
fca_module->super.coll_alltoallv = mca_coll_fca_component.fca_enable_alltoallv? mca_coll_fca_alltoallv : NULL;
|
||||
fca_module->super.coll_alltoallw = mca_coll_fca_component.fca_enable_alltoallw? mca_coll_fca_alltoallw : NULL;
|
||||
fca_module->super.coll_barrier = mca_coll_fca_component.fca_enable_barrier? mca_coll_fca_barrier : NULL;
|
||||
fca_module->super.coll_bcast = mca_coll_fca_component.fca_enable_bcast? mca_coll_fca_bcast : NULL;
|
||||
fca_module->super.coll_exscan = NULL;
|
||||
fca_module->super.coll_gather = mca_coll_fca_component.fca_enable_gather? mca_coll_fca_gather : NULL;
|
||||
fca_module->super.coll_gatherv = mca_coll_fca_component.fca_enable_gatherv? mca_coll_fca_gatherv : NULL;
|
||||
fca_module->super.coll_reduce = mca_coll_fca_component.fca_enable_reduce? mca_coll_fca_reduce : NULL;
|
||||
fca_module->super.coll_reduce_scatter = mca_coll_fca_component.fca_enable_reduce_scatter? mca_coll_fca_reduce_scatter : NULL;
|
||||
fca_module->super.coll_scan = NULL;
|
||||
fca_module->super.coll_scatter = NULL;
|
||||
fca_module->super.coll_scatterv = NULL;
|
||||
|
||||
*priority = mca_coll_fca_component.fca_priority;
|
||||
module = &fca_module->super;
|
||||
|
||||
exit:
|
||||
FCA_VERBOSE(4, "Query FCA module for comm %p size %d rank %d local_peers=%d: priority=%d %s",
|
||||
(void *)comm, size, ompi_comm_rank(comm), local_peers,
|
||||
*priority, module ? "enabled" : "disabled");
|
||||
return module;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_coll_fca_module_t,
|
||||
mca_coll_base_module_t,
|
||||
mca_coll_fca_module_construct,
|
||||
mca_coll_fca_module_destruct);
|
||||
|
||||
|
||||
static void mca_coll_fca_comm_wrap_constructor(mca_coll_fca_comm_wrap_t *item) {
|
||||
item->fca_comm = NULL;
|
||||
item->comm_id = -1;
|
||||
item->rank = -1;
|
||||
}
|
||||
|
||||
static void mca_coll_fca_comm_wrap_destruct(mca_coll_fca_comm_wrap_t *item) {
|
||||
|
||||
int ret;
|
||||
|
||||
if(item->fca_comm != NULL)
|
||||
{
|
||||
fca_comm_destroy(item->fca_comm);
|
||||
if (item->rank == 0) {
|
||||
ret = fca_comm_end(mca_coll_fca_component.fca_context,
|
||||
item->comm_id);
|
||||
if (ret < 0) {
|
||||
FCA_ERROR("COMM_END failed: %s", fca_strerror(ret));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_coll_fca_comm_wrap_t,
|
||||
opal_object_t,
|
||||
mca_coll_fca_comm_wrap_constructor,
|
||||
mca_coll_fca_comm_wrap_destruct);
|
||||
|
||||
|
||||
static void mca_coll_fca_c_cache_item_construct(mca_coll_fca_c_cache_item_t *item) {
|
||||
item->comm = NULL;
|
||||
item->size = -1;
|
||||
/* item->fca_comm_wrap = OBJ_NEW(mca_coll_fca_comm_wrap_t); */
|
||||
item->fca_comm_wrap = NULL;
|
||||
}
|
||||
|
||||
static void mca_coll_fca_c_cache_item_destruct(mca_coll_fca_c_cache_item_t *item) {
|
||||
if(item->fca_comm_wrap != NULL) {
|
||||
OBJ_RELEASE(item->fca_comm_wrap);
|
||||
/* OBJ_RELEASE(item->comm); */
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_coll_fca_c_cache_item_t,
|
||||
opal_list_item_t,
|
||||
mca_coll_fca_c_cache_item_construct,
|
||||
mca_coll_fca_c_cache_item_destruct);
|
@ -1,633 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011 Mellanox Technologies. All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2018 Cisco Systems, Inc. All rights reserved
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifdef HAVE_ALLOCA_H
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
|
||||
#include "ompi_config.h"
|
||||
#include "ompi/constants.h"
|
||||
#include "coll_fca.h"
|
||||
#include "coll_fca_convertor.h"
|
||||
|
||||
|
||||
static mca_coll_fca_dtype_info_t* mca_coll_fca_get_dtype(ompi_datatype_t *dtype)
|
||||
{
|
||||
mca_coll_fca_dtype_info_t *dtype_info;
|
||||
ptrdiff_t lb, extent;
|
||||
int id = dtype->id;
|
||||
int fca_dtype;
|
||||
|
||||
if (id < 0 || id >= FCA_DT_MAX_PREDEFINED) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Different dtype structures may have the same id. In that case, we assume
|
||||
* they are aliases.
|
||||
*/
|
||||
dtype_info = &mca_coll_fca_component.fca_dtypes[id];
|
||||
if (dtype_info->mpi_dtype->id == id) {
|
||||
return dtype_info;
|
||||
}
|
||||
|
||||
/* assert we don't overwrite another datatype */
|
||||
assert(dtype_info->mpi_dtype == MPI_DATATYPE_NULL);
|
||||
|
||||
fca_dtype = fca_translate_mpi_dtype(dtype->name);
|
||||
if (fca_dtype < 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
FCA_DT_GET_TRUE_EXTENT(dtype, &lb, &extent);
|
||||
dtype_info->mpi_dtype = dtype;
|
||||
dtype_info->mpi_dtype_extent = extent;
|
||||
dtype_info->fca_dtype = fca_dtype;
|
||||
dtype_info->fca_dtype_extent = fca_get_dtype_size(fca_dtype);
|
||||
FCA_VERBOSE(2, "Added new dtype[%d]: %s fca id: %d, mpi size: %lu, fca size: %lu",
|
||||
id, dtype->name, dtype_info->fca_dtype, dtype_info->mpi_dtype_extent,
|
||||
dtype_info->fca_dtype_extent);
|
||||
return dtype_info;
|
||||
}
|
||||
|
||||
static mca_coll_fca_op_info_t *mca_coll_fca_get_op(ompi_op_t *op)
|
||||
{
|
||||
mca_coll_fca_op_info_t *op_info;
|
||||
int i, fca_op;
|
||||
|
||||
/*
|
||||
* Find 'op' in the array by exhaustive search. We assume all valid ops are
|
||||
* in the beginning. If we stumble on a MPI_OP_NULL, we try to resolve the FCA
|
||||
* operation code and store it in the array.
|
||||
*/
|
||||
op_info = mca_coll_fca_component.fca_reduce_ops;
|
||||
for (i = 0; i < FCA_MAX_OPS; ++i, ++op_info) {
|
||||
if (op_info->mpi_op == op) {
|
||||
return op_info;
|
||||
} else if (op_info->mpi_op == MPI_OP_NULL) {
|
||||
fca_op = fca_translate_mpi_op(op->o_name);
|
||||
if (fca_op < 0)
|
||||
return NULL;
|
||||
op_info->mpi_op = op;
|
||||
op_info->fca_op = fca_op;
|
||||
FCA_VERBOSE(2, "Added new op[%d]: %s fca id: %d", i, op->o_name, fca_op);
|
||||
return op_info;
|
||||
}
|
||||
}
|
||||
/* assert the array does not overflow */
|
||||
/*assert(mca_coll_fca_component.fca_reduce_ops[FCA_MAX_OPS - 1] == MPI_OP_NULL);*/
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* If "datatype" is contiguous when it appears "count" times, return 1 and
|
||||
* set "*size" to the total buffer size, and *gap to the gap before the data.
|
||||
* Otherwise return 0.
|
||||
*/
|
||||
static inline int mca_coll_fca_array_size(ompi_datatype_t *dtype, int count,
|
||||
size_t *gap, size_t *size)
|
||||
{
|
||||
ptrdiff_t true_lb, true_extent;
|
||||
|
||||
if (FCA_DT_IS_CONTIGUOUS_MEMORY_LAYOUT(dtype, count)) {
|
||||
FCA_DT_GET_TRUE_EXTENT(dtype, &true_lb, &true_extent);
|
||||
*gap = true_lb;
|
||||
*size = true_extent * count;
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static int mca_coll_fca_fill_reduce_spec(int count, ompi_datatype_t *dtype,
|
||||
ompi_op_t *op, fca_reduce_spec_t *spec,
|
||||
int max_fca_payload)
|
||||
{
|
||||
mca_coll_fca_dtype_info_t *dtype_info;
|
||||
mca_coll_fca_op_info_t *op_info;
|
||||
|
||||
/* Check dtype */
|
||||
dtype_info = mca_coll_fca_get_dtype(dtype);
|
||||
if (!dtype_info) {
|
||||
FCA_VERBOSE(10, "Unsupported dtype: %s", dtype->name);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Check FCA size */
|
||||
if ((int)(dtype_info->fca_dtype_extent * count) > max_fca_payload) {
|
||||
FCA_VERBOSE(10, "Unsupported buffer size: %lu", dtype_info->fca_dtype_extent * count);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Check operation */
|
||||
op_info = mca_coll_fca_get_op(op);
|
||||
if (!op_info) {
|
||||
FCA_VERBOSE(10, "Unsupported op: %s", op->o_name);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Fill spec */
|
||||
spec->dtype = dtype_info->fca_dtype;
|
||||
spec->op = op_info->fca_op;
|
||||
spec->length = count;
|
||||
spec->buf_size = dtype_info->mpi_dtype_extent * count;
|
||||
if (MPI_IN_PLACE == spec->sbuf) {
|
||||
FCA_VERBOSE(10, "Using MPI_IN_PLACE for sbuf");
|
||||
spec->sbuf = spec->rbuf;
|
||||
} else if (MPI_IN_PLACE == spec->rbuf) {
|
||||
FCA_VERBOSE(10, "Using MPI_IN_PLACE for rbuf");
|
||||
spec->rbuf = spec->sbuf;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Function: - barrier
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_fca_barrier(struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
|
||||
int ret;
|
||||
|
||||
FCA_VERBOSE(5,"Using FCA Barrier");
|
||||
if (OPAL_UNLIKELY(ompi_mpi_state >= OMPI_MPI_STATE_FINALIZE_STARTED)) {
|
||||
FCA_VERBOSE(5, "In finalize, reverting to previous barrier");
|
||||
goto orig_barrier;
|
||||
}
|
||||
ret = fca_do_barrier(fca_module->fca_comm);
|
||||
if (ret < 0) {
|
||||
if (ret == -EUSEMPI) {
|
||||
goto orig_barrier;
|
||||
}
|
||||
FCA_ERROR("Barrier failed: %s", fca_strerror(ret));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
orig_barrier:
|
||||
return fca_module->previous_barrier(comm, fca_module->previous_barrier_module);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Function: - broadcast
|
||||
* Accepts: - same arguments as MPI_Bcast()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_fca_bcast(void *buff, int count, struct ompi_datatype_t *datatype,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
|
||||
MCA_COLL_FCA_DECLARE_CONVERTOR(conv);
|
||||
fca_bcast_spec_t spec;
|
||||
size_t gap, size;
|
||||
int ret;
|
||||
|
||||
FCA_VERBOSE(5, "[%d] Calling mca_coll_fca_bcast, root=%d, count=%d",
|
||||
ompi_comm_rank(comm), root, count);
|
||||
|
||||
/* Setup exchange buffer */
|
||||
spec.root = root;
|
||||
if (mca_coll_fca_array_size(datatype, count, &gap, &size)) {
|
||||
spec.buf = (char*)buff + gap;
|
||||
} else {
|
||||
mca_coll_fca_convertor_create(&conv, datatype, count, buff,
|
||||
(root == fca_module->rank)
|
||||
? MCA_COLL_FCA_CONV_SEND
|
||||
: MCA_COLL_FCA_CONV_RECV,
|
||||
&spec.buf, &size);
|
||||
}
|
||||
|
||||
/* Check that operation size does not exceed limit */
|
||||
spec.size = size;
|
||||
if (spec.size > fca_module->fca_comm_caps.max_payload) {
|
||||
FCA_VERBOSE(5, "Unsupported bcast operation size %d, using fallback",
|
||||
spec.size);
|
||||
if (spec.buf != buff) {
|
||||
mca_coll_fca_convertor_destroy(&conv);
|
||||
}
|
||||
goto orig_bcast;
|
||||
}
|
||||
|
||||
/* Sender may pack data */
|
||||
if (spec.buf != buff && root == fca_module->rank) {
|
||||
mca_coll_fca_convertor_process(&conv, 0);
|
||||
}
|
||||
|
||||
/* Call FCA Bcast */
|
||||
FCA_VERBOSE(5, "Using FCA Bcast");
|
||||
ret = fca_do_bcast(fca_module->fca_comm, &spec);
|
||||
|
||||
/* Destroy convertor if operation failed */
|
||||
if (ret < 0) {
|
||||
mca_coll_fca_convertor_destroy(&conv);
|
||||
if (ret == -EUSEMPI) {
|
||||
goto orig_bcast;
|
||||
}
|
||||
FCA_ERROR("Bcast failed: %s", fca_strerror(ret));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Unpack data and clean up convertor */
|
||||
if (mca_coll_fca_convertor_valid(&conv)) {
|
||||
if (root != fca_module->rank) {
|
||||
mca_coll_fca_convertor_process(&conv, 0);
|
||||
}
|
||||
mca_coll_fca_convertor_destroy(&conv);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
orig_bcast:
|
||||
return fca_module->previous_bcast(buff, count, datatype, root, comm,
|
||||
fca_module->previous_bcast_module);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reduce
|
||||
*
|
||||
* Function: - reduce
|
||||
* Accepts: - same as MPI_Reduce()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_fca_reduce(const void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
|
||||
fca_reduce_spec_t spec;
|
||||
int ret;
|
||||
|
||||
mca_coll_fca_get_reduce_root(root, fca_module->rank, &spec);
|
||||
spec.sbuf = (void *)sbuf;
|
||||
spec.rbuf = rbuf;
|
||||
if (mca_coll_fca_fill_reduce_spec(count, dtype, op, &spec,
|
||||
fca_module->fca_comm_caps.max_payload)
|
||||
!= OMPI_SUCCESS) {
|
||||
FCA_VERBOSE(5, "Unsupported reduce operation %s, using fallback\n", op->o_name);
|
||||
goto orig_reduce;
|
||||
}
|
||||
|
||||
FCA_VERBOSE(5,"Using FCA Reduce");
|
||||
ret = fca_do_reduce(fca_module->fca_comm, &spec);
|
||||
if (ret < 0) {
|
||||
if (ret == -EUSEMPI) {
|
||||
goto orig_reduce;
|
||||
}
|
||||
FCA_ERROR("Reduce failed: %s", fca_strerror(ret));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
orig_reduce:
|
||||
return fca_module->previous_reduce(sbuf, rbuf, count, dtype, op, root,
|
||||
comm, fca_module->previous_reduce_module);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allreduce
|
||||
*
|
||||
* Function: - allreduce
|
||||
* Accepts: - same as MPI_Allreduce()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_fca_allreduce(const void *sbuf, void *rbuf, int count,
|
||||
struct ompi_datatype_t *dtype, struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
|
||||
fca_reduce_spec_t spec;
|
||||
int ret;
|
||||
|
||||
spec.sbuf = (void *)sbuf;
|
||||
spec.rbuf = rbuf;
|
||||
if (mca_coll_fca_fill_reduce_spec(count, dtype, op, &spec,
|
||||
fca_module->fca_comm_caps.max_payload)
|
||||
!= OMPI_SUCCESS) {
|
||||
FCA_VERBOSE(5, "Unsupported allreduce operation %s, using fallback\n", op->o_name);
|
||||
goto orig_allreduce;
|
||||
}
|
||||
|
||||
FCA_VERBOSE(5,"Using FCA Allreduce");
|
||||
ret = fca_do_all_reduce(fca_module->fca_comm, &spec);
|
||||
if (ret < 0) {
|
||||
if (ret == -EUSEMPI) {
|
||||
goto orig_allreduce;
|
||||
}
|
||||
FCA_ERROR("Allreduce failed: %s", fca_strerror(ret));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
orig_allreduce:
|
||||
return fca_module->previous_allreduce(sbuf, rbuf, count, dtype, op, comm,
|
||||
fca_module->previous_allreduce_module);
|
||||
}
|
||||
|
||||
/*
|
||||
* Prepare a send buffer for allgather/allgatherv, handle packing and MPI_IN_PLACE.
|
||||
*/
|
||||
static size_t __setup_gather_sendbuf(void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
struct mca_coll_fca_convertor *sconv,
|
||||
void **real_sendbuf)
|
||||
{
|
||||
size_t gap, ssize;
|
||||
|
||||
if (mca_coll_fca_array_size(sdtype, scount, &gap, &ssize)) {
|
||||
*real_sendbuf = (char*) sbuf + gap;
|
||||
} else {
|
||||
FCA_VERBOSE(5, "Packing send buffer");
|
||||
mca_coll_fca_convertor_create(sconv, sdtype, scount, sbuf,
|
||||
MCA_COLL_FCA_CONV_SEND, real_sendbuf,
|
||||
&ssize);
|
||||
mca_coll_fca_convertor_process(sconv, 0);
|
||||
}
|
||||
return ssize;
|
||||
}
|
||||
|
||||
static size_t __setup_gather_sendbuf_inplace(void *inplace_sbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct mca_coll_fca_convertor *sconv,
|
||||
void **real_sendbuf)
|
||||
{
|
||||
size_t gap, ssize;
|
||||
|
||||
if (mca_coll_fca_array_size(rdtype, rcount, &gap, &ssize)) {
|
||||
*real_sendbuf = (char*) inplace_sbuf + gap;
|
||||
} else {
|
||||
FCA_VERBOSE(5, "Packing send buffer");
|
||||
mca_coll_fca_convertor_create(sconv, rdtype, rcount, inplace_sbuf,
|
||||
MCA_COLL_FCA_CONV_SEND, real_sendbuf,
|
||||
&ssize);
|
||||
mca_coll_fca_convertor_process(sconv, 0);
|
||||
}
|
||||
return ssize;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allgather
|
||||
*
|
||||
* Function: - allgather
|
||||
* Accepts: - same as MPI_Allgather()
|
||||
* Returns: - MPI_SUCCESS or error code
|
||||
*/
|
||||
int mca_coll_fca_allgather(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
|
||||
#if OMPI_FCA_ALLGATHER == 1
|
||||
MCA_COLL_FCA_DECLARE_CONVERTOR(sconv);
|
||||
MCA_COLL_FCA_DECLARE_CONVERTOR(rconv);
|
||||
fca_gather_spec_t spec = {0,};
|
||||
size_t rgap, rsize;
|
||||
ptrdiff_t rdtype_extent;
|
||||
ssize_t total_rcount;
|
||||
int ret;
|
||||
|
||||
/* Setup send buffer */
|
||||
if(sbuf == MPI_IN_PLACE ) {
|
||||
FCA_DT_EXTENT(rdtype, &rdtype_extent);
|
||||
spec.size = __setup_gather_sendbuf_inplace(
|
||||
(char *)rbuf + rcount * fca_module->rank * rdtype_extent,
|
||||
rcount, rdtype, &sconv, &spec.sbuf);
|
||||
} else {
|
||||
spec.size = __setup_gather_sendbuf((void *)sbuf, scount, sdtype, &sconv, &spec.sbuf);
|
||||
}
|
||||
|
||||
/* Setup recv buffer */
|
||||
total_rcount = ompi_comm_size(comm) * rcount;
|
||||
if (mca_coll_fca_array_size(rdtype, total_rcount, &rgap, &rsize) && rgap == 0) {
|
||||
spec.rbuf = rbuf;
|
||||
} else {
|
||||
mca_coll_fca_convertor_create(&rconv, rdtype, total_rcount, rbuf,
|
||||
MCA_COLL_FCA_CONV_RECV, &spec.rbuf, &rsize);
|
||||
}
|
||||
|
||||
|
||||
/* Call FCA Allgather */
|
||||
FCA_VERBOSE(5,"Using FCA Allgather size");
|
||||
ret = fca_do_allgather(fca_module->fca_comm, &spec);
|
||||
|
||||
/* Destroy convertors if operation failed */
|
||||
if (ret < 0) {
|
||||
mca_coll_fca_convertor_destroy(&sconv);
|
||||
mca_coll_fca_convertor_destroy(&rconv);
|
||||
if (ret == -EUSEMPI) {
|
||||
goto orig_allgather;
|
||||
}
|
||||
FCA_ERROR("Allgather failed: %s", fca_strerror(ret));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Unpack data and clean up convertor */
|
||||
mca_coll_fca_convertor_destroy(&sconv);
|
||||
if (mca_coll_fca_convertor_valid(&rconv)) {
|
||||
FCA_VERBOSE(5, "Unpacking Allgather receive buffer");
|
||||
mca_coll_fca_convertor_process(&rconv, 0);
|
||||
mca_coll_fca_convertor_destroy(&rconv);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
orig_allgather:
|
||||
#endif
|
||||
return fca_module->previous_allgather(sbuf, scount, sdtype, rbuf, rcount, rdtype,
|
||||
comm, fca_module->previous_allgather_module);
|
||||
}
|
||||
|
||||
int mca_coll_fca_allgatherv(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, const int *rcounts, const int *disps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
|
||||
#if OMPI_FCA_ALLGATHER == 1
|
||||
MCA_COLL_FCA_DECLARE_CONVERTOR(sconv);
|
||||
MCA_COLL_FCA_DECLARE_CONVERTOR(rconv);
|
||||
fca_gatherv_spec_t spec;
|
||||
size_t rgap, rsize;
|
||||
int sum_rcounts;
|
||||
ptrdiff_t rdtype_extent;
|
||||
int comm_size;
|
||||
int relemsize;
|
||||
size_t displ;
|
||||
int i, ret;
|
||||
|
||||
comm_size = ompi_comm_size(fca_module->comm);
|
||||
FCA_DT_EXTENT(rdtype, &rdtype_extent);
|
||||
|
||||
/* Setup send buffer */
|
||||
if(sbuf == MPI_IN_PLACE ) {
|
||||
spec.sendsize = __setup_gather_sendbuf_inplace(
|
||||
(char *)rbuf + disps[fca_module->rank] * rdtype_extent,
|
||||
rcounts[fca_module->rank], rdtype, &sconv, &spec.sbuf);
|
||||
} else {
|
||||
spec.sendsize = __setup_gather_sendbuf((void *)sbuf, scount, sdtype, &sconv, &spec.sbuf);
|
||||
}
|
||||
|
||||
/* Allocate alternative recvsizes/displs on the stack, which will be in bytes */
|
||||
spec.recvsizes = alloca(sizeof *spec.recvsizes * comm_size);
|
||||
spec.displs = alloca(sizeof *spec.displs * comm_size);
|
||||
|
||||
/* Calculate the size of receive buffer */
|
||||
sum_rcounts = 0;
|
||||
for (i = 0; i < comm_size; ++i) {
|
||||
sum_rcounts += rcounts[i];
|
||||
}
|
||||
|
||||
/* convert MPI counts which depend on dtype) to FCA sizes (which are in bytes) */
|
||||
if (mca_coll_fca_array_size(rdtype, sum_rcounts, &rgap, &rsize) && rgap == 0) {
|
||||
spec.rbuf = rbuf;
|
||||
for (i = 0; i < comm_size; ++i) {
|
||||
spec.recvsizes[i] = rcounts[i] * rdtype_extent;
|
||||
spec.displs[i] = disps[i] * rdtype_extent;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* Reorder and remove gaps in displs - we want to allocate as little memory
|
||||
* as possible, and we should unpack one-by-one anyway.
|
||||
*/
|
||||
FCA_VERBOSE(5, "Reordering AllgatherV displacements");
|
||||
mca_coll_fca_convertor_create(&rconv, rdtype, sum_rcounts, rbuf,
|
||||
MCA_COLL_FCA_CONV_RECV, &spec.rbuf, &rsize);
|
||||
assert(rsize % sum_rcounts == 0);
|
||||
relemsize = rsize / sum_rcounts;
|
||||
|
||||
displ = 0;
|
||||
for (i = 0; i < comm_size; ++i) {
|
||||
spec.recvsizes[i] = rcounts[i] * relemsize;
|
||||
spec.displs[i] = displ;
|
||||
displ += spec.recvsizes[i];
|
||||
}
|
||||
assert(displ == rsize);
|
||||
}
|
||||
|
||||
/* Call FCA AllgatherV */
|
||||
FCA_VERBOSE(5,"Using FCA Allgatherv");
|
||||
ret = fca_do_allgatherv(fca_module->fca_comm, &spec);
|
||||
|
||||
/* Destroy convertors if operation failed */
|
||||
if (ret < 0) {
|
||||
mca_coll_fca_convertor_destroy(&sconv);
|
||||
mca_coll_fca_convertor_destroy(&rconv);
|
||||
if (ret == -EUSEMPI) {
|
||||
goto orig_allgatherv;
|
||||
}
|
||||
FCA_ERROR("Allgatherv failed: %s", fca_strerror(ret));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* Unpack data and clean up convertor */
|
||||
mca_coll_fca_convertor_destroy(&sconv);
|
||||
if (mca_coll_fca_convertor_valid(&rconv)) {
|
||||
FCA_VERBOSE(5, "Unpacking AllgatherV receive buffer rdtype_extent=%ld",
|
||||
rdtype_extent);
|
||||
for (i = 0; i < comm_size; ++i) {
|
||||
mca_coll_fca_convertor_set(&rconv, rdtype,
|
||||
(char*)rbuf + disps[i] * rdtype_extent,
|
||||
rcounts[i]);
|
||||
mca_coll_fca_convertor_process(&rconv, spec.displs[i]);
|
||||
}
|
||||
mca_coll_fca_convertor_destroy(&rconv);
|
||||
}
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
orig_allgatherv:
|
||||
#endif
|
||||
return fca_module->previous_allgatherv(sbuf, scount, sdtype, rbuf, rcounts,
|
||||
disps, rdtype, comm,
|
||||
fca_module->previous_allgatherv_module);
|
||||
}
|
||||
|
||||
int mca_coll_fca_alltoall(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
|
||||
/* not implemented yet */
|
||||
return fca_module->previous_alltoall(sbuf, scount, sdtype, rbuf, rcount, rdtype,
|
||||
comm, fca_module->previous_alltoall_module);
|
||||
}
|
||||
|
||||
int mca_coll_fca_alltoallv(const void *sbuf, const int *scounts, const int *sdisps,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, const int *rcounts, const int *rdisps,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
|
||||
/* not implemented yet */
|
||||
return fca_module->previous_alltoallv(sbuf, scounts, sdisps, sdtype, rbuf, rcounts, rdisps, rdtype,
|
||||
comm, fca_module->previous_alltoallv_module);
|
||||
}
|
||||
|
||||
int mca_coll_fca_alltoallw(const void *sbuf, const int *scounts, const int *sdisps,
|
||||
struct ompi_datatype_t * const *sdtypes,
|
||||
void *rbuf, const int *rcounts, const int *rdisps,
|
||||
struct ompi_datatype_t * const *rdtypes,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
|
||||
/* not implemented yet */
|
||||
return fca_module->previous_alltoallw(sbuf, scounts, sdisps, sdtypes, rbuf, rcounts, rdisps, rdtypes,
|
||||
comm, fca_module->previous_alltoallw_module);
|
||||
}
|
||||
|
||||
int mca_coll_fca_gather(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, int rcount,
|
||||
struct ompi_datatype_t *rdtype,
|
||||
int root, struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
|
||||
/* not implemented yet */
|
||||
return fca_module->previous_gather(sbuf, scount, sdtype, rbuf, rcount, rdtype, root,
|
||||
comm, fca_module->previous_gather_module);
|
||||
}
|
||||
|
||||
int mca_coll_fca_gatherv(const void *sbuf, int scount,
|
||||
struct ompi_datatype_t *sdtype,
|
||||
void *rbuf, const int *rcounts, const int *disps,
|
||||
struct ompi_datatype_t *rdtype, int root,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
|
||||
/* not implemented yet */
|
||||
return fca_module->previous_gatherv(sbuf, scount, sdtype, rbuf, rcounts, disps, rdtype, root,
|
||||
comm, fca_module->previous_gatherv_module);
|
||||
}
|
||||
|
||||
int mca_coll_fca_reduce_scatter(const void *sbuf, void *rbuf, const int *rcounts,
|
||||
struct ompi_datatype_t *dtype,
|
||||
struct ompi_op_t *op,
|
||||
struct ompi_communicator_t *comm,
|
||||
mca_coll_base_module_t *module)
|
||||
{
|
||||
mca_coll_fca_module_t *fca_module = (mca_coll_fca_module_t*)module;
|
||||
/* not implemented yet */
|
||||
return fca_module->previous_reduce_scatter(sbuf, rbuf, rcounts, dtype, op,
|
||||
comm, fca_module->previous_reduce_scatter_module);
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
#
|
||||
# Copyright (c) 2011 Mellanox Technologies. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
|
||||
# MCA_coll_fca_CONFIG([action-if-can-compile],
|
||||
# [action-if-cant-compile])
|
||||
# ------------------------------------------------
|
||||
AC_DEFUN([MCA_ompi_coll_fca_CONFIG],[
|
||||
AC_CONFIG_FILES([ompi/mca/coll/fca/Makefile])
|
||||
|
||||
OMPI_CHECK_FCA([coll_fca],
|
||||
[coll_fca_happy="yes"],
|
||||
[coll_fca_happy="no"])
|
||||
|
||||
AS_IF([test "$coll_fca_happy" = "yes"],
|
||||
[$1],
|
||||
[$2])
|
||||
|
||||
# substitute in the things needed to build fca
|
||||
AC_SUBST([coll_fca_CFLAGS])
|
||||
AC_SUBST([coll_fca_CPPFLAGS])
|
||||
AC_SUBST([coll_fca_LDFLAGS])
|
||||
AC_SUBST([coll_fca_LIBS])
|
||||
])dnl
|
||||
|
@ -1,7 +0,0 @@
|
||||
#
|
||||
# owner/status file
|
||||
# owner: institution that is responsible for this package
|
||||
# status: e.g. active, maintenance, unmaintained
|
||||
#
|
||||
owner: MELLANOX
|
||||
status: active
|
@ -1,44 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
#
|
||||
# Copyright (c) 2013-2015 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2017 IBM Corporation. All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
#
|
||||
|
||||
dist_oshmemdata_DATA = \
|
||||
help-oshmem-scoll-fca.txt
|
||||
|
||||
AM_CPPFLAGS = $(coll_fca_CPPFLAGS) -DCOLL_FCA_HOME=\"$(coll_fca_HOME)\" $(coll_fca_extra_CPPFLAGS)
|
||||
scoll_fca_sources = \
|
||||
scoll_fca.h \
|
||||
scoll_fca_debug.h \
|
||||
scoll_fca_api.h \
|
||||
scoll_fca_module.c \
|
||||
scoll_fca_component.c \
|
||||
scoll_fca_ops.c
|
||||
if MCA_BUILD_oshmem_scoll_fca_DSO
|
||||
component_noinst =
|
||||
component_install = mca_scoll_fca.la
|
||||
else
|
||||
component_noinst = libmca_scoll_fca.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
mcacomponentdir = $(oshmemlibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_scoll_fca_la_SOURCES = $(scoll_fca_sources)
|
||||
mca_scoll_fca_la_LIBADD = $(top_builddir)/oshmem/liboshmem.la \
|
||||
$(scoll_fca_LIBS)
|
||||
mca_scoll_fca_la_LDFLAGS = -module -avoid-version $(scoll_fca_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_scoll_fca_la_SOURCES =$(scoll_fca_sources)
|
||||
libmca_scoll_fca_la_LIBADD = $(scoll_fca_LIBS)
|
||||
libmca_scoll_fca_la_LDFLAGS = -module -avoid-version $(scoll_fca_LDFLAGS)
|
@ -1,40 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
#
|
||||
# Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2015 Research Organization for Information Science
|
||||
# and Technology (RIST). All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
|
||||
# MCA_oshmem_scoll_fca_CONFIG([action-if-can-compile],
|
||||
# [action-if-cant-compile])
|
||||
# ------------------------------------------------
|
||||
AC_DEFUN([MCA_oshmem_scoll_fca_CONFIG],[
|
||||
AC_CONFIG_FILES([oshmem/mca/scoll/fca/Makefile])
|
||||
|
||||
OMPI_CHECK_FCA([scoll_fca],
|
||||
[scoll_fca_happy="yes"],
|
||||
[scoll_fca_happy="no"])
|
||||
|
||||
AS_IF([test "$scoll_fca_happy" = "yes"],
|
||||
[scoll_fca_WRAPPER_EXTRA_LDFLAGS="$scoll_fca_LDFLAGS"
|
||||
scoll_fca_CPPFLAGS="$scoll_fca_CPPFLAGS"
|
||||
scoll_fca_WRAPPER_EXTRA_LIBS="$scoll_fca_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
|
||||
# substitute in the things needed to build fca
|
||||
AC_SUBST([scoll_fca_CFLAGS])
|
||||
AC_SUBST([scoll_fca_CPPFLAGS])
|
||||
AC_SUBST([scoll_fca_LDFLAGS])
|
||||
AC_SUBST([scoll_fca_LIBS])
|
||||
AC_SUBST(scoll_fca_HOME, "$ompi_check_fca_dir")
|
||||
])dnl
|
||||
|
@ -1,16 +0,0 @@
|
||||
#
|
||||
# Copyright (c) 2015 Mellanox Technologies, Inc.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
[module_enable:fatal]
|
||||
scoll:fca module reports issue during module enabling phase.
|
||||
Try to use scoll:fca component with anoter one
|
||||
for example scoll:basic
|
||||
|
||||
Error: %s
|
||||
#
|
@ -1,137 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_SCOLL_FCA_H
|
||||
#define MCA_SCOLL_FCA_H
|
||||
#include "oshmem_config.h"
|
||||
#include "oshmem/constants.h"
|
||||
#include "shmem.h"
|
||||
#include "oshmem/mca/mca.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "oshmem/util/oshmem_util.h"
|
||||
#include "scoll_fca_api.h"
|
||||
#include "scoll_fca_debug.h"
|
||||
|
||||
BEGIN_C_DECLS
|
||||
struct mca_scoll_fca_component_t {
|
||||
/** Base coll component */
|
||||
mca_scoll_base_component_1_0_0_t super;
|
||||
|
||||
/** MCA parameter: Priority of this component */
|
||||
int fca_priority;
|
||||
|
||||
/** MCA parameter: Verbose level of this component */
|
||||
int fca_verbose;
|
||||
|
||||
/** MCA parameter: Path to fca spec file */
|
||||
char* fca_spec_file;
|
||||
|
||||
/** MCA parameter: FCA device */
|
||||
char* fca_dev;
|
||||
|
||||
/** MCA parameter: Enable FCA */
|
||||
int fca_enable;
|
||||
|
||||
/** MCA parameter: Enable FCA Barrier */
|
||||
int fca_enable_barrier;
|
||||
|
||||
/** MCA parameter: Enable FCA Bcast */
|
||||
int fca_enable_bcast;
|
||||
|
||||
/** MCA parameter: Enable FCA Allreduce */
|
||||
int fca_enable_allreduce;
|
||||
|
||||
/** MCA parameter: Enable FCA Allgather */
|
||||
int fca_enable_allgather;
|
||||
|
||||
/** MCA parameter: Enable FCA Allgatherv */
|
||||
int fca_enable_allgatherv;
|
||||
|
||||
/** MCA parameter: FCA NP */
|
||||
int fca_np;
|
||||
|
||||
/* FCA global stuff */
|
||||
fca_t *fca_context; /* FCA context handle */
|
||||
|
||||
/*These vars are used as symmetric objects during __fca_comm_new. The proper amount of memory
|
||||
is allocated only once during fca_comm_query*/
|
||||
int *ret;
|
||||
int *rcounts;
|
||||
void *my_info_exchangeable;
|
||||
void *fca_comm_desc_exchangeable;
|
||||
};
|
||||
typedef struct mca_scoll_fca_component_t mca_scoll_fca_component_t;
|
||||
|
||||
OSHMEM_MODULE_DECLSPEC extern mca_scoll_fca_component_t mca_scoll_fca_component;
|
||||
|
||||
struct mca_scoll_fca_module_t {
|
||||
mca_scoll_base_module_t super;
|
||||
struct oshmem_group_t *comm;
|
||||
int rank;
|
||||
int local_proc_idx;
|
||||
int num_local_procs;
|
||||
int *local_ranks;
|
||||
fca_comm_t *fca_comm;
|
||||
fca_comm_desc_t fca_comm_desc;
|
||||
fca_comm_caps_t fca_comm_caps;
|
||||
|
||||
/* Saved handlers - for fallback */
|
||||
mca_scoll_base_module_barrier_fn_t previous_barrier;
|
||||
mca_scoll_base_module_t *previous_barrier_module;
|
||||
mca_scoll_base_module_broadcast_fn_t previous_broadcast;
|
||||
mca_scoll_base_module_t *previous_broadcast_module;
|
||||
mca_scoll_base_module_collect_fn_t previous_collect;
|
||||
mca_scoll_base_module_t *previous_collect_module;
|
||||
mca_scoll_base_module_reduce_fn_t previous_reduce;
|
||||
mca_scoll_base_module_t *previous_reduce_module;
|
||||
mca_scoll_base_module_alltoall_fn_t previous_alltoall;
|
||||
mca_scoll_base_module_t *previous_alltoall_module;
|
||||
};
|
||||
typedef struct mca_scoll_fca_module_t mca_scoll_fca_module_t;
|
||||
OBJ_CLASS_DECLARATION(mca_scoll_fca_module_t);
|
||||
|
||||
/* API functions */
|
||||
int mca_scoll_fca_init_query(bool enable_progress_threads,
|
||||
bool enable_mpi_threads);
|
||||
mca_scoll_base_module_t *mca_scoll_fca_comm_query(struct oshmem_group_t *comm,
|
||||
int *priority);
|
||||
int mca_scoll_fca_get_fca_lib(struct oshmem_group_t *comm);
|
||||
|
||||
int mca_scoll_fca_barrier(struct oshmem_group_t *group,
|
||||
long *pSync,
|
||||
int algorithm_type);
|
||||
int mca_scoll_fca_broadcast(struct oshmem_group_t *group,
|
||||
int PE_root,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
bool nlong_type,
|
||||
int algorithm_type);
|
||||
int mca_scoll_fca_collect(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
bool nlong_type,
|
||||
int algorithm_type);
|
||||
int mca_scoll_fca_reduce(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk,
|
||||
int algorithm_type);
|
||||
OBJ_CLASS_DECLARATION(mca_coll_fca_module_t);
|
||||
END_C_DECLS
|
||||
#endif
|
@ -1,81 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#include <fca/fca_api.h>
|
||||
#include <fca/fca_version.h>
|
||||
#include <fca/config/fca_parse_specfile.h>
|
||||
|
||||
#ifndef FCA_API
|
||||
#define OSHMEM_FCA_VERSION 12
|
||||
#else
|
||||
#define OSHMEM_FCA_VERSION FCA_API
|
||||
#endif
|
||||
|
||||
/*
|
||||
* * FCA API compatibility layer.
|
||||
* * MPI build must define an FCA version macro.
|
||||
* */
|
||||
|
||||
#define OSHMEM_FCA_BARRIER 1
|
||||
#define OSHMEM_FCA_BCAST 1
|
||||
#define OSHMEM_FCA_ALLREDUCE 1
|
||||
|
||||
#if OSHMEM_FCA_VERSION == 12
|
||||
|
||||
#define OSHMEM_FCA_ALLGATHER 0
|
||||
#define FCA_MAJOR_BIT 24ul
|
||||
#define FCA_MINOR_BIT 16ul
|
||||
#define EUSESHMEM 287
|
||||
|
||||
static inline int mca_scoll_fca_comm_init(fca_t *fca_context,
|
||||
int rank,
|
||||
int comm_size,
|
||||
int local_proc_idx,
|
||||
int num_local_procs,
|
||||
fca_comm_desc_t *comm_desc,
|
||||
fca_comm_t **fca_comm)
|
||||
{
|
||||
return fca_comm_init(fca_context,
|
||||
local_proc_idx,
|
||||
num_local_procs,
|
||||
comm_size,
|
||||
comm_desc,
|
||||
fca_comm);
|
||||
}
|
||||
#elif OSHMEM_FCA_VERSION >= 20
|
||||
|
||||
#define OSHMEM_FCA_ALLGATHER 1
|
||||
#define OSHMEM_FCA_ALLGATHERV 1
|
||||
|
||||
#define OSHMEM_FCA_PROGRESS 1
|
||||
#define EUSESHMEM 287
|
||||
|
||||
static inline int mca_scoll_fca_comm_init(fca_t *fca_context, int rank, int comm_size,
|
||||
int local_proc_idx, int num_local_procs,
|
||||
fca_comm_desc_t *comm_desc,
|
||||
fca_comm_t **fca_comm)
|
||||
{
|
||||
fca_comm_init_spec_t spec;
|
||||
|
||||
spec.rank = rank;
|
||||
spec.size = comm_size;
|
||||
spec.desc = *comm_desc;
|
||||
spec.proc_idx = local_proc_idx;
|
||||
spec.num_procs = num_local_procs;
|
||||
return fca_comm_init(fca_context, &spec, fca_comm);
|
||||
}
|
||||
#else
|
||||
|
||||
#error "FCA API version is unsupported"
|
||||
|
||||
#endif
|
@ -1,355 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#define _GNU_SOURCE
|
||||
#include <stdio.h>
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <libgen.h>
|
||||
|
||||
#include "oshmem_config.h"
|
||||
|
||||
#ifdef HAVE_SYS_TYPES_H
|
||||
#include <sys/types.h>
|
||||
#endif
|
||||
#ifdef HAVE_SYS_STAT_H
|
||||
#include <sys/stat.h>
|
||||
#endif
|
||||
#ifdef HAVE_UNISTD_H
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
#include "opal/mca/installdirs/installdirs.h"
|
||||
|
||||
|
||||
#include "scoll_fca.h"
|
||||
|
||||
#include "opal/runtime/opal_progress.h"
|
||||
#include "opal/util/printf.h"
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
/*
|
||||
* * Public string showing the coll ompi_fca component version number
|
||||
* */
|
||||
const char *mca_scoll_fca_component_version_string =
|
||||
"Open SHMEM FCA collective MCA component version " OSHMEM_VERSION;
|
||||
|
||||
/*
|
||||
* * Global variable
|
||||
* */
|
||||
int mca_scoll_fca_output = -1;
|
||||
|
||||
/*
|
||||
* * Instantiate the public struct with all of our public information
|
||||
* * and pointers to our public functions in it
|
||||
* */
|
||||
static int fca_open(void);
|
||||
static int fca_close(void);
|
||||
static int fca_register(void);
|
||||
|
||||
mca_scoll_fca_component_t mca_scoll_fca_component = {
|
||||
|
||||
/* First, the mca_component_t struct containing meta information
|
||||
* about the component itfca */
|
||||
{
|
||||
.scoll_version = {
|
||||
MCA_SCOLL_BASE_VERSION_2_0_0,
|
||||
|
||||
/* Component name and version */
|
||||
.mca_component_name = "fca",
|
||||
MCA_BASE_MAKE_VERSION(component, OSHMEM_MAJOR_VERSION, OSHMEM_MINOR_VERSION,
|
||||
OSHMEM_RELEASE_VERSION),
|
||||
|
||||
/* Component open and close functions */
|
||||
.mca_open_component = fca_open,
|
||||
.mca_close_component = fca_close,
|
||||
.mca_register_component_params = fca_register,
|
||||
},
|
||||
.scoll_data = {
|
||||
/* The component is not checkpoint ready */
|
||||
MCA_BASE_METADATA_PARAM_NONE
|
||||
},
|
||||
|
||||
/* Initialization / querying functions */
|
||||
|
||||
.scoll_init = mca_scoll_fca_init_query,
|
||||
.scoll_query = mca_scoll_fca_comm_query,
|
||||
}
|
||||
};
|
||||
|
||||
#define FCA_API_CLEAR_MICRO(__x) ((__x>>FCA_MINOR_BIT)<<FCA_MINOR_BIT)
|
||||
|
||||
/**
|
||||
* * Called from FCA blocking functions to progress MPI
|
||||
* */
|
||||
static void mca_scoll_fca_progress_cb(void *arg)
|
||||
{
|
||||
opal_progress();
|
||||
}
|
||||
|
||||
static int mca_scoll_fca_mpi_progress_cb(void)
|
||||
{
|
||||
#if OSHMEM_FCA_PROGRESS == 1
|
||||
if (!mca_scoll_fca_component.fca_context)
|
||||
return 0;
|
||||
|
||||
fca_progress(mca_scoll_fca_component.fca_context);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
/**
|
||||
* * Initialize translation tables for FCA datatypes and operations
|
||||
* */
|
||||
/*static void mca_scoll_fca_init_fca_translations(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < FCA_DT_MAX_PREDEFINED; ++i) {
|
||||
mca_coll_fca_component.fca_dtypes[i].mpi_dtype = MPI_DATATYPE_NULL;
|
||||
mca_coll_fca_component.fca_dtypes[i].fca_dtype = -1;
|
||||
mca_coll_fca_component.fca_dtypes[i].fca_dtype_extent = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < FCA_MAX_OPS; ++i) {
|
||||
mca_coll_fca_component.fca_reduce_ops[i].mpi_op = MPI_OP_NULL;
|
||||
mca_coll_fca_component.fca_reduce_ops[i].fca_op = -1;
|
||||
}
|
||||
}*/
|
||||
|
||||
int mca_scoll_fca_get_fca_lib(struct oshmem_group_t *group)
|
||||
{
|
||||
struct fca_init_spec *spec;
|
||||
int ret;
|
||||
unsigned long fca_ver, major, minor, detected_ver;
|
||||
char x[3];
|
||||
|
||||
if (mca_scoll_fca_component.fca_context)
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
fca_ver = FCA_API_CLEAR_MICRO(fca_get_version());
|
||||
major = (fca_ver >> FCA_MAJOR_BIT);
|
||||
minor = (fca_ver >> FCA_MINOR_BIT) & 0xf;
|
||||
sprintf(x, "%ld%ld", major, minor);
|
||||
detected_ver = atol(x);
|
||||
|
||||
if (detected_ver != OSHMEM_FCA_VERSION) {
|
||||
FCA_ERROR("Unsupported FCA version: %s, please update FCA to v%d, now v%ld",
|
||||
fca_get_version_string(), OSHMEM_FCA_VERSION, fca_ver);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
spec = fca_parse_spec_file(mca_scoll_fca_component.fca_spec_file);
|
||||
if (!spec) {
|
||||
FCA_ERROR("Failed to parse FCA spec file `%s'",
|
||||
mca_scoll_fca_component.fca_spec_file);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
spec->job_id = oshmem_proc_local()->super.proc_name.jobid;
|
||||
spec->rank_id = oshmem_proc_pe(oshmem_proc_local());
|
||||
spec->progress.func = mca_scoll_fca_progress_cb;
|
||||
spec->progress.arg = NULL;
|
||||
|
||||
ret = fca_init(spec, &mca_scoll_fca_component.fca_context);
|
||||
if (ret < 0) {
|
||||
FCA_ERROR("Failed to initialize FCA: %s", fca_strerror(ret));
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
fca_free_init_spec(spec);
|
||||
|
||||
opal_progress_register(mca_scoll_fca_mpi_progress_cb);
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static void mca_scoll_fca_close_fca_lib(void)
|
||||
{
|
||||
opal_progress_unregister(mca_scoll_fca_mpi_progress_cb);
|
||||
fca_cleanup(mca_scoll_fca_component.fca_context);
|
||||
mca_scoll_fca_component.fca_context = NULL;
|
||||
free(mca_scoll_fca_component.fca_spec_file);
|
||||
}
|
||||
|
||||
static char *mca_scoll_fca_check_file(char *file)
|
||||
{
|
||||
struct stat s;
|
||||
int rc;
|
||||
|
||||
if (NULL == file) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rc = stat(file, &s);
|
||||
if (0 != rc || !S_ISREG(s.st_mode)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* It exists and is a file -- good enough */
|
||||
return file;
|
||||
|
||||
|
||||
}
|
||||
|
||||
static char *mca_scoll_fca_get_spec_file(void)
|
||||
{
|
||||
char *file;
|
||||
opal_asprintf(&file, "%s/etc/fca_mpi_spec.ini", COLL_FCA_HOME);
|
||||
if (NULL == mca_scoll_fca_check_file(file)) {
|
||||
free(file);
|
||||
opal_asprintf(&file, "%s/../fca/etc/fca_mpi_spec.ini", opal_install_dirs.prefix);
|
||||
if (NULL == mca_scoll_fca_check_file(file)) {
|
||||
free(file);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return file;
|
||||
}
|
||||
|
||||
static int fca_register(void)
|
||||
{
|
||||
mca_base_component_t *c;
|
||||
|
||||
FCA_VERBOSE(2, "==>");
|
||||
|
||||
c = &mca_scoll_fca_component.super.scoll_version;
|
||||
|
||||
mca_scoll_fca_component.fca_priority = 80;
|
||||
(void) mca_base_component_var_register(c,
|
||||
"priority",
|
||||
"Priority of the scoll:fca component",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_fca_component.fca_priority);
|
||||
|
||||
mca_scoll_fca_component.fca_verbose = 0;
|
||||
(void) mca_base_component_var_register(c,
|
||||
"verbose",
|
||||
"Verbose level of the fca coll component",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_fca_component.fca_verbose);
|
||||
|
||||
mca_scoll_fca_component.fca_enable = 1;
|
||||
(void) mca_base_component_var_register(c,
|
||||
"enable",
|
||||
"[1|0|] Enable/Disable Fabric Collective Accelerator",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_fca_component.fca_enable);
|
||||
|
||||
mca_scoll_fca_component.fca_spec_file = mca_scoll_fca_get_spec_file();
|
||||
(void) mca_base_component_var_register(c,
|
||||
"spec_file",
|
||||
"Path to the FCA configuration file fca_mpi_spec.ini",
|
||||
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_fca_component.fca_spec_file);
|
||||
|
||||
mca_scoll_fca_component.fca_np = 64;
|
||||
(void) mca_base_component_var_register(c,
|
||||
"np",
|
||||
"[integer] Minimal allowed job's NP to activate FCA",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_fca_component.fca_np);
|
||||
|
||||
mca_scoll_fca_component.fca_enable_barrier = OSHMEM_FCA_BARRIER;
|
||||
(void) mca_base_component_var_register(c,
|
||||
"enable_barrier",
|
||||
"[1|0|] Enable/Disable FCA Barrier support",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_fca_component.fca_enable_barrier);
|
||||
|
||||
mca_scoll_fca_component.fca_enable_bcast = OSHMEM_FCA_BCAST;
|
||||
(void) mca_base_component_var_register(c,
|
||||
"enable_bcast",
|
||||
"[1|0|] Enable/Disable FCA Bcast support",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_fca_component.fca_enable_bcast);
|
||||
|
||||
mca_scoll_fca_component.fca_enable_allreduce = OSHMEM_FCA_ALLREDUCE;
|
||||
(void) mca_base_component_var_register(c,
|
||||
"enable_allreduce",
|
||||
"[1|0|] Enable/Disable FCA Allreduce support",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_fca_component.fca_enable_allreduce);
|
||||
|
||||
mca_scoll_fca_component.fca_enable_allgather = OSHMEM_FCA_ALLGATHER;
|
||||
(void) mca_base_component_var_register(c,
|
||||
"enable_allgather",
|
||||
"[1|0|] Enable/Disable FCA Allgather support",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_fca_component.fca_enable_allgather);
|
||||
|
||||
mca_scoll_fca_component.fca_enable_allgatherv = OSHMEM_FCA_ALLGATHERV;
|
||||
(void) mca_base_component_var_register(c,
|
||||
"enable_allgatherv",
|
||||
"[1|0|] Enable/Disable FCA Allgatherv support",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
|
||||
OPAL_INFO_LVL_9,
|
||||
MCA_BASE_VAR_SCOPE_READONLY,
|
||||
&mca_scoll_fca_component.fca_enable_allgatherv);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int fca_open(void)
|
||||
{
|
||||
FCA_VERBOSE(2, "==>");
|
||||
|
||||
mca_scoll_fca_output = opal_output_open(NULL );
|
||||
opal_output_set_verbosity(mca_scoll_fca_output,
|
||||
mca_scoll_fca_component.fca_verbose);
|
||||
mca_scoll_fca_component.fca_context = NULL;
|
||||
mca_scoll_fca_component.ret = NULL;
|
||||
mca_scoll_fca_component.rcounts = NULL;
|
||||
mca_scoll_fca_component.fca_comm_desc_exchangeable = NULL;
|
||||
mca_scoll_fca_component.my_info_exchangeable = NULL;
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int fca_close(void)
|
||||
{
|
||||
FCA_VERBOSE(2, "==>");
|
||||
|
||||
if (!mca_scoll_fca_component.fca_context)
|
||||
return OSHMEM_SUCCESS;
|
||||
|
||||
mca_scoll_fca_close_fca_lib();
|
||||
|
||||
if (NULL != mca_scoll_fca_component.ret)
|
||||
MCA_MEMHEAP_CALL(private_free(mca_scoll_fca_component.ret));
|
||||
|
||||
if (NULL != mca_scoll_fca_component.rcounts)
|
||||
MCA_MEMHEAP_CALL(private_free(mca_scoll_fca_component.rcounts));
|
||||
|
||||
if (NULL != mca_scoll_fca_component.fca_comm_desc_exchangeable)
|
||||
MCA_MEMHEAP_CALL(private_free(mca_scoll_fca_component.fca_comm_desc_exchangeable));
|
||||
|
||||
if (NULL != mca_scoll_fca_component.my_info_exchangeable)
|
||||
MCA_MEMHEAP_CALL(private_free(mca_scoll_fca_component.my_info_exchangeable));
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
@ -1,35 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
* */
|
||||
|
||||
#ifndef MCA_SCOLL_FCA_DEBUG_H
|
||||
#define MCA_SCOLL_FCA_DEBUG_H
|
||||
#pragma GCC system_header
|
||||
|
||||
#ifdef __BASE_FILE__
|
||||
#define __FCA_FILE__ __BASE_FILE__
|
||||
#else
|
||||
#define __FCA_FILE__ __FILE__
|
||||
#endif
|
||||
|
||||
#define FCA_VERBOSE(level, format, ...) \
|
||||
opal_output_verbose(level, mca_scoll_fca_output, "%s:%d - %s() " format, \
|
||||
__FCA_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
|
||||
#define FCA_ERROR(format, ... ) \
|
||||
opal_output_verbose(0, mca_scoll_fca_output, "Error: %s:%d - %s() " format, \
|
||||
__FCA_FILE__, __LINE__, __FUNCTION__, ## __VA_ARGS__)
|
||||
|
||||
#define FCA_MODULE_VERBOSE(fca_module, level, format, ...) \
|
||||
FCA_VERBOSE(level, "[%p:%d] " format, (void*)(fca_module)->comm, (fca_module)->rank, ## __VA_ARGS__)
|
||||
|
||||
extern int mca_scoll_fca_output;
|
||||
|
||||
#endif
|
||||
|
@ -1,571 +0,0 @@
|
||||
/**
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include "scoll_fca.h"
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include "opal/util/show_help.h"
|
||||
|
||||
#include "oshmem/constants.h"
|
||||
#include "oshmem/mca/scoll/scoll.h"
|
||||
#include "oshmem/mca/scoll/base/base.h"
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "oshmem/mca/spml/spml.h"
|
||||
#include "oshmem/mca/memheap/memheap.h"
|
||||
#include "oshmem/runtime/runtime.h"
|
||||
|
||||
/*
|
||||
* * Initial query function that is invoked during MPI_INIT, allowing
|
||||
* * this module to indicate what level of thread support it provides.
|
||||
* */
|
||||
|
||||
static const int root_id = 0;
|
||||
|
||||
#define __INTERNAL_BARRIER_FROM_SCOLL_BASIC 1
|
||||
static int _internal_barrier(mca_scoll_fca_module_t *fca_module)
|
||||
{
|
||||
#if !__INTERNAL_BARRIER_FROM_SCOLL_BASIC
|
||||
struct oshmem_group_t *group = fca_module->comm;
|
||||
int rc = OSHMEM_SUCCESS;
|
||||
int root_id = 0;
|
||||
int PE_root = oshmem_proc_pe(group->proc_array[root_id]);
|
||||
int i = 0;
|
||||
|
||||
if (PE_root != group->my_pe)
|
||||
{
|
||||
rc = MCA_SPML_CALL(send(NULL, 0, PE_root, MCA_SPML_BASE_PUT_STANDARD));
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = MCA_SPML_CALL(recv(NULL, 0, PE_root));
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* The root collects and broadcasts the messages. */
|
||||
|
||||
else
|
||||
{
|
||||
int pe_cur = 0;
|
||||
|
||||
for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++)
|
||||
{
|
||||
pe_cur = oshmem_proc_pe(group->proc_array[i]);
|
||||
if (pe_cur != PE_root)
|
||||
{
|
||||
rc = MCA_SPML_CALL(recv(NULL, 0, SHMEM_ANY_SOURCE));
|
||||
}
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; (i < group->proc_count) && (rc == OSHMEM_SUCCESS); i++)
|
||||
{
|
||||
pe_cur = oshmem_proc_pe(group->proc_array[i]);
|
||||
if (pe_cur != PE_root)
|
||||
{
|
||||
rc = MCA_SPML_CALL(send(NULL, 0, pe_cur, MCA_SPML_BASE_PUT_STANDARD));
|
||||
}
|
||||
if (OSHMEM_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
#else
|
||||
long pSync = _SHMEM_SYNC_VALUE;
|
||||
/*we use 4th algorithm for barrier from scoll/basic. It does not use pSync,
|
||||
* so we pass to that function just regular long value in order to meet function defenition requirements*/
|
||||
return fca_module->previous_barrier(fca_module->comm,
|
||||
&pSync,
|
||||
SCOLL_ALG_BARRIER_BASIC);
|
||||
#endif
|
||||
}
|
||||
|
||||
int mca_scoll_fca_init_query(bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
{
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int have_remote_peers(struct oshmem_group_t *group,
|
||||
size_t size,
|
||||
int *local_peers)
|
||||
{
|
||||
struct ompi_proc_t *proc;
|
||||
size_t i;
|
||||
int ret;
|
||||
|
||||
*local_peers = 0;
|
||||
ret = 0;
|
||||
for (i = 0; i < size; ++i) {
|
||||
proc = group->proc_array[i];
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
|
||||
++*local_peers;
|
||||
} else {
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* * Fills local rank information in fca_module.
|
||||
* */
|
||||
|
||||
static int _get_local_ranks(mca_scoll_fca_module_t *fca_module)
|
||||
{
|
||||
struct oshmem_group_t *comm = fca_module->comm;
|
||||
ompi_proc_t* proc;
|
||||
int i, rank;
|
||||
|
||||
/* Count the local ranks */
|
||||
fca_module->num_local_procs = 0;
|
||||
for (rank = 0; rank < comm->proc_count; ++rank) {
|
||||
proc = comm->proc_array[rank];
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
|
||||
if (proc->super.proc_name.vpid == (uint32_t) fca_module->rank) {
|
||||
fca_module->local_proc_idx = fca_module->num_local_procs;
|
||||
}
|
||||
++fca_module->num_local_procs;
|
||||
}
|
||||
}
|
||||
/* Make a list of local ranks */
|
||||
fca_module->local_ranks = calloc(fca_module->num_local_procs,
|
||||
sizeof *fca_module->local_ranks);
|
||||
if (!fca_module->local_ranks) {
|
||||
FCA_ERROR("Failed to allocate memory for %d local ranks",
|
||||
fca_module->num_local_procs);
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
for (rank = 0; rank < comm->proc_count; ++rank) {
|
||||
proc = comm->proc_array[rank];
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(proc->super.proc_flags)) {
|
||||
fca_module->local_ranks[i++] = rank;
|
||||
}
|
||||
}
|
||||
|
||||
FCA_MODULE_VERBOSE(fca_module,
|
||||
3,
|
||||
"i am %d/%d",
|
||||
fca_module->local_proc_idx, fca_module->num_local_procs);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int _fca_comm_new(mca_scoll_fca_module_t *fca_module)
|
||||
{
|
||||
struct oshmem_group_t *comm = fca_module->comm;
|
||||
fca_comm_new_spec_t spec;
|
||||
int info_size = 0, all_info_size = 0;
|
||||
void *all_info = NULL, *my_info = NULL;
|
||||
int *disps = NULL;
|
||||
int i;
|
||||
const int root_pe = oshmem_proc_pe(comm->proc_array[root_id]);
|
||||
const int my_id = oshmem_proc_group_find_id(comm, comm->my_pe);
|
||||
/* call fca_get_rank_info() on node managers only*/
|
||||
|
||||
if (fca_module->local_proc_idx == 0) {
|
||||
my_info = fca_get_rank_info(mca_scoll_fca_component.fca_context,
|
||||
&info_size);
|
||||
if (!my_info) {
|
||||
FCA_ERROR("fca_get_rank_info returned NULL");
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
} else {
|
||||
info_size = 0;
|
||||
}
|
||||
|
||||
FCA_MODULE_VERBOSE(fca_module, 1, "Info size: %d", info_size);
|
||||
for (i = 0; i < comm->proc_count; i++) {
|
||||
mca_scoll_fca_component.rcounts[i] = -1;
|
||||
}
|
||||
_internal_barrier(fca_module);
|
||||
MCA_SPML_CALL(put(oshmem_ctx_default, (void *)&mca_scoll_fca_component.rcounts[my_id], (size_t)sizeof(info_size), (void *)&info_size, root_pe));
|
||||
|
||||
if (root_pe == comm->my_pe) {
|
||||
int value = -1;
|
||||
for (i = 0; i < comm->proc_count; i++) {
|
||||
MCA_SPML_CALL(wait((void *)&mca_scoll_fca_component.rcounts[i], SHMEM_CMP_NE, &value, SHMEM_INT));
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate buffer for gathering rank information on rank0 */
|
||||
if (root_pe == comm->my_pe) {
|
||||
all_info_size = 0;
|
||||
disps = calloc(comm->proc_count, sizeof *disps);
|
||||
for (i = 0; i < comm->proc_count; ++i) {
|
||||
disps[i] = all_info_size;
|
||||
all_info_size += mca_scoll_fca_component.rcounts[i];
|
||||
}
|
||||
all_info = NULL;
|
||||
FCA_MODULE_VERBOSE(fca_module,
|
||||
1,
|
||||
"Total rank_info size: %d",
|
||||
all_info_size);
|
||||
all_info = malloc(all_info_size);
|
||||
memset(all_info, 0, all_info_size);
|
||||
}
|
||||
|
||||
if (my_info) {
|
||||
memcpy(mca_scoll_fca_component.my_info_exchangeable,
|
||||
my_info,
|
||||
info_size);
|
||||
}
|
||||
_internal_barrier(fca_module);
|
||||
if (root_pe == comm->my_pe) {
|
||||
for (i = 0; i < comm->proc_count; i++) {
|
||||
if (mca_scoll_fca_component.rcounts[i] > 0) {
|
||||
MCA_SPML_CALL(get(oshmem_ctx_default, (void *)mca_scoll_fca_component.my_info_exchangeable, mca_scoll_fca_component.rcounts[i], (void*)(((char*)all_info)+disps[i]),comm->proc_array[i]->super.proc_name.vpid));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Rank0 calls fca_comm_new() and fills fca_comm_spec filed */
|
||||
if (root_pe == comm->my_pe) {
|
||||
spec.rank_info = all_info;
|
||||
spec.is_comm_world = comm == oshmem_group_all;
|
||||
spec.rank_count = 0;
|
||||
for (i = 0; i < comm->proc_count; ++i) {
|
||||
FCA_MODULE_VERBOSE(fca_module,
|
||||
1,
|
||||
"rcounts[%d]=%d disps[%d]=%d",
|
||||
i, mca_scoll_fca_component.rcounts[i], i, disps[i]);
|
||||
if (mca_scoll_fca_component.rcounts[i] > 0)
|
||||
++spec.rank_count;
|
||||
}
|
||||
|
||||
FCA_MODULE_VERBOSE(fca_module,
|
||||
1,
|
||||
"starting fca_comm_new(), rank_count: %d",
|
||||
spec.rank_count);
|
||||
|
||||
*mca_scoll_fca_component.ret =
|
||||
fca_comm_new(mca_scoll_fca_component.fca_context,
|
||||
&spec,
|
||||
&fca_module->fca_comm_desc);
|
||||
|
||||
free(disps);
|
||||
free(all_info);
|
||||
}
|
||||
|
||||
_internal_barrier(fca_module);
|
||||
|
||||
if (root_pe != comm->my_pe) {
|
||||
MCA_SPML_CALL(get(oshmem_ctx_default, (void *)mca_scoll_fca_component.ret,sizeof(int), (void *)mca_scoll_fca_component.ret, root_pe));
|
||||
}
|
||||
|
||||
/* Examine comm_new return value */
|
||||
_internal_barrier(fca_module);
|
||||
if (*mca_scoll_fca_component.ret < 0) {
|
||||
FCA_ERROR("rank %i: COMM_NEW failed: %s",
|
||||
fca_module->rank, fca_strerror(*mca_scoll_fca_component.ret));
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* Release allocate rank_info on node managers */
|
||||
if (fca_module->local_proc_idx == 0) {
|
||||
fca_free_rank_info(my_info);
|
||||
}
|
||||
|
||||
{
|
||||
if (root_pe == comm->my_pe) {
|
||||
memcpy(mca_scoll_fca_component.fca_comm_desc_exchangeable,
|
||||
&fca_module->fca_comm_desc,
|
||||
sizeof(fca_module->fca_comm_desc));
|
||||
}
|
||||
|
||||
_internal_barrier(fca_module);
|
||||
if (root_pe != comm->my_pe) {
|
||||
MCA_SPML_CALL(get(oshmem_ctx_default, (void *)mca_scoll_fca_component.fca_comm_desc_exchangeable, sizeof(fca_module->fca_comm_desc), (void *)&fca_module->fca_comm_desc, root_pe));
|
||||
}
|
||||
|
||||
_internal_barrier(fca_module);
|
||||
|
||||
}
|
||||
FCA_MODULE_VERBOSE(fca_module,
|
||||
1,
|
||||
"Received FCA communicator spec, comm_id %d",
|
||||
fca_module->fca_comm_desc.comm_id);
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static int _create_fca_comm(mca_scoll_fca_module_t *fca_module)
|
||||
{
|
||||
int comm_size;
|
||||
int rc, ret;
|
||||
|
||||
rc = _fca_comm_new(fca_module);
|
||||
if (rc != OSHMEM_SUCCESS)
|
||||
return rc;
|
||||
|
||||
/* allocate comm_init_spec */
|
||||
FCA_MODULE_VERBOSE(fca_module,
|
||||
1,
|
||||
"Starting COMM_INIT comm_id %d proc_idx %d num_procs %d",
|
||||
fca_module->fca_comm_desc.comm_id, fca_module->local_proc_idx, fca_module->num_local_procs);
|
||||
|
||||
comm_size = fca_module->comm->proc_count;
|
||||
ret = mca_scoll_fca_comm_init(mca_scoll_fca_component.fca_context,
|
||||
oshmem_proc_group_find_id(fca_module->comm,
|
||||
fca_module->rank),
|
||||
comm_size,
|
||||
fca_module->local_proc_idx,
|
||||
fca_module->num_local_procs,
|
||||
&fca_module->fca_comm_desc,
|
||||
&fca_module->fca_comm);
|
||||
if (ret < 0) {
|
||||
FCA_ERROR("COMM_INIT failed: %s", fca_strerror(ret));
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* get communicator capabilities */
|
||||
ret = fca_comm_get_caps(fca_module->fca_comm, &fca_module->fca_comm_caps);
|
||||
if (ret < 0) {
|
||||
FCA_ERROR("GET_COMM_CAPS failed: %s", fca_strerror(ret));
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
|
||||
/* by this point every rank in the communicator is set up */
|
||||
FCA_MODULE_VERBOSE(fca_module,
|
||||
1,
|
||||
"Initialized FCA communicator, comm_id %d",
|
||||
fca_module->fca_comm_desc.comm_id);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
static void _destroy_fca_comm(mca_scoll_fca_module_t *fca_module)
|
||||
{
|
||||
int ret;
|
||||
struct oshmem_group_t *comm = fca_module->comm;
|
||||
const int root_pe = oshmem_proc_pe(comm->proc_array[root_id]);
|
||||
|
||||
fca_comm_destroy(fca_module->fca_comm);
|
||||
if (comm->my_pe == root_pe && mca_scoll_fca_component.fca_context) {
|
||||
ret = fca_comm_end(mca_scoll_fca_component.fca_context,
|
||||
fca_module->fca_comm_desc.comm_id);
|
||||
if (ret < 0) {
|
||||
FCA_ERROR("COMM_END failed: %s", fca_strerror(ret));
|
||||
}
|
||||
}
|
||||
|
||||
FCA_MODULE_VERBOSE(fca_module,
|
||||
1,
|
||||
"Destroyed FCA communicator, comm_id %d",
|
||||
fca_module->fca_comm_desc.comm_id);
|
||||
}
|
||||
|
||||
#define FCA_SAVE_PREV_SCOLL_API(__api) do {\
|
||||
fca_module->previous_ ## __api = comm->g_scoll.scoll_ ## __api;\
|
||||
fca_module->previous_ ## __api ## _module = comm->g_scoll.scoll_ ## __api ## _module;\
|
||||
if (!comm->g_scoll.scoll_ ## __api || !comm->g_scoll.scoll_ ## __api ## _module) {\
|
||||
FCA_VERBOSE(1, "no underlying " # __api"; disqualifying myself");\
|
||||
return OSHMEM_ERROR;\
|
||||
}\
|
||||
OBJ_RETAIN(fca_module->previous_ ## __api ## _module);\
|
||||
} while(0)
|
||||
|
||||
static int _save_coll_handlers(mca_scoll_fca_module_t *fca_module)
|
||||
{
|
||||
struct oshmem_group_t *comm = fca_module->comm;
|
||||
|
||||
FCA_SAVE_PREV_SCOLL_API(barrier);
|
||||
FCA_SAVE_PREV_SCOLL_API(broadcast);
|
||||
FCA_SAVE_PREV_SCOLL_API(collect);
|
||||
FCA_SAVE_PREV_SCOLL_API(reduce);
|
||||
FCA_SAVE_PREV_SCOLL_API(alltoall);
|
||||
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* * Initialize module on the communicator
|
||||
* */
|
||||
static int mca_scoll_fca_module_enable(mca_scoll_base_module_t *module,
|
||||
struct oshmem_group_t *comm)
|
||||
{
|
||||
|
||||
mca_scoll_fca_module_t *fca_module = (mca_scoll_fca_module_t*) module;
|
||||
int rc;
|
||||
|
||||
fca_module->comm = comm;
|
||||
fca_module->rank = comm->my_pe;
|
||||
|
||||
rc = mca_scoll_fca_get_fca_lib(comm);
|
||||
if (rc != OSHMEM_SUCCESS)
|
||||
goto exit_fatal;
|
||||
|
||||
rc = _save_coll_handlers(fca_module);
|
||||
if (rc != OSHMEM_SUCCESS)
|
||||
goto exit_fatal;
|
||||
|
||||
rc = _get_local_ranks(fca_module);
|
||||
if (rc != OSHMEM_SUCCESS)
|
||||
goto exit_fatal;
|
||||
|
||||
rc = _create_fca_comm(fca_module);
|
||||
if (rc != OSHMEM_SUCCESS)
|
||||
goto exit_fatal;
|
||||
|
||||
FCA_MODULE_VERBOSE(fca_module, 1, "FCA Module initialized");
|
||||
return OMPI_SUCCESS;
|
||||
|
||||
exit_fatal:
|
||||
/* it is possible that other pe(s) succesfully enabled fca.
|
||||
* So differnt frameworks will be used for collective ops
|
||||
*/
|
||||
FCA_ERROR("FCA module enable failed - aborting to prevent inconsistent application state");
|
||||
/* There's no modules available */
|
||||
opal_show_help("help-oshmem-scoll-fca.txt",
|
||||
"module_enable:fatal", true,
|
||||
"FCA module enable failed - aborting to prevent inconsistent application state");
|
||||
oshmem_shmem_abort(-1);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
static void mca_scoll_fca_module_clear(mca_scoll_fca_module_t *fca_module)
|
||||
{
|
||||
fca_module->num_local_procs = 0;
|
||||
fca_module->local_ranks = NULL;
|
||||
fca_module->fca_comm = NULL;
|
||||
|
||||
fca_module->previous_barrier = NULL;
|
||||
fca_module->previous_broadcast = NULL;
|
||||
fca_module->previous_collect = NULL;
|
||||
fca_module->previous_reduce = NULL;
|
||||
fca_module->previous_alltoall = NULL;
|
||||
}
|
||||
|
||||
static void mca_scoll_fca_module_construct(mca_scoll_fca_module_t *fca_module)
|
||||
{
|
||||
FCA_VERBOSE(5, "==>");
|
||||
mca_scoll_fca_module_clear(fca_module);
|
||||
}
|
||||
|
||||
static void mca_scoll_fca_module_destruct(mca_scoll_fca_module_t *fca_module)
|
||||
{
|
||||
FCA_VERBOSE(5, "==>");
|
||||
OBJ_RELEASE(fca_module->previous_barrier_module);
|
||||
OBJ_RELEASE(fca_module->previous_broadcast_module);
|
||||
OBJ_RELEASE(fca_module->previous_collect_module);
|
||||
OBJ_RELEASE(fca_module->previous_reduce_module);
|
||||
OBJ_RELEASE(fca_module->previous_alltoall_module);
|
||||
if (fca_module->fca_comm)
|
||||
_destroy_fca_comm(fca_module);
|
||||
free(fca_module->local_ranks);
|
||||
mca_scoll_fca_module_clear(fca_module);
|
||||
}
|
||||
|
||||
/*
|
||||
* * Invoked when there's a new communicator that has been created.
|
||||
* * Look at the communicator and decide which set of functions and
|
||||
* * priority we want to return.
|
||||
* */
|
||||
mca_scoll_base_module_t *
|
||||
mca_scoll_fca_comm_query(struct oshmem_group_t *comm, int *priority)
|
||||
{
|
||||
mca_scoll_base_module_t *module;
|
||||
int size = comm->proc_count;
|
||||
int local_peers = 0;
|
||||
|
||||
mca_scoll_fca_module_t *fca_module;
|
||||
|
||||
*priority = 0;
|
||||
module = NULL;
|
||||
|
||||
if (!mca_scoll_fca_component.fca_enable) {
|
||||
FCA_VERBOSE(20, "FCA is disable on user request => exiting");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (mca_memheap.memheap_component == NULL ) {
|
||||
FCA_VERBOSE(20, "No memheap => exiting");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (NULL == mca_scoll_fca_component.ret) {
|
||||
MCA_MEMHEAP_CALL(private_alloc(sizeof(int),(void **)&mca_scoll_fca_component.ret));
|
||||
MCA_MEMHEAP_CALL(private_alloc(oshmem_group_all->proc_count*sizeof(*mca_scoll_fca_component.rcounts), (void **)&mca_scoll_fca_component.rcounts ));
|
||||
MCA_MEMHEAP_CALL(private_alloc(/*info_size*/20,&mca_scoll_fca_component.my_info_exchangeable));
|
||||
MCA_MEMHEAP_CALL(private_alloc(sizeof(fca_comm_desc_t), &mca_scoll_fca_component.fca_comm_desc_exchangeable));
|
||||
}
|
||||
if (size < mca_scoll_fca_component.fca_np) {
|
||||
FCA_VERBOSE(20,
|
||||
"size(%d) < fca_np(%d)",
|
||||
size, mca_scoll_fca_component.fca_np);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (size < 2) {
|
||||
FCA_VERBOSE(20, "size(%d) < 2", size);
|
||||
goto exit;
|
||||
}
|
||||
|
||||
if (!have_remote_peers(comm,
|
||||
size,
|
||||
&local_peers) /* || OMPI_COMM_IS_INTER(comm)*/) {
|
||||
FCA_VERBOSE(1,
|
||||
"all peers in group are on the same node, fca disabled\n");
|
||||
goto exit;
|
||||
}
|
||||
|
||||
fca_module = OBJ_NEW(mca_scoll_fca_module_t);
|
||||
if (!fca_module) {
|
||||
goto exit_fatal;
|
||||
}
|
||||
fca_module->super.scoll_module_enable = mca_scoll_fca_module_enable;
|
||||
fca_module->super.scoll_collect =
|
||||
mca_scoll_fca_component.fca_enable_allgather ?
|
||||
mca_scoll_fca_collect : NULL;
|
||||
fca_module->super.scoll_reduce =
|
||||
mca_scoll_fca_component.fca_enable_allreduce ?
|
||||
mca_scoll_fca_reduce : NULL;
|
||||
fca_module->super.scoll_barrier =
|
||||
mca_scoll_fca_component.fca_enable_barrier ? mca_scoll_fca_barrier :
|
||||
NULL;
|
||||
fca_module->super.scoll_broadcast =
|
||||
mca_scoll_fca_component.fca_enable_bcast ? mca_scoll_fca_broadcast :
|
||||
NULL;
|
||||
fca_module->super.scoll_alltoall = NULL;
|
||||
|
||||
*priority = mca_scoll_fca_component.fca_priority;
|
||||
module = &fca_module->super;
|
||||
|
||||
exit:
|
||||
FCA_VERBOSE(4,
|
||||
"Query FCA module for comm %p size %d rank %d local_peers=%d: priority=%d %s",
|
||||
(void *)comm, size, comm->my_pe, local_peers, *priority, module ? "enabled" : "disabled");
|
||||
return module;
|
||||
|
||||
exit_fatal:
|
||||
/* it is possible that other pe(s) succesfully initialized fca.
|
||||
* So differnt frameworks will be used for collective ops
|
||||
*/
|
||||
FCA_ERROR("FCA module query failed - aborting");
|
||||
oshmem_shmem_abort(-1);
|
||||
return NULL ;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_scoll_fca_module_t,
|
||||
mca_scoll_base_module_t,
|
||||
mca_scoll_fca_module_construct,
|
||||
mca_scoll_fca_module_destruct);
|
||||
|
@ -1,302 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2013 Mellanox Technologies, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2015 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifdef HAVE_ALLOCA_H
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
|
||||
#include "oshmem_config.h"
|
||||
#include "oshmem/constants.h"
|
||||
#include "scoll_fca.h"
|
||||
#include <stdio.h>
|
||||
#include "oshmem/proc/proc.h"
|
||||
#include "oshmem/op/op.h"
|
||||
int mca_scoll_fca_barrier(struct oshmem_group_t *group, long *pSync, int alg)
|
||||
{
|
||||
mca_scoll_fca_module_t *fca_module =
|
||||
(mca_scoll_fca_module_t *) group->g_scoll.scoll_barrier_module;
|
||||
int rc;
|
||||
|
||||
FCA_VERBOSE(5, "Using FCA Barrier");
|
||||
rc = fca_do_barrier(fca_module->fca_comm);
|
||||
if (rc < 0) {
|
||||
if (rc == -EUSESHMEM) {
|
||||
FCA_VERBOSE(5, "FCA Barrier failed, using original barrier");
|
||||
goto orig_barrier;
|
||||
}
|
||||
FCA_ERROR("Barrier failed: %s", fca_strerror(rc));
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
return OSHMEM_SUCCESS;
|
||||
orig_barrier:
|
||||
PREVIOUS_SCOLL_FN(fca_module, barrier, group,
|
||||
pSync,
|
||||
SCOLL_DEFAULT_ALG);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int mca_scoll_fca_broadcast(struct oshmem_group_t *group,
|
||||
int PE_root,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
bool nlong_type,
|
||||
int alg)
|
||||
{
|
||||
mca_scoll_fca_module_t *fca_module =
|
||||
(mca_scoll_fca_module_t *) group->g_scoll.scoll_broadcast_module;
|
||||
fca_bcast_spec_t spec;
|
||||
int rc;
|
||||
|
||||
FCA_VERBOSE(5, "rank %i, DOING FCA BCAST\n", group->my_pe);
|
||||
spec.root = oshmem_proc_group_find_id(group, PE_root);
|
||||
if (group->my_pe == PE_root)
|
||||
spec.buf = (void *) source;
|
||||
else
|
||||
spec.buf = target;
|
||||
spec.size = nlong;
|
||||
if (spec.size > fca_module->fca_comm_caps.max_payload) {
|
||||
FCA_VERBOSE(5,
|
||||
"Unsupported bcast operation size %d, using fallback",
|
||||
spec.size);
|
||||
goto orig_bcast;
|
||||
}
|
||||
rc = fca_do_bcast(fca_module->fca_comm, &spec);
|
||||
if (rc < 0) {
|
||||
if (rc == -EUSESHMEM) {
|
||||
FCA_VERBOSE(5, "FCA Broadcast failed, using original Broadcast");
|
||||
goto orig_bcast;
|
||||
}
|
||||
FCA_ERROR("Bcast failed: %s", fca_strerror(rc));
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
return OSHMEM_SUCCESS;
|
||||
orig_bcast:
|
||||
PREVIOUS_SCOLL_FN(fca_module, broadcast, group,
|
||||
PE_root,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync,
|
||||
SCOLL_DEFAULT_ALG);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int mca_scoll_fca_collect(struct oshmem_group_t *group,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
bool nlong_type,
|
||||
int alg)
|
||||
{
|
||||
int rc, i;
|
||||
mca_scoll_fca_module_t *fca_module =
|
||||
(mca_scoll_fca_module_t *) group->g_scoll.scoll_collect_module;
|
||||
|
||||
FCA_VERBOSE(5,
|
||||
"rank %i, DOING FCA_COLLECT, nlong_type = %i\n",
|
||||
group->my_pe, (int)nlong_type);
|
||||
#if OSHMEM_FCA_ALLGATHER
|
||||
if (nlong_type == true) {
|
||||
fca_gather_spec_t spec = {0,};
|
||||
spec.size = (int)nlong;
|
||||
spec.sbuf = (void *)source;
|
||||
spec.rbuf = target;
|
||||
rc = fca_do_allgather(fca_module->fca_comm, &spec);
|
||||
if (rc < 0) {
|
||||
if (rc == -EUSESHMEM) {
|
||||
FCA_VERBOSE(5,"FCA Fcollect(allgather) failed, using original Fcollect");
|
||||
goto orig_collect;
|
||||
}
|
||||
FCA_ERROR("Fcollect(allgather) failed: %s", fca_strerror(rc));
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t *sendcounts = (size_t *)malloc(group->proc_count*sizeof(size_t));
|
||||
mca_scoll_fca_collect(group,sendcounts,(void *)&nlong,sizeof(size_t),pSync,true,SCOLL_DEFAULT_ALG);
|
||||
fca_gatherv_spec_t spec;
|
||||
spec.sendsize = (int)nlong;
|
||||
spec.sbuf = (void *)source;
|
||||
spec.rbuf = target;
|
||||
spec.recvsizes = alloca(sizeof(*spec.recvsizes) * group->proc_count);
|
||||
spec.displs = alloca(sizeof(*spec.displs) * group->proc_count);
|
||||
for (i=0; i<group->proc_count; i++) {
|
||||
spec.recvsizes[i] = (int)sendcounts[i];
|
||||
}
|
||||
spec.displs[0] = 0;
|
||||
for (i=1; i<group->proc_count; i++) {
|
||||
spec.displs[i] = spec.displs[i-1]+spec.recvsizes[i-1];
|
||||
}
|
||||
rc = fca_do_allgatherv(fca_module->fca_comm, &spec);
|
||||
if (rc < 0) {
|
||||
if (rc == -EUSESHMEM) {
|
||||
FCA_VERBOSE(5,"FCA Collect(allgatherv) failed, using original Collect");
|
||||
goto orig_collect;
|
||||
}
|
||||
FCA_ERROR("Collect(allgatherv) failed: %s", fca_strerror(rc));
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
free(sendcounts);
|
||||
return OSHMEM_SUCCESS;
|
||||
}
|
||||
orig_collect:
|
||||
#endif
|
||||
PREVIOUS_SCOLL_FN(fca_module, collect, group,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync,
|
||||
nlong_type,
|
||||
SCOLL_DEFAULT_ALG);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#define FCA_DTYPE_8_SIGNED 1
|
||||
#define FCA_DTYPE_16_SIGNED 2
|
||||
#define FCA_DTYPE_32_SIGNED 3
|
||||
#define FCA_DTYPE_64_SIGNED 4
|
||||
#define FCA_DTYPE_32_FLOAT 9
|
||||
#define FCA_DTYPE_64_FLOAT 10
|
||||
#define UNSUPPORTED_OP -1
|
||||
|
||||
static bool if_floating_type(oshmem_op_t *op)
|
||||
{
|
||||
if ((op->dt == OSHMEM_OP_TYPE_FLOAT) || (op->dt == OSHMEM_OP_TYPE_DOUBLE)
|
||||
|| (op->dt == OSHMEM_OP_TYPE_LDOUBLE))
|
||||
return true;
|
||||
else
|
||||
return false;
|
||||
}
|
||||
static int shmem_dtype_to_fca_dtype(oshmem_op_t *op)
|
||||
{
|
||||
if ((op->dt == OSHMEM_OP_TYPE_FCOMPLEX)
|
||||
|| (op->dt == OSHMEM_OP_TYPE_DCOMPLEX)) {
|
||||
return UNSUPPORTED_OP;
|
||||
}
|
||||
switch (op->dt_size * 8) {
|
||||
case 64:
|
||||
if (if_floating_type(op))
|
||||
return FCA_DTYPE_64_FLOAT;
|
||||
else
|
||||
return FCA_DTYPE_64_SIGNED;
|
||||
break;
|
||||
case 32:
|
||||
if (if_floating_type(op))
|
||||
return FCA_DTYPE_32_FLOAT;
|
||||
else
|
||||
return FCA_DTYPE_32_SIGNED;
|
||||
break;
|
||||
case 16:
|
||||
if (OPAL_UNLIKELY(if_floating_type(op)))
|
||||
return UNSUPPORTED_OP;
|
||||
else
|
||||
return FCA_DTYPE_16_SIGNED;
|
||||
break;
|
||||
case 8:
|
||||
if (OPAL_UNLIKELY(if_floating_type(op)))
|
||||
return UNSUPPORTED_OP;
|
||||
else
|
||||
return FCA_DTYPE_8_SIGNED;
|
||||
break;
|
||||
default:
|
||||
return UNSUPPORTED_OP;
|
||||
}
|
||||
}
|
||||
|
||||
static int shmem_op_to_fca_op(oshmem_op_t *op)
|
||||
{
|
||||
switch (op->op) {
|
||||
case OSHMEM_OP_AND:
|
||||
return FCA_OP_BAND;
|
||||
break;
|
||||
case OSHMEM_OP_OR:
|
||||
return FCA_OP_BOR;
|
||||
break;
|
||||
case OSHMEM_OP_XOR:
|
||||
return FCA_OP_BXOR;
|
||||
case OSHMEM_OP_MAX:
|
||||
return FCA_OP_MAX;
|
||||
break;
|
||||
case OSHMEM_OP_MIN:
|
||||
return FCA_OP_MIN;
|
||||
break;
|
||||
case OSHMEM_OP_SUM:
|
||||
return FCA_OP_SUM;
|
||||
break;
|
||||
case OSHMEM_OP_PROD:
|
||||
return FCA_OP_PROD;
|
||||
break;
|
||||
default:
|
||||
return UNSUPPORTED_OP;
|
||||
}
|
||||
}
|
||||
int mca_scoll_fca_reduce(struct oshmem_group_t *group,
|
||||
struct oshmem_op_t *op,
|
||||
void *target,
|
||||
const void *source,
|
||||
size_t nlong,
|
||||
long *pSync,
|
||||
void *pWrk,
|
||||
int alg)
|
||||
{
|
||||
mca_scoll_fca_module_t *fca_module =
|
||||
(mca_scoll_fca_module_t *) group->g_scoll.scoll_reduce_module;
|
||||
int fca_dtype;
|
||||
int fca_op;
|
||||
int rc;
|
||||
fca_reduce_spec_t spec;
|
||||
|
||||
FCA_VERBOSE(5, "rank %i, DOING FCA_REDUCE\n", group->my_pe);
|
||||
if ((fca_dtype = shmem_dtype_to_fca_dtype(op)) < 0) {
|
||||
FCA_VERBOSE(5,
|
||||
"SHMEM_DATA_TYPE = %i is unsupported in the current version of FCA library; using original reduce",
|
||||
op->dt);
|
||||
goto orig_reduce;
|
||||
}
|
||||
if ((fca_op = shmem_op_to_fca_op(op)) < 0) {
|
||||
FCA_VERBOSE(5,
|
||||
"SHMEM_OPERATION_TYPE = %i is unsupported; using original reduce",
|
||||
op->op);
|
||||
goto orig_reduce;
|
||||
}
|
||||
spec.sbuf = (void *) source;
|
||||
spec.rbuf = target;
|
||||
spec.dtype = (enum fca_reduce_dtype_t) fca_dtype;
|
||||
spec.op = (enum fca_reduce_op_t) fca_op;
|
||||
spec.length = (int) (nlong / op->dt_size);
|
||||
rc = fca_do_all_reduce(fca_module->fca_comm, &spec);
|
||||
if (rc < 0) {
|
||||
if (rc == -EUSESHMEM) {
|
||||
FCA_VERBOSE(5,
|
||||
"FCA Reduce(allreduce) failed, using original Reduce");
|
||||
goto orig_reduce;
|
||||
}
|
||||
FCA_ERROR("Reduce (allreduce) failed: %s", fca_strerror(rc));
|
||||
return OSHMEM_ERROR;
|
||||
}
|
||||
return OSHMEM_SUCCESS;
|
||||
orig_reduce:
|
||||
PREVIOUS_SCOLL_FN(fca_module, reduce, group,
|
||||
op,
|
||||
target,
|
||||
source,
|
||||
nlong,
|
||||
pSync,
|
||||
pWrk,
|
||||
SCOLL_DEFAULT_ALG);
|
||||
return rc;
|
||||
}
|
Загрузка…
x
Ссылка в новой задаче
Block a user