Intial support for Cray's uGNI interface (XE-6/XK-6)
This commit was SVN r25608.
Этот коммит содержится в:
родитель
de8d3a4f79
Коммит
e03d23d96e
95
ompi/config/ompi_check_ugni.m4
Обычный файл
95
ompi/config/ompi_check_ugni.m4
Обычный файл
@ -0,0 +1,95 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2006 QLogic Corp. All rights reserved.
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# OMPI_CHECK_UGNI(prefix, [action-if-found], [action-if-not-found])
|
||||
# --------------------------------------------------------
|
||||
# check if GNI support can be found. sets prefix_{CPPFLAGS,
|
||||
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
|
||||
# support, otherwise executes action-if-not-found
|
||||
#
|
||||
# NOTES
|
||||
# on Cray XE6 systems, the GNI development header (gni_pub.h) is in a
|
||||
# completely different place than the ugni library (libugni).
|
||||
#
|
||||
# EXAMPLE CONFIGURE USAGE:
|
||||
# --with-ugni=/base/path/to/libugni --with-ugni-includedir=/path/to/gni_pub.h
|
||||
#
|
||||
# --with-ugni=/opt/cray/ugni/default --with-ugni-includedir=/opt/cray/gni-headers/default/include
|
||||
|
||||
AC_DEFUN([OMPI_CHECK_UGNI], [
|
||||
AC_ARG_WITH([ugni], [
|
||||
AC_HELP_STRING([--with-ugni(=DIR)],
|
||||
[Build GNI (Cray Gemini) support, optionally adding DIR/include, DIR/lib, and DIR/lib64 to the search path for headers and libraries])])
|
||||
|
||||
dnl does the path exist?
|
||||
OMPI_CHECK_WITHDIR([ugni], [$with_ugni], [.])
|
||||
|
||||
AC_ARG_WITH([ugni-libdir], [
|
||||
AC_HELP_STRING([--with-ugni-libdir=DIR], [
|
||||
Search for GNI (Cray Gemini) libraries in DIR])])
|
||||
OMPI_CHECK_WITHDIR([ugni-libdir], [$with_ugni_libdir], [libugni.*])
|
||||
|
||||
AC_ARG_WITH([ugni-includedir], [
|
||||
AC_HELP_STRING([--with-ugni-includedir=DIR], [
|
||||
Search for GNI (Cray Gemini) headers in DIR])])
|
||||
OMPI_CHECK_WITHDIR([ugni-includedir], [$with_ugni_includedir], [gni_pub.h])
|
||||
|
||||
AS_IF([test "$with_ugni_includedir" != "" -a "$with_ugni_includedir" != "yes" -a "$with_ugni_includedir" != "no"],
|
||||
[$1_CPPFLAGS="$$1_CPPFLAGS -I$with_ugni_includedir"])
|
||||
|
||||
ompi_check_ugni_$1_save_CPPFLAGS="$CPPFLAGS"
|
||||
ompi_check_ugni_$1_save_LDFLAGS="$LDFLAGS"
|
||||
ompi_check_ugni_$1_save_LIBS="$LIBS"
|
||||
|
||||
AS_IF([test "$with_ugni" != "no"], [
|
||||
AS_IF([test ! -z "$with_ugni" -a "$with_ugni" != "yes"], [
|
||||
ompi_check_ugni_dir="$with_ugni"])
|
||||
AS_IF([test ! -z "$with_ugni_libdir" -a "$with_ugni_libdir" != "yes"], [
|
||||
ompi_check_ugni_libdir="$with_ugni_libdir"])
|
||||
|
||||
OMPI_CHECK_PACKAGE([$1],
|
||||
[ugni.h],
|
||||
[ugni],
|
||||
[GNI_CdmCreate],
|
||||
[],
|
||||
[$ompi_check_ugni_dir],
|
||||
[$ompi_check_ugni_libdir],
|
||||
[ompi_check_ugni_happy="yes"],
|
||||
[ompi_check_ugni_happy="no"])],
|
||||
[ompi_check_ugni_happy="no"])
|
||||
|
||||
CPPFLAGS="$ompi_check_ugni_$1_save_CPPFLAGS"
|
||||
LDFLAGS="$ompi_check_ugni_$1_save_LDFLAGS"
|
||||
LIBS="$ompi_check_ugni_$1_save_LIBS"
|
||||
|
||||
dnl XXX not sure if this is true, but will assume so...
|
||||
AS_IF([test "$ompi_check_ugni_happy" = "yes" -a "$enable_progress_threads" = "yes"],
|
||||
[AC_MSG_WARN([GNI driver does not currently support progress threads. Disabling.])
|
||||
ompi_check_ugni_happy="no"])
|
||||
|
||||
AS_IF([test "$ompi_check_ugni_happy" = "yes"],
|
||||
[$2],
|
||||
[AS_IF([test ! -z "$with_ugni" -a "$with_ugni" != "no"],
|
||||
[AC_MSG_ERROR([GNI support requested but not found. Cannot continue.])])
|
||||
$3])
|
||||
])
|
52
ompi/mca/btl/ugni/Makefile.am
Обычный файл
52
ompi/mca/btl/ugni/Makefile.am
Обычный файл
@ -0,0 +1,52 @@
|
||||
# -*- indent-tabs-mode:nil -*-
|
||||
#
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
AM_CPPFLAGS = $(btl_ugni_CPPFLAGS)
|
||||
|
||||
if MCA_BUILD_ompi_btl_ugni_DSO
|
||||
component_noinst =
|
||||
component_install = mca_btl_ugni.la
|
||||
else
|
||||
component_noinst = libmca_btl_ugni.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
ugni_SOURCES = \
|
||||
btl_ugni_component.c \
|
||||
btl_ugni_module.c \
|
||||
btl_ugni_add_procs.c \
|
||||
btl_ugni_endpoint.h \
|
||||
btl_ugni_endpoint.c \
|
||||
btl_ugni_frag.c \
|
||||
btl_ugni_frag.h \
|
||||
btl_ugni_rdma.h \
|
||||
btl_ugni_send.c \
|
||||
btl_ugni_sendi.c \
|
||||
btl_ugni_put.c \
|
||||
btl_ugni_get.c \
|
||||
btl_ugni.h
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_btl_ugni_la_SOURCES = $(ugni_SOURCES)
|
||||
nodist_mca_btl_ugni_la_SOURCES = $(ugni_nodist_SOURCES)
|
||||
mca_btl_ugni_la_LIBADD = $(btl_ugni_LIBS)
|
||||
mca_btl_ugni_la_LDFLAGS = -module -avoid-version $(btl_ugni_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_btl_ugni_la_SOURCES = $(ugni_SOURCES)
|
||||
nodist_libmca_btl_ugni_la_SOURCES = $(ugni_nodist_SOURCES)
|
||||
libmca_btl_ugni_la_LIBADD = $(btl_ugni_LIBS)
|
||||
libmca_btl_ugni_la_LDFLAGS = -module -avoid-version $(btl_ugni_LDFLAGS)
|
248
ompi/mca/btl/ugni/btl_ugni.h
Обычный файл
248
ompi/mca/btl/ugni/btl_ugni.h
Обычный файл
@ -0,0 +1,248 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/*
|
||||
* The ugni btl is implemented with native Cray Gemini.
|
||||
*
|
||||
* Known issues with ugni:
|
||||
* -
|
||||
*/
|
||||
|
||||
#ifndef MCA_BTL_UGNI_H
|
||||
#define MCA_BTL_UGNI_H
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "ompi/mca/mpool/mpool.h"
|
||||
#include "ompi/mca/mpool/base/base.h"
|
||||
#include "ompi/mca/mpool/rdma/mpool_rdma.h"
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
#include "opal/util/output.h"
|
||||
#include "opal_stdint.h"
|
||||
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
#include "ompi/class/ompi_free_list.h"
|
||||
|
||||
#include "ompi/mca/common/ugni/common_ugni.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
#include <assert.h>
|
||||
#include <sys/time.h>
|
||||
#include <gni_pub.h>
|
||||
|
||||
/* datagram message ids */
|
||||
#define MCA_BTL_UGNI_CONNECT_WILDCARD_ID 0x6b69726b00000000ull
|
||||
#define MCA_BTL_UGNI_CONNECT_DIRECTED_ID 0x6b61686e00000000ull
|
||||
#define MCA_BTL_UGNI_DATAGRAM_MASK 0xffffffff00000000ull
|
||||
|
||||
typedef enum {
|
||||
MCA_BTL_UGNI_TAG_SEND,
|
||||
MCA_BTL_UGNI_TAG_DISCONNECT,
|
||||
MCA_BTL_UGNI_TAG_PUT_INIT,
|
||||
MCA_BTL_UGNI_TAG_PUT_COMPLETE
|
||||
} mca_btl_ugni_smsg_tag_t;
|
||||
|
||||
/* Maximum number of outstanding eager messages */
|
||||
extern int mca_btl_ugni_smsg_max_credits;
|
||||
extern int mca_btl_ugni_smsg_mbox_size;
|
||||
|
||||
struct mca_btl_ugni_module_t {
|
||||
mca_btl_base_module_t super;
|
||||
|
||||
ompi_common_ugni_device_t *device;
|
||||
|
||||
size_t endpoint_count;
|
||||
struct mca_btl_base_endpoint_t **endpoints;
|
||||
|
||||
opal_list_t failed_frags;
|
||||
|
||||
mca_mpool_base_module_t *smsg_mpool;
|
||||
ompi_free_list_t smsg_mboxes;
|
||||
|
||||
gni_ep_handle_t wildcard_ep;
|
||||
gni_smsg_attr_t wc_remote_attr, wc_local_attr;
|
||||
|
||||
gni_cq_handle_t bte_local_cq;
|
||||
gni_cq_handle_t smsg_remote_cq;
|
||||
};
|
||||
typedef struct mca_btl_ugni_module_t mca_btl_ugni_module_t;
|
||||
|
||||
struct mca_btl_ugni_component_t {
|
||||
/* base BTL component */
|
||||
mca_btl_base_component_2_0_0_t super;
|
||||
|
||||
/* maximum supported btls. hardcoded to 1 for now */
|
||||
uint32_t ugni_max_btls;
|
||||
/* Maximum number of entries a completion queue can hold */
|
||||
uint32_t cq_size;
|
||||
|
||||
/* number of ugni modules */
|
||||
uint32_t ugni_num_btls;
|
||||
/* ugni modules */
|
||||
mca_btl_ugni_module_t *modules;
|
||||
|
||||
/* eager send limit in bytes */
|
||||
/* used as the threshold for switching from SMSG */
|
||||
size_t eager_limit;
|
||||
|
||||
/* After this message size switch to BTE protocols */
|
||||
size_t btl_fma_limit;
|
||||
/* Switch to put when trying to GET at or above this size */
|
||||
size_t btl_get_limit;
|
||||
|
||||
/* eager fragment list */
|
||||
ompi_free_list_t ugni_frags_eager;
|
||||
/* RDMA fragment list */
|
||||
ompi_free_list_t ugni_frags_rdma;
|
||||
|
||||
/* initial free list size */
|
||||
int ugni_free_list_num;
|
||||
/* maximum free list size */
|
||||
int ugni_free_list_max;
|
||||
/* free list increment */
|
||||
int ugni_free_list_inc;
|
||||
|
||||
/* number of times to retry a post */
|
||||
int rdma_max_retries;
|
||||
};
|
||||
typedef struct mca_btl_ugni_component_t mca_btl_ugni_component_t;
|
||||
|
||||
int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
|
||||
ompi_common_ugni_device_t *device);
|
||||
|
||||
/**
|
||||
* BML->BTL notification of change in the process list.
|
||||
*
|
||||
* location: btl_ugni_add_procs.c
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param nprocs (IN) Number of processes
|
||||
* @param procs (IN) Array of processes
|
||||
* @param endpoint (OUT) Array of mca_btl_base_endpoint_t structures by BTL.
|
||||
* @param reachable (OUT) Bitmask indicating set of peer processes that are reachable by this BTL.
|
||||
* @return OMPI_SUCCESS or error status on failure.
|
||||
*/
|
||||
int
|
||||
mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl,
|
||||
size_t nprocs,
|
||||
struct ompi_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **peers,
|
||||
opal_bitmap_t *reachable);
|
||||
|
||||
/**
|
||||
* Notification of change to the process list.
|
||||
*
|
||||
* location: btl_ugni_add_procs.c
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param nprocs (IN) Number of processes
|
||||
* @param proc (IN) Set of processes
|
||||
* @param peer (IN) Set of peer addressing information.
|
||||
* @return Status indicating if cleanup was successful
|
||||
*/
|
||||
int
|
||||
mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
|
||||
size_t nprocs,
|
||||
struct ompi_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **peers);
|
||||
|
||||
/**
|
||||
* Initiate an asynchronous send.
|
||||
*
|
||||
* location: btl_ugni_send.c
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transfered
|
||||
* @param tag (IN) The tag value used to notify the peer.
|
||||
*/
|
||||
int
|
||||
mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *btl_peer,
|
||||
struct mca_btl_base_descriptor_t *descriptor,
|
||||
mca_btl_base_tag_t tag);
|
||||
|
||||
/**
|
||||
* Initiate an immediate blocking send.
|
||||
*
|
||||
* location: btl_ugni_sendi.c
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param convertor (IN) Data type convertor
|
||||
* @param header (IN) Pointer to header.
|
||||
* @param header_size (IN) Size of header.
|
||||
* @param payload_size (IN) Size of payload (from convertor).
|
||||
* @param order (IN) The ordering tag (may be MCA_BTL_NO_ORDER)
|
||||
* @param flags (IN) Flags.
|
||||
* @param tag (IN) The tag value used to notify the peer.
|
||||
* @param descriptor (OUT) The descriptor to be returned unable to be sent immediately
|
||||
*/
|
||||
int
|
||||
mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct opal_convertor_t *convertor,
|
||||
void *header, size_t header_size,
|
||||
size_t payload_size, uint8_t order,
|
||||
uint32_t flags, mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t **descriptor);
|
||||
|
||||
/**
|
||||
* Initiate a get operation.
|
||||
*
|
||||
* location: btl_ugni_get.c
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*/
|
||||
int
|
||||
mca_btl_ugni_get (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des);
|
||||
|
||||
/**
|
||||
* Initiate a put operation.
|
||||
*
|
||||
* location: btl_ugni_put.c
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*/
|
||||
int
|
||||
mca_btl_ugni_put (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des);
|
||||
|
||||
mca_btl_base_descriptor_t *
|
||||
mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
uint8_t order, size_t size, uint32_t flags);
|
||||
|
||||
struct mca_btl_ugni_reg_t {
|
||||
mca_mpool_base_registration_t base;
|
||||
gni_mem_handle_t memory_hdl;
|
||||
void *buffer;
|
||||
size_t size;
|
||||
};
|
||||
typedef struct mca_btl_ugni_reg_t mca_btl_ugni_reg_t;
|
||||
|
||||
/* Global structures */
|
||||
|
||||
OMPI_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component;
|
||||
OMPI_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module;
|
||||
|
||||
#endif
|
131
ompi/mca/btl/ugni/btl_ugni_add_procs.c
Обычный файл
131
ompi/mca/btl/ugni/btl_ugni_add_procs.c
Обычный файл
@ -0,0 +1,131 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "btl_ugni.h"
|
||||
#include "btl_ugni_frag.h"
|
||||
#include "btl_ugni_endpoint.h"
|
||||
|
||||
int mca_btl_ugni_add_procs(struct mca_btl_base_module_t* btl,
|
||||
size_t nprocs,
|
||||
struct ompi_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **peers,
|
||||
opal_bitmap_t *reachable) {
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
||||
size_t ntotal_procs;
|
||||
size_t i;
|
||||
int rc;
|
||||
|
||||
|
||||
if (NULL == ugni_module->endpoints) {
|
||||
(void) ompi_proc_world (&ntotal_procs);
|
||||
|
||||
ugni_module->endpoints = calloc (ntotal_procs, sizeof (mca_btl_base_endpoint_t *));
|
||||
|
||||
if (OPAL_UNLIKELY(NULL == ugni_module->endpoints)) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
rc = ompi_free_list_init_new (&mca_btl_ugni_component.ugni_frags_eager,
|
||||
sizeof (mca_btl_ugni_base_frag_t),
|
||||
opal_cache_line_size, OBJ_CLASS(mca_btl_ugni_base_frag_t),
|
||||
sizeof (mca_btl_ugni_frag_hdr_t) + mca_btl_ugni_component.eager_limit,
|
||||
opal_cache_line_size,
|
||||
mca_btl_ugni_component.ugni_free_list_num,
|
||||
mca_btl_ugni_component.ugni_free_list_max,
|
||||
mca_btl_ugni_component.ugni_free_list_inc,
|
||||
NULL);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = ompi_free_list_init_new (&mca_btl_ugni_component.ugni_frags_rdma,
|
||||
sizeof (mca_btl_ugni_rdma_frag_t),
|
||||
opal_cache_line_size, OBJ_CLASS(mca_btl_ugni_rdma_frag_t),
|
||||
0, opal_cache_line_size,
|
||||
mca_btl_ugni_component.ugni_free_list_num,
|
||||
mca_btl_ugni_component.ugni_free_list_max,
|
||||
mca_btl_ugni_component.ugni_free_list_inc,
|
||||
NULL);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0 ; i < nprocs ; ++i) {
|
||||
struct ompi_proc_t *ompi_proc = procs[i];
|
||||
uint32_t rem_rank = ompi_proc->proc_name.vpid;
|
||||
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(ompi_proc->proc_flags)) {
|
||||
/* ignore local procs */
|
||||
peers[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Create and Init endpoints */
|
||||
rc = mca_btl_ugni_init_ep (peers + i, (mca_btl_ugni_module_t *) btl, ompi_proc);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
BTL_ERROR(("btl/ugni error initializing endpoint"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Set the reachable bit */
|
||||
rc = opal_bitmap_set_bit (reachable, i);
|
||||
|
||||
/* Store a reference to this peer */
|
||||
ugni_module->endpoints[rem_rank] = peers[i];
|
||||
}
|
||||
|
||||
ugni_module->endpoint_count += nprocs;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
|
||||
size_t nprocs, struct ompi_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **peers) {
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
||||
size_t i;
|
||||
|
||||
/* NTH: this function destroys the endpoint list which will cause bad
|
||||
things to happen if the caller only wants to delete a few procs. */
|
||||
|
||||
for (i = 0 ; i < nprocs ; ++i) {
|
||||
struct ompi_proc_t *ompi_proc = procs[i];
|
||||
uint32_t rem_rank = ompi_proc->proc_name.vpid;
|
||||
|
||||
if (ugni_module->endpoints[rem_rank]) {
|
||||
mca_btl_ugni_release_ep (ugni_module->endpoints[rem_rank]);
|
||||
}
|
||||
|
||||
ugni_module->endpoints[rem_rank] = NULL;
|
||||
}
|
||||
|
||||
ugni_module->endpoint_count -= nprocs;
|
||||
|
||||
if (0 == ugni_module->endpoint_count) {
|
||||
free (ugni_module->endpoints);
|
||||
ugni_module->endpoints = NULL;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
627
ompi/mca/btl/ugni/btl_ugni_component.c
Обычный файл
627
ompi/mca/btl/ugni/btl_ugni_component.c
Обычный файл
@ -0,0 +1,627 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "btl_ugni.h"
|
||||
#include "btl_ugni_endpoint.h"
|
||||
#include "btl_ugni_frag.h"
|
||||
#include "btl_ugni_rdma.h"
|
||||
|
||||
#include "opal/mca/base/mca_base_param.h"
|
||||
#include "opal/memoryhooks/memory.h"
|
||||
#include "ompi/runtime/params.h"
|
||||
|
||||
int mca_btl_ugni_smsg_max_credits = 32;
|
||||
int mca_btl_ugni_smsg_mbox_size;
|
||||
|
||||
static int btl_ugni_component_register(void);
|
||||
static int btl_ugni_component_open(void);
|
||||
static int btl_ugni_component_close(void);
|
||||
static mca_btl_base_module_t **mca_btl_ugni_component_init(int *, bool, bool);
|
||||
static int mca_btl_ugni_component_progress(void);
|
||||
|
||||
mca_btl_ugni_component_t mca_btl_ugni_component = {
|
||||
{
|
||||
/* First, the mca_base_component_t struct containing meta information
|
||||
about the component itself */
|
||||
|
||||
{
|
||||
MCA_BTL_BASE_VERSION_2_0_0,
|
||||
|
||||
"ugni", /* MCA component name */
|
||||
OMPI_MAJOR_VERSION, /* MCA component major version */
|
||||
OMPI_MINOR_VERSION, /* MCA component minor version */
|
||||
OMPI_RELEASE_VERSION, /* MCA component release version */
|
||||
btl_ugni_component_open, /* component open */
|
||||
btl_ugni_component_close, /* component close */
|
||||
NULL, /* component query */
|
||||
btl_ugni_component_register, /* component register */
|
||||
},
|
||||
{
|
||||
MCA_BASE_METADATA_PARAM_CHECKPOINT
|
||||
},
|
||||
mca_btl_ugni_component_init,
|
||||
mca_btl_ugni_component_progress,
|
||||
}
|
||||
};
|
||||
|
||||
static inline char *
|
||||
mca_btl_ugni_param_register_string(const char *param_name,
|
||||
const char *default_value)
|
||||
{
|
||||
char *param_value;
|
||||
int id = mca_base_param_register_string("btl", "ugni", param_name, NULL,
|
||||
default_value);
|
||||
mca_base_param_lookup_string(id, ¶m_value);
|
||||
return param_value;
|
||||
}
|
||||
|
||||
static inline int
|
||||
mca_btl_ugni_param_register_int (const char *param_name, int value)
|
||||
{
|
||||
int id = mca_base_param_register_int("btl", "ugni", param_name, NULL, value);
|
||||
mca_base_param_lookup_int(id, &value);
|
||||
return value;
|
||||
}
|
||||
|
||||
static int
|
||||
btl_ugni_component_register(void)
|
||||
{
|
||||
mca_btl_ugni_component.ugni_free_list_num =
|
||||
mca_btl_ugni_param_register_int("free_list_num", 8);
|
||||
mca_btl_ugni_component.ugni_free_list_max =
|
||||
mca_btl_ugni_param_register_int("free_list_max", -1);
|
||||
mca_btl_ugni_component.ugni_free_list_inc =
|
||||
mca_btl_ugni_param_register_int("free_list_inc", 64);
|
||||
|
||||
mca_btl_ugni_component.cq_size =
|
||||
mca_btl_ugni_param_register_int("cq_size", 25000);
|
||||
|
||||
mca_btl_ugni_component.btl_fma_limit =
|
||||
mca_btl_ugni_param_register_int("fma_limit", 4 * 1024);
|
||||
|
||||
mca_btl_ugni_component.btl_get_limit =
|
||||
mca_btl_ugni_param_register_int("get_limit", 8 * 1024);
|
||||
|
||||
mca_btl_ugni_component.rdma_max_retries =
|
||||
mca_btl_ugni_param_register_int("rdma_max_retries", 8);
|
||||
|
||||
mca_btl_ugni_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
|
||||
|
||||
/* smsg threshold */
|
||||
mca_btl_ugni_module.super.btl_eager_limit = 0; /* set dynamically in module_init */
|
||||
mca_btl_ugni_module.super.btl_rndv_eager_limit = 8 * 1024;
|
||||
mca_btl_ugni_module.super.btl_rdma_pipeline_frag_size = 2 * 1024 * 1024;
|
||||
mca_btl_ugni_module.super.btl_max_send_size = 0; /* set this later */
|
||||
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = 0; /* set this later */
|
||||
|
||||
/* threshold for put */
|
||||
mca_btl_ugni_module.super.btl_min_rdma_pipeline_size = 0;
|
||||
|
||||
mca_btl_ugni_module.super.btl_flags = MCA_BTL_FLAGS_SEND |
|
||||
MCA_BTL_FLAGS_RDMA |
|
||||
MCA_BTL_FLAGS_RDMA_MATCHED;
|
||||
|
||||
mca_btl_ugni_module.super.btl_bandwidth = 40000; /* Mbs */
|
||||
mca_btl_ugni_module.super.btl_latency = 2; /* Microsecs */
|
||||
|
||||
/* Call the BTL based to register its MCA params */
|
||||
mca_btl_base_param_register(&mca_btl_ugni_component.super.btl_version,
|
||||
&mca_btl_ugni_module.super);
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int
|
||||
btl_ugni_component_open(void)
|
||||
{
|
||||
mca_btl_ugni_component.ugni_num_btls = 0;
|
||||
mca_btl_ugni_component.modules = NULL;
|
||||
|
||||
OBJ_CONSTRUCT(&mca_btl_ugni_component.ugni_frags_eager, ompi_free_list_t);
|
||||
OBJ_CONSTRUCT(&mca_btl_ugni_component.ugni_frags_rdma, ompi_free_list_t);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* component cleanup - sanity checking of queue lengths
|
||||
*/
|
||||
static int
|
||||
btl_ugni_component_close(void)
|
||||
{
|
||||
ompi_common_ugni_fini ();
|
||||
|
||||
OBJ_DESTRUCT(&mca_btl_ugni_component.ugni_frags_eager);
|
||||
OBJ_DESTRUCT(&mca_btl_ugni_component.ugni_frags_rdma);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static void mca_btl_ugni_autoset_leave_pinned (void) {
|
||||
mca_base_param_source_t source;
|
||||
int index, rc, value;
|
||||
|
||||
/* If we have a memory manager available, and
|
||||
mpi_leave_pinned==-1, then unless the user explicitly set
|
||||
mpi_leave_pinned_pipeline==0, then set mpi_leave_pinned to 1.
|
||||
We have a memory manager if we have both FREE and MUNMAP
|
||||
support */
|
||||
value = opal_mem_hooks_support_level();
|
||||
if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) ==
|
||||
((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) & value)) {
|
||||
rc = 0;
|
||||
index = mca_base_param_find("mpi", NULL, "leave_pinned");
|
||||
if (index >= 0) {
|
||||
if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &value) &&
|
||||
-1 == value) {
|
||||
++rc;
|
||||
}
|
||||
}
|
||||
index = mca_base_param_find("mpi", NULL, "leave_pinned_pipeline");
|
||||
if (index >= 0) {
|
||||
if (OPAL_SUCCESS == mca_base_param_lookup_int(index, &value) &&
|
||||
OPAL_SUCCESS == mca_base_param_lookup_source(index, &source,
|
||||
NULL)) {
|
||||
if (0 == value && MCA_BASE_PARAM_SOURCE_DEFAULT == source) {
|
||||
++rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* If we were good on both parameters, then set leave_pinned=1 */
|
||||
if (2 == rc) {
|
||||
ompi_mpi_leave_pinned = 1;
|
||||
ompi_mpi_leave_pinned_pipeline = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int mca_btl_ugni_smsg_setup (void) {
|
||||
gni_smsg_attr_t tmp_smsg_attrib;
|
||||
unsigned int mbox_size;
|
||||
int rc;
|
||||
|
||||
/* calculate mailbox size */
|
||||
tmp_smsg_attrib.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
|
||||
tmp_smsg_attrib.msg_maxsize = mca_btl_ugni_component.eager_limit + sizeof (mca_btl_ugni_frag_hdr_t);
|
||||
tmp_smsg_attrib.mbox_maxcredit = mca_btl_ugni_smsg_max_credits;
|
||||
|
||||
rc = GNI_SmsgBufferSizeNeeded (&tmp_smsg_attrib, &mbox_size);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
BTL_ERROR(("error in GNI_SmsgBufferSizeNeeded"));
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
mca_btl_ugni_smsg_mbox_size = ((mbox_size + opal_cache_line_size - 1)/opal_cache_line_size) * opal_cache_line_size;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static mca_btl_base_module_t **
|
||||
mca_btl_ugni_component_init (int *num_btl_modules,
|
||||
bool enable_progress_threads,
|
||||
bool enable_mpi_threads)
|
||||
{
|
||||
struct mca_btl_base_module_t **base_modules;
|
||||
mca_btl_ugni_module_t *ugni_modules;
|
||||
unsigned int i;
|
||||
size_t nprocs;
|
||||
int rc;
|
||||
|
||||
/* Initialize ugni library and create communication domain */
|
||||
rc = ompi_common_ugni_init();
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Create and initialize modules
|
||||
* Create one module per device
|
||||
* One btl == One module
|
||||
*/
|
||||
/* Manju: I should set this automatically, not hardcoded */
|
||||
mca_btl_ugni_component.ugni_num_btls = ompi_common_ugni_module.device_count;
|
||||
|
||||
BTL_VERBOSE(("btl/ugni initializing"));
|
||||
|
||||
ugni_modules = mca_btl_ugni_component.modules = (mca_btl_ugni_module_t *)
|
||||
calloc (mca_btl_ugni_component.ugni_num_btls,
|
||||
sizeof (mca_btl_ugni_module_t));
|
||||
|
||||
if (OPAL_UNLIKELY(NULL == mca_btl_ugni_component.modules)) {
|
||||
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
base_modules = (struct mca_btl_base_module_t **)
|
||||
calloc (mca_btl_ugni_component.ugni_num_btls,
|
||||
sizeof (struct mca_btl_base_module_t *));
|
||||
if (OPAL_UNLIKELY(NULL == base_modules)) {
|
||||
BTL_ERROR(("Malloc failed : %s:%d", __FILE__, __LINE__));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mca_btl_ugni_autoset_leave_pinned ();
|
||||
|
||||
(void) ompi_proc_world (&nprocs);
|
||||
|
||||
if (0 == mca_btl_ugni_component.eager_limit) {
|
||||
/* auto-set the eager limit based on the number of ranks */
|
||||
if (nprocs <= 1024) {
|
||||
mca_btl_ugni_component.eager_limit = 1024;
|
||||
} else if (nprocs <= 16384) {
|
||||
mca_btl_ugni_component.eager_limit = 512;
|
||||
} else {
|
||||
mca_btl_ugni_component.eager_limit = 256;
|
||||
}
|
||||
}
|
||||
|
||||
rc = mca_btl_ugni_smsg_setup ();
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
|
||||
mca_btl_ugni_module_t *ugni_module = ugni_modules + i;
|
||||
|
||||
rc = mca_btl_ugni_module_init (ugni_module,
|
||||
ompi_common_ugni_module.devices + i);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
BTL_ERROR(("Failed to initialize uGNI module @ %s:%d", __FILE__,
|
||||
__LINE__));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
base_modules[i] = (mca_btl_base_module_t *) ugni_module;
|
||||
}
|
||||
|
||||
*num_btl_modules = mca_btl_ugni_component.ugni_num_btls;
|
||||
|
||||
/* XXX TODO remove before release */
|
||||
signal (SIGSEGV, SIG_DFL);
|
||||
|
||||
BTL_VERBOSE(("btl/ugni done initializing modules"));
|
||||
|
||||
return base_modules;
|
||||
}
|
||||
|
||||
static inline void mca_btl_ugni_callback_reverse_get (mca_btl_base_module_t *btl,
|
||||
mca_btl_base_endpoint_t *ep,
|
||||
mca_btl_base_descriptor_t *des,
|
||||
int rc)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
||||
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) des;
|
||||
uint32_t msg_id = ORTE_PROC_MY_NAME->vpid;
|
||||
|
||||
BTL_VERBOSE(("reverse get (put) for rem_ctx %p complete", des->des_cbdata));
|
||||
|
||||
/* tell peer the put is complete */
|
||||
rc = GNI_SmsgSendWTag (frag->endpoint->common->ep_handle, &des->des_cbdata, sizeof (void *),
|
||||
NULL, 0, msg_id, MCA_BTL_UGNI_TAG_PUT_COMPLETE);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
/* turn off btl ownership for now */
|
||||
des->des_flags &= ~MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
|
||||
opal_list_append (&ugni_module->failed_frags, (opal_list_item_t *) des);
|
||||
} else {
|
||||
des->des_flags |= MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_start_progress_reverse_get (mca_btl_base_endpoint_t *ep,
|
||||
mca_btl_base_segment_t *segments,
|
||||
void *rem_ctx)
|
||||
{
|
||||
mca_btl_ugni_base_frag_t *frag;
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("starting reverse get (put) for remote ctx: %p", rem_ctx));
|
||||
|
||||
MCA_BTL_UGNI_FRAG_ALLOC_RDMA(ep->btl, frag, rc);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
BTL_ERROR(("error allocating rdma frag for reverse get"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
frag->base.des_cbfunc = mca_btl_ugni_callback_reverse_get;
|
||||
frag->base.des_cbdata = rem_ctx;
|
||||
frag->endpoint = ep;
|
||||
|
||||
memmove (&frag->segments, segments, 2 * sizeof (segments[0]));
|
||||
|
||||
frag->base.des_src = frag->segments;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = frag->segments + 1;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
|
||||
rc = mca_btl_ugni_put (&ep->btl->super, ep, &frag->base);
|
||||
assert (OMPI_SUCCESS == rc);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int
|
||||
mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
mca_btl_active_message_callback_t *reg;
|
||||
mca_btl_ugni_base_frag_t frag;
|
||||
mca_btl_base_segment_t *segments;
|
||||
mca_btl_ugni_frag_hdr_t *hdr;
|
||||
uintptr_t data_ptr;
|
||||
int tries = 3;
|
||||
int count = 0;
|
||||
int rc;
|
||||
|
||||
do {
|
||||
uint8_t tag = GNI_SMSG_ANY_TAG;
|
||||
|
||||
rc = GNI_SmsgGetNextWTag (ep->common->ep_handle, (void **) &data_ptr, &tag);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_VERBOSE(("no smsg message waiting. rc = %d", rc));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(0 == data_ptr)) {
|
||||
BTL_ERROR(("null data ptr!"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
count++;
|
||||
|
||||
BTL_VERBOSE(("got smsg fragment. tag = %d\n", tag));
|
||||
|
||||
switch (tag) {
|
||||
case MCA_BTL_UGNI_TAG_SEND:
|
||||
hdr = (mca_btl_ugni_frag_hdr_t *) data_ptr;
|
||||
|
||||
BTL_VERBOSE(("received smsg fragment. hdr = {len = %u, tag = %d}",
|
||||
(unsigned int) hdr->len, hdr->tag));
|
||||
|
||||
reg = mca_btl_base_active_message_trigger + hdr->tag;
|
||||
frag.base.des_dst = frag.segments;
|
||||
frag.base.des_dst_cnt = 1;
|
||||
|
||||
frag.segments[0].seg_addr.pval = (void *)(data_ptr + sizeof (*hdr));
|
||||
frag.segments[0].seg_len = hdr->len;
|
||||
|
||||
reg->cbfunc(&ep->btl->super, hdr->tag, &(frag.base), reg->cbdata);
|
||||
|
||||
break;
|
||||
case MCA_BTL_UGNI_TAG_DISCONNECT:
|
||||
/* remote endpoint has disconnected */
|
||||
rc = GNI_SmsgRelease (ep->common->ep_handle);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
BTL_ERROR(("Smsg release failed!"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
mca_btl_ugni_ep_disconnect (ep, false);
|
||||
|
||||
return count;
|
||||
case MCA_BTL_UGNI_TAG_PUT_INIT:
|
||||
segments = (mca_btl_base_segment_t *) data_ptr;
|
||||
|
||||
mca_btl_ugni_start_progress_reverse_get (ep, segments,
|
||||
((void **)(segments + 2))[0]);
|
||||
|
||||
break;
|
||||
case MCA_BTL_UGNI_TAG_PUT_COMPLETE:
|
||||
mca_btl_ugni_post_frag_complete (((void **)data_ptr)[0], OMPI_SUCCESS);
|
||||
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
/* BTL_ERROR(("unknown tag %d\n", tag)); */
|
||||
}
|
||||
|
||||
rc = GNI_SmsgRelease (ep->common->ep_handle);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
BTL_ERROR(("Smsg release failed!"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
} while (tries--);
|
||||
|
||||
/* finished processing events */
|
||||
return count;
|
||||
}
|
||||
|
||||
static inline int
|
||||
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *btl)
|
||||
{
|
||||
uint32_t remote_addr, remote_id;
|
||||
uint64_t datagram_id;
|
||||
mca_btl_base_endpoint_t *ep;
|
||||
gni_ep_handle_t handle;
|
||||
gni_post_state_t post_state;
|
||||
int rc, count;
|
||||
|
||||
count = 0;
|
||||
|
||||
post_state = GNI_POST_PENDING;
|
||||
rc = GNI_PostDataProbeById (btl->device->dev_handle, &datagram_id);
|
||||
if (OPAL_LIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) ==
|
||||
MCA_BTL_UGNI_CONNECT_WILDCARD_ID) {
|
||||
handle = btl->wildcard_ep;
|
||||
} else {
|
||||
handle =
|
||||
btl->endpoints[(uint32_t)(datagram_id & 0xffffffffull)]->common->ep_handle;
|
||||
}
|
||||
|
||||
/* wait for the incoming datagram to complete (in case it isn't) */
|
||||
rc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state,
|
||||
&remote_addr, &remote_id);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", rc));
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, "
|
||||
"peer = %d", datagram_id, post_state, remote_id));
|
||||
|
||||
ep = btl->endpoints[remote_id];
|
||||
|
||||
OPAL_THREAD_LOCK(&ep->common->lock);
|
||||
|
||||
/* NTH: TODO -- error handling */
|
||||
(void) mca_btl_ugni_ep_connect_progress (ep);
|
||||
|
||||
if (ep->smsgs_waiting && OMPI_COMMON_UGNI_CONNECTED == MCA_BTL_UGNI_EP_STATE(ep)) {
|
||||
/* process messages waiting in the endpoint's smsg mailbox */
|
||||
while ((rc = mca_btl_ugni_smsg_process (ep) > 0)) count += rc;
|
||||
ep->smsgs_waiting = false;
|
||||
}
|
||||
|
||||
OPAL_THREAD_UNLOCK(&ep->common->lock);
|
||||
|
||||
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) ==
|
||||
MCA_BTL_UGNI_CONNECT_WILDCARD_ID) {
|
||||
mca_btl_ugni_wildcard_ep_post (btl);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static inline int
|
||||
mca_btl_ugni_handle_smsg_overrun (mca_btl_ugni_module_t *btl)
|
||||
{
|
||||
gni_cq_entry_t event_data;
|
||||
unsigned int ep_index;
|
||||
int count, rc;
|
||||
|
||||
BTL_VERBOSE(("btl/ugni_component detect SMSG CQ overrun. "
|
||||
"processing message backlog..."));
|
||||
|
||||
/* we don't know which endpoint lost an smsg completion. clear the
|
||||
smsg cq and check all mailboxes */
|
||||
|
||||
/* clear out remote cq */
|
||||
do {
|
||||
rc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
|
||||
} while (GNI_RC_SUCCESS == rc);
|
||||
|
||||
count = 0;
|
||||
|
||||
for (ep_index = 0 ; ep_index < btl->endpoint_count ; ++ep_index) {
|
||||
mca_btl_base_endpoint_t *ep = btl->endpoints[ep_index];
|
||||
|
||||
if (NULL == ep || OMPI_COMMON_UGNI_CONNECTED != MCA_BTL_UGNI_EP_STATE(ep)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
do {
|
||||
/* clear out smsg mailbox */
|
||||
rc = mca_btl_ugni_smsg_process (ep);
|
||||
if (rc > 0)
|
||||
count += rc;
|
||||
} while (rc > 0);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static inline int
|
||||
mca_btl_ugni_progress_smsg (mca_btl_ugni_module_t *btl)
|
||||
{
|
||||
mca_btl_base_endpoint_t *ep;
|
||||
gni_cq_entry_t event_data;
|
||||
int rc;
|
||||
|
||||
rc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
|
||||
if (GNI_RC_NOT_DONE == rc) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc || !GNI_CQ_STATUS_OK(event_data) ||
|
||||
GNI_CQ_OVERRUN(event_data))) {
|
||||
if (GNI_RC_ERROR_RESOURCE == rc ||
|
||||
(GNI_RC_SUCCESS == rc && GNI_CQ_OVERRUN(event_data))) {
|
||||
/* recover from smsg cq overrun */
|
||||
return mca_btl_ugni_handle_smsg_overrun (btl);
|
||||
}
|
||||
|
||||
BTL_ERROR(("unhandled error in GNI_CqGetEvent"));
|
||||
|
||||
/* unhandled error: crash */
|
||||
assert (0);
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("REMOTE CQ: Got event 0x%" PRIx64 ". msg id = %" PRIu64
|
||||
". ok = %d, type = %" PRIu64 "\n", (uint64_t) event_data,
|
||||
GNI_CQ_GET_MSG_ID(event_data), GNI_CQ_STATUS_OK(event_data),
|
||||
GNI_CQ_GET_TYPE(event_data)));
|
||||
|
||||
/* we could check the message type here but it seems to always be a POST */
|
||||
|
||||
ep = btl->endpoints[GNI_CQ_GET_MSG_ID(event_data)];
|
||||
if (OPAL_UNLIKELY(OMPI_COMMON_UGNI_CONNECTED != MCA_BTL_UGNI_EP_STATE(ep))) {
|
||||
/* due to the nature of datagrams we may get a smsg completion before
|
||||
we get mailbox info from the peer */
|
||||
BTL_VERBOSE(("event occurred on an unconnected endpoint! ep state = %d", MCA_BTL_UGNI_EP_STATE(ep)));
|
||||
|
||||
/* flag the endpoint as having messages waiting */
|
||||
ep->smsgs_waiting = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
return mca_btl_ugni_smsg_process (ep);
|
||||
}
|
||||
|
||||
static inline int
|
||||
mca_btl_ugni_progress_bte (mca_btl_ugni_module_t *btl)
|
||||
{
|
||||
(void) ompi_common_ugni_process_completed_post (btl->device, btl->bte_local_cq);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
mca_btl_ugni_retry_failed (mca_btl_ugni_module_t *btl)
|
||||
{
|
||||
int count = opal_list_get_size (&btl->failed_frags);
|
||||
opal_list_item_t *item;
|
||||
|
||||
while (count-- && NULL != (item = opal_list_remove_first (&btl->failed_frags))) {
|
||||
fprintf (stderr, "retrying frag %p\n", (void *) item);
|
||||
mca_btl_ugni_post_frag_complete ((void *) item, OMPI_SUCCESS);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
mca_btl_ugni_component_progress (void)
|
||||
{
|
||||
mca_btl_ugni_module_t *btl;
|
||||
unsigned int i, j, k;
|
||||
int count;
|
||||
|
||||
count = ompi_common_ugni_progress ();
|
||||
|
||||
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
|
||||
btl = mca_btl_ugni_component.modules + i;
|
||||
|
||||
mca_btl_ugni_retry_failed (btl);
|
||||
|
||||
count += mca_btl_ugni_progress_datagram (btl);
|
||||
for (j = 0 ; j < 2 ; ++j) {
|
||||
for (k = 0 ; k < 5 ; ++k) {
|
||||
count += mca_btl_ugni_progress_smsg (btl);
|
||||
}
|
||||
|
||||
count += mca_btl_ugni_progress_bte (btl);
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
200
ompi/mca/btl/ugni/btl_ugni_endpoint.c
Обычный файл
200
ompi/mca/btl/ugni/btl_ugni_endpoint.c
Обычный файл
@ -0,0 +1,200 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "btl_ugni.h"
|
||||
#include "btl_ugni_endpoint.h"
|
||||
#include "btl_ugni_frag.h"
|
||||
|
||||
static void mca_btl_ugni_ep_construct (mca_btl_base_endpoint_t *ep);
|
||||
static void mca_btl_ugni_ep_destruct (mca_btl_base_endpoint_t *ep);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_base_endpoint_t, opal_object_t,
|
||||
mca_btl_ugni_ep_construct, mca_btl_ugni_ep_destruct);
|
||||
|
||||
static void mca_btl_ugni_ep_construct (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
OBJ_CONSTRUCT(&ep->pending_list, opal_list_t);
|
||||
ep->smsgs_waiting = false;
|
||||
}
|
||||
|
||||
static void mca_btl_ugni_ep_destruct (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
OBJ_DESTRUCT(&ep->pending_list);
|
||||
}
|
||||
|
||||
static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
|
||||
struct mca_btl_ugni_reg_t *reg =
|
||||
(struct mca_btl_ugni_reg_t *) mbox->super.registration;
|
||||
|
||||
mbox->buffer = mbox->super.ptr;
|
||||
|
||||
/* initialize mailbox attributes */
|
||||
mbox->smsg_attrib.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT;
|
||||
mbox->smsg_attrib.msg_maxsize = mca_btl_ugni_component.eager_limit +
|
||||
sizeof (mca_btl_ugni_frag_hdr_t);
|
||||
mbox->smsg_attrib.mbox_maxcredit = mca_btl_ugni_smsg_max_credits;
|
||||
mbox->smsg_attrib.mbox_offset = 0; /* autoselect */
|
||||
mbox->smsg_attrib.msg_buffer = mbox->buffer;
|
||||
mbox->smsg_attrib.buff_size = mca_btl_ugni_smsg_mbox_size;
|
||||
mbox->smsg_attrib.mem_hndl = reg->memory_hdl;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_ugni_smsg_mbox_t, ompi_free_list_item_t,
|
||||
mca_btl_ugni_smsg_mbox_construct, NULL);
|
||||
|
||||
static inline int mca_btl_ugni_ep_smsg_get_mbox (mca_btl_base_endpoint_t *ep) {
|
||||
mca_btl_ugni_module_t *ugni_module = ep->btl;
|
||||
ompi_free_list_item_t *mbox;
|
||||
int rc;
|
||||
|
||||
OMPI_FREE_LIST_GET(&ugni_module->smsg_mboxes, mbox, rc);
|
||||
if (OPAL_UNLIKELY(NULL == mbox)) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
ep->mailbox = (mca_btl_ugni_smsg_mbox_t *) mbox;
|
||||
|
||||
/* per ugni spec we need to zero mailbox data before connecting */
|
||||
memset (ep->mailbox->buffer, 0, mca_btl_ugni_smsg_mbox_size);
|
||||
return rc;
|
||||
}
|
||||
|
||||
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect) {
|
||||
uint32_t msg_id = ORTE_PROC_MY_NAME->vpid;
|
||||
char msg;
|
||||
int rc;
|
||||
|
||||
OPAL_THREAD_LOCK(&ep->common->lock);
|
||||
|
||||
do {
|
||||
if (OMPI_COMMON_UGNI_INIT == MCA_BTL_UGNI_EP_STATE(ep)) {
|
||||
/* nothing to do */
|
||||
break;
|
||||
}
|
||||
|
||||
if (OMPI_COMMON_UGNI_CONNECTED == MCA_BTL_UGNI_EP_STATE(ep) && send_disconnect) {
|
||||
rc = GNI_SmsgSendWTag (ep->common->ep_handle, &msg, 1, NULL, 0, msg_id,
|
||||
MCA_BTL_UGNI_TAG_DISCONNECT);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_VERBOSE(("btl/ugni could not send close message"));
|
||||
}
|
||||
|
||||
/* we might want to wait for local completion here (do we even care) */
|
||||
}
|
||||
|
||||
ep->common->state = OMPI_COMMON_UGNI_BOUND;
|
||||
|
||||
/* drop the lock before we unbind */
|
||||
OPAL_THREAD_UNLOCK(&ep->common->lock);
|
||||
rc = ompi_common_ugni_endpoint_unbind (ep->common);
|
||||
OPAL_THREAD_LOCK(&ep->common->lock);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
BTL_VERBOSE(("btl/ugni error unbinding ugni endpoint"));
|
||||
}
|
||||
|
||||
OMPI_FREE_LIST_RETURN(&ep->btl->smsg_mboxes, ((ompi_free_list_item_t *) ep->mailbox));
|
||||
ep->mailbox = NULL;
|
||||
} while (0);
|
||||
|
||||
OPAL_THREAD_UNLOCK(&ep->common->lock);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("initiaiting connection to remote peer with address: %u id: %u",
|
||||
ep->common->ep_rem_addr, ep->common->ep_rem_id));
|
||||
|
||||
/* bind endpoint to remote address */
|
||||
OPAL_THREAD_UNLOCK(&ep->common->lock);
|
||||
rc = ompi_common_ugni_endpoint_bind (ep->common);
|
||||
OPAL_THREAD_LOCK(&ep->common->lock);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
MCA_BTL_UGNI_EP_STATE(ep) = OMPI_COMMON_UGNI_CONNECTING;
|
||||
|
||||
/* build connection data */
|
||||
rc = mca_btl_ugni_ep_smsg_get_mbox (ep);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
memset (&ep->remote_smsg_attrib, 0, sizeof (ep->remote_smsg_attrib));
|
||||
|
||||
BTL_VERBOSE(("btl/ugni connection to remote peer initiated"));
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
|
||||
opal_list_item_t *item;
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("finishing connection. remote attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
|
||||
"mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
|
||||
"msg_maxsize = %d", ep->remote_smsg_attrib.msg_type, ep->remote_smsg_attrib.msg_buffer,
|
||||
ep->remote_smsg_attrib.buff_size, ep->remote_smsg_attrib.mem_hndl.qword1,
|
||||
ep->remote_smsg_attrib.mem_hndl.qword2, ep->remote_smsg_attrib.mbox_offset,
|
||||
ep->remote_smsg_attrib.mbox_maxcredit, ep->remote_smsg_attrib.msg_maxsize));
|
||||
|
||||
BTL_VERBOSE(("finishing connection. local attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
|
||||
"mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
|
||||
"msg_maxsize = %d", ep->mailbox->smsg_attrib.msg_type, ep->mailbox->smsg_attrib.msg_buffer,
|
||||
ep->mailbox->smsg_attrib.buff_size, ep->mailbox->smsg_attrib.mem_hndl.qword1,
|
||||
ep->mailbox->smsg_attrib.mem_hndl.qword2, ep->mailbox->smsg_attrib.mbox_offset,
|
||||
ep->mailbox->smsg_attrib.mbox_maxcredit, ep->mailbox->smsg_attrib.msg_maxsize));
|
||||
|
||||
rc = GNI_SmsgInit (ep->common->ep_handle, &ep->mailbox->smsg_attrib, &ep->remote_smsg_attrib);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error initializing SMSG protocol. rc = %d", rc));
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->pending_list)));
|
||||
|
||||
MCA_BTL_UGNI_EP_STATE(ep) = OMPI_COMMON_UGNI_CONNECTED;
|
||||
|
||||
/* post pending sends */
|
||||
while (NULL != (item = opal_list_remove_first (&ep->pending_list))) {
|
||||
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) item;
|
||||
|
||||
(void) mca_btl_ugni_send (&ep->btl->super, ep, &frag->base, frag->tag);
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) {
|
||||
int rc;
|
||||
|
||||
if (OMPI_COMMON_UGNI_CONNECTED == MCA_BTL_UGNI_EP_STATE(ep)) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
if (OMPI_COMMON_UGNI_CONNECTING > ep->common->state) {
|
||||
rc = mca_btl_ugni_ep_connect_start (ep);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
if (GNI_SMSG_TYPE_INVALID == ep->remote_smsg_attrib.msg_type) {
|
||||
(void) mca_btl_ugni_directed_ep_post (ep);
|
||||
return OMPI_ERR_RESOURCE_BUSY;
|
||||
}
|
||||
|
||||
return mca_btl_ugni_ep_connect_finish (ep);
|
||||
}
|
135
ompi/mca/btl/ugni/btl_ugni_endpoint.h
Обычный файл
135
ompi/mca/btl/ugni/btl_ugni_endpoint.h
Обычный файл
@ -0,0 +1,135 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef MCA_BTL_UGNI_ENDPOINT_H
|
||||
#define MCA_BTL_UGNI_ENDPOINT_H
|
||||
|
||||
#include "btl_ugni.h"
|
||||
|
||||
struct mca_btl_ugni_smsg_mbox_t {
|
||||
ompi_free_list_item_t super;
|
||||
|
||||
void *buffer;
|
||||
gni_smsg_attr_t smsg_attrib;
|
||||
};
|
||||
typedef struct mca_btl_ugni_smsg_mbox_t mca_btl_ugni_smsg_mbox_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_btl_ugni_smsg_mbox_t);
|
||||
|
||||
struct mca_btl_base_endpoint_t {
|
||||
opal_object_t super;
|
||||
|
||||
ompi_common_ugni_endpoint_t *common;
|
||||
|
||||
mca_btl_ugni_module_t *btl;
|
||||
|
||||
gni_smsg_attr_t remote_smsg_attrib;
|
||||
|
||||
mca_btl_ugni_smsg_mbox_t *mailbox;
|
||||
|
||||
opal_list_t pending_list;
|
||||
|
||||
/* true if a frag was received before the connection was complete */
|
||||
bool smsgs_waiting;
|
||||
};
|
||||
typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
|
||||
|
||||
#define MCA_BTL_UGNI_EP_STATE(ep) ((ep)->common->state)
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_btl_base_endpoint_t);
|
||||
|
||||
int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep);
|
||||
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect);
|
||||
|
||||
static inline int mca_btl_ugni_init_ep (mca_btl_base_endpoint_t **ep,
|
||||
mca_btl_ugni_module_t *btl,
|
||||
ompi_proc_t *peer_proc) {
|
||||
mca_btl_base_endpoint_t *endpoint;
|
||||
int rc;
|
||||
|
||||
endpoint = OBJ_NEW(mca_btl_base_endpoint_t);
|
||||
assert (endpoint != NULL);
|
||||
|
||||
rc = ompi_common_ugni_endpoint_for_proc (btl->device, peer_proc, &endpoint->common);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
assert (0);
|
||||
return rc;
|
||||
}
|
||||
|
||||
endpoint->btl = btl;
|
||||
|
||||
*ep = endpoint;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static inline void mca_btl_ugni_release_ep (mca_btl_base_endpoint_t *ep) {
|
||||
int rc;
|
||||
|
||||
rc = mca_btl_ugni_ep_disconnect (ep, false);
|
||||
if (OMPI_SUCCESS == rc) {
|
||||
BTL_VERBOSE(("btl/ugni error disconnecting endpoint"));
|
||||
}
|
||||
|
||||
ompi_common_ugni_endpoint_return (ep->common);
|
||||
|
||||
OBJ_RELEASE(ep);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_check_endpoint_state (mca_btl_base_endpoint_t *ep) {
|
||||
int rc;
|
||||
|
||||
if (OPAL_LIKELY(OMPI_COMMON_UGNI_CONNECTED == ep->common->state)) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ep->common->lock);
|
||||
|
||||
switch (ep->common->state) {
|
||||
case OMPI_COMMON_UGNI_INIT:
|
||||
rc = mca_btl_ugni_ep_connect_progress (ep);
|
||||
if (OMPI_SUCCESS != rc) {
|
||||
break;
|
||||
}
|
||||
case OMPI_COMMON_UGNI_CONNECTING:
|
||||
rc = OMPI_ERR_RESOURCE_BUSY;
|
||||
break;
|
||||
default:
|
||||
rc = OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_THREAD_UNLOCK(&ep->common->lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module) {
|
||||
int rc;
|
||||
|
||||
memset (&ugni_module->wc_local_attr, 0, sizeof (ugni_module->wc_local_attr));
|
||||
rc = GNI_EpPostDataWId (ugni_module->wildcard_ep, &ugni_module->wc_local_attr, sizeof (ugni_module->wc_local_attr),
|
||||
&ugni_module->wc_remote_attr, sizeof (ugni_module->wc_remote_attr),
|
||||
MCA_BTL_UGNI_CONNECT_WILDCARD_ID | ORTE_PROC_MY_NAME->vpid);
|
||||
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep) {
|
||||
int rc;
|
||||
rc = GNI_EpPostDataWId (ep->common->ep_handle, &ep->mailbox->smsg_attrib, sizeof (ep->mailbox->smsg_attrib),
|
||||
&ep->remote_smsg_attrib, sizeof (ep->remote_smsg_attrib),
|
||||
MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->common->ep_rem_id);
|
||||
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
#endif /* MCA_BTL_UGNI_ENDPOINT_H */
|
41
ompi/mca/btl/ugni/btl_ugni_frag.c
Обычный файл
41
ompi/mca/btl/ugni/btl_ugni_frag.c
Обычный файл
@ -0,0 +1,41 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "btl_ugni.h"
|
||||
#include "btl_ugni_frag.h"
|
||||
|
||||
static inline void mca_btl_ugni_frag_constructor (mca_btl_ugni_base_frag_t *frag)
|
||||
{
|
||||
/* send memory does not need to be registered so we do not need a mpool */
|
||||
frag->hdr = (mca_btl_ugni_frag_hdr_t *) calloc (1, sizeof (mca_btl_ugni_frag_hdr_t) + mca_btl_ugni_component.eager_limit);
|
||||
frag->segments[0].seg_addr.pval = (void *) (frag->hdr + 1);
|
||||
}
|
||||
|
||||
static inline void mca_btl_ugni_frag_destructor (mca_btl_ugni_base_frag_t *frag)
|
||||
{
|
||||
if (NULL != frag->hdr) {
|
||||
free (frag->hdr);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void mca_btl_ugni_rdma_frag_constructor (mca_btl_ugni_base_frag_t *frag)
|
||||
{
|
||||
/* we don't need any buffer memory for rdma frags */
|
||||
frag->hdr = NULL;
|
||||
frag->segments[0].seg_addr.pval = NULL;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_ugni_base_frag_t, mca_btl_base_descriptor_t,
|
||||
mca_btl_ugni_frag_constructor, mca_btl_ugni_frag_destructor);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_ugni_rdma_frag_t, mca_btl_base_descriptor_t,
|
||||
mca_btl_ugni_rdma_frag_constructor, NULL);
|
74
ompi/mca/btl/ugni/btl_ugni_frag.h
Обычный файл
74
ompi/mca/btl/ugni/btl_ugni_frag.h
Обычный файл
@ -0,0 +1,74 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#if !defined(MCA_BTL_UGNI_FRAG_H)
|
||||
#define MCA_BTL_UGNI_FRAG_H
|
||||
|
||||
#include "btl_ugni.h"
|
||||
#include "btl_ugni_endpoint.h"
|
||||
|
||||
struct mca_btl_ugni_frag_hdr_t {
|
||||
size_t len;
|
||||
mca_btl_base_tag_t tag;
|
||||
};
|
||||
typedef struct mca_btl_ugni_frag_hdr_t mca_btl_ugni_frag_hdr_t;
|
||||
|
||||
struct mca_btl_ugni_base_frag_t {
|
||||
mca_btl_base_descriptor_t base;
|
||||
mca_btl_base_segment_t segments[2];
|
||||
mca_btl_ugni_frag_hdr_t *hdr;
|
||||
mca_btl_base_tag_t tag;
|
||||
ompi_common_ugni_post_desc_t post_desc;
|
||||
mca_btl_base_endpoint_t *endpoint;
|
||||
mca_btl_ugni_reg_t *registration;
|
||||
ompi_free_list_t *my_list;
|
||||
mca_btl_ugni_module_t *btl;
|
||||
int tries;
|
||||
};
|
||||
|
||||
typedef struct mca_btl_ugni_base_frag_t mca_btl_ugni_base_frag_t;
|
||||
typedef struct mca_btl_ugni_base_frag_t mca_btl_ugni_rdma_frag_t;
|
||||
|
||||
#define MCA_BTL_UGNI_DESC_TO_FRAG(desc) ((mca_btl_ugni_base_frag_t *)((uintptr_t) (desc) - offsetof (mca_btl_ugni_base_frag_t, post_desc)))
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_btl_ugni_base_frag_t);
|
||||
OBJ_CLASS_DECLARATION(mca_btl_ugni_rdma_frag_t);
|
||||
|
||||
#define MCA_BTL_UGNI_FRAG_ALLOC_EAGER(module, frag, rc) \
|
||||
do { \
|
||||
ompi_free_list_item_t *item; \
|
||||
OMPI_FREE_LIST_GET(&mca_btl_ugni_component.ugni_frags_eager, item, rc); \
|
||||
frag = (mca_btl_ugni_base_frag_t *) item; \
|
||||
frag->my_list = &mca_btl_ugni_component.ugni_frags_eager; \
|
||||
frag->btl = (module); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_BTL_UGNI_FRAG_ALLOC_RDMA(module, frag, rc) \
|
||||
do { \
|
||||
ompi_free_list_item_t *item; \
|
||||
OMPI_FREE_LIST_GET(&mca_btl_ugni_component.ugni_frags_rdma, item, rc); \
|
||||
frag = (mca_btl_ugni_base_frag_t *) item; \
|
||||
frag->my_list = &mca_btl_ugni_component.ugni_frags_rdma; \
|
||||
frag->btl = (module); \
|
||||
} while (0)
|
||||
|
||||
#define MCA_BTL_UGNI_FRAG_RETURN(frag) \
|
||||
do { \
|
||||
if (OPAL_UNLIKELY(NULL != (frag)->registration)) { \
|
||||
(frag)->btl->super.btl_mpool->mpool_deregister((frag)->btl->super.btl_mpool, \
|
||||
&(frag)->registration->base); \
|
||||
(frag)->registration = NULL; \
|
||||
} \
|
||||
OMPI_FREE_LIST_RETURN((frag)->my_list, (ompi_free_list_item_t *)(frag)); \
|
||||
} while (0);
|
||||
|
||||
#endif /* MCA_BTL_UGNI_FRAG_H */
|
65
ompi/mca/btl/ugni/btl_ugni_get.c
Обычный файл
65
ompi/mca/btl/ugni/btl_ugni_get.c
Обычный файл
@ -0,0 +1,65 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "btl_ugni_rdma.h"
|
||||
|
||||
/**
|
||||
* Initiate a get operation.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*/
|
||||
int mca_btl_ugni_get (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des) {
|
||||
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) des;
|
||||
gni_mem_handle_t lcl_hdl, rem_hdl;
|
||||
void *lcl_buffer, *rem_buffer;
|
||||
size_t size;
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("Using RDMA Get"));
|
||||
|
||||
/* Check if endpoint is connected */
|
||||
rc = mca_btl_ugni_check_endpoint_state(endpoint);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc))
|
||||
/* we should already be connected by this point (we got a smsg send) */
|
||||
return rc;
|
||||
|
||||
/* Get remote memory handle */
|
||||
rem_buffer = (void *)(des->des_src->seg_addr.pval);
|
||||
size = des->des_src->seg_len;
|
||||
memcpy (&rem_hdl, (void *) des->des_src->seg_key.key64, sizeof (rem_hdl));
|
||||
|
||||
/* Get local memory handle */
|
||||
lcl_buffer = (void *)(des->des_dst->seg_addr.pval);
|
||||
memcpy (&lcl_hdl, (void *) des->des_dst->seg_key.key64, sizeof (lcl_hdl));
|
||||
|
||||
if (OPAL_UNLIKELY(((uintptr_t)rem_buffer & 0x3) || ((uintptr_t)lcl_buffer & 0x3) ||
|
||||
size & 0x3 || size > mca_btl_ugni_component.btl_get_limit)) {
|
||||
/* switch to put */
|
||||
return mca_btl_ugni_start_reverse_get (btl, frag);
|
||||
}
|
||||
|
||||
frag->tries = 0;
|
||||
|
||||
if (size < mca_btl_ugni_component.btl_fma_limit) {
|
||||
rc = post_fma_descriptor (frag, GNI_POST_FMA_GET, endpoint, size,
|
||||
lcl_buffer, lcl_hdl, rem_buffer, rem_hdl);
|
||||
} else {
|
||||
rc = post_bte_descriptor (frag, GNI_POST_RDMA_GET, endpoint, size,
|
||||
lcl_buffer, lcl_hdl, rem_buffer, rem_hdl);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
518
ompi/mca/btl/ugni/btl_ugni_module.c
Обычный файл
518
ompi/mca/btl/ugni/btl_ugni_module.c
Обычный файл
@ -0,0 +1,518 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "ompi/constants.h"
|
||||
#include "ompi/communicator/communicator.h"
|
||||
#include "opal/util/show_help.h"
|
||||
#include "opal/align.h"
|
||||
#include "ompi/mca/btl/base/base.h"
|
||||
#include "ompi/mca/dpm/dpm.h"
|
||||
#include "orte/util/proc_info.h"
|
||||
#include "ompi/mca/btl/btl.h"
|
||||
#include "ompi/mca/btl/base/btl_base_error.h"
|
||||
|
||||
#include "btl_ugni.h"
|
||||
#include "btl_ugni_frag.h"
|
||||
#include "btl_ugni_endpoint.h"
|
||||
|
||||
#include <unistd.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
|
||||
static int
|
||||
mca_btl_ugni_free (struct mca_btl_base_module_t *btl,
|
||||
mca_btl_base_descriptor_t *des);
|
||||
|
||||
static int
|
||||
mca_btl_ugni_module_finalize (struct mca_btl_base_module_t* btl);
|
||||
|
||||
static struct mca_btl_base_descriptor_t *
|
||||
mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
mca_mpool_base_registration_t *registration,
|
||||
struct opal_convertor_t *convertor,
|
||||
uint8_t order, size_t reserve, size_t *size,
|
||||
uint32_t flags);
|
||||
|
||||
static mca_btl_base_descriptor_t *
|
||||
mca_btl_ugni_prepare_dst (mca_btl_base_module_t *btl,
|
||||
mca_btl_base_endpoint_t *endpoint,
|
||||
mca_mpool_base_registration_t *registration,
|
||||
opal_convertor_t *convertor, uint8_t order,
|
||||
size_t reserve, size_t *size, uint32_t flags);
|
||||
|
||||
mca_btl_ugni_module_t mca_btl_ugni_module = {
|
||||
{
|
||||
/* .btl_component = */ &mca_btl_ugni_component.super,
|
||||
|
||||
/* these are set in component_register */
|
||||
/* .btl_eager_limit = */ 0,
|
||||
/* .btl_rndv_eager_limit = */ 0,
|
||||
/* .btl_max_send_size = */ 0,
|
||||
/* .btl_rdma_pipeline_send_length = */ 0,
|
||||
/* .btl_rdma_pipeline_frag_size = */ 0,
|
||||
/* .btl_min_rdma_pipeline_size = */ 0,
|
||||
/* .btl_exclusivity = */ 0,
|
||||
/* .btl_latency = */ 0,
|
||||
/* .btl_bandwidth = */ 0,
|
||||
/* .btl_flags = */ 0,
|
||||
|
||||
/* member functions */
|
||||
mca_btl_ugni_add_procs,
|
||||
mca_btl_ugni_del_procs,
|
||||
NULL, /* register */
|
||||
mca_btl_ugni_module_finalize,
|
||||
mca_btl_ugni_alloc,
|
||||
mca_btl_ugni_free,
|
||||
mca_btl_ugni_prepare_src,
|
||||
mca_btl_ugni_prepare_dst,
|
||||
mca_btl_ugni_send,
|
||||
mca_btl_ugni_sendi,
|
||||
mca_btl_ugni_put,
|
||||
mca_btl_ugni_get,
|
||||
NULL, /* mca_btl_base_dump, */
|
||||
NULL, /* mpool */
|
||||
NULL, /* mca_btl_ugni_register_error_cb - error callback registration */
|
||||
NULL, /* mca_btl_ugni_ft_event */
|
||||
}
|
||||
};
|
||||
|
||||
static int ugni_reg_mem (void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_btl_ugni_module_t *btl = (mca_btl_ugni_module_t *) reg_data;
|
||||
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg;
|
||||
int rc;
|
||||
|
||||
rc = GNI_MemRegister (btl->device->dev_handle, (uint64_t)base,
|
||||
size, NULL, GNI_MEM_READWRITE, -1,
|
||||
&(ugni_reg->memory_hdl));
|
||||
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
ugni_reg->buffer = base;
|
||||
ugni_reg->size = size;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int ugni_reg_smsg_mem (void *reg_data, void *base, size_t size,
|
||||
mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_btl_ugni_module_t *btl = (mca_btl_ugni_module_t *) reg_data;
|
||||
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg;
|
||||
int rc;
|
||||
|
||||
rc = GNI_MemRegister (btl->device->dev_handle, (uint64_t)base,
|
||||
size, btl->smsg_remote_cq, GNI_MEM_READWRITE |
|
||||
GNI_MEM_USE_GART, -1, &(ugni_reg->memory_hdl));
|
||||
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
ugni_reg->buffer = base;
|
||||
ugni_reg->size = size;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int
|
||||
ugni_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg)
|
||||
{
|
||||
mca_btl_ugni_module_t *btl = (mca_btl_ugni_module_t *) reg_data;
|
||||
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *)reg;
|
||||
int rc;
|
||||
|
||||
rc = GNI_MemDeregister (btl->device->dev_handle, &ugni_reg->memory_hdl);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
ugni_reg->buffer = NULL;
|
||||
ugni_reg->size = 0;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int
|
||||
mca_btl_ugni_module_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
||||
{
|
||||
struct mca_mpool_base_resources_t mpool_resources;
|
||||
int mbox_increment, rc;
|
||||
size_t nprocs;
|
||||
|
||||
(void) ompi_proc_world (&nprocs);
|
||||
|
||||
mpool_resources.reg_data = (void *) ugni_module;
|
||||
mpool_resources.sizeof_reg = sizeof (mca_btl_ugni_reg_t);
|
||||
mpool_resources.register_mem = ugni_reg_mem;
|
||||
mpool_resources.deregister_mem = ugni_dereg_mem;
|
||||
ugni_module->super.btl_mpool =
|
||||
mca_mpool_base_module_create("rdma", ugni_module->device,
|
||||
&mpool_resources);
|
||||
if (NULL == ugni_module->super.btl_mpool) {
|
||||
BTL_ERROR(("error creating mpool"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
mpool_resources.register_mem = ugni_reg_smsg_mem;
|
||||
|
||||
ugni_module->smsg_mpool =
|
||||
mca_mpool_base_module_create("rdma", ugni_module->device,
|
||||
&mpool_resources);
|
||||
|
||||
OBJ_CONSTRUCT(&ugni_module->smsg_mboxes, ompi_free_list_t);
|
||||
|
||||
mbox_increment = nprocs;
|
||||
|
||||
if (nprocs * mca_btl_ugni_smsg_mbox_size > 2 * 1024 * 1024) {
|
||||
/* allocate at most 2 MB at a time */
|
||||
mbox_increment = (int) (2.0 * 1024.0 * 1024.0 / (float)mca_btl_ugni_smsg_mbox_size);
|
||||
}
|
||||
|
||||
if (nprocs < 1024) {
|
||||
mbox_increment = nprocs / 2;
|
||||
} else if (nprocs < 16384) {
|
||||
mbox_increment = nprocs / 10;
|
||||
} else {
|
||||
mbox_increment = nprocs / 40;
|
||||
}
|
||||
|
||||
rc = ompi_free_list_init_new (&ugni_module->smsg_mboxes,
|
||||
sizeof (mca_btl_ugni_smsg_mbox_t), 64,
|
||||
OBJ_CLASS(mca_btl_ugni_smsg_mbox_t),
|
||||
mca_btl_ugni_smsg_mbox_size,
|
||||
opal_cache_line_size, 0,
|
||||
nprocs, mbox_increment,
|
||||
ugni_module->smsg_mpool);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int
|
||||
mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
|
||||
ompi_common_ugni_device_t *dev)
|
||||
{
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("binding module %p to device %p", (void *) ugni_module,
|
||||
(void *) dev));
|
||||
|
||||
/* copy module defaults (and function pointers) */
|
||||
memmove (ugni_module, &mca_btl_ugni_module, sizeof (mca_btl_ugni_module));
|
||||
|
||||
OBJ_CONSTRUCT(&ugni_module->failed_frags, opal_list_t);
|
||||
|
||||
/* module settings */
|
||||
ugni_module->super.btl_eager_limit = mca_btl_ugni_component.eager_limit;
|
||||
|
||||
ugni_module->super.btl_max_send_size = ugni_module->super.btl_eager_limit;
|
||||
ugni_module->super.btl_rdma_pipeline_send_length = ugni_module->super.btl_eager_limit;
|
||||
|
||||
ugni_module->device = dev;
|
||||
|
||||
/* create wildcard endpoint to listen for connections.
|
||||
* there is no need to bind this endpoint. */
|
||||
rc = GNI_EpCreate (ugni_module->device->dev_handle, NULL,
|
||||
&ugni_module->wildcard_ep);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
BTL_ERROR(("error creating wildcard ugni endpoint"));
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
/* post wildcard datagram */
|
||||
rc = mca_btl_ugni_wildcard_ep_post (ugni_module);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
BTL_ERROR(("error posting wildcard datagram"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
ugni_module->endpoints = NULL;
|
||||
|
||||
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.cq_size,
|
||||
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->bte_local_cq);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error creating local BTE CQ"));
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
/* the smsg_remote_cq must be created before we setup the smsg mpool */
|
||||
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.cq_size,
|
||||
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->smsg_remote_cq);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error creating remote SMSG CQ"));
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
/* create rdma and smsg mpools */
|
||||
rc = mca_btl_ugni_module_setup_mpools (ugni_module);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
BTL_ERROR(("error setting up module mpools"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int
|
||||
mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *)btl;
|
||||
size_t ntotal_procs, i;
|
||||
int rc;
|
||||
|
||||
/* close all open connections and release endpoints */
|
||||
if (NULL != ugni_module->endpoints) {
|
||||
(void) ompi_proc_world (&ntotal_procs);
|
||||
|
||||
for (i = 0 ; i < ntotal_procs ; ++i) {
|
||||
if (ugni_module->endpoints[i]) {
|
||||
mca_btl_ugni_release_ep (ugni_module->endpoints[i]);
|
||||
}
|
||||
|
||||
ugni_module->endpoints[i] = NULL;
|
||||
}
|
||||
|
||||
ugni_module->endpoint_count = 0;
|
||||
ugni_module->endpoints = NULL;
|
||||
}
|
||||
|
||||
/* destroy all cqs */
|
||||
rc = GNI_CqDestroy (ugni_module->bte_local_cq);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error tearing down local BTE CQ"));
|
||||
}
|
||||
|
||||
rc = GNI_CqDestroy (ugni_module->smsg_remote_cq);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error tearing down remote SMSG CQ"));
|
||||
}
|
||||
|
||||
/* cancel wildcard post */
|
||||
rc = GNI_EpPostDataCancelById (ugni_module->wildcard_ep,
|
||||
MCA_BTL_UGNI_CONNECT_WILDCARD_ID |
|
||||
ORTE_PROC_MY_NAME->vpid);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_VERBOSE(("btl/ugni error cancelling wildcard post"));
|
||||
}
|
||||
|
||||
/* tear down wildcard endpoint */
|
||||
rc = GNI_EpDestroy (ugni_module->wildcard_ep);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_VERBOSE(("btl/ugni error destroying endpoint"));
|
||||
}
|
||||
|
||||
(void) mca_mpool_base_module_destroy (ugni_module->smsg_mpool);
|
||||
ugni_module->smsg_mpool = NULL;
|
||||
|
||||
(void) mca_mpool_base_module_destroy (ugni_module->super.btl_mpool);
|
||||
ugni_module->super.btl_mpool = NULL;
|
||||
|
||||
OBJ_DESTRUCT(&ugni_module->failed_frags);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
|
||||
mca_btl_base_descriptor_t *
|
||||
mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
uint8_t order, size_t size, uint32_t flags)
|
||||
{
|
||||
mca_btl_ugni_base_frag_t *frag = NULL;
|
||||
int rc = OMPI_SUCCESS;
|
||||
|
||||
if (size <= mca_btl_ugni_component.eager_limit) {
|
||||
MCA_BTL_UGNI_FRAG_ALLOC_EAGER((mca_btl_ugni_module_t *) btl, frag, rc);
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("btl/ugni_module allocated frag of size: %u, flags: %x. frag = %p",
|
||||
(unsigned int)size, flags, (void *) frag));
|
||||
|
||||
if (OPAL_LIKELY(NULL != frag)) {
|
||||
frag->base.des_flags = flags;
|
||||
frag->base.order = order;
|
||||
frag->base.des_src = frag->segments;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.des_dst = frag->segments;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
|
||||
frag->segments[0].seg_len = size;
|
||||
}
|
||||
|
||||
return (mca_btl_base_descriptor_t *) frag;
|
||||
}
|
||||
|
||||
static int
|
||||
mca_btl_ugni_free (struct mca_btl_base_module_t *btl,
|
||||
mca_btl_base_descriptor_t *des)
|
||||
{
|
||||
MCA_BTL_UGNI_FRAG_RETURN((mca_btl_ugni_base_frag_t *) des);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static struct mca_btl_base_descriptor_t *
|
||||
mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
|
||||
mca_btl_base_endpoint_t *endpoint,
|
||||
mca_mpool_base_registration_t *registration,
|
||||
struct opal_convertor_t *convertor,
|
||||
uint8_t order, size_t reserve, size_t *size,
|
||||
uint32_t flags)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
||||
mca_btl_ugni_base_frag_t *frag = NULL;
|
||||
void *data_ptr;
|
||||
int rc;
|
||||
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
|
||||
if (OPAL_LIKELY(reserve)) {
|
||||
MCA_BTL_UGNI_FRAG_ALLOC_EAGER(ugni_module, frag, rc);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
}
|
||||
if ((*size + reserve) > mca_btl_ugni_component.eager_limit) {
|
||||
*size = mca_btl_ugni_component.eager_limit - reserve;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("preparing src for send fragment. size = %u",
|
||||
(unsigned int)(*size + reserve)));
|
||||
|
||||
if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
|
||||
/* non-contiguous data requires using the convertor */
|
||||
uint32_t iov_count = 1;
|
||||
struct iovec iov;
|
||||
|
||||
iov.iov_len = mca_btl_ugni_component.eager_limit - reserve;
|
||||
iov.iov_base =
|
||||
(IOVBASE_TYPE *)(((uintptr_t)(frag->segments[0].seg_addr.pval)) +
|
||||
reserve);
|
||||
|
||||
rc = opal_convertor_pack (convertor, &iov, &iov_count, size);
|
||||
if (OPAL_UNLIKELY(rc < 0)) {
|
||||
MCA_BTL_UGNI_FRAG_RETURN(frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->segments[0].seg_len = reserve + *size;
|
||||
}
|
||||
else {
|
||||
memmove ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + reserve),
|
||||
data_ptr, *size);
|
||||
frag->segments[0].seg_len = reserve + *size;
|
||||
}
|
||||
} else {
|
||||
MCA_BTL_UGNI_FRAG_ALLOC_RDMA(ugni_module, frag, rc);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* For medium message use FMA protocols and for large message
|
||||
* use BTE protocols
|
||||
*/
|
||||
/* No need to register while using FMA Put (registration is
|
||||
* non-null in get-- is this always true?) */
|
||||
if (*size >= mca_btl_ugni_component.btl_fma_limit || (flags & MCA_BTL_DES_FLAGS_GET)) {
|
||||
if (NULL == registration) {
|
||||
rc = ugni_module->super.btl_mpool->mpool_register(ugni_module->super.btl_mpool,
|
||||
data_ptr, *size, 0,
|
||||
®istration);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
BTL_ERROR(("btl/ugni error registering source memory"));
|
||||
MCA_BTL_UGNI_FRAG_RETURN(frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->registration = (mca_btl_ugni_reg_t*)registration;
|
||||
}
|
||||
|
||||
memcpy ((void *) frag->segments[0].seg_key.key64,
|
||||
(void *)&((mca_btl_ugni_reg_t *)registration)->memory_hdl,
|
||||
sizeof (((mca_btl_ugni_reg_t *)registration)->memory_hdl));
|
||||
} else {
|
||||
memset ((void *) frag->segments[0].seg_key.key64, 0,
|
||||
sizeof (frag->segments[0].seg_key.key64));
|
||||
}
|
||||
|
||||
frag->segments[0].seg_len = *size;
|
||||
frag->segments[0].seg_addr.pval = data_ptr;
|
||||
}
|
||||
|
||||
frag->base.des_src = frag->segments;
|
||||
frag->base.des_src_cnt = 1;
|
||||
frag->base.order = order;
|
||||
frag->base.des_flags = flags;
|
||||
frag->endpoint = endpoint;
|
||||
|
||||
return &frag->base;
|
||||
}
|
||||
|
||||
static mca_btl_base_descriptor_t *
|
||||
mca_btl_ugni_prepare_dst (mca_btl_base_module_t *btl,
|
||||
mca_btl_base_endpoint_t *endpoint,
|
||||
mca_mpool_base_registration_t *registration,
|
||||
opal_convertor_t *convertor, uint8_t order,
|
||||
size_t reserve, size_t *size, uint32_t flags)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
||||
mca_btl_ugni_base_frag_t *frag;
|
||||
void *data_ptr;
|
||||
int rc;
|
||||
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
|
||||
/* no alignment restrictions on put */
|
||||
MCA_BTL_UGNI_FRAG_ALLOC_RDMA(ugni_module, frag, rc);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* always need to register the buffer for put/get (even for fma) */
|
||||
if (NULL == registration) {
|
||||
rc = ugni_module->super.btl_mpool->mpool_register(ugni_module->super.btl_mpool,
|
||||
data_ptr, *size, 0,
|
||||
®istration);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
MCA_BTL_UGNI_FRAG_RETURN(frag);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
frag->registration = (mca_btl_ugni_reg_t*) registration;
|
||||
}
|
||||
|
||||
memcpy ((void *) frag->segments[0].seg_key.key64,
|
||||
(void *)&((mca_btl_ugni_reg_t *)registration)->memory_hdl,
|
||||
sizeof (((mca_btl_ugni_reg_t *)registration)->memory_hdl));
|
||||
|
||||
frag->segments[0].seg_len = *size;
|
||||
frag->segments[0].seg_addr.pval = data_ptr;
|
||||
|
||||
frag->base.des_dst = frag->segments;
|
||||
frag->base.des_dst_cnt = 1;
|
||||
frag->base.order = order;
|
||||
frag->base.des_flags = flags;
|
||||
frag->endpoint = endpoint;
|
||||
|
||||
return (struct mca_btl_base_descriptor_t *) frag;
|
||||
}
|
65
ompi/mca/btl/ugni/btl_ugni_put.c
Обычный файл
65
ompi/mca/btl/ugni/btl_ugni_put.c
Обычный файл
@ -0,0 +1,65 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
#include "ompi_config.h"
|
||||
#include "opal/include/opal_stdint.h"
|
||||
|
||||
#include "btl_ugni_rdma.h"
|
||||
#include "opal/util/opal_sos.h"
|
||||
|
||||
/**
|
||||
* Initiate a put operation.
|
||||
*
|
||||
* @param btl (IN) BTL module
|
||||
* @param endpoint (IN) BTL addressing information
|
||||
* @param descriptor (IN) Description of the data to be transferred
|
||||
*/
|
||||
int mca_btl_ugni_put (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *des) {
|
||||
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) des;
|
||||
gni_mem_handle_t lcl_hdl, rem_hdl;
|
||||
void *lcl_buffer, *rem_buffer;
|
||||
size_t size;
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("Using RDMA Put"));
|
||||
|
||||
/* Check if endpoint is connected */
|
||||
rc = mca_btl_ugni_check_endpoint_state(endpoint);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
/* we should already be connected by this point (we got an rc send) */
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Get local memory handle */
|
||||
lcl_buffer = (void*)(des->des_src->seg_addr.pval);
|
||||
size = des->des_src->seg_len;
|
||||
memcpy (&lcl_hdl, (void *) des->des_src->seg_key.key64,
|
||||
sizeof(gni_mem_handle_t));
|
||||
|
||||
/* Get remote memory handle */
|
||||
rem_buffer = (void*)(des->des_dst->seg_addr.pval);
|
||||
memcpy (&rem_hdl, (void *) des->des_dst->seg_key.key64,
|
||||
sizeof(gni_mem_handle_t));
|
||||
|
||||
frag->tries = 0;
|
||||
|
||||
if (size < mca_btl_ugni_component.btl_fma_limit) {
|
||||
rc = post_fma_descriptor (frag, GNI_POST_FMA_PUT, endpoint, size,
|
||||
lcl_buffer, lcl_hdl, rem_buffer, rem_hdl);
|
||||
} else {
|
||||
rc = post_bte_descriptor (frag, GNI_POST_RDMA_PUT, endpoint, size,
|
||||
lcl_buffer, lcl_hdl, rem_buffer, rem_hdl);
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
128
ompi/mca/btl/ugni/btl_ugni_rdma.h
Обычный файл
128
ompi/mca/btl/ugni/btl_ugni_rdma.h
Обычный файл
@ -0,0 +1,128 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#if !defined(MCA_BTL_UGNI_RDMA_H)
|
||||
#define MCA_BTL_UGNI_RDMA_H
|
||||
|
||||
#include "btl_ugni.h"
|
||||
#include "btl_ugni_frag.h"
|
||||
#include "btl_ugni_endpoint.h"
|
||||
|
||||
static inline void
|
||||
mca_btl_ugni_post_frag_complete (ompi_common_ugni_post_desc_t *desc, int rc) {
|
||||
mca_btl_ugni_base_frag_t *frag = MCA_BTL_UGNI_DESC_TO_FRAG(desc);
|
||||
|
||||
/* always call put/get callback */
|
||||
frag->base.des_cbfunc(&frag->btl->super, frag->endpoint, &frag->base, rc);
|
||||
|
||||
if (OPAL_LIKELY(frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) {
|
||||
MCA_BTL_UGNI_FRAG_RETURN(frag);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int init_gni_post_desc(mca_btl_ugni_base_frag_t *frag,
|
||||
mca_btl_base_endpoint_t *ep,
|
||||
gni_post_type_t op_type,
|
||||
uint64_t lcl_addr,
|
||||
gni_mem_handle_t *lcl_mdh,
|
||||
uint64_t rem_addr,
|
||||
gni_mem_handle_t *rem_mdh,
|
||||
uint64_t bufsize,
|
||||
gni_cq_handle_t cq_hndl) {
|
||||
frag->post_desc.base.type = op_type;
|
||||
frag->post_desc.base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
|
||||
frag->post_desc.base.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
|
||||
frag->post_desc.base.local_addr = (uint64_t) lcl_addr;
|
||||
frag->post_desc.base.local_mem_hndl = *lcl_mdh;
|
||||
frag->post_desc.base.remote_addr = (uint64_t) rem_addr;
|
||||
frag->post_desc.base.remote_mem_hndl = *rem_mdh;
|
||||
frag->post_desc.base.length = bufsize;
|
||||
frag->post_desc.base.rdma_mode = 0;
|
||||
frag->post_desc.base.src_cq_hndl = cq_hndl;
|
||||
|
||||
frag->post_desc.cbfunc = mca_btl_ugni_post_frag_complete;
|
||||
frag->post_desc.endpoint = ep->common;
|
||||
|
||||
frag->post_desc.tries = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int post_fma_descriptor (mca_btl_ugni_base_frag_t *frag, gni_post_type_t op_type,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
size_t size, void *lcl_buffer, gni_mem_handle_t lcl_hdl,
|
||||
void *rem_buffer, gni_mem_handle_t rem_hdl)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* Post descriptor */
|
||||
init_gni_post_desc (frag, endpoint, op_type, (uint64_t)lcl_buffer,
|
||||
&lcl_hdl, (uint64_t)rem_buffer, &rem_hdl,
|
||||
size, 0);
|
||||
|
||||
rc = GNI_PostFma (endpoint->common->ep_handle, &frag->post_desc.base);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("GNI_PostFma failed with rc = %d", rc));
|
||||
assert(rc < 4);
|
||||
rc = OMPI_ERR_OUT_OF_RESOURCE; /* ompi_common_rc_ugni_to_ompi (rc);*/
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int post_bte_descriptor (mca_btl_ugni_base_frag_t *frag, gni_post_type_t op_type,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
size_t size, void *lcl_buffer, gni_mem_handle_t lcl_hdl,
|
||||
void *rem_buffer, gni_mem_handle_t rem_hdl) {
|
||||
int rc;
|
||||
|
||||
/* Post descriptor */
|
||||
init_gni_post_desc (frag, endpoint, op_type, (uint64_t)lcl_buffer,
|
||||
&lcl_hdl, (uint64_t)rem_buffer, &rem_hdl,
|
||||
size, endpoint->btl->bte_local_cq);
|
||||
|
||||
rc = GNI_PostRdma (endpoint->common->ep_handle, &frag->post_desc.base);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
assert(rc < 4);
|
||||
rc = ompi_common_rc_ugni_to_ompi (rc);
|
||||
BTL_ERROR(("GNI_PostRdma failed with rc = %d", rc));
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_start_reverse_get (struct mca_btl_base_module_t *btl,
|
||||
mca_btl_ugni_base_frag_t *frag) {
|
||||
/* off alignment/off size. switch to put */
|
||||
mca_btl_base_segment_t segments[2];
|
||||
uint32_t msg_id = ORTE_PROC_MY_NAME->vpid;
|
||||
void *post_desc_ptr = &(frag->post_desc);
|
||||
int rc;
|
||||
|
||||
segments[0] = frag->base.des_src[0];
|
||||
segments[1] = frag->base.des_dst[0];
|
||||
|
||||
rc = GNI_SmsgSendWTag (frag->endpoint->common->ep_handle, segments,
|
||||
sizeof (segments), &post_desc_ptr, sizeof (void *),
|
||||
msg_id, MCA_BTL_UGNI_TAG_PUT_INIT);
|
||||
if (OPAL_UNLIKELY(rc == GNI_RC_NOT_DONE)) {
|
||||
BTL_ERROR(("GNI_SmsgSendWTag failed with rc = %d", rc));
|
||||
/* send this smsg packet later */
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
/* todo -- on failure try again */
|
||||
assert (GNI_RC_SUCCESS == rc);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
#endif /* MCA_BTL_UGNI_RDMA_H */
|
67
ompi/mca/btl/ugni/btl_ugni_send.c
Обычный файл
67
ompi/mca/btl/ugni/btl_ugni_send.c
Обычный файл
@ -0,0 +1,67 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "btl_ugni.h"
|
||||
#include "btl_ugni_frag.h"
|
||||
#include "btl_ugni_endpoint.h"
|
||||
|
||||
int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *btl_peer,
|
||||
struct mca_btl_base_descriptor_t *descriptor,
|
||||
mca_btl_base_tag_t tag)
|
||||
{
|
||||
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) descriptor;
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %d", (void *)descriptor,
|
||||
ORTE_PROC_MY_NAME->vpid, btl_peer->common->ep_rem_id, frag->segments[0].seg_len));
|
||||
|
||||
rc = mca_btl_ugni_check_endpoint_state (btl_peer);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
frag->tag = tag;
|
||||
descriptor->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||
|
||||
opal_list_append (&btl_peer->pending_list, (opal_list_item_t *) frag);
|
||||
/* connection started and request queued or something bad happened */
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
frag->hdr->tag = tag;
|
||||
frag->hdr->len = frag->segments[0].seg_len;
|
||||
|
||||
/* check endpoint state */
|
||||
rc = GNI_SmsgSendWTag (btl_peer->common->ep_handle, frag->hdr,
|
||||
descriptor->des_src->seg_len + sizeof (frag->hdr[0]),
|
||||
NULL, 0, -1, MCA_BTL_UGNI_TAG_SEND);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
BTL_VERBOSE(("GNI_SmsgSendWTag failed with rc = %d", rc));
|
||||
|
||||
if (OPAL_LIKELY(GNI_RC_NOT_DONE == rc)) {
|
||||
BTL_VERBOSE(("out of credits"));
|
||||
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
if (MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags) {
|
||||
/* completion callback */
|
||||
frag->base.des_cbfunc(&btl_peer->btl->super, btl_peer, &frag->base, OMPI_SUCCESS);
|
||||
}
|
||||
|
||||
if (descriptor->des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) {
|
||||
MCA_BTL_UGNI_FRAG_RETURN (frag);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
95
ompi/mca/btl/ugni/btl_ugni_sendi.c
Обычный файл
95
ompi/mca/btl/ugni/btl_ugni_sendi.c
Обычный файл
@ -0,0 +1,95 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "btl_ugni.h"
|
||||
#include "btl_ugni_frag.h"
|
||||
#include "btl_ugni_endpoint.h"
|
||||
|
||||
int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct opal_convertor_t *convertor,
|
||||
void *header, size_t header_size,
|
||||
size_t payload_size, uint8_t order,
|
||||
uint32_t flags, mca_btl_base_tag_t tag,
|
||||
mca_btl_base_descriptor_t **descriptor)
|
||||
{
|
||||
size_t length = header_size + payload_size;
|
||||
uint32_t msg_id = ORTE_PROC_MY_NAME->vpid;
|
||||
mca_btl_ugni_base_frag_t *frag;
|
||||
uint32_t iov_count = 1;
|
||||
void *data_ptr = NULL;
|
||||
struct iovec iov;
|
||||
size_t max_data;
|
||||
int rc;
|
||||
|
||||
assert (length < mca_btl_ugni_component.eager_limit);
|
||||
assert (0 == (flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK));
|
||||
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != mca_btl_ugni_check_endpoint_state (endpoint))) {
|
||||
/* can't complete inline send if the endpoint is not already connected */
|
||||
/* go ahead and start the connection */
|
||||
*descriptor = mca_btl_ugni_alloc (btl, endpoint, order, length, flags);
|
||||
|
||||
return OMPI_ERR_RESOURCE_BUSY;
|
||||
}
|
||||
|
||||
MCA_BTL_UGNI_FRAG_ALLOC_EAGER((mca_btl_ugni_module_t *) btl, frag, rc);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
*descriptor = NULL;
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("btl/ugni sending inline descriptor %p from %d -> %d. length = %u", (void *) frag,
|
||||
ORTE_PROC_MY_NAME->vpid, endpoint->common->ep_rem_id, (unsigned int) length));
|
||||
|
||||
/* write match header (with MPI comm/tag/etc. info) */
|
||||
memcpy (frag->segments[0].seg_addr.pval, header, header_size);
|
||||
|
||||
frag->hdr->tag = tag;
|
||||
frag->hdr->len = length;
|
||||
|
||||
/*
|
||||
We can add MEMCHECKER calls before and after the packing.
|
||||
*/
|
||||
if (OPAL_UNLIKELY(payload_size && opal_convertor_need_buffers (convertor))) {
|
||||
/* pack the data into the supplied buffer */
|
||||
iov.iov_base = (IOVBASE_TYPE *)((uintptr_t)frag->segments[0].seg_addr.pval + header_size);
|
||||
iov.iov_len = max_data = payload_size;
|
||||
|
||||
(void) opal_convertor_pack (convertor, &iov, &iov_count, &max_data);
|
||||
|
||||
assert (max_data == payload_size);
|
||||
|
||||
header_size += payload_size;
|
||||
payload_size = 0;
|
||||
} else if (payload_size) {
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
}
|
||||
|
||||
header_size += sizeof (frag->hdr[0]);
|
||||
|
||||
/* check endpoint state */
|
||||
rc = GNI_SmsgSendWTag (endpoint->common->ep_handle, frag->hdr, header_size,
|
||||
data_ptr, payload_size, msg_id,
|
||||
MCA_BTL_UGNI_TAG_SEND);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
BTL_VERBOSE(("GNI_SmsgSendWTag failed with rc = %d", rc));
|
||||
MCA_BTL_UGNI_FRAG_RETURN (frag);
|
||||
*descriptor = NULL;
|
||||
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
MCA_BTL_UGNI_FRAG_RETURN (frag);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
56
ompi/mca/btl/ugni/configure.m4
Обычный файл
56
ompi/mca/btl/ugni/configure.m4
Обычный файл
@ -0,0 +1,56 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2006 QLogic Corp. All rights reserved.
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# OMPI_CHECK_UGNI(prefix, [action-if-found], [action-if-not-found])
|
||||
# --------------------------------------------------------
|
||||
# check if GNI support can be found. sets prefix_{CPPFLAGS,
|
||||
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
|
||||
# support, otherwise executes action-if-not-found
|
||||
#
|
||||
# NOTES
|
||||
# on Cray XE6 systems, the GNI development header (gni_pub.h) is in a
|
||||
# completely different place than the ugni library (libugni).
|
||||
#
|
||||
# EXAMPLE CONFIGURE USAGE:
|
||||
# --with-ugni=/base/path/to/libugni --with-ugni-includedir=/path/to/gni_pub.h
|
||||
#
|
||||
# --with-ugni=/opt/cray/ugni/default --with-ugni-includedir=/opt/cray/gni-headers/default/include
|
||||
|
||||
AC_DEFUN([MCA_ompi_btl_ugni_CONFIG],[
|
||||
AC_CONFIG_FILES([ompi/mca/btl/ugni/Makefile])
|
||||
|
||||
OMPI_CHECK_UGNI([btl_ugni],
|
||||
[btl_ugni_happy="yes"],
|
||||
[btl_ugni_happy="no"])
|
||||
|
||||
AS_IF([test "$btl_ugni_happy" = "yes"],
|
||||
[btl_ugni_WRAPPER_EXTRA_LDFLAGS="$btl_ugni_LDFLAGS"
|
||||
btl_ugni_WRAPPER_EXTRA_LIBS="$btl_ugni_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
|
||||
# substitute in the things needed to build ugni
|
||||
AC_SUBST([btl_ugni_CPPFLAGS])
|
||||
AC_SUBST([btl_ugni_LDFLAGS])
|
||||
AC_SUBST([btl_ugni_LIBS])
|
||||
])dnl
|
43
ompi/mca/common/ugni/Makefile.am
Обычный файл
43
ompi/mca/common/ugni/Makefile.am
Обычный файл
@ -0,0 +1,43 @@
|
||||
# -*- indent-tabs-mode:nil -*-
|
||||
#
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
AM_CPPFLAGS = $(common_ugni_CPPFLAGS)
|
||||
|
||||
if MCA_BUILD_ompi_common_ugni_DSO
|
||||
component_noinst =
|
||||
component_install = mca_common_ugni.la
|
||||
else
|
||||
component_noinst = libmca_common_ugni.la
|
||||
component_install =
|
||||
endif
|
||||
|
||||
headers = common_ugni.h \
|
||||
common_ugni_ep.h
|
||||
|
||||
ugni_SOURCES = common_ugni.c \
|
||||
common_ugni_ep.c
|
||||
|
||||
mcacomponentdir = $(pkglibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_common_ugni_la_SOURCES = $(ugni_SOURCES)
|
||||
nodist_mca_common_ugni_la_SOURCES = $(ugni_nodist_SOURCES)
|
||||
mca_common_ugni_la_LIBADD = $(common_ugni_LIBS)
|
||||
mca_common_ugni_la_LDFLAGS = -module -avoid-version $(common_ugni_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
libmca_common_ugni_la_SOURCES = $(ugni_SOURCES)
|
||||
nodist_libmca_common_ugni_la_SOURCES = $(ugni_nodist_SOURCES)
|
||||
libmca_common_ugni_la_LIBADD = $(common_ugni_LIBS)
|
||||
libmca_common_ugni_la_LDFLAGS = -module -avoid-version $(common_ugni_LDFLAGS)
|
318
ompi/mca/common/ugni/common_ugni.c
Обычный файл
318
ompi/mca/common/ugni/common_ugni.c
Обычный файл
@ -0,0 +1,318 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "common_ugni.h"
|
||||
|
||||
#include "ompi/proc/proc.h"
|
||||
|
||||
/* NTH: we need some options from the btl */
|
||||
#include "ompi/mca/btl/ugni/btl_ugni.h"
|
||||
|
||||
static int ompi_common_ugni_module_ref_count = 0;
|
||||
ompi_common_ugni_module_t ompi_common_ugni_module;
|
||||
|
||||
mca_base_component_t ompi_common_ugni_component = {
|
||||
MCA_BASE_VERSION_2_0_0,
|
||||
"common",
|
||||
MCA_BASE_VERSION_2_0_0,
|
||||
"ugni",
|
||||
MCA_BASE_VERSION_2_0_0,
|
||||
NULL,
|
||||
NULL
|
||||
};
|
||||
|
||||
static inline int
|
||||
get_ptag(uint8_t *out_ptag)
|
||||
{
|
||||
/* TODO no need for tmp */
|
||||
char *ptr;
|
||||
uint8_t tmp_ptag;
|
||||
|
||||
if (NULL == (ptr = getenv("PMI_GNI_PTAG"))) {
|
||||
/* TODO add err msg - better rc? */
|
||||
return OMPI_ERR_NOT_FOUND;
|
||||
}
|
||||
errno = 0;
|
||||
tmp_ptag = (uint8_t)strtoul (ptr, (char **)NULL, 10);
|
||||
if (0 != errno) {
|
||||
/* TODO add err msg - better rc? */
|
||||
return OMPI_ERR_VALUE_OUT_OF_BOUNDS;
|
||||
}
|
||||
*out_ptag = tmp_ptag;
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static inline int get_cookie (uint32_t *out_cookie)
|
||||
{
|
||||
/* TODO no need for tmp */
|
||||
char *ptr;
|
||||
uint32_t tmp_cookie;
|
||||
|
||||
if (NULL == (ptr = getenv("PMI_GNI_COOKIE"))) {
|
||||
/* TODO add err msg - better rc? */
|
||||
return OMPI_ERR_NOT_FOUND;
|
||||
}
|
||||
errno = 0;
|
||||
tmp_cookie = (uint32_t) strtoul (ptr, NULL, 10);
|
||||
if (0 != errno) {
|
||||
/* TODO add err msg - better rc? */
|
||||
return OMPI_ERR_VALUE_OUT_OF_BOUNDS;
|
||||
}
|
||||
|
||||
*out_cookie = tmp_cookie;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
ompi_common_ugni_get_nic_address(int device_id)
|
||||
{
|
||||
unsigned int address, cpu_id;
|
||||
gni_return_t status;
|
||||
int i, alps_dev_id = -1;
|
||||
char *token,*p_ptr;
|
||||
|
||||
p_ptr = getenv("PMI_GNI_DEV_ID");
|
||||
if (!p_ptr) {
|
||||
status = GNI_CdmGetNicAddress(device_id, &address, &cpu_id);
|
||||
if(status != GNI_RC_SUCCESS) {
|
||||
opal_output (0, "FAILED:GNI_CdmGetNicAddress returned error %d", status);
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
while (NULL != (token = strtok(p_ptr, ":"))) {
|
||||
alps_dev_id = atoi(token);
|
||||
if (alps_dev_id == device_id) {
|
||||
break;
|
||||
}
|
||||
p_ptr = NULL;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(-1 == alps_dev_id)) {
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
|
||||
p_ptr = getenv("PMI_GNI_LOC_ADDR");
|
||||
if (OPAL_UNLIKELY(NULL == p_ptr)) {
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (NULL != (token = strtok(p_ptr, ":"))) {
|
||||
if (i == alps_dev_id) {
|
||||
return strtoul (token, NULL, 10);
|
||||
}
|
||||
p_ptr = NULL;
|
||||
++i;
|
||||
}
|
||||
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
|
||||
static int ompi_common_ugni_device_init (ompi_common_ugni_device_t *device,
|
||||
int comm_world_size, int device_id)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* Create a NIC Adress */
|
||||
device->dev_id = device_id; /* Minor number of the Gemini NIC */
|
||||
|
||||
device->dev_addr = ompi_common_ugni_get_nic_address (device->dev_id);
|
||||
|
||||
OPAL_OUTPUT((-1, "Got NIC Addr: 0x%08x, CPU ID: %d", device->dev_addr, device->dev_id));
|
||||
|
||||
/* Attach device to the communication domain */
|
||||
rc = GNI_CdmAttach (ompi_common_ugni_module.cd_handle, device->dev_id,
|
||||
&device->dev_pe_addr, &device->dev_handle);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
OPAL_OUTPUT((0, "Error: Creating communication domain %d\n", rc));
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
/* Create a completion queue to attach to endpoints */
|
||||
rc = GNI_CqCreate (device->dev_handle, ompi_common_ugni_module.local_cq_size,
|
||||
0, GNI_CQ_NOBLOCK, NULL, NULL, &device->dev_local_cq);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
OPAL_OUTPUT((0, "Error creating SMSG local CQ. rc = %d", rc));
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
device->dev_eps = calloc (comm_world_size, sizeof (ompi_common_ugni_endpoint_t *));
|
||||
if (NULL == device->dev_eps) {
|
||||
OPAL_OUTPUT((0, "Error allocating space for endpoint pointers"));
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
static int ompi_common_ugni_device_fini (ompi_common_ugni_device_t *dev)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (dev->dev_eps) {
|
||||
free (dev->dev_eps);
|
||||
dev->dev_eps = NULL;
|
||||
}
|
||||
|
||||
rc = GNI_CqDestroy (dev->dev_local_cq);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
OPAL_OUTPUT((-1, "btl/ugni error destroying cq. rc = %d", rc));
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Send local device information and other information
|
||||
* required for setup
|
||||
*/
|
||||
static int ompi_common_ugni_send_modex (int my_rank)
|
||||
{
|
||||
uint32_t modex_size, total_msg_size, msg_offset;
|
||||
struct ompi_common_ugni_modex_t modex;
|
||||
char *modex_msg;
|
||||
int rc, i;
|
||||
|
||||
modex_size = sizeof (struct ompi_common_ugni_modex_t);
|
||||
total_msg_size = ompi_common_ugni_module.device_count * modex_size;
|
||||
|
||||
modex_msg = (char *) malloc (total_msg_size);
|
||||
if (NULL == modex_msg) {
|
||||
OPAL_OUTPUT((-1, "Error allocating memory for modex @ %s:%d",
|
||||
__FILE__, __LINE__));
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* pack modex for all available devices */
|
||||
for (i = 0, msg_offset = 0; i < ompi_common_ugni_module.device_count ; ++i) {
|
||||
ompi_common_ugni_device_t *dev = ompi_common_ugni_module.devices + i;
|
||||
|
||||
modex.addr = dev->dev_addr;
|
||||
modex.id = my_rank;
|
||||
|
||||
memcpy ((void *)((uintptr_t) modex_msg + msg_offset),
|
||||
(void *)&modex, modex_size);
|
||||
|
||||
msg_offset += modex_size;
|
||||
}
|
||||
|
||||
rc = ompi_modex_send(&ompi_common_ugni_component,
|
||||
modex_msg, total_msg_size);
|
||||
|
||||
free(modex_msg);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int ompi_common_ugni_fini (void)
|
||||
{
|
||||
int i, rc;
|
||||
|
||||
if (0 == ompi_common_ugni_module_ref_count) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
if (1 == ompi_common_ugni_module_ref_count) {
|
||||
/* tear down component */
|
||||
if (ompi_common_ugni_module.devices) {
|
||||
/* finalize devices */
|
||||
for (i = 0 ; i < ompi_common_ugni_module.device_count ; ++i) {
|
||||
ompi_common_ugni_device_fini (ompi_common_ugni_module.devices + i);
|
||||
}
|
||||
|
||||
free (ompi_common_ugni_module.devices);
|
||||
ompi_common_ugni_module.devices = NULL;
|
||||
}
|
||||
|
||||
/* finally, tear down the communication domain */
|
||||
rc = GNI_CdmDestroy (ompi_common_ugni_module.cd_handle);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
OPAL_OUTPUT((-1, "error destroying cdm"));
|
||||
}
|
||||
}
|
||||
|
||||
ompi_common_ugni_module_ref_count--;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
int ompi_common_ugni_init (void)
|
||||
{
|
||||
int modes, rc, my_rank, i;
|
||||
size_t comm_world_size;
|
||||
ompi_proc_t *my_proc;
|
||||
|
||||
ompi_common_ugni_module_ref_count ++;
|
||||
|
||||
if (ompi_common_ugni_module_ref_count > 1) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
my_proc = ompi_proc_local ();
|
||||
my_rank = my_proc->proc_name.vpid;
|
||||
|
||||
/* pull settings from ugni btl */
|
||||
ompi_common_ugni_module.rdma_max_retries =
|
||||
mca_btl_ugni_component.rdma_max_retries;
|
||||
ompi_common_ugni_module.local_cq_size =
|
||||
mca_btl_ugni_component.cq_size;
|
||||
|
||||
(void) ompi_proc_world (&comm_world_size);
|
||||
|
||||
/* Create a communication domain */
|
||||
modes = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_CACHED_AMO_ENABLED |
|
||||
GNI_CDM_MODE_DUAL_EVENTS | GNI_CDM_MODE_ERR_NO_KILL |
|
||||
GNI_CDM_MODE_FAST_DATAGRAM_POLL;
|
||||
|
||||
/* collect uGNI information */
|
||||
rc = get_ptag(&ompi_common_ugni_module.ptag);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = get_cookie(&ompi_common_ugni_module.cookie);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* create a communication domain */
|
||||
rc = GNI_CdmCreate (my_rank, ompi_common_ugni_module.ptag,
|
||||
ompi_common_ugni_module.cookie, modes,
|
||||
&ompi_common_ugni_module.cd_handle);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
OPAL_OUTPUT((0, "Error: Creating communication domain %d\n",rc));
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
/* setup uGNI devices. we only support one device atm */
|
||||
ompi_common_ugni_module.device_count = 1;
|
||||
ompi_common_ugni_module.devices = calloc (ompi_common_ugni_module.device_count,
|
||||
sizeof (ompi_common_ugni_device_t));
|
||||
|
||||
for (i = 0 ; i < ompi_common_ugni_module.device_count ; ++i) {
|
||||
rc = ompi_common_ugni_device_init (ompi_common_ugni_module.devices + i,
|
||||
comm_world_size, i);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
OPAL_OUTPUT((-1, "error initializing uGNI device"));
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* send ugni modex */
|
||||
ompi_common_ugni_send_modex (my_rank);
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
193
ompi/mca/common/ugni/common_ugni.h
Обычный файл
193
ompi/mca/common/ugni/common_ugni.h
Обычный файл
@ -0,0 +1,193 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "ompi_config.h"
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/include/opal/prefetch.h"
|
||||
#include "opal_stdint.h"
|
||||
|
||||
#include "ompi/include/ompi/constants.h"
|
||||
#include "ompi/runtime/ompi_module_exchange.h"
|
||||
#include "ompi/proc/proc.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
#include <assert.h>
|
||||
#include <sys/time.h>
|
||||
#include <gni_pub.h>
|
||||
|
||||
#include "common_ugni_ep.h"
|
||||
|
||||
#if !defined(MPI_COMMON_UGNI_H)
|
||||
#define MPI_COMMON_UGNI_H
|
||||
|
||||
struct ompi_common_ugni_modex_t {
|
||||
uint32_t addr;
|
||||
int id;
|
||||
};
|
||||
typedef struct ompi_common_ugni_modex_t ompi_common_ugni_modex_t;
|
||||
|
||||
struct ompi_common_ugni_device_t {
|
||||
opal_object_t super;
|
||||
|
||||
gni_nic_handle_t dev_handle;
|
||||
|
||||
/* Minor number of the Gemini NIC */
|
||||
int32_t dev_id;
|
||||
uint32_t dev_pe_addr;
|
||||
uint32_t dev_addr;
|
||||
uint32_t dev_cpu_id;
|
||||
|
||||
gni_cq_handle_t dev_local_cq;
|
||||
|
||||
size_t dev_ep_count;
|
||||
ompi_common_ugni_endpoint_t **dev_eps;
|
||||
};
|
||||
typedef struct ompi_common_ugni_device_t ompi_common_ugni_device_t;
|
||||
|
||||
struct ompi_common_ugni_module_t {
|
||||
/* protection tag */
|
||||
uint8_t ptag;
|
||||
|
||||
/* unique id for this process assigned by the system */
|
||||
uint32_t cookie;
|
||||
|
||||
/* communication domain handle */
|
||||
gni_cdm_handle_t cd_handle;
|
||||
|
||||
/* device count. to be used if we have more than 1 common per ugni device */
|
||||
int device_count;
|
||||
ompi_common_ugni_device_t *devices;
|
||||
|
||||
int local_cq_size;
|
||||
|
||||
int rdma_max_retries;
|
||||
};
|
||||
typedef struct ompi_common_ugni_module_t ompi_common_ugni_module_t;
|
||||
|
||||
struct ompi_common_ugni_post_desc_t {
|
||||
gni_post_descriptor_t base;
|
||||
|
||||
ompi_common_ugni_endpoint_t *endpoint;
|
||||
int tries;
|
||||
|
||||
/* NTH: callback function for this post. this may change in the future */
|
||||
void (*cbfunc) (struct ompi_common_ugni_post_desc_t *, int);
|
||||
};
|
||||
typedef struct ompi_common_ugni_post_desc_t ompi_common_ugni_post_desc_t;
|
||||
|
||||
extern ompi_common_ugni_module_t ompi_common_ugni_module;
|
||||
extern mca_base_component_t ompi_common_ugni_component;
|
||||
|
||||
static inline int
|
||||
ompi_common_rc_ugni_to_ompi (gni_return_t rc)
|
||||
{
|
||||
int codes[] = {OMPI_SUCCESS,
|
||||
OMPI_ERR_RESOURCE_BUSY,
|
||||
OMPI_ERR_BAD_PARAM,
|
||||
OMPI_ERR_OUT_OF_RESOURCE,
|
||||
OMPI_ERR_TIMEOUT,
|
||||
OMPI_ERR_PERM,
|
||||
OMPI_ERROR,
|
||||
OMPI_ERR_BAD_PARAM,
|
||||
OMPI_ERR_BAD_PARAM,
|
||||
OMPI_ERR_NOT_FOUND,
|
||||
OMPI_ERR_VALUE_OUT_OF_BOUNDS,
|
||||
OMPI_ERROR,
|
||||
OMPI_ERR_NOT_SUPPORTED,
|
||||
OMPI_ERR_OUT_OF_RESOURCE};
|
||||
return codes[rc];
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize uGNI communication domain and device(s).
|
||||
*/
|
||||
int ompi_common_ugni_init (void);
|
||||
|
||||
/*
|
||||
* Finalize uGNI communication domain and device(s).
|
||||
*/
|
||||
int ompi_common_ugni_fini (void);
|
||||
|
||||
static inline int
|
||||
ompi_common_ugni_process_completed_post (ompi_common_ugni_device_t *dev,
|
||||
gni_cq_handle_t cq_handle) {
|
||||
ompi_common_ugni_post_desc_t *desc;
|
||||
gni_return_t rc = GNI_RC_NOT_DONE;
|
||||
gni_cq_entry_t event_data = 0;
|
||||
uint32_t recoverable = 1;
|
||||
|
||||
rc = GNI_CqGetEvent (cq_handle, &event_data);
|
||||
if (GNI_RC_NOT_DONE == rc || GNI_CQ_GET_TYPE(event_data) != GNI_CQ_EVENT_TYPE_POST) {
|
||||
/* ignore smsg completion */
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY((GNI_RC_SUCCESS != rc && !event_data) || GNI_CQ_OVERRUN(event_data))) {
|
||||
/* TODO -- need to handle overrun -- how do we do this without an event?
|
||||
will the event eventually come back? Ask Cray */
|
||||
OPAL_OUTPUT((-1, "post error! cq overrun = %d", (int)GNI_CQ_OVERRUN(event_data)));
|
||||
assert (GNI_RC_SUCCESS == rc);
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
rc = GNI_GetCompleted (cq_handle, event_data, (gni_post_descriptor_t **) &desc);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
OPAL_OUTPUT((-1, "Error in GNI_GetComplete %s", gni_err_str[rc]));
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(!GNI_CQ_STATUS_OK(event_data))) {
|
||||
(void) GNI_CqErrorRecoverable (event_data, &recoverable);
|
||||
|
||||
if (OPAL_UNLIKELY(!recoverable ||
|
||||
++desc->tries >= ompi_common_ugni_module.rdma_max_retries)) {
|
||||
OPAL_OUTPUT((-1, "giving up on descriptor %p", (void *) desc));
|
||||
/* give up */
|
||||
desc->cbfunc (desc, OMPI_ERROR);
|
||||
|
||||
return OMPI_ERROR;
|
||||
}
|
||||
|
||||
/* repost transaction */
|
||||
if (GNI_POST_RDMA_PUT == desc->base.type ||
|
||||
GNI_POST_RDMA_GET == desc->base.type) {
|
||||
rc = GNI_PostRdma (desc->endpoint->ep_handle, &desc->base);
|
||||
} else {
|
||||
rc = GNI_PostFma (desc->endpoint->ep_handle, &desc->base);
|
||||
}
|
||||
|
||||
return ompi_common_rc_ugni_to_ompi (rc);
|
||||
}
|
||||
|
||||
desc->cbfunc (desc, OMPI_SUCCESS);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int ompi_common_ugni_progress (void) {
|
||||
ompi_common_ugni_device_t *dev;
|
||||
int count, i;
|
||||
|
||||
for (i = 0, count = 0 ; i < ompi_common_ugni_module.device_count ; ++i) {
|
||||
dev = ompi_common_ugni_module.devices + i;
|
||||
/* progress fma transactions (ignore local smsg) */
|
||||
count += ompi_common_ugni_process_completed_post (dev, dev->dev_local_cq);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
#endif /* MPI_COMMON_UGNI_H */
|
147
ompi/mca/common/ugni/common_ugni_ep.c
Обычный файл
147
ompi/mca/common/ugni/common_ugni_ep.c
Обычный файл
@ -0,0 +1,147 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "common_ugni.h"
|
||||
|
||||
static void ompi_common_ugni_ep_construct (ompi_common_ugni_endpoint_t *ep)
|
||||
{
|
||||
OBJ_CONSTRUCT(&ep->lock, opal_mutex_t);
|
||||
ep->state = OMPI_COMMON_UGNI_INIT;
|
||||
ep->bind_count = 0;
|
||||
}
|
||||
|
||||
static void ompi_common_ugni_ep_destruct (ompi_common_ugni_endpoint_t *ep)
|
||||
{
|
||||
OBJ_DESTRUCT(&ep->lock);
|
||||
ompi_common_ugni_endpoint_unbind (ep);
|
||||
ep->dev->dev_eps[ep->ep_rem_id] = NULL;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(ompi_common_ugni_endpoint_t, opal_object_t,
|
||||
ompi_common_ugni_ep_construct, ompi_common_ugni_ep_destruct);
|
||||
|
||||
int ompi_common_ugni_endpoint_for_proc (ompi_common_ugni_device_t *dev, ompi_proc_t *peer_proc,
|
||||
ompi_common_ugni_endpoint_t **ep)
|
||||
{
|
||||
ompi_common_ugni_endpoint_t *endpoint;
|
||||
ompi_common_ugni_modex_t *modex;
|
||||
size_t msg_size;
|
||||
int rem_id, rc;
|
||||
|
||||
assert (NULL != dev && NULL != ep && peer_proc);
|
||||
|
||||
rem_id = peer_proc->proc_name.vpid;;
|
||||
|
||||
if (NULL == dev->dev_eps[rem_id]) {
|
||||
endpoint = OBJ_NEW(ompi_common_ugni_endpoint_t);
|
||||
if (OPAL_UNLIKELY(NULL == endpoint)) {
|
||||
assert (0);
|
||||
return OMPI_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Receive the modex */
|
||||
rc = ompi_modex_recv(&ompi_common_ugni_component,
|
||||
peer_proc, (void *)&modex, &msg_size);
|
||||
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
|
||||
OPAL_OUTPUT((-1, "btl/ugni error receiving modex"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* these should be the same */
|
||||
assert (rem_id == modex->id);
|
||||
|
||||
endpoint->ep_rem_addr = modex->addr;
|
||||
endpoint->ep_rem_id = modex->id;
|
||||
|
||||
endpoint->dev = dev;
|
||||
|
||||
*ep = endpoint;
|
||||
|
||||
dev->dev_eps[rem_id] = endpoint;
|
||||
} else {
|
||||
OBJ_RETAIN(dev->dev_eps[rem_id]);
|
||||
*ep = dev->dev_eps[rem_id];
|
||||
}
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
void ompi_common_ugni_endpoint_return (ompi_common_ugni_endpoint_t *ep)
|
||||
{
|
||||
assert(NULL != ep);
|
||||
|
||||
OBJ_RELEASE(ep);
|
||||
}
|
||||
|
||||
int ompi_common_ugni_endpoint_bind (ompi_common_ugni_endpoint_t *ep)
|
||||
{
|
||||
int rc;
|
||||
|
||||
assert (NULL != ep);
|
||||
if (OPAL_UNLIKELY(NULL == ep)) {
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
do {
|
||||
if (OPAL_LIKELY(OMPI_COMMON_UGNI_BOUND <= ep->state)) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ep->lock);
|
||||
/* create a uGNI endpoint handle and bind it to the remote peer */
|
||||
rc = GNI_EpCreate (ep->dev->dev_handle, ep->dev->dev_local_cq,
|
||||
&ep->ep_handle);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
rc = ompi_common_rc_ugni_to_ompi (rc);
|
||||
break;
|
||||
}
|
||||
|
||||
rc = GNI_EpBind (ep->ep_handle, ep->ep_rem_addr, ep->ep_rem_id);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
rc = ompi_common_rc_ugni_to_ompi (rc);
|
||||
break;
|
||||
}
|
||||
|
||||
ep->state = OMPI_COMMON_UGNI_BOUND;
|
||||
} while (0);
|
||||
|
||||
OPAL_THREAD_UNLOCK(&ep->lock);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int ompi_common_ugni_endpoint_unbind (ompi_common_ugni_endpoint_t *ep)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (0 == ep->bind_count) {
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
||||
assert (OMPI_COMMON_UGNI_BOUND == ep->state);
|
||||
|
||||
rc = GNI_EpUnbind (ep->ep_handle);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
/* should warn */
|
||||
}
|
||||
|
||||
GNI_EpDestroy (ep->ep_handle);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
/* should warn */
|
||||
}
|
||||
|
||||
ep->state = OMPI_COMMON_UGNI_INIT;
|
||||
ep->bind_count--;
|
||||
|
||||
return OMPI_SUCCESS;
|
||||
}
|
||||
|
72
ompi/mca/common/ugni/common_ugni_ep.h
Обычный файл
72
ompi/mca/common/ugni/common_ugni_ep.h
Обычный файл
@ -0,0 +1,72 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#if !defined(MPI_COMMON_UGNI_EP_H)
|
||||
#define MPI_COMMON_UGNI_EP_H
|
||||
|
||||
enum ompi_common_ugni_endpoint_state_t {
|
||||
OMPI_COMMON_UGNI_INIT = 0,
|
||||
OMPI_COMMON_UGNI_BOUND,
|
||||
OMPI_COMMON_UGNI_CONNECTING,
|
||||
OMPI_COMMON_UGNI_CONNECTED
|
||||
};
|
||||
typedef enum ompi_common_ugni_endpoint_state_t ompi_common_ugni_endpoint_state_t;
|
||||
|
||||
struct ompi_common_ugni_device_t;
|
||||
|
||||
struct ompi_common_ugni_endpoint_t {
|
||||
opal_object_t super;
|
||||
gni_ep_handle_t ep_handle; /**< uGNI handle for this endpoint */
|
||||
ompi_common_ugni_endpoint_state_t state; /**< bind/connection state */
|
||||
uint32_t ep_rem_addr, ep_rem_id; /**< remote information */
|
||||
struct ompi_common_ugni_device_t *dev; /**< device this endpoint is using */
|
||||
opal_mutex_t lock;
|
||||
int bind_count; /**< bind reference count */
|
||||
};
|
||||
typedef struct ompi_common_ugni_endpoint_t ompi_common_ugni_endpoint_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(ompi_common_ugni_endpoint_t);
|
||||
|
||||
/*
|
||||
* Get (and retain) a reference to an endpoint to peer_proc. This endpoint
|
||||
* needs to be returned with ompi_common_ugni_endpoint_return.
|
||||
*
|
||||
* @param[IN] dev uGNI device this endpoint should be bound to.
|
||||
* @param[IN] peer_proc remote peer the endpoint will be connected to.
|
||||
* @param[OUT] ep uGNI endpoint for the peer
|
||||
*/
|
||||
int ompi_common_ugni_endpoint_for_proc (struct ompi_common_ugni_device_t *dev, ompi_proc_t *peer_proc,
|
||||
ompi_common_ugni_endpoint_t **ep);
|
||||
|
||||
/*
|
||||
* Allocate and bind a uGNI endpoint handle to the remote peer.
|
||||
*
|
||||
* @param[IN] ep uGNI endpoint to bind
|
||||
*/
|
||||
int ompi_common_ugni_endpoint_bind (ompi_common_ugni_endpoint_t *ep);
|
||||
|
||||
/*
|
||||
* Unbind and free the uGNI endpoint handle associated with this endpoint.
|
||||
*
|
||||
* @param[IN] ep uGNI endpoint to unbind
|
||||
*/
|
||||
int ompi_common_ugni_endpoint_unbind (ompi_common_ugni_endpoint_t *ep);
|
||||
|
||||
/*
|
||||
* Return (and possibly free) an endpoint. The endpoint may not be used
|
||||
* once it is returned.
|
||||
*
|
||||
* @param[IN] ep uGNI endpoint to return
|
||||
*/
|
||||
void ompi_common_ugni_endpoint_return (ompi_common_ugni_endpoint_t *ep);
|
||||
|
||||
#endif /* MPI_COMMON_UGNI_EP_H */
|
56
ompi/mca/common/ugni/configure.m4
Обычный файл
56
ompi/mca/common/ugni/configure.m4
Обычный файл
@ -0,0 +1,56 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2006 QLogic Corp. All rights reserved.
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# OMPI_CHECK_UGNI(prefix, [action-if-found], [action-if-not-found])
|
||||
# --------------------------------------------------------
|
||||
# check if GNI support can be found. sets prefix_{CPPFLAGS,
|
||||
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
|
||||
# support, otherwise executes action-if-not-found
|
||||
#
|
||||
# NOTES
|
||||
# on Cray XE6 systems, the GNI development header (gni_pub.h) is in a
|
||||
# completely different place than the ugni library (libugni).
|
||||
#
|
||||
# EXAMPLE CONFIGURE USAGE:
|
||||
# --with-ugni=/base/path/to/libugni --with-ugni-includedir=/path/to/gni_pub.h
|
||||
#
|
||||
# --with-ugni=/opt/cray/ugni/default --with-ugni-includedir=/opt/cray/gni-headers/default/include
|
||||
|
||||
AC_DEFUN([MCA_ompi_common_ugni_CONFIG],[
|
||||
AC_CONFIG_FILES([ompi/mca/common/ugni/Makefile])
|
||||
|
||||
OMPI_CHECK_UGNI([common_ugni],
|
||||
[common_ugni_happy="yes"],
|
||||
[common_ugni_happy="no"])
|
||||
|
||||
AS_IF([test "$common_ugni_happy" = "yes"],
|
||||
[common_ugni_WRAPPER_EXTRA_LDFLAGS="$common_ugni_LDFLAGS"
|
||||
common_ugni_WRAPPER_EXTRA_LIBS="$common_ugni_LIBS"
|
||||
$1],
|
||||
[$2])
|
||||
|
||||
# substitute in the things needed to build ugni
|
||||
AC_SUBST([common_ugni_CPPFLAGS])
|
||||
AC_SUBST([common_ugni_LDFLAGS])
|
||||
AC_SUBST([common_ugni_LIBS])
|
||||
])dnl
|
Загрузка…
x
Ссылка в новой задаче
Block a user