1
1

btl/ugni: improve multi-threaded performance

This commit updates the ugni btl to make use of multiple device
contexts to improve the multi-threaded RMA performance. This commit
contains the following:

 - Cleanup the endpoint structure by removing unnecessary field. The
   structure now also contains all the fields originally handled by the
   common/ugni endpoint.

 - Clean up the fragment allocation code to remove the need to
   initialize the my_list member of the fragment structure. This
   member is not initialized by the free list initializer function.

 - Remove the (now unused) common/ugni component. btl/ugni no longer
   need the component. common/ugni was originally split out of
   btl/ugni to support bcol/ugni. As that component exists there is no
   reason to keep this component.

 - Create wrappers for the ugni functionality required by
   btl/ugni. This was done to ease supporting multiple device
   contexts. The wrappers are thread safe and currently use a spin
   lock instead of a mutex. This produces better performance when
   using multiple threads spread over multiple cores. In the future
   this lock may be replaced by another serialization mechanism. The
   wrappers are located in a new file: btl_ugni_device.h.

 - Remove unnecessary device locking from serial parts of the ugni
   btl. This includes the first add-procs and module finalize.

 - Clean up fragment wait list code by moving enqueue into common
   function.

 - Expose the communication domain flags as an MCA variable. The
   defaults have been updated to reflect the recommended setting for
   knl and haswell.

 - Avoid allocating fragments for communication with already
   overloaded peers.

 - Allocate RDMA endpoints dyncamically. This is needed to support
   spreading RMA operations accross multiple contexts.

 - Add support for spreading RMA communication over multiple ugni
   device contexts. This should greatly improve the threading
   performance when communicating with multiple peers. By default the
   number of virtual devices depends on 1) whether
   opal_using_threads() is set, 2) how many local processes are in the
   job, and 3) how many bits are available in the pid. The last is
   used to ensure that each CDM is created with a unique id.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
Этот коммит содержится в:
Nathan Hjelm 2017-03-12 22:37:35 -06:00
родитель 12bf38a25c
Коммит d5cdeb81d0
27 изменённых файлов: 1938 добавлений и 1604 удалений

Просмотреть файл

@ -1,6 +1,6 @@
# -*- indent-tabs-mode:nil -*-
#
# Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
# Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
#
@ -40,14 +40,15 @@ ugni_SOURCES = \
btl_ugni_smsg.c \
btl_ugni_progress_thread.c \
btl_ugni_prepare.h \
btl_ugni_atomic.c
btl_ugni_atomic.c \
btl_ugni_init.c \
btl_ugni_device.h
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_btl_ugni_la_SOURCES = $(ugni_SOURCES)
nodist_mca_btl_ugni_la_SOURCES = $(ugni_nodist_SOURCES)
mca_btl_ugni_la_LIBADD = $(btl_ugni_LIBS) \
$(OPAL_TOP_BUILDDIR)/opal/mca/common/ugni/lib@OPAL_LIB_PREFIX@mca_common_ugni.la
mca_btl_ugni_la_LIBADD = $(btl_ugni_LIBS)
mca_btl_ugni_la_LDFLAGS = -module -avoid-version $(btl_ugni_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
@ -34,7 +34,6 @@
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/class/opal_hash_table.h"
#include "opal/class/opal_free_list.h"
#include "opal/mca/common/ugni/common_ugni.h"
#include <errno.h>
#include <stdint.h>
@ -48,6 +47,23 @@
#define MCA_BTL_UGNI_CONNECT_DIRECTED_ID 0x8000000000000000ull
#define MCA_BTL_UGNI_DATAGRAM_MASK 0x8000000000000000ull
/** maximum number of supported virtual devices */
#define MCA_BTL_UGNI_MAX_DEV_HANDLES 128
/** number of rdma completion queue items to remove per progress loop */
#define MCA_BTL_UGNI_COMPLETIONS_PER_LOOP 16
/**
* Modex data
*/
struct mca_btl_ugni_modex_t {
/** GNI NIC address */
uint32_t addr;
/** CDM identifier (base) */
int id;
};
typedef struct mca_btl_ugni_modex_t mca_btl_ugni_modex_t;
/* ompi and smsg endpoint attributes */
typedef struct mca_btl_ugni_endpoint_attr_t {
opal_process_name_t proc_name;
@ -61,12 +77,73 @@ enum {
MCA_BTL_UGNI_RCACHE_GRDMA
};
enum mca_btl_ugni_free_list_id_t {
/* eager fragment list (registered) */
MCA_BTL_UGNI_LIST_EAGER_SEND,
MCA_BTL_UGNI_LIST_EAGER_RECV,
/* SMSG fragment list (unregistered) */
MCA_BTL_UGNI_LIST_SMSG,
/* RDMA fragment list */
MCA_BTL_UGNI_LIST_RDMA,
MCA_BTL_UGNI_LIST_RDMA_INT,
MCA_BTL_UGNI_LIST_MAX,
};
struct mca_btl_ugni_cq_t {
/** ugni CQ handle */
gni_cq_handle_t gni_handle;
/** number of completions expected on the CQ */
int32_t active_operations;
};
typedef struct mca_btl_ugni_cq_t mca_btl_ugni_cq_t;
/**
* GNI virtual device
*/
struct mca_btl_ugni_device_t {
/** Communication domain handle */
gni_cdm_handle_t dev_cd_handle;
/** protection for ugni access */
volatile int32_t lock;
/** Index of device in module devices array */
int dev_index;
/** number of SMSG connections */
volatile int32_t smsg_connections;
/** uGNI device handle */
gni_nic_handle_t dev_handle;
/** uGNI rdma completion queue */
mca_btl_ugni_cq_t dev_rdma_local_cq;
/** local rdma completion queue (async) */
mca_btl_ugni_cq_t dev_rdma_local_irq_cq;
/** local SMSG completion queue */
mca_btl_ugni_cq_t dev_smsg_local_cq;
/** IRQ memory handle for this device */
gni_mem_handle_t smsg_irq_mhndl;
/** RDMA endpoint free list */
opal_free_list_t endpoints;
/** post descriptors pending resources */
opal_list_t pending_post;
};
typedef struct mca_btl_ugni_device_t mca_btl_ugni_device_t;
typedef intptr_t (*mca_btl_ugni_device_serialize_fn_t) (mca_btl_ugni_device_t *device, void *arg);
typedef struct mca_btl_ugni_module_t {
mca_btl_base_module_t super;
bool initialized;
opal_common_ugni_device_t *device;
mca_btl_ugni_device_t devices[MCA_BTL_UGNI_MAX_DEV_HANDLES];
opal_mutex_t endpoint_lock;
size_t endpoint_count;
@ -82,9 +159,6 @@ typedef struct mca_btl_ugni_module_t {
opal_mutex_t eager_get_pending_lock;
opal_list_t eager_get_pending;
opal_mutex_t pending_descriptors_lock;
opal_list_t pending_descriptors;
opal_free_list_t post_descriptors;
mca_mpool_base_module_t *mpool;
@ -95,23 +169,11 @@ typedef struct mca_btl_ugni_module_t {
struct mca_btl_ugni_endpoint_attr_t wc_remote_attr, wc_local_attr;
gni_cq_handle_t rdma_local_cq;
gni_cq_handle_t smsg_remote_cq;
gni_cq_handle_t smsg_local_cq;
gni_cq_handle_t smsg_remote_irq_cq;
gni_cq_handle_t rdma_local_irq_cq;
/* eager fragment list (registered) */
opal_free_list_t eager_frags_send;
opal_free_list_t eager_frags_recv;
/* SMSG fragment list (unregistered) */
opal_free_list_t smsg_frags;
/* RDMA fragment list */
opal_free_list_t rdma_frags;
opal_free_list_t rdma_int_frags;
/** fragment free lists (see enum mca_btl_ugni_free_list_id_t) */
opal_free_list_t frags_lists[MCA_BTL_UGNI_LIST_MAX];
/* lock for this list */
opal_mutex_t ep_wait_list_lock;
@ -197,10 +259,62 @@ typedef struct mca_btl_ugni_component_t {
/* Indicate whether progress thread allowed */
bool progress_thread_enabled;
/** Number of ugni device contexts to create per GNI device */
int virtual_device_count;
/** Protection tag */
uint8_t ptag;
/** Unique id for this process assigned by the system */
uint32_t cookie;
/** Starting value of communication identifier */
uint32_t cdm_id_base;
/** GNI CDM flags */
uint32_t cdm_flags;
/** NIC address */
uint32_t dev_addr;
} mca_btl_ugni_component_t;
int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
opal_common_ugni_device_t *device);
/* Global structures */
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component;
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module;
/**
* Get a virtual device for communication
*/
static inline mca_btl_ugni_device_t *mca_btl_ugni_ep_get_device (mca_btl_ugni_module_t *ugni_module)
{
static volatile uint32_t device_index = (uint32_t) 0;
uint32_t dev_index;
/* don't really care if the device index is atomically updated */
dev_index = (device_index++) & (mca_btl_ugni_component.virtual_device_count - 1);
return ugni_module->devices + dev_index;
}
static inline int mca_btl_rc_ugni_to_opal (gni_return_t rc)
{
static int codes[] = {OPAL_SUCCESS,
OPAL_ERR_RESOURCE_BUSY,
OPAL_ERR_BAD_PARAM,
OPAL_ERR_OUT_OF_RESOURCE,
OPAL_ERR_TIMEOUT,
OPAL_ERR_PERM,
OPAL_ERROR,
OPAL_ERR_BAD_PARAM,
OPAL_ERR_BAD_PARAM,
OPAL_ERR_NOT_FOUND,
OPAL_ERR_VALUE_OUT_OF_BOUNDS,
OPAL_ERROR,
OPAL_ERR_NOT_SUPPORTED,
OPAL_ERR_OUT_OF_RESOURCE};
return codes[rc];
}
/**
* BML->BTL notification of change in the process list.
@ -324,10 +438,32 @@ typedef struct mca_btl_ugni_reg_t {
mca_btl_base_registration_handle_t handle;
} mca_btl_ugni_reg_t;
/* Global structures */
/**
* Initialize uGNI support.
*/
int mca_btl_ugni_init (void);
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component;
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module;
/**
* Finalize uGNI support.
*/
int mca_btl_ugni_fini (void);
int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module);
/**
* Intialize a virtual device for device index 0.
*
* @param[inout] device Device to initialize
* @param[in] virtual_device_id Virtual device identified (up to max handles)
*/
int mca_btl_ugni_device_init (mca_btl_ugni_device_t *device, int virtual_device_id);
/**
* Finalize a virtual device.
*
* @param[in] device Device to finalize
*/
int mca_btl_ugni_device_fini (mca_btl_ugni_device_t *dev);
/* Get a unique 64-bit id for the process name */
static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name) {
@ -338,6 +474,57 @@ static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name) {
int mca_btl_ugni_spawn_progress_thread(struct mca_btl_base_module_t* btl);
int mca_btl_ugni_kill_progress_thread(void);
/**
* Try to lock a uGNI device for exclusive access
*/
static inline int mca_btl_ugni_device_trylock (mca_btl_ugni_device_t *device)
{
/* checking the lock non-atomically first can reduce the number of
* unnecessary atomic operations. */
return (device->lock || opal_atomic_swap_32 (&device->lock, 1));
}
/**
* Lock a uGNI device for exclusive access
*/
static inline void mca_btl_ugni_device_lock (mca_btl_ugni_device_t *device)
{
while (mca_btl_ugni_device_trylock (device));
}
/**
* Release exclusive access to the device
*/
static inline void mca_btl_ugni_device_unlock (mca_btl_ugni_device_t *device)
{
opal_atomic_wmb ();
device->lock = 0;
}
/**
* Serialize an operation on a uGNI device
*
* @params[in] device ugni device
* @params[in] fn function to serialize
* @params[in] arg function argument
*/
static inline intptr_t mca_btl_ugni_device_serialize (mca_btl_ugni_device_t *device,
mca_btl_ugni_device_serialize_fn_t fn, void *arg)
{
intptr_t rc;
if (!opal_using_threads ()) {
return fn (device, arg);
}
/* NTH: for now the device is just protected by a spin lock but this will change in the future */
mca_btl_ugni_device_lock (device);
rc = fn (device, arg);
mca_btl_ugni_device_unlock (device);
return rc;
}
/** Number of times the progress thread has woken up */
extern unsigned int mca_btl_ugni_progress_thread_wakeups;

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
@ -20,7 +20,7 @@
#include "opal/include/opal/align.h"
#include "opal/mca/pmix/pmix.h"
#define INITIAL_GNI_EPS 10000
#define INITIAL_GNI_EPS 1024
static int
mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module);
@ -50,7 +50,7 @@ int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
/* NTH: might want to vary this size based off the universe size (if
* one exists). the table is only used for connection lookup and
* endpoint removal. */
rc = opal_hash_table_init (&ugni_module->id_to_endpoint, 512);
rc = opal_hash_table_init (&ugni_module->id_to_endpoint, INITIAL_GNI_EPS);
if (OPAL_SUCCESS != rc) {
BTL_ERROR(("error initializing the endpoint hash. rc = %d", rc));
return rc;
@ -58,93 +58,63 @@ int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
}
for (size_t i = 0 ; i < nprocs ; ++i) {
struct opal_proc_t *opal_proc = procs[i];
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name);
/* check for an existing endpoint */
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) (peers + i))) {
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) {
ugni_module->nlocal_procs++;
/* ugni is allowed on local processes to provide support for network
* atomic operations */
}
/* Create and Init endpoints */
rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, opal_proc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
BTL_ERROR(("btl/ugni error initializing endpoint"));
return rc;
}
/* go ahead and connect the local endpoint for RDMA/CQ write */
if (opal_proc == opal_proc_local_get ()) {
ugni_module->local_ep = peers[i];
}
/* Add this endpoint to the pointer array. */
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) peers[i]));
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, peers[i]);
++ugni_module->endpoint_count;
peers[i] = mca_btl_ugni_get_ep (btl, procs[i]);
if (NULL == peers[i]) {
continue;
}
if (procs[i] == opal_proc_local_get ()) {
ugni_module->local_ep = peers[i];
}
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
/* Set the reachable bit if necessary */
if (reachable) {
rc = opal_bitmap_set_bit (reachable, i);
(void) opal_bitmap_set_bit (reachable, i);
}
}
mca_btl_ugni_module_set_max_reg (ugni_module, ugni_module->nlocal_procs);
if (false == ugni_module->initialized) {
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->rdma_local_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local BTE/FMA CQ"));
return opal_common_rc_ugni_to_opal (rc);
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
mca_btl_ugni_device_t *device = ugni_module->devices + i;
rc = GNI_CqCreate (device->dev_handle, mca_btl_ugni_component.local_cq_size, 0,
GNI_CQ_NOBLOCK, NULL, NULL, &device->dev_rdma_local_cq.gni_handle);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local BTE/FMA CQ"));
return mca_btl_rc_ugni_to_opal (rc);
}
rc = GNI_CqCreate (device->dev_handle, mca_btl_ugni_component.local_cq_size,
0, GNI_CQ_NOBLOCK, NULL, NULL, &device->dev_smsg_local_cq.gni_handle);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local SMSG CQ"));
return mca_btl_rc_ugni_to_opal (rc);
}
if (mca_btl_ugni_component.progress_thread_enabled) {
rc = GNI_CqCreate (device->dev_handle, mca_btl_ugni_component.local_cq_size,
0, GNI_CQ_BLOCKING, NULL, NULL, &device->dev_rdma_local_irq_cq.gni_handle);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local BTE/FMA CQ"));
return mca_btl_rc_ugni_to_opal (rc);
}
}
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->smsg_local_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local SMSG CQ"));
return opal_common_rc_ugni_to_opal (rc);
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.remote_cq_size,
rc = GNI_CqCreate (ugni_module->devices[0].dev_handle, mca_btl_ugni_component.remote_cq_size,
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->smsg_remote_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating remote SMSG CQ"));
return opal_common_rc_ugni_to_opal (rc);
return mca_btl_rc_ugni_to_opal (rc);
}
if (mca_btl_ugni_component.progress_thread_enabled) {
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
0, GNI_CQ_BLOCKING, NULL, NULL, &ugni_module->rdma_local_irq_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local BTE/FMA CQ"));
return opal_common_rc_ugni_to_opal (rc);
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.remote_cq_size,
rc = GNI_CqCreate (ugni_module->devices[0].dev_handle, mca_btl_ugni_component.remote_cq_size,
0, GNI_CQ_BLOCKING, NULL, NULL, &ugni_module->smsg_remote_irq_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating remote SMSG CQ"));
return opal_common_rc_ugni_to_opal (rc);
return mca_btl_rc_ugni_to_opal (rc);
}
}
@ -175,15 +145,13 @@ int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
return OPAL_ERR_OUT_OF_RESOURCE;
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemRegister(ugni_module->device->dev_handle,
rc = GNI_MemRegister(ugni_module->devices[0].dev_handle,
(unsigned long)mmap_start_addr,
4096,
ugni_module->smsg_remote_irq_cq,
GNI_MEM_READWRITE,
-1,
&ugni_module->device->smsg_irq_mhndl);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
&ugni_module->devices[0].smsg_irq_mhndl);
mca_btl_ugni_spawn_progress_thread(btl);
}
@ -198,18 +166,10 @@ int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
size_t nprocs, struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers) {
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
size_t i;
int rc;
while (ugni_module->active_send_count) {
/* ensure all sends are complete before removing and procs */
rc = mca_btl_ugni_progress_local_smsg (ugni_module);
if (OPAL_SUCCESS != rc) {
break;
}
}
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
for (i = 0 ; i < nprocs ; ++i) {
for (size_t i = 0 ; i < nprocs ; ++i) {
struct opal_proc_t *opal_proc = procs[i];
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name);
mca_btl_base_endpoint_t *ep = NULL;
@ -224,10 +184,18 @@ int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
--ugni_module->endpoint_count;
}
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) {
--ugni_module->nlocal_procs;
}
/* remote the endpoint from the hash table */
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, NULL);
}
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
mca_btl_ugni_module_set_max_reg (ugni_module, ugni_module->nlocal_procs);
return OPAL_SUCCESS;
}
@ -244,9 +212,12 @@ struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_
do {
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
if (OPAL_SUCCESS == rc) {
BTL_VERBOSE(("returning existing endpoint for proc %s", OPAL_NAME_PRINT(proc->proc_name)));
break;
}
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) proc));
/* Create and Init endpoints */
rc = mca_btl_ugni_init_ep (ugni_module, &ep, ugni_module, proc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
@ -254,8 +225,13 @@ struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_
break;
}
/* Add this endpoint to the pointer array. */
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) ep));
/* ugni is allowed on local processes to provide support for network atomic operations */
if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) {
++ugni_module->nlocal_procs;
}
++ugni_module->endpoint_count;
/* add this endpoint to the connection lookup table */
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, ep);
} while (0);
@ -269,10 +245,8 @@ static int ugni_reg_mem (void *reg_data, void *base, size_t size,
mca_rcache_base_registration_t *reg)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg;
gni_cq_handle_t cq = NULL;
gni_return_t rc;
int flags;
gni_cq_handle_t cq = 0;
int flags, rc;
if (ugni_module->reg_count >= ugni_module->reg_max) {
return OPAL_ERR_OUT_OF_RESOURCE;
@ -293,37 +267,26 @@ static int ugni_reg_mem (void *reg_data, void *base, size_t size,
cq = ugni_module->smsg_remote_cq;
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
size, cq, flags, -1, &(ugni_reg->handle.gni_handle));
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
rc = mca_btl_ugni_reg_mem (ugni_module, base, size, (mca_btl_ugni_reg_t *) reg, cq, flags);
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
opal_atomic_add_32(&ugni_module->reg_count,1);
}
opal_atomic_add_32(&ugni_module->reg_count,1);
return OPAL_SUCCESS;
return rc;
}
static int
ugni_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *)reg;
gni_return_t rc;
int rc;
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemDeregister (ugni_module->device->dev_handle, &ugni_reg->handle.gni_handle);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
return OPAL_ERROR;
rc = mca_btl_ugni_dereg_mem (ugni_module, (mca_btl_ugni_reg_t *) reg);
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
opal_atomic_add_32(&ugni_module->reg_count,-1);
}
opal_atomic_add_32(&ugni_module->reg_count,-1);
return OPAL_SUCCESS;
return rc;
}
static int
@ -356,7 +319,7 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
return rc;
}
rc = opal_free_list_init (&ugni_module->smsg_frags,
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_SMSG,
sizeof (mca_btl_ugni_smsg_frag_t),
opal_cache_line_size, OBJ_CLASS(mca_btl_ugni_smsg_frag_t),
mca_btl_ugni_component.ugni_smsg_limit,
@ -365,13 +328,13 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_component.ugni_free_list_max,
mca_btl_ugni_component.ugni_free_list_inc,
NULL, 0, NULL, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
(void *) ugni_module);
(void *) (intptr_t) MCA_BTL_UGNI_LIST_SMSG);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating smsg fragment free list"));
return rc;
}
rc = opal_free_list_init (&ugni_module->rdma_frags,
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_RDMA,
sizeof (mca_btl_ugni_rdma_frag_t), 64,
OBJ_CLASS(mca_btl_ugni_rdma_frag_t),
0, opal_cache_line_size,
@ -379,17 +342,17 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_component.ugni_free_list_max,
mca_btl_ugni_component.ugni_free_list_inc,
NULL, 0, NULL, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
(void *) ugni_module);
(void *) (intptr_t) MCA_BTL_UGNI_LIST_RDMA);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
rc = opal_free_list_init (&ugni_module->rdma_int_frags,
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_RDMA_INT,
sizeof (mca_btl_ugni_rdma_frag_t), 8,
OBJ_CLASS(mca_btl_ugni_rdma_frag_t),
0, opal_cache_line_size, 0, -1, 64,
NULL, 0, NULL, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
(void *) ugni_module);
(void *) (intptr_t) MCA_BTL_UGNI_LIST_RDMA_INT);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
@ -419,14 +382,14 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
}
ugni_module->rcache =
mca_rcache_base_module_create (rcache_name, ugni_module->device, &rcache_resources.base);
mca_rcache_base_module_create (rcache_name, ugni_module->devices, &rcache_resources.base);
if (NULL == ugni_module->rcache) {
BTL_ERROR(("error creating registration cache"));
return OPAL_ERROR;
}
rc = opal_free_list_init (&ugni_module->eager_frags_send,
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_EAGER_SEND,
sizeof (mca_btl_ugni_eager_frag_t), 8,
OBJ_CLASS(mca_btl_ugni_eager_frag_t),
ugni_module->super.btl_eager_limit, 64,
@ -435,13 +398,13 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_component.ugni_eager_inc,
ugni_module->super.btl_mpool, 0, ugni_module->rcache,
(opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
(void *) ugni_module);
(void *) (intptr_t) MCA_BTL_UGNI_LIST_EAGER_SEND);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating eager send fragment free list"));
return rc;
}
rc = opal_free_list_init (&ugni_module->eager_frags_recv,
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_EAGER_RECV,
sizeof (mca_btl_ugni_eager_frag_t), 8,
OBJ_CLASS(mca_btl_ugni_eager_frag_t),
ugni_module->super.btl_eager_limit, 64,
@ -450,7 +413,7 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_component.ugni_eager_inc,
ugni_module->super.btl_mpool, 0, ugni_module->rcache,
(opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
(void *) ugni_module);
(void *) (intptr_t) MCA_BTL_UGNI_LIST_EAGER_RECV);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating eager receive fragment free list"));
return rc;
@ -503,14 +466,22 @@ mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_
gni_return_t grc;
int fuzz = 20;
grc = GNI_GetJobResInfo (ugni_module->device->dev_id, opal_common_ugni_module.ptag,
grc = GNI_GetJobResInfo (0, mca_btl_ugni_component.ptag,
GNI_JOB_RES_MDD, &res_des);
if (GNI_RC_SUCCESS == grc) {
ugni_module->reg_max = (res_des.limit - fuzz) / nlocal_procs;
if (nlocal_procs) {
ugni_module->reg_max = (res_des.limit - fuzz) / nlocal_procs;
} else {
ugni_module->reg_max = 0;
}
}
#else
/* no way to determine the maximum registration count */
ugni_module->reg_max = 1200 / nlocal_procs;
if (nlocal_procs) {
ugni_module->reg_max = 1200 / nlocal_procs;
} else {
ugni_module->reg_max = 0;
}
#endif
} else if (-1 == mca_btl_ugni_component.max_mem_reg) {
ugni_module->reg_max = INT_MAX;
@ -557,7 +528,7 @@ static int mca_btl_ugni_smsg_setup (int nprocs)
grc = GNI_SmsgBufferSizeNeeded (&tmp_smsg_attrib, &mbox_size);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
BTL_ERROR(("error in GNI_SmsgBufferSizeNeeded"));
return opal_common_rc_ugni_to_opal (grc);
return mca_btl_rc_ugni_to_opal (grc);
}
mca_btl_ugni_component.smsg_mbox_size = OPAL_ALIGN(mbox_size, 64, unsigned int);

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -95,34 +95,23 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
return OPAL_ERR_NOT_SUPPORTED;
}
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
mca_btl_ugni_alloc_post_descriptor (endpoint, NULL, cbfunc, cbcontext, cbdata, &post_desc);
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, NULL, cbfunc, cbcontext, cbdata);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, 0, dummy, remote_address,
init_gni_post_desc (post_desc, order, GNI_POST_AMO, 0, dummy, remote_address,
remote_handle->gni_handle, size, 0);
post_desc->desc.base.amo_cmd = gni_op;
post_desc->desc.amo_cmd = gni_op;
post_desc->desc.base.first_operand = operand;
post_desc->desc.first_operand = operand;
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
if (GNI_RC_ILLEGAL_OP == rc) {
return OPAL_ERR_NOT_SUPPORTED;
}
return OPAL_ERR_OUT_OF_RESOURCE;
rc = mca_btl_ugni_endpoint_post_fma (endpoint, post_desc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_ugni_return_post_descriptor (post_desc);
}
return OPAL_SUCCESS;
return rc;
}
int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
@ -147,35 +136,24 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en
return OPAL_ERR_NOT_SUPPORTED;
}
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
init_gni_post_desc (post_desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
remote_address, remote_handle->gni_handle, size, 0);
post_desc->desc.base.amo_cmd = gni_op;
post_desc->desc.amo_cmd = gni_op;
post_desc->desc.base.first_operand = operand;
post_desc->desc.first_operand = operand;
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
if (GNI_RC_ILLEGAL_OP == rc) {
return OPAL_ERR_NOT_SUPPORTED;
}
return OPAL_ERR_OUT_OF_RESOURCE;
rc = mca_btl_ugni_endpoint_post_fma (endpoint, post_desc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_ugni_return_post_descriptor (post_desc);
}
return OPAL_SUCCESS;
return rc;
}
int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
@ -190,31 +168,23 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_
gni_op = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? GNI_FMA_ATOMIC2_CSWAP_S : GNI_FMA_ATOMIC_CSWAP;
size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8;
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
init_gni_post_desc (post_desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
remote_address, remote_handle->gni_handle, size, 0);
post_desc->desc.base.amo_cmd = gni_op;
post_desc->desc.amo_cmd = gni_op;
post_desc->desc.base.first_operand = compare;
post_desc->desc.base.second_operand = value;
post_desc->desc.first_operand = compare;
post_desc->desc.second_operand = value;
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
return OPAL_ERR_OUT_OF_RESOURCE;
rc = mca_btl_ugni_endpoint_post_fma (endpoint, post_desc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_ugni_return_post_descriptor (post_desc);
}
return OPAL_SUCCESS;
return rc;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -73,7 +73,7 @@ mca_base_var_enum_value_flag_t cdm_flags[] = {
{.flag = GNI_CDM_MODE_CACHED_AMO_ENABLED, .string = "cached-amo-enabled", .conflicting_flag = 0},
{.flag = GNI_CDM_MODE_CQ_NIC_LOCAL_PLACEMENT, .string = "cq-nic-placement", .conflicting_flag = 0},
{.flag = GNI_CDM_MODE_FMA_SMALL_WINDOW, .string = "fma-small-window", .conflicting_flag = 0},
{}
{.string = NULL}
};
static inline int mca_btl_ugni_get_stat (const mca_base_pvar_t *pvar, void *value, void *obj)
@ -81,8 +81,10 @@ static inline int mca_btl_ugni_get_stat (const mca_base_pvar_t *pvar, void *valu
gni_statistic_t statistic = (gni_statistic_t) (intptr_t) pvar->ctx;
gni_return_t rc = GNI_RC_SUCCESS;
rc = GNI_GetNicStat (mca_btl_ugni_component.modules[0].device.dev_handle, statistic,
value);
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
rc = GNI_GetNicStat (mca_btl_ugni_component.modules[0].devices[i].dev_handle, statistic,
((unsigned int *) value) + i);
}
return mca_btl_rc_ugni_to_opal (rc);
}
@ -221,6 +223,31 @@ static int btl_ugni_component_register(void)
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment);
/* communication domain flags */
rc = mca_base_var_enum_create_flag ("btl_ugni_cdm_flags", cdm_flags, (mca_base_var_enum_flag_t **) &new_enum);
if (OPAL_SUCCESS != rc) {
return rc;
}
mca_btl_ugni_component.cdm_flags = GNI_CDM_MODE_FORK_PARTCOPY | GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL |
GNI_CDM_MODE_MDD_SHARED | GNI_CDM_MODE_FMA_SHARED | GNI_CDM_MODE_FMA_SMALL_WINDOW;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"cdm_flags", "Flags to set when creating a communication domain "
" (default: fork-fullcopy,cached-amo-enabled,err-no-kill,fast-datagram-poll,"
"fma-shared,fma-small-window)",
MCA_BASE_VAR_TYPE_UNSIGNED_INT, new_enum, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.cdm_flags);
OBJ_RELEASE(new_enum);
mca_btl_ugni_component.virtual_device_count = 0;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"virtual_device_count", "Number of virtual devices to create. Higher numbers may "
"result in better performance when using threads. (default: auto, max: 8)",
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.virtual_device_count);
/* determine if there are get alignment restrictions */
GNI_GetDeviceType (&device_type);
@ -242,12 +269,9 @@ static int btl_ugni_component_register(void)
}
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"smsg_page_size", "Page size to use for SMSG "
"mailbox allocation (default: detect)",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.smsg_page_size);
"smsg_page_size", "Page size to use for SMSG mailbox allocation (default: detect)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_page_size);
mca_btl_ugni_component.progress_thread_requested = 0;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
@ -300,9 +324,10 @@ static int btl_ugni_component_register(void)
return rc;
}
mca_btl_ugni_component.rcache_type = MCA_BTL_UGNI_RCACHE_UDREG;
/* NTH: there are known *serious* performance issues with udreg. if they are ever resolved it is the preferred rcache */
mca_btl_ugni_component.rcache_type = MCA_BTL_UGNI_RCACHE_GRDMA;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"rcache", "registration cache to use", MCA_BASE_VAR_TYPE_INT, new_enum,
"rcache", "registration cache to use (default: grdma)", MCA_BASE_VAR_TYPE_INT, new_enum,
0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.rcache_type);
OBJ_RELEASE(new_enum);
@ -390,7 +415,7 @@ btl_ugni_component_open(void)
static int
btl_ugni_component_close(void)
{
opal_common_ugni_fini ();
mca_btl_ugni_fini ();
if (mca_btl_ugni_component.modules) {
free (mca_btl_ugni_component.modules);
@ -407,7 +432,6 @@ mca_btl_ugni_component_init (int *num_btl_modules,
{
struct mca_btl_base_module_t **base_modules;
mca_btl_ugni_module_t *ugni_modules;
unsigned int i;
int rc;
if (16384 < mca_btl_ugni_component.ugni_smsg_limit) {
@ -425,19 +449,18 @@ mca_btl_ugni_component_init (int *num_btl_modules,
}
/* Initialize ugni library and create communication domain */
rc = opal_common_ugni_init();
rc = mca_btl_ugni_init();
if (OPAL_SUCCESS != rc) {
return NULL;
}
/* Create and initialize one module per uGNI device */
mca_btl_ugni_component.ugni_num_btls = opal_common_ugni_module.device_count;
/* For now only create a single BTL module */
mca_btl_ugni_component.ugni_num_btls = 1;
BTL_VERBOSE(("btl/ugni initializing"));
ugni_modules = mca_btl_ugni_component.modules = (mca_btl_ugni_module_t *)
calloc (mca_btl_ugni_component.ugni_num_btls,
sizeof (mca_btl_ugni_module_t));
calloc (mca_btl_ugni_component.ugni_num_btls, sizeof (mca_btl_ugni_module_t));
if (OPAL_UNLIKELY(NULL == mca_btl_ugni_component.modules)) {
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
@ -460,20 +483,15 @@ mca_btl_ugni_component_init (int *num_btl_modules,
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = mca_btl_ugni_module.super.btl_eager_limit;
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
mca_btl_ugni_module_t *ugni_module = ugni_modules + i;
rc = mca_btl_ugni_module_init (ugni_module,
opal_common_ugni_module.devices + i);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("Failed to initialize uGNI module @ %s:%d", __FILE__,
__LINE__));
return NULL;
}
base_modules[i] = (mca_btl_base_module_t *) ugni_module;
rc = mca_btl_ugni_module_init (ugni_modules);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("Failed to initialize uGNI module @ %s:%d", __FILE__,
__LINE__));
return NULL;
}
*base_modules = (mca_btl_base_module_t *) ugni_modules;
*num_btl_modules = mca_btl_ugni_component.ugni_num_btls;
BTL_VERBOSE(("btl/ugni done initializing %d module(s)", *num_btl_modules));
@ -482,80 +500,47 @@ mca_btl_ugni_component_init (int *num_btl_modules,
}
static inline int
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device)
{
uint64_t datagram_id, data, proc_id;
uint32_t remote_addr, remote_id;
mca_btl_base_endpoint_t *ep;
gni_post_state_t post_state;
gni_ep_handle_t handle;
gni_return_t grc;
int count = 0, rc;
/* check for datagram completion */
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); /* TODO: may not need lock for this function */
grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id);
if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return 0;
rc = mca_btl_ugni_get_datagram (ugni_module, device, &handle, &ep);
if (1 != rc) {
return rc;
}
data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK);
BTL_VERBOSE(("datgram_id: %" PRIx64 ", mask: %" PRIx64, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK)));
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) {
ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&ugni_module->endpoints, data);
handle = ep->smsg_ep_handle;
} else {
handle = ugni_module->wildcard_ep;
}
/* wait for the incoming datagram to complete (in case it isn't) */
grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state,
&remote_addr, &remote_id);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != grc) {
BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc));
return opal_common_rc_ugni_to_opal (grc);
}
BTL_VERBOSE(("remote datagram completion on handle %p", handle));
/* if this is a wildcard endpoint lookup the remote peer by the proc id we received */
if (handle == ugni_module->wildcard_ep) {
proc_id = mca_btl_ugni_proc_name_to_id (ugni_module->wc_remote_attr.proc_name);
struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name);
BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64,
proc_id));
BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc: %s",
OPAL_NAME_PRINT(ugni_module->wc_remote_attr.proc_name)));
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
/* check if the endpoint is known */
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) {
struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name);
BTL_VERBOSE(("Got connection request from an unknown peer {jobid = 0x%x, vid = 0x%x}",
ugni_module->wc_remote_attr.proc_name.jobid, ugni_module->wc_remote_attr.proc_name.vpid));
ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc);
if (OPAL_UNLIKELY(NULL == ep)) {
return rc;
}
ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc);
if (OPAL_UNLIKELY(NULL == ep)) {
/* there is no way to recover from this error so just abort() */
BTL_ERROR(("could not find/allocate a btl endpoint for peer %s",
OPAL_NAME_PRINT(ugni_module->wc_remote_attr.proc_name)));
abort ();
return OPAL_ERR_NOT_FOUND;
}
} else {
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
}
/* should not have gotten a NULL endpoint */
assert (NULL != ep);
BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, "
"data = 0x%" PRIx64 ", ep = %p, remote id: %d", datagram_id, post_state,
data, (void *) ep, remote_id));
BTL_VERBOSE(("got a datagram completion: ep = %p. wc = %d", (void *) ep, handle == ugni_module->wildcard_ep));
/* NTH: TODO -- error handling */
opal_mutex_lock (&ep->lock);
if (handle != ugni_module->wildcard_ep) {
/* directed post complete */
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
ep->dg_posted = false;
}
@ -579,106 +564,106 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
static inline void btl_ugni_dump_post_desc (mca_btl_ugni_post_descriptor_t *desc)
{
fprintf (stderr, "desc->desc.base.post_id = %" PRIx64 "\n", desc->desc.base.post_id);
fprintf (stderr, "desc->desc.base.status = %" PRIx64 "\n", desc->desc.base.status);
fprintf (stderr, "desc->desc.base.cq_mode_complete = %hu\n", desc->desc.base.cq_mode_complete);
fprintf (stderr, "desc->desc.base.type = %d\n", desc->desc.base.type);
fprintf (stderr, "desc->desc.base.cq_mode = %hu\n", desc->desc.base.cq_mode);
fprintf (stderr, "desc->desc.base.dlvr_mode = %hu\n", desc->desc.base.dlvr_mode);
fprintf (stderr, "desc->desc.base.local_addr = %" PRIx64 "\n", desc->desc.base.local_addr);
fprintf (stderr, "desc->desc.base.local_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.base.local_mem_hndl.qword1,
desc->desc.base.local_mem_hndl.qword2);
fprintf (stderr, "desc->desc.base.remote_addr = %" PRIx64 "\n", desc->desc.base.remote_addr);
fprintf (stderr, "desc->desc.base.remote_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.base.remote_mem_hndl.qword1,
desc->desc.base.remote_mem_hndl.qword2);
fprintf (stderr, "desc->desc.base.length = %" PRIu64 "\n", desc->desc.base.length);
fprintf (stderr, "desc->desc.base.rdma_mode = %hu\n", desc->desc.base.rdma_mode);
fprintf (stderr, "desc->desc.base.amo_cmd = %d\n", desc->desc.base.amo_cmd);
fprintf (stderr, "desc->desc.post_id = %" PRIx64 "\n", desc->desc.post_id);
fprintf (stderr, "desc->desc.status = %" PRIx64 "\n", desc->desc.status);
fprintf (stderr, "desc->desc.cq_mode_complete = %hu\n", desc->desc.cq_mode_complete);
fprintf (stderr, "desc->desc.type = %d\n", desc->desc.type);
fprintf (stderr, "desc->desc.cq_mode = %hu\n", desc->desc.cq_mode);
fprintf (stderr, "desc->desc.dlvr_mode = %hu\n", desc->desc.dlvr_mode);
fprintf (stderr, "desc->desc.local_addr = %" PRIx64 "\n", desc->desc.local_addr);
fprintf (stderr, "desc->desc.local_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.local_mem_hndl.qword1,
desc->desc.local_mem_hndl.qword2);
fprintf (stderr, "desc->desc.remote_addr = %" PRIx64 "\n", desc->desc.remote_addr);
fprintf (stderr, "desc->desc.remote_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.remote_mem_hndl.qword1,
desc->desc.remote_mem_hndl.qword2);
fprintf (stderr, "desc->desc.length = %" PRIu64 "\n", desc->desc.length);
fprintf (stderr, "desc->desc.rdma_mode = %hu\n", desc->desc.rdma_mode);
fprintf (stderr, "desc->desc.amo_cmd = %d\n", desc->desc.amo_cmd);
}
#endif
static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, int which_cq)
{
mca_btl_ugni_post_descriptor_t *post_desc = NULL;
gni_cq_entry_t event_data = 0;
gni_post_descriptor_t *desc;
uint32_t recoverable = 1;
gni_return_t grc;
gni_cq_handle_t the_cq;
the_cq = (which_cq == 0) ? ugni_module->rdma_local_cq : ugni_module->rdma_local_irq_cq;
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
grc = GNI_CqGetEvent (the_cq, &event_data);
if (GNI_RC_NOT_DONE == grc) {
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return 0;
}
if (OPAL_UNLIKELY((GNI_RC_SUCCESS != grc && !event_data) || GNI_CQ_OVERRUN(event_data))) {
/* TODO -- need to handle overrun -- how do we do this without an event?
will the event eventually come back? Ask Cray */
BTL_ERROR(("unhandled post error! ugni rc = %d %s", grc, gni_err_str[grc]));
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return opal_common_rc_ugni_to_opal (grc);
}
grc = GNI_GetCompleted (the_cq, event_data, &desc);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc && GNI_RC_TRANSACTION_ERROR != grc)) {
BTL_ERROR(("Error in GNI_GetComplete %s", gni_err_str[grc]));
return opal_common_rc_ugni_to_opal (grc);
}
post_desc = MCA_BTL_UGNI_DESC_TO_PDESC(desc);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc || !GNI_CQ_STATUS_OK(event_data))) {
(void) GNI_CqErrorRecoverable (event_data, &recoverable);
if (OPAL_UNLIKELY(++post_desc->desc.tries >= mca_btl_ugni_component.rdma_max_retries ||
!recoverable)) {
char char_buffer[1024];
GNI_CqErrorStr (event_data, char_buffer, 1024);
/* give up */
BTL_ERROR(("giving up on desciptor %p, recoverable %d: %s", (void *) post_desc,
recoverable, char_buffer));
#if OPAL_ENABLE_DEBUG
btl_ugni_dump_post_desc (post_desc);
#endif
mca_btl_ugni_post_desc_complete (ugni_module, post_desc, OPAL_ERROR);
return OPAL_ERROR;
}
mca_btl_ugni_repost (ugni_module, post_desc);
return 0;
}
mca_btl_ugni_post_desc_complete (ugni_module, post_desc, opal_common_rc_ugni_to_opal (grc));
return 1;
}
static inline int
mca_btl_ugni_post_pending (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_post_pending (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device)
{
int count = opal_list_get_size (&ugni_module->pending_descriptors);
int i;
int pending_post_count = opal_list_get_size (&device->pending_post);
mca_btl_ugni_post_descriptor_t *post_desc;
int rc;
for (i = 0 ; i < count ; ++i) {
OPAL_THREAD_LOCK(&ugni_module->pending_descriptors_lock);
mca_btl_ugni_post_descriptor_t *post_desc =
(mca_btl_ugni_post_descriptor_t *) opal_list_remove_first (&ugni_module->pending_descriptors);
OPAL_THREAD_UNLOCK(&ugni_module->pending_descriptors_lock);
/* check if there are any posts pending resources */
if (OPAL_LIKELY(0 == pending_post_count)) {
return 0;
}
if (OPAL_SUCCESS != mca_btl_ugni_repost (ugni_module, post_desc)) {
BTL_VERBOSE(("progressing %d pending FMA/RDMA operations", pending_post_count));
for (int i = 0 ; i < pending_post_count ; ++i) {
mca_btl_ugni_device_lock (device);
post_desc = (mca_btl_ugni_post_descriptor_t *) opal_list_remove_first (&device->pending_post);
mca_btl_ugni_device_unlock (device);
if (NULL == post_desc) {
break;
}
rc = mca_btl_ugni_repost (ugni_module, post_desc);
if (OPAL_SUCCESS != rc) {
mca_btl_ugni_device_lock (device);
opal_list_prepend (&device->pending_post, (opal_list_item_t *) post_desc);
mca_btl_ugni_device_unlock (device);
break;
}
}
return i;
return 1;
}
static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device,
mca_btl_ugni_cq_t *cq)
{
mca_btl_ugni_post_descriptor_t *post_desc[MCA_BTL_UGNI_COMPLETIONS_PER_LOOP];
gni_cq_entry_t event_data[MCA_BTL_UGNI_COMPLETIONS_PER_LOOP];
int rc;
rc = mca_btl_ugni_cq_get_completed_desc (device, cq, event_data, post_desc, MCA_BTL_UGNI_COMPLETIONS_PER_LOOP);
if (0 >= rc) {
return rc;
}
BTL_VERBOSE(("got %d completed rdma descriptors", rc));
for (int i = 0 ; i < rc ; ++i) {
BTL_VERBOSE(("post descriptor %p complete. GNI_CQ_STATUS_OK(): %d", post_desc[i],
GNI_CQ_STATUS_OK(event_data[i])));
if (OPAL_UNLIKELY(!GNI_CQ_STATUS_OK(event_data[i]))) {
uint32_t recoverable = 1;
(void) GNI_CqErrorRecoverable (event_data[i], &recoverable);
if (OPAL_UNLIKELY(++post_desc[i]->tries >= mca_btl_ugni_component.rdma_max_retries ||
!recoverable)) {
char char_buffer[1024];
GNI_CqErrorStr (event_data[i], char_buffer, 1024);
/* give up */
BTL_ERROR(("giving up on desciptor %p, recoverable %d: %s", (void *) post_desc[i],
recoverable, char_buffer));
#if OPAL_ENABLE_DEBUG
btl_ugni_dump_post_desc (post_desc[i]);
#endif
mca_btl_ugni_post_desc_complete (ugni_module, post_desc[i], OPAL_ERROR);
return OPAL_ERROR;
}
mca_btl_ugni_repost (ugni_module, post_desc[i]);
return 0;
}
mca_btl_ugni_post_desc_complete (ugni_module, post_desc[i], OPAL_SUCCESS);
}
/* should be resources to progress the pending post list */
(void) mca_btl_ugni_post_pending (ugni_module, device);
return rc;
}
static inline int
@ -692,9 +677,14 @@ mca_btl_ugni_progress_wait_list (mca_btl_ugni_module_t *ugni_module)
return 0;
}
/* check the count before taking the lock to avoid unnecessary locking */
count = opal_list_get_size(&ugni_module->ep_wait_list);
if (0 == count) {
return 0;
}
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
count = opal_list_get_size(&ugni_module->ep_wait_list);
do {
endpoint = (mca_btl_base_endpoint_t *) opal_list_remove_first (&ugni_module->ep_wait_list);
if (endpoint != NULL) {
@ -714,35 +704,34 @@ mca_btl_ugni_progress_wait_list (mca_btl_ugni_module_t *ugni_module)
static int mca_btl_ugni_component_progress (void)
{
mca_btl_ugni_module_t *ugni_module;
static int64_t call_count = 0;
int64_t cur_call_count = OPAL_THREAD_ADD64(&call_count, 1);
unsigned int i;
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules;
static volatile int32_t call_count = 0;
int32_t current_call;
int count = 0;
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
ugni_module = mca_btl_ugni_component.modules + i;
current_call = OPAL_THREAD_ADD32(&call_count, 1);
if ((cur_call_count & 0x7) == 0) {
count += mca_btl_ugni_progress_datagram (ugni_module);
}
count += mca_btl_ugni_progress_remote_smsg (ugni_module);
if (ugni_module->connected_peer_count) {
if ((current_call & 0x7) == 0) {
count += mca_btl_ugni_progress_datagram (ugni_module, ugni_module->devices);
}
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
mca_btl_ugni_device_t *device = ugni_module->devices + i;
if (device->smsg_connections) {
count += mca_btl_ugni_progress_local_smsg (ugni_module, device);
mca_btl_ugni_progress_wait_list (ugni_module);
count += mca_btl_ugni_progress_local_smsg (ugni_module);
count += mca_btl_ugni_progress_remote_smsg (ugni_module);
}
if (ugni_module->active_rdma_count) {
count += mca_btl_ugni_progress_rdma (ugni_module, 0);
if (device->dev_rdma_local_cq.active_operations) {
count += mca_btl_ugni_progress_rdma (ugni_module, device, &device->dev_rdma_local_cq);
}
if (mca_btl_ugni_component.progress_thread_enabled) {
count += mca_btl_ugni_progress_rdma (ugni_module, 1);
if (mca_btl_ugni_component.progress_thread_enabled && device->dev_rdma_local_irq_cq.active_operations) {
count += mca_btl_ugni_progress_rdma (ugni_module, device, &device->dev_rdma_local_irq_cq);
}
/* post pending after progressing rdma */
mca_btl_ugni_post_pending (ugni_module);
}
return count;

430
opal/mca/btl/ugni/btl_ugni_device.h Обычный файл
Просмотреть файл

@ -0,0 +1,430 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file This file contains wrappers for uGNI functionality. These wrappers are thread-safe
* and intended to provide a way to measure various different ways to handle mutual exclusion
* into the uGNI library (which is not thread safe). These functions are all defined to be
* inline to limit the cost to non-threaded users.
*/
#if !defined(BTL_UGNI_DEVICE_H)
#define BTL_UGNI_DEVICE_H
#include "btl_ugni_endpoint.h"
#include "btl_ugni_frag.h"
/* helper functions */
typedef struct mca_btl_ugni_smsg_send_wtag_arg_t {
gni_ep_handle_t ep_handle;
void *hdr;
size_t hdr_len;
void *payload;
size_t payload_len;
uint32_t msg_id;
int tag;
} mca_btl_ugni_smsg_send_wtag_arg_t;
static inline int mca_btl_ugni_smsg_send_wtag_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_smsg_send_wtag_arg_t *args = (mca_btl_ugni_smsg_send_wtag_arg_t *) arg;
gni_return_t grc;
grc = GNI_SmsgSendWTag (args->ep_handle, args->hdr, args->hdr_len, args->payload,
args->payload_len, args->msg_id, args->tag);
device->dev_smsg_local_cq.active_operations += (GNI_RC_SUCCESS == grc);
return grc;
}
typedef struct mca_btl_ugni_smsg_get_next_wtag_arg_t {
gni_ep_handle_t ep_handle;
uintptr_t *data_ptr;
uint8_t *tag;
} mca_btl_ugni_smsg_get_next_wtag_arg_t;
static inline intptr_t mca_btl_ugni_smsg_get_next_wtag_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_smsg_get_next_wtag_arg_t *args = (mca_btl_ugni_smsg_get_next_wtag_arg_t *) arg;
return GNI_SmsgGetNextWTag(args->ep_handle, (void **) args->data_ptr, args->tag);
}
static inline intptr_t mca_btl_ugni_smsg_release_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_endpoint_handle_t *ep_handle = (mca_btl_ugni_endpoint_handle_t *) arg;
return GNI_SmsgRelease (ep_handle->gni_handle);
}
static inline intptr_t mca_btl_ugni_cq_clear_device (mca_btl_ugni_device_t *device, void *arg)
{
gni_cq_handle_t cq = (gni_cq_handle_t) (intptr_t) arg;
gni_cq_entry_t event_data;
int rc;
do {
rc = GNI_CqGetEvent (cq, &event_data);
} while (GNI_RC_NOT_DONE != rc);
return OPAL_SUCCESS;
}
typedef struct mca_btl_ugni_cq_get_event_args_t {
mca_btl_ugni_cq_t *cq;
gni_cq_entry_t *event_data;
} mca_btl_ugni_cq_get_event_args_t;
static inline intptr_t mca_btl_ugni_cq_get_event_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_cq_get_event_args_t *args = (mca_btl_ugni_cq_get_event_args_t *) arg;
gni_return_t rc;
rc = GNI_CqGetEvent (args->cq->gni_handle, args->event_data);
args->cq->active_operations -= GNI_RC_NOT_DONE != rc;
return rc;
}
typedef struct mca_btl_ugni_gni_cq_get_event_args_t {
gni_cq_handle_t cq;
gni_cq_entry_t *event_data;
} mca_btl_ugni_gni_cq_get_event_args_t;
static inline intptr_t mca_btl_ugni_gni_cq_get_event_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_gni_cq_get_event_args_t *args = (mca_btl_ugni_gni_cq_get_event_args_t *) arg;
return GNI_CqGetEvent (args->cq, args->event_data);
}
static inline intptr_t mca_btl_ugni_post_fma_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_post_descriptor_t *desc = (mca_btl_ugni_post_descriptor_t *) arg;
bool ep_handle_allocated = false;
int rc;
if (NULL == desc->ep_handle) {
desc->ep_handle = mca_btl_ugni_ep_get_rdma (desc->endpoint, device);
if (OPAL_UNLIKELY(NULL == desc->ep_handle)) {
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
}
ep_handle_allocated = true;
}
BTL_VERBOSE(("Posting FMA descriptor %p with op_type %d, amo %d, ep_handle %p, remote_addr 0x%lx, "
"length %lu", desc, desc->desc.type, desc->desc.amo_cmd, desc->ep_handle,
desc->desc.remote_addr, desc->desc.length));
rc = GNI_PostFma (desc->ep_handle->gni_handle, &desc->desc);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
if (ep_handle_allocated) {
/* only return the endpoint handle if we allocated it. if we didn't allocate the
* handle this call was likely made from repost() */
mca_btl_ugni_ep_return_rdma (desc->ep_handle);
desc->ep_handle = NULL;
}
} else {
++device->dev_rdma_local_cq.active_operations;
}
return mca_btl_rc_ugni_to_opal (rc);
}
static inline intptr_t mca_btl_ugni_post_rdma_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_post_descriptor_t *desc = (mca_btl_ugni_post_descriptor_t *) arg;
bool ep_handle_allocated = false;
int rc;
if (NULL == desc->ep_handle) {
desc->ep_handle = mca_btl_ugni_ep_get_rdma (desc->endpoint, device);
if (OPAL_UNLIKELY(NULL == desc->ep_handle)) {
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
}
ep_handle_allocated = true;
}
/* pick the appropriate CQ */
desc->cq = mca_btl_ugni_component.progress_thread_enabled ? &device->dev_rdma_local_irq_cq :
&device->dev_rdma_local_cq;
desc->desc.src_cq_hndl = desc->cq->gni_handle;
BTL_VERBOSE(("Posting RDMA descriptor %p with op_type %d, ep_handle %p, remote_addr 0x%lx, "
"length %lu", desc, desc->desc.type, desc->ep_handle, desc->desc.remote_addr,
desc->desc.length));
rc = GNI_PostRdma (desc->ep_handle->gni_handle, &desc->desc);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
if (ep_handle_allocated) {
/* only return the endpoint handle if we allocated it. if we didn't allocate the
* handle this call was likely made from repost() */
mca_btl_ugni_ep_return_rdma (desc->ep_handle);
desc->ep_handle = NULL;
}
} else {
++desc->cq->active_operations;
}
return mca_btl_rc_ugni_to_opal (rc);
}
static inline intptr_t mca_btl_ugni_post_cqwrite_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_post_descriptor_t *desc = (mca_btl_ugni_post_descriptor_t *) arg;
int rc;
desc->ep_handle = mca_btl_ugni_ep_get_rdma (desc->endpoint, device);
if (OPAL_UNLIKELY(NULL == desc->ep_handle)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
desc->desc.src_cq_hndl = device->dev_rdma_local_cq.gni_handle;
rc = GNI_PostCqWrite (desc->ep_handle->gni_handle, &desc->desc);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
mca_btl_ugni_ep_return_rdma (desc->ep_handle);
desc->ep_handle = NULL;
}
return mca_btl_rc_ugni_to_opal (rc);
}
typedef struct mca_btl_ugni_cq_get_completed_desc_arg_t {
mca_btl_ugni_cq_t *cq;
gni_cq_entry_t *event_data;
mca_btl_ugni_post_descriptor_t **post_desc;
int count;
} mca_btl_ugni_cq_get_completed_desc_arg_t;
static inline intptr_t mca_btl_ugni_cq_get_completed_desc_device (mca_btl_ugni_device_t *device, void *arg0)
{
mca_btl_ugni_cq_get_completed_desc_arg_t *args = (mca_btl_ugni_cq_get_completed_desc_arg_t *) arg0;
mca_btl_ugni_cq_t *cq = args->cq;
gni_post_descriptor_t *desc;
int rc;
for (int i = 0 ; i < args->count ; ++i) {
rc = GNI_CqGetEvent (cq->gni_handle, args->event_data + i);
if (GNI_RC_NOT_DONE == rc) {
return i;
}
if (OPAL_UNLIKELY((GNI_RC_SUCCESS != rc && !args->event_data[i]) || GNI_CQ_OVERRUN(args->event_data[i]))) {
/* TODO -- need to handle overrun -- how do we do this without an event?
will the event eventually come back? Ask Cray */
BTL_ERROR(("unhandled post error! ugni rc = %d %s", rc, gni_err_str[rc]));
return mca_btl_rc_ugni_to_opal (rc);
}
rc = GNI_GetCompleted (cq->gni_handle, args->event_data[i], &desc);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc && GNI_RC_TRANSACTION_ERROR != rc)) {
BTL_ERROR(("Error in GNI_GetComplete %s", gni_err_str[rc]));
return mca_btl_rc_ugni_to_opal (rc);
}
args->post_desc[i] = MCA_BTL_UGNI_DESC_TO_PDESC(desc);
/* return the endpoint handle while we have the lock. see the explanation in
* the documentation for mca_btl_ugni_ep_return_rdma() */
if (OPAL_LIKELY(GNI_CQ_STATUS_OK(args->event_data[i]))) {
/* the operation completed successfully. return the endpoint handle now. otherwise
* we may still need the endpoint handle to start the repost(). */
mca_btl_ugni_ep_return_rdma (args->post_desc[i]->ep_handle);
args->post_desc[i]->ep_handle = NULL;
}
--cq->active_operations;
}
return args->count;
}
typedef struct mca_btl_ugni_get_datagram_args_t {
mca_btl_ugni_module_t *ugni_module;
gni_ep_handle_t *handle;
mca_btl_base_endpoint_t **ep;
} mca_btl_ugni_get_datagram_args_t;
static inline intptr_t mca_btl_ugni_get_datagram_device (mca_btl_ugni_device_t *device, void *arg0)
{
mca_btl_ugni_get_datagram_args_t *args = (mca_btl_ugni_get_datagram_args_t *) arg0;
uint32_t remote_addr, remote_id;
uint64_t datagram_id;
gni_post_state_t post_state;
gni_return_t grc;
uint64_t data;
grc = GNI_PostDataProbeById (device->dev_handle, &datagram_id);
if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
return 0;
}
data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK);
BTL_VERBOSE(("rc: %d, datgram_id: %" PRIx64 ", mask: %" PRIx64, grc, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK)));
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) {
*(args->ep) = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&args->ugni_module->endpoints, data);
*(args->handle) = (*args->ep)->smsg_ep_handle->gni_handle;
} else {
*(args->handle) = args->ugni_module->wildcard_ep;
}
/* wait for the incoming datagram to complete (in case it isn't) */
grc = GNI_EpPostDataWaitById (*args->handle, datagram_id, -1, &post_state,
&remote_addr, &remote_id);
if (GNI_RC_SUCCESS != grc) {
BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc));
return mca_btl_rc_ugni_to_opal (grc);
}
BTL_VERBOSE(("handled datagram completion. post_state: %d, remote_addr: %u, remote_id: %u, directed?: %d",
post_state, remote_addr, remote_id, (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID));
return 1;
}
typedef struct mca_btl_ugni_reg_mem_args_t {
mca_btl_ugni_module_t *ugni_module;
void *base;
size_t size;
mca_btl_ugni_reg_t *ugni_reg;
gni_cq_handle_t cq;
int flags;
} mca_btl_ugni_reg_mem_args_t;
static intptr_t mca_btl_ugni_reg_mem_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_reg_mem_args_t *args = (mca_btl_ugni_reg_mem_args_t *) arg;
gni_return_t rc;
rc = GNI_MemRegister (device->dev_handle, (uint64_t) args->base, args->size, args->cq,
args->flags, -1, &args->ugni_reg->handle.gni_handle);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
return OPAL_SUCCESS;
}
typedef struct mca_btl_ugni_dereg_mem_arg_t {
mca_btl_ugni_module_t *ugni_module;
mca_btl_ugni_reg_t *ugni_reg;
} mca_btl_ugni_dereg_mem_arg_t;
static intptr_t mca_btl_ugni_dereg_mem_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_dereg_mem_arg_t *args = (mca_btl_ugni_dereg_mem_arg_t *) arg;
gni_return_t rc;
rc = GNI_MemDeregister (device->dev_handle, &args->ugni_reg->handle.gni_handle);
return mca_btl_rc_ugni_to_opal (rc);
}
/* multi-thread safe interface to uGNI */
static inline int mca_btl_ugni_endpoint_smsg_send_wtag (mca_btl_base_endpoint_t *endpoint, void *hdr, size_t hdr_len,
void *payload, size_t payload_len, uint32_t msg_id, int tag)
{
mca_btl_ugni_smsg_send_wtag_arg_t args = {.ep_handle = endpoint->smsg_ep_handle->gni_handle,
.hdr = hdr, .hdr_len = hdr_len, .payload = payload,
.payload_len = payload_len, .msg_id = msg_id,
.tag = tag};
mca_btl_ugni_device_t *device = endpoint->smsg_ep_handle->device;
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_send_wtag_device, &args);
}
static inline int mca_btl_ugni_smsg_get_next_wtag (mca_btl_ugni_endpoint_handle_t *ep_handle, uintptr_t *data_ptr, uint8_t *tag)
{
mca_btl_ugni_device_t *device = ep_handle->device;
mca_btl_ugni_smsg_get_next_wtag_arg_t args = {.ep_handle = ep_handle->gni_handle, .data_ptr = data_ptr, .tag = tag};
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_get_next_wtag_device, &args);
}
static inline int mca_btl_ugni_smsg_release (mca_btl_ugni_endpoint_handle_t *ep_handle)
{
mca_btl_ugni_device_t *device = ep_handle->device;
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_release_device, ep_handle);
}
static inline void mca_btl_ugni_cq_clear (mca_btl_ugni_device_t *device, gni_cq_handle_t cq)
{
(void) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_clear_device, (void *) (intptr_t) cq);
}
static inline int mca_btl_ugni_cq_get_event (mca_btl_ugni_device_t *device, mca_btl_ugni_cq_t *cq, gni_cq_entry_t *event_data)
{
mca_btl_ugni_cq_get_event_args_t args = {.cq = cq, .event_data = event_data};
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_get_event_device, &args);
}
static inline int mca_btl_ugni_gni_cq_get_event (mca_btl_ugni_device_t *device, gni_cq_handle_t cq, gni_cq_entry_t *event_data)
{
mca_btl_ugni_gni_cq_get_event_args_t args = {.cq = cq, .event_data = event_data};
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_gni_cq_get_event_device, &args);
}
static inline int mca_btl_ugni_endpoint_post_fma (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
mca_btl_ugni_device_t *device = desc->ep_handle ? desc->ep_handle->device : mca_btl_ugni_ep_get_device (ugni_module);
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_fma_device, desc);
}
static inline int mca_btl_ugni_endpoint_post_rdma (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
mca_btl_ugni_device_t *device = desc->ep_handle ? desc->ep_handle->device : mca_btl_ugni_ep_get_device (ugni_module);
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_rdma_device, desc);
}
static inline int mca_btl_ugni_endpoint_post_cqwrite (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
mca_btl_ugni_device_t *device = ugni_module->devices;
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_cqwrite_device, desc);
}
static inline int mca_btl_ugni_cq_get_completed_desc (mca_btl_ugni_device_t *device, mca_btl_ugni_cq_t *cq,
gni_cq_entry_t *event_data, mca_btl_ugni_post_descriptor_t **post_desc,
int count)
{
mca_btl_ugni_cq_get_completed_desc_arg_t args = {.cq = cq, .event_data = event_data, .post_desc = post_desc, .count = count};
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_get_completed_desc_device, &args);
}
static inline int mca_btl_ugni_get_datagram (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device, gni_ep_handle_t *gni_handle,
mca_btl_base_endpoint_t **ep)
{
mca_btl_ugni_get_datagram_args_t args = {.ugni_module = ugni_module, .ep = ep, .handle = gni_handle};
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_get_datagram_device, &args);
}
static inline int mca_btl_ugni_reg_mem (mca_btl_ugni_module_t *ugni_module, void *base, size_t size, mca_btl_ugni_reg_t *ugni_reg,
gni_cq_handle_t cq, int flags)
{
mca_btl_ugni_reg_mem_args_t args = {.ugni_module = ugni_module, .base = base, .size = size,
.ugni_reg = ugni_reg, .cq = cq, .flags = flags};
mca_btl_ugni_device_t *device = ugni_module->devices;
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_reg_mem_device, &args);
}
static inline int mca_btl_ugni_dereg_mem (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_reg_t *ugni_reg)
{
mca_btl_ugni_dereg_mem_arg_t args = {.ugni_module = ugni_module, .ugni_reg = ugni_reg};
mca_btl_ugni_device_t *device = ugni_module->devices;
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_dereg_mem_device, &args);
}
#endif /* BTL_UGNI_DEVICE_H */

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011-2013 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -12,6 +12,7 @@
#include "btl_ugni_endpoint.h"
#include "btl_ugni_smsg.h"
#include "opal/mca/pmix/pmix.h"
static void mca_btl_ugni_ep_construct (mca_btl_base_endpoint_t *ep)
{
@ -24,15 +25,94 @@ static void mca_btl_ugni_ep_destruct (mca_btl_base_endpoint_t *ep)
{
OBJ_DESTRUCT(&ep->frag_wait_list);
OBJ_DESTRUCT(&ep->lock);
free (ep->remote_attr);
}
OBJ_CLASS_INSTANCE(mca_btl_ugni_endpoint_t, opal_list_item_t,
mca_btl_ugni_ep_construct, mca_btl_ugni_ep_destruct);
static int mca_btl_ugni_endpoint_get_modex (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_modex_t *modex;
size_t msg_size;
int rc;
assert (NULL != ep && NULL != ep->peer_proc);
/* Receive the modex */
OPAL_MODEX_RECV(rc, &mca_btl_ugni_component.super.btl_version,
&ep->peer_proc->proc_name, (void **)&modex, &msg_size);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error receiving modex"));
return rc;
}
ep->ep_rem_addr = modex->addr;
ep->ep_rem_id = modex->id;
BTL_VERBOSE(("received modex for ep %p. addr: %d, id: %d", ep, ep->ep_rem_addr, ep->ep_rem_id));
free (modex);
return OPAL_SUCCESS;
}
int mca_btl_ugni_init_ep (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_endpoint_t **ep,
mca_btl_ugni_module_t *btl, opal_proc_t *peer_proc)
{
mca_btl_ugni_endpoint_t *endpoint;
int rc;
endpoint = OBJ_NEW(mca_btl_ugni_endpoint_t);
assert (endpoint != NULL);
endpoint->smsg_progressing = 0;
endpoint->state = MCA_BTL_UGNI_EP_STATE_INIT;
endpoint->peer_proc = peer_proc;
/* get the modex info for this endpoint and setup a ugni endpoint. this call may lead
* to re-entry through opal_progress(). */
rc = mca_btl_ugni_endpoint_get_modex (endpoint);
if (OPAL_SUCCESS != rc) {
assert (0);
return rc;
}
/* add this endpoint to the pointer array */
endpoint->index = opal_pointer_array_add (&ugni_module->endpoints, endpoint);
*ep = endpoint;
return OPAL_SUCCESS;
}
void mca_btl_ugni_release_ep (mca_btl_ugni_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
int rc;
opal_mutex_lock (&ep->lock);
rc = mca_btl_ugni_ep_disconnect (ep, false);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_VERBOSE(("btl/ugni error disconnecting endpoint"));
}
/* TODO -- Clear space at the end of the endpoint array */
opal_pointer_array_set_item (&ugni_module->endpoints, ep->index, NULL);
opal_mutex_unlock (&ep->lock);
OBJ_RELEASE(ep);
}
static inline int mca_btl_ugni_ep_smsg_get_mbox (mca_btl_base_endpoint_t *ep) {
mca_btl_ugni_module_t *ugni_module = ep->btl;
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
opal_free_list_item_t *mbox;
assert (NULL == ep->mailbox);
mbox = opal_free_list_get (&ugni_module->smsg_mboxes);
if (OPAL_UNLIKELY(NULL == mbox)) {
return OPAL_ERR_OUT_OF_RESOURCE;
@ -47,61 +127,103 @@ static inline int mca_btl_ugni_ep_smsg_get_mbox (mca_btl_base_endpoint_t *ep) {
return OPAL_SUCCESS;
}
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect) {
gni_return_t rc;
static int mca_btl_ugni_ep_send_disconnect (mca_btl_base_endpoint_t *ep)
{
int rc;
do {
rc = mca_btl_ugni_endpoint_smsg_send_wtag (ep, NULL, 0, NULL, 0, -1, MCA_BTL_UGNI_TAG_DISCONNECT);
if (OPAL_LIKELY(GNI_RC_NOT_DONE != rc)) {
break;
}
/* most likely got here because we are out of credits. check the remote CQ to get credit return */
(void) mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_ep_btl (ep));
} while (1);
return mca_btl_rc_ugni_to_opal (rc);
}
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
mca_btl_ugni_device_t *device;
int rc;
if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
/* nothing to do */
return OPAL_SUCCESS;
}
if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
rc = GNI_SmsgSendWTag (ep->smsg_ep_handle, NULL, 0, NULL, 0, -1,
MCA_BTL_UGNI_TAG_DISCONNECT);
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("btl/ugni could not send close message"));
device = ep->smsg_ep_handle->device;
while (device->dev_smsg_local_cq.active_operations) {
/* ensure all sends are complete before removing and procs */
rc = mca_btl_ugni_progress_local_smsg (ugni_module, device);
if (OPAL_SUCCESS != rc) {
break;
}
/* we might want to wait for local completion here (do we even care), yes we do */
/* TODO: FIX FIX FIX */
}
/* TODO: FIX GROSS */
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
(void) opal_common_ugni_ep_destroy (&ep->smsg_ep_handle);
(void) opal_common_ugni_ep_destroy (&ep->rdma_ep_handle);
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
rc = mca_btl_ugni_ep_send_disconnect (ep);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_VERBOSE(("could not send disconnect message to peer"));
}
/* wait for the disconnect messagse to go */
do {
/* ensure all sends are complete before removing and procs */
rc = mca_btl_ugni_progress_local_smsg (ugni_module, device);
if (OPAL_SUCCESS != rc) {
break;
}
} while (device->dev_smsg_local_cq.active_operations);
(void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, -1);
}
mca_btl_ugni_device_lock (device);
/* NTH: this call may not need the device lock. seems to work without it but
* the lock is here to be safe. */
(void) mca_btl_ugni_ep_handle_destroy (ep->smsg_ep_handle);
ep->smsg_ep_handle = NULL;
mca_btl_ugni_device_unlock (device);
if (ep->mailbox) {
opal_free_list_return (&ep->btl->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox));
opal_free_list_return (&ugni_module->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox));
ep->mailbox = NULL;
}
ep->state = MCA_BTL_UGNI_EP_STATE_INIT;
(void) opal_atomic_add_64 (&ep->btl->connected_peer_count, -11);
return OPAL_SUCCESS;
}
static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
mca_btl_ugni_device_t *device = ugni_module->devices;
int rc;
rc = mca_btl_ugni_ep_connect_rdma (ep);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
/* protect against re-entry from opal_progress */
if (OPAL_UNLIKELY(MCA_BTL_UGNI_EP_STATE_CONNECTING == ep->state)) {
return OPAL_ERR_RESOURCE_BUSY;
}
BTL_VERBOSE(("initiaiting connection to remote peer with address: %u id: %u proc: %p",
ep->common->ep_rem_addr, ep->common->ep_rem_id, (void *)ep->peer_proc));
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTING;
BTL_VERBOSE(("initiating connection to remote peer with address: %u id: %u proc: %p",
ep->ep_rem_addr, ep->ep_rem_id, (void *)ep->peer_proc));
/* bind endpoint to remote address */
/* we bind two endpoints to seperate out local smsg completion and local fma completion */
rc = opal_common_ugni_ep_create (ep->common, ep->btl->smsg_local_cq, &ep->smsg_ep_handle);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
mca_btl_ugni_device_lock (device);
ep->smsg_ep_handle = mca_btl_ugni_ep_handle_create (ep, device->dev_smsg_local_cq.gni_handle, device);
mca_btl_ugni_device_unlock (device);
if (OPAL_UNLIKELY(NULL == ep->smsg_ep_handle)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* build connection data */
@ -110,9 +232,10 @@ static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
return rc;
}
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTING;
memset (&ep->remote_attr, 0, sizeof (ep->remote_attr));
ep->remote_attr = calloc (1, sizeof (*ep->remote_attr));
if (OPAL_UNLIKELY(NULL == ep->remote_attr)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
BTL_VERBOSE(("btl/ugni connection to remote peer initiated"));
@ -120,15 +243,16 @@ static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
}
static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
gni_return_t grc;
int rc;
BTL_VERBOSE(("finishing connection. remote attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
"mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
"msg_maxsize = %d", ep->remote_attr.smsg_attr.msg_type, ep->remote_attr.smsg_attr.msg_buffer,
ep->remote_attr.smsg_attr.buff_size, ep->remote_attr.smsg_attr.mem_hndl.qword1,
ep->remote_attr.smsg_attr.mem_hndl.qword2, ep->remote_attr.smsg_attr.mbox_offset,
ep->remote_attr.smsg_attr.mbox_maxcredit, ep->remote_attr.smsg_attr.msg_maxsize));
"msg_maxsize = %d", ep->remote_attr->smsg_attr.msg_type, ep->remote_attr->smsg_attr.msg_buffer,
ep->remote_attr->smsg_attr.buff_size, ep->remote_attr->smsg_attr.mem_hndl.qword1,
ep->remote_attr->smsg_attr.mem_hndl.qword2, ep->remote_attr->smsg_attr.mbox_offset,
ep->remote_attr->smsg_attr.mbox_maxcredit, ep->remote_attr->smsg_attr.msg_maxsize));
BTL_VERBOSE(("finishing connection. local attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
"mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
@ -137,54 +261,78 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
ep->mailbox->attr.smsg_attr.mem_hndl.qword2, ep->mailbox->attr.smsg_attr.mbox_offset,
ep->mailbox->attr.smsg_attr.mbox_maxcredit, ep->mailbox->attr.smsg_attr.msg_maxsize));
grc = GNI_SmsgInit (ep->smsg_ep_handle, &ep->mailbox->attr.smsg_attr, &ep->remote_attr.smsg_attr);
grc = GNI_SmsgInit (ep->smsg_ep_handle->gni_handle, &ep->mailbox->attr.smsg_attr,
&ep->remote_attr->smsg_attr);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
BTL_ERROR(("error initializing SMSG protocol. rc = %d", grc));
return opal_common_rc_ugni_to_opal (grc);
return mca_btl_rc_ugni_to_opal (grc);
}
/* set the local event data to the local index and the remote event data to my
* index on the remote peer. This makes lookup of endpoints on completion take
* a single lookup in the endpoints array. we will not be able to change the
* remote peer's index in the endpoint's array after this point. */
GNI_EpSetEventData (ep->rdma_ep_handle, ep->index, ep->remote_attr.index);
GNI_EpSetEventData (ep->smsg_ep_handle, ep->index, ep->remote_attr.index);
GNI_EpSetEventData (ep->smsg_ep_handle->gni_handle, ep->index, ep->remote_attr->index);
ep->rmt_irq_mem_hndl = ep->remote_attr.rmt_irq_mem_hndl;
ep->rmt_irq_mem_hndl = ep->remote_attr->rmt_irq_mem_hndl;
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED;
(void) opal_atomic_add_64 (&ep->btl->connected_peer_count, 1);
(void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, 1);
/* send all pending messages */
BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list)));
rc = mca_btl_ugni_progress_send_wait_list (ep);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
OPAL_THREAD_LOCK(&ep->btl->ep_wait_list_lock);
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
if (false == ep->wait_listed) {
opal_list_append (&ep->btl->ep_wait_list, &ep->super);
opal_list_append (&ugni_module->ep_wait_list, &ep->super);
ep->wait_listed = true;
}
OPAL_THREAD_UNLOCK(&ep->btl->ep_wait_list_lock);
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
}
free (ep->remote_attr);
ep->remote_attr = NULL;
return OPAL_SUCCESS;
}
static inline int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep) {
static int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
mca_btl_ugni_device_t *device = ep->smsg_ep_handle->device;
gni_return_t rc;
BTL_VERBOSE(("posting directed datagram to remote id: %d for endpoint %p", ep->common->ep_rem_id, (void *)ep));
ep->mailbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
BTL_VERBOSE(("posting directed datagram to remote id: %d for endpoint %p", ep->ep_rem_id, (void *)ep));
/* the irq cq is associated with only the first device */
ep->mailbox->attr.rmt_irq_mem_hndl = ugni_module->devices->smsg_irq_mhndl;
rc = GNI_EpPostDataWId (ep->smsg_ep_handle, &ep->mailbox->attr, sizeof (ep->mailbox->attr),
&ep->remote_attr, sizeof (ep->remote_attr),
rc = GNI_EpPostDataWId (ep->smsg_ep_handle->gni_handle, &ep->mailbox->attr, sizeof (ep->mailbox->attr),
ep->remote_attr, sizeof (*ep->remote_attr),
MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index);
return opal_common_rc_ugni_to_opal (rc);
return mca_btl_rc_ugni_to_opal (rc);
}
int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) {
int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module)
{
gni_return_t rc;
BTL_VERBOSE(("posting wildcard datagram"));
memset (&ugni_module->wc_local_attr, 0, sizeof (ugni_module->wc_local_attr));
memset (&ugni_module->wc_remote_attr, 0, sizeof (ugni_module->wc_remote_attr));
rc = GNI_EpPostDataWId (ugni_module->wildcard_ep, &ugni_module->wc_local_attr,
sizeof (ugni_module->wc_local_attr), &ugni_module->wc_remote_attr,
sizeof (ugni_module->wc_remote_attr), MCA_BTL_UGNI_CONNECT_WILDCARD_ID);
return mca_btl_rc_ugni_to_opal (rc);
}
int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep)
{
int rc;
BTL_VERBOSE(("progressing connection for endpoint %p with state %d", (void *)ep, ep->state));
@ -193,14 +341,17 @@ int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) {
return OPAL_SUCCESS;
}
if (MCA_BTL_UGNI_EP_STATE_RDMA >= ep->state) {
if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
rc = mca_btl_ugni_ep_connect_start (ep);
if (OPAL_SUCCESS != rc) {
return rc;
}
}
if (GNI_SMSG_TYPE_INVALID == ep->remote_attr.smsg_attr.msg_type) {
BTL_VERBOSE(("ep->remote_attr->smsg_attr = {.msg_type = %d, .msg_buffer = 0x%lx}", ep->remote_attr->smsg_attr.msg_type,
ep->remote_attr->smsg_attr.msg_buffer));
if (GNI_SMSG_TYPE_INVALID == ep->remote_attr->smsg_attr.msg_type) {
/* use datagram to exchange connection information with the remote peer */
if (!ep->dg_posted) {
rc = mca_btl_ugni_directed_ep_post (ep);
@ -217,3 +368,77 @@ int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) {
return mca_btl_ugni_ep_connect_finish (ep);
}
int mca_btl_ugni_endpoint_handle_init_rdma (opal_free_list_item_t *item, void *ctx)
{
mca_btl_ugni_endpoint_handle_t *handle = (mca_btl_ugni_endpoint_handle_t *) item;
mca_btl_ugni_device_t *device = (mca_btl_ugni_device_t *) ctx;
gni_return_t grc;
grc = GNI_EpCreate (device->dev_handle, device->dev_rdma_local_cq.gni_handle, &handle->gni_handle);
handle->device = device;
return mca_btl_rc_ugni_to_opal (grc);
}
static void mca_btl_ugni_endpoint_handle_construct (mca_btl_ugni_endpoint_handle_t *handle)
{
handle->gni_handle = 0;
}
static void mca_btl_ugni_endpoint_handle_destruct (mca_btl_ugni_endpoint_handle_t *handle)
{
if (handle->gni_handle) {
GNI_EpDestroy (handle->gni_handle);
handle->gni_handle = 0;
}
}
OBJ_CLASS_INSTANCE(mca_btl_ugni_endpoint_handle_t, opal_object_t,
mca_btl_ugni_endpoint_handle_construct,
mca_btl_ugni_endpoint_handle_destruct);
mca_btl_ugni_endpoint_handle_t *mca_btl_ugni_ep_handle_create (mca_btl_ugni_endpoint_t *ep, gni_cq_handle_t cq,
mca_btl_ugni_device_t *device)
{
mca_btl_ugni_endpoint_handle_t *ep_handle;
gni_return_t grc;
ep_handle = OBJ_NEW(mca_btl_ugni_endpoint_handle_t);
if (OPAL_UNLIKELY(NULL == ep_handle)) {
return NULL;
}
ep_handle->device = device;
/* create a uGNI endpoint handle and bind it to the remote peer */
grc = GNI_EpCreate (device->dev_handle, cq, &ep_handle->gni_handle);
if (OPAL_LIKELY(GNI_RC_SUCCESS == grc)) {
grc = GNI_EpBind (ep_handle->gni_handle, ep->ep_rem_addr, ep->ep_rem_id);
}
if (GNI_RC_SUCCESS != grc) {
OBJ_RELEASE(ep_handle);
ep_handle = NULL;
}
return ep_handle;
}
int mca_btl_ugni_ep_handle_destroy (mca_btl_ugni_endpoint_handle_t *ep_handle)
{
int rc;
if (NULL == ep_handle || 0 == ep_handle->gni_handle) {
return OPAL_SUCCESS;
}
/* TODO: need to fix, may be outstanding tx's, etc. */
rc = GNI_EpUnbind (ep_handle->gni_handle);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
/* should warn */
}
OBJ_RELEASE(ep_handle);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -17,15 +17,22 @@
enum mca_btl_ugni_endpoint_state_t {
MCA_BTL_UGNI_EP_STATE_INIT = 0,
MCA_BTL_UGNI_EP_STATE_START,
MCA_BTL_UGNI_EP_STATE_RDMA,
MCA_BTL_UGNI_EP_STATE_CONNECTING,
MCA_BTL_UGNI_EP_STATE_CONNECTED
MCA_BTL_UGNI_EP_STATE_CONNECTED,
};
typedef enum mca_btl_ugni_endpoint_state_t mca_btl_ugni_endpoint_state_t;
struct mca_btl_ugni_smsg_mbox_t;
struct mca_btl_ugni_endpoint_handle_t {
opal_free_list_item_t super;
mca_btl_ugni_device_t *device;
gni_ep_handle_t gni_handle;
};
typedef struct mca_btl_ugni_endpoint_handle_t mca_btl_ugni_endpoint_handle_t;
OBJ_CLASS_DECLARATION(mca_btl_ugni_endpoint_handle_t);
typedef struct mca_btl_base_endpoint_t {
opal_list_item_t super;
@ -37,24 +44,34 @@ typedef struct mca_btl_base_endpoint_t {
opal_recursive_mutex_t lock;
mca_btl_ugni_endpoint_state_t state;
opal_common_ugni_endpoint_t *common;
/** Remote NIC address */
uint32_t ep_rem_addr;
mca_btl_ugni_module_t *btl;
/** Remote CDM identifier (base) */
uint32_t ep_rem_id;
gni_ep_handle_t smsg_ep_handle;
gni_ep_handle_t rdma_ep_handle;
/** endpoint to use for SMSG messages */
mca_btl_ugni_endpoint_handle_t *smsg_ep_handle;
mca_btl_ugni_endpoint_attr_t remote_attr; /* TODO: UGH, remove this */
/** temporary space to store the remote SMSG attributes */
mca_btl_ugni_endpoint_attr_t *remote_attr;
/** SMSG mailbox assigned to this endpoint */
struct mca_btl_ugni_smsg_mbox_t *mailbox;
gni_mem_handle_t rmt_irq_mem_hndl;
/** Remote IRQ handle (for async completion) */
gni_mem_handle_t rmt_irq_mem_hndl;
/** frags waiting for SMSG credits */
opal_list_t frag_wait_list;
/** endpoint is currently wait-listed for SMSG progress */
bool wait_listed;
/** protect against race on connection */
bool dg_posted;
/** protect against re-entry to SMSG */
int32_t smsg_progressing;
int index;
@ -65,49 +82,10 @@ OBJ_CLASS_DECLARATION(mca_btl_ugni_endpoint_t);
int mca_btl_ugni_ep_connect_progress (mca_btl_ugni_endpoint_t *ep);
int mca_btl_ugni_ep_disconnect (mca_btl_ugni_endpoint_t *ep, bool send_disconnect);
static inline int mca_btl_ugni_init_ep (mca_btl_ugni_module_t *ugni_module,
mca_btl_ugni_endpoint_t **ep,
mca_btl_ugni_module_t *btl,
opal_proc_t *peer_proc) {
mca_btl_ugni_endpoint_t *endpoint;
endpoint = OBJ_NEW(mca_btl_ugni_endpoint_t);
assert (endpoint != NULL);
endpoint->smsg_progressing = 0;
endpoint->state = MCA_BTL_UGNI_EP_STATE_INIT;
endpoint->btl = btl;
endpoint->peer_proc = peer_proc;
endpoint->index = opal_pointer_array_add (&ugni_module->endpoints, endpoint);
*ep = endpoint;
return OPAL_SUCCESS;
}
static inline void mca_btl_ugni_release_ep (mca_btl_ugni_endpoint_t *ep) {
int rc;
if (ep->common) {
opal_mutex_lock (&ep->lock);
rc = mca_btl_ugni_ep_disconnect (ep, false);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_VERBOSE(("btl/ugni error disconnecting endpoint"));
}
/* TODO -- Clear space at the end of the endpoint array */
opal_pointer_array_set_item (&ep->btl->endpoints, ep->index, NULL);
opal_mutex_unlock (&ep->lock);
opal_common_ugni_endpoint_return (ep->common);
}
OBJ_RELEASE(ep);
}
int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module);
void mca_btl_ugni_release_ep (mca_btl_ugni_endpoint_t *ep);
int mca_btl_ugni_init_ep (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_endpoint_t **ep,
mca_btl_ugni_module_t *btl, opal_proc_t *peer_proc);
static inline int mca_btl_ugni_check_endpoint_state (mca_btl_ugni_endpoint_t *ep) {
int rc;
@ -120,8 +98,6 @@ static inline int mca_btl_ugni_check_endpoint_state (mca_btl_ugni_endpoint_t *ep
switch (ep->state) {
case MCA_BTL_UGNI_EP_STATE_INIT:
case MCA_BTL_UGNI_EP_STATE_RDMA:
case MCA_BTL_UGNI_EP_STATE_START:
rc = mca_btl_ugni_ep_connect_progress (ep);
if (OPAL_SUCCESS != rc) {
break;
@ -138,63 +114,91 @@ static inline int mca_btl_ugni_check_endpoint_state (mca_btl_ugni_endpoint_t *ep
return rc;
}
static inline int mca_btl_ugni_ep_connect_rdma (mca_btl_base_endpoint_t *ep) {
int rc;
if (ep->state >= MCA_BTL_UGNI_EP_STATE_RDMA) {
return OPAL_SUCCESS;
}
/* protect against re-entry from opal_progress */
if (OPAL_UNLIKELY(MCA_BTL_UGNI_EP_STATE_START == ep->state)) {
return OPAL_ERR_RESOURCE_BUSY;
}
ep->state = MCA_BTL_UGNI_EP_STATE_START;
/* get the modex info for this endpoint and setup a ugni endpoint. this call may lead
* to re-entry through opal_progress(). */
rc = opal_common_ugni_endpoint_for_proc (ep->btl->device, ep->peer_proc, &ep->common);
if (OPAL_SUCCESS != rc) {
assert (0);
return rc;
}
/* bind endpoint to remote address */
rc = opal_common_ugni_ep_create (ep->common, ep->btl->rdma_local_cq, &ep->rdma_ep_handle);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
ep->state = MCA_BTL_UGNI_EP_STATE_RDMA;
return OPAL_SUCCESS;
/**
* Accessor function for endpoint btl
*
* @param[in] ep endpoint to query
*
* This helper function exists to make it easy to switch between using a single
* and multiple ugni modules. Currently there is only one so we just use the
* pointer in the component structure. This saves 4-8 bytes in the endpoint
* structure.
*/
static inline mca_btl_ugni_module_t *mca_btl_ugni_ep_btl (mca_btl_ugni_endpoint_t *ep)
{
/* there is only one ugni module at this time. if that changes add a btl pointer back
* to the endpoint structure. */
return mca_btl_ugni_component.modules;
}
static inline int mca_btl_ugni_check_endpoint_state_rdma (mca_btl_base_endpoint_t *ep) {
int rc;
if (OPAL_LIKELY(MCA_BTL_UGNI_EP_STATE_INIT < ep->state)) {
return OPAL_SUCCESS;
/**
* Allocate and bind a uGNI endpoint handle to the remote peer.
*
* @param[in] ep BTL endpoint
* @param[in] cq completion queue
* @param[out] ep_handle uGNI endpoint handle
*/
mca_btl_ugni_endpoint_handle_t *mca_btl_ugni_ep_handle_create (mca_btl_ugni_endpoint_t *ep, gni_cq_handle_t cq,
mca_btl_ugni_device_t *device);
/**
* Unbind and free the uGNI endpoint handle.
*
* @param[in] ep_handle uGNI endpoint handle to unbind and release
*/
int mca_btl_ugni_ep_handle_destroy (mca_btl_ugni_endpoint_handle_t *ep_handle);
/**
* Free list initialization function for endpoint handles (DO NOT CALL outside free list)
*
* @param[in] item Free list item to initialize
* @param[in] ctx Free list context
*
* @returns OPAL_SUCCESS on success
* @returns OPAL error code on error
*/
int mca_btl_ugni_endpoint_handle_init_rdma (opal_free_list_item_t *item, void *ctx);
/**
* @brief get an endpoint handle from a device's free list
*
* @param[in] ep btl endpoint
* @param[in] device btl device to use
*
* This function MUST be called with the device lock held. This was done over using
* the atomic free list to avoid unnecessary atomics in the critical path.
*/
static inline mca_btl_ugni_endpoint_handle_t *
mca_btl_ugni_ep_get_rdma (mca_btl_ugni_endpoint_t *ep, mca_btl_ugni_device_t *device)
{
mca_btl_ugni_endpoint_handle_t *ep_handle;
gni_return_t grc;
ep_handle = (mca_btl_ugni_endpoint_handle_t *) opal_free_list_get_st (&device->endpoints);
if (OPAL_UNLIKELY(NULL == ep_handle)) {
return NULL;
}
grc = GNI_EpBind (ep_handle->gni_handle, ep->ep_rem_addr, ep->ep_rem_id | device->dev_index);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
opal_free_list_return_st (&device->endpoints, &ep_handle->super);
ep_handle = NULL;
}
opal_mutex_lock (&ep->lock);
rc = mca_btl_ugni_ep_connect_rdma (ep);
opal_mutex_unlock (&ep->lock);
return rc;
return ep_handle;
}
static inline int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module) {
gni_return_t rc;
BTL_VERBOSE(("posting wildcard datagram"));
memset (&ugni_module->wc_local_attr, 0, sizeof (ugni_module->wc_local_attr));
memset (&ugni_module->wc_remote_attr, 0, sizeof (ugni_module->wc_remote_attr));
rc = GNI_EpPostDataWId (ugni_module->wildcard_ep, &ugni_module->wc_local_attr,
sizeof (ugni_module->wc_local_attr), &ugni_module->wc_remote_attr,
sizeof (ugni_module->wc_remote_attr), MCA_BTL_UGNI_CONNECT_WILDCARD_ID);
return opal_common_rc_ugni_to_opal (rc);
/**
* @brief return an endpoint handle to a device's free list
*
* @param[in] ep_handle endpoint handle to return
*
* This function MUST be called with the device lock held. This was done over using
* the atomic free list to avoid unnecessary atomics in the critical path. If
*/
static inline void mca_btl_ugni_ep_return_rdma (mca_btl_ugni_endpoint_handle_t *ep_handle)
{
(void) GNI_EpUnbind (ep_handle->gni_handle);
opal_free_list_return_st (&ep_handle->device->endpoints, &ep_handle->super);
}
#endif /* MCA_BTL_UGNI_ENDPOINT_H */

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -38,11 +38,25 @@ OBJ_CLASS_INSTANCE(mca_btl_ugni_rdma_frag_t, mca_btl_base_descriptor_t,
OBJ_CLASS_INSTANCE(mca_btl_ugni_eager_frag_t, mca_btl_base_descriptor_t,
mca_btl_ugni_eager_frag_constructor, NULL);
OBJ_CLASS_INSTANCE(mca_btl_ugni_post_descriptor_t, opal_free_list_item_t,
NULL, NULL);
int mca_btl_ugni_frag_init (mca_btl_ugni_base_frag_t *frag, mca_btl_ugni_module_t *ugni_module)
static void mca_btl_ugni_post_descriptor_constructor (mca_btl_ugni_post_descriptor_t *desc)
{
desc->cq = NULL;
desc->ep_handle = NULL;
}
OBJ_CLASS_INSTANCE(mca_btl_ugni_post_descriptor_t, opal_free_list_item_t,
mca_btl_ugni_post_descriptor_constructor, NULL);
int mca_btl_ugni_frag_init (mca_btl_ugni_base_frag_t *frag, void *id)
{
/* NTH: the id is a combination of the module id and the free list id. for now there
* is only ever one module so the module id is ignored. if this changes the code
* here and btl_ugni_add_procs.c (opal_free_list_init calls) needs to be updated */
intptr_t free_list_id = (intptr_t) id & 0xff;
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules;
frag->msg_id = opal_pointer_array_add (&ugni_module->pending_smsg_frags_bb, (void *) frag);
frag->my_list = ugni_module->frags_lists + free_list_id;
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2013 The University of Tennessee and The University
@ -72,7 +72,7 @@ typedef struct mca_btl_ugni_base_frag_t {
uint16_t flags;
mca_btl_ugni_frag_hdr_t hdr;
mca_btl_base_segment_t segments[2];
opal_common_ugni_post_desc_t post_desc;
gni_post_descriptor_t post_desc;
mca_btl_base_endpoint_t *endpoint;
mca_btl_ugni_reg_t *registration;
opal_free_list_t *my_list;
@ -88,12 +88,15 @@ typedef struct mca_btl_ugni_base_frag_t mca_btl_ugni_eager_frag_t;
typedef struct mca_btl_ugni_post_descriptor_t {
opal_free_list_item_t super;
opal_common_ugni_post_desc_t desc;
gni_post_descriptor_t desc;
mca_btl_ugni_endpoint_handle_t *ep_handle;
mca_btl_base_endpoint_t *endpoint;
mca_btl_base_registration_handle_t *local_handle;
mca_btl_base_rdma_completion_fn_t cbfunc;
mca_btl_ugni_cq_t *cq;
void *cbdata;
void *ctx;
int tries;
} mca_btl_ugni_post_descriptor_t;
OBJ_CLASS_DECLARATION(mca_btl_ugni_post_descriptor_t);
@ -101,26 +104,38 @@ OBJ_CLASS_DECLARATION(mca_btl_ugni_post_descriptor_t);
#define MCA_BTL_UGNI_DESC_TO_PDESC(desc) \
((mca_btl_ugni_post_descriptor_t *)((uintptr_t) (desc) - offsetof (mca_btl_ugni_post_descriptor_t, desc)))
static inline void mca_btl_ugni_alloc_post_descriptor (mca_btl_base_endpoint_t *endpoint, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata,
mca_btl_ugni_post_descriptor_t **desc)
static inline mca_btl_ugni_post_descriptor_t *
mca_btl_ugni_alloc_post_descriptor (mca_btl_base_endpoint_t *endpoint, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
*desc = (mca_btl_ugni_post_descriptor_t *) opal_free_list_get (&endpoint->btl->post_descriptors);
if (NULL != *desc) {
(*desc)->cbfunc = cbfunc;
(*desc)->ctx = cbcontext;
(*desc)->cbdata = cbdata;
(*desc)->local_handle = local_handle;
(*desc)->endpoint = endpoint;
(void) OPAL_THREAD_ADD64(&endpoint->btl->active_rdma_count, 1);
/* mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint); */
mca_btl_ugni_post_descriptor_t *desc;
desc = OBJ_NEW(mca_btl_ugni_post_descriptor_t);
/* (mca_btl_ugni_post_descriptor_t *) opal_free_list_get (&ugni_module->post_descriptors); */
if (OPAL_UNLIKELY(NULL != desc)) {
desc->cbfunc = cbfunc;
desc->ctx = cbcontext;
desc->cbdata = cbdata;
desc->local_handle = local_handle;
desc->endpoint = endpoint;
}
return desc;
}
static inline void mca_btl_ugni_return_post_descriptor (mca_btl_ugni_module_t *module,
mca_btl_ugni_post_descriptor_t *desc)
static inline void mca_btl_ugni_return_post_descriptor (mca_btl_ugni_post_descriptor_t *desc)
{
(void) OPAL_THREAD_ADD64(&module->active_rdma_count, -1);
opal_free_list_return (&module->post_descriptors, &desc->super);
/* mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (desc->endpoint); */
if (NULL != desc->ep_handle) {
mca_btl_ugni_ep_return_rdma (desc->ep_handle);
/* desc->ep_handle = NULL; */
}
/* desc->cq = NULL; */
/* opal_free_list_return (&ugni_module->post_descriptors, &desc->super); */
free (desc);
}
static inline void mca_btl_ugni_post_desc_complete (mca_btl_ugni_module_t *module, mca_btl_ugni_post_descriptor_t *desc, int rc)
@ -129,40 +144,38 @@ static inline void mca_btl_ugni_post_desc_complete (mca_btl_ugni_module_t *modul
if (NULL != desc->cbfunc) {
/* call the user's callback function */
desc->cbfunc (&module->super, desc->endpoint, (void *)(intptr_t) desc->desc.base.local_addr,
desc->cbfunc (&module->super, desc->endpoint, (void *)(intptr_t) desc->desc.local_addr,
desc->local_handle, desc->ctx, desc->cbdata, rc);
}
/* the descriptor is no longer needed */
mca_btl_ugni_return_post_descriptor (module, desc);
mca_btl_ugni_return_post_descriptor (desc);
}
OBJ_CLASS_DECLARATION(mca_btl_ugni_smsg_frag_t);
OBJ_CLASS_DECLARATION(mca_btl_ugni_rdma_frag_t);
OBJ_CLASS_DECLARATION(mca_btl_ugni_eager_frag_t);
int mca_btl_ugni_frag_init (mca_btl_ugni_base_frag_t *frag, mca_btl_ugni_module_t *ugni_module);
int mca_btl_ugni_frag_init (mca_btl_ugni_base_frag_t *frag, void *id);
static inline int mca_btl_ugni_frag_alloc (mca_btl_base_endpoint_t *ep,
opal_free_list_t *list,
mca_btl_ugni_base_frag_t **frag)
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc (mca_btl_base_endpoint_t *ep,
opal_free_list_t *list)
{
*frag = (mca_btl_ugni_base_frag_t *) opal_free_list_get (list);
if (OPAL_LIKELY(NULL != *frag)) {
(*frag)->my_list = list;
(*frag)->endpoint = ep;
(*frag)->ref_cnt = 1;
return OPAL_SUCCESS;
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) opal_free_list_get (list);
if (OPAL_LIKELY(NULL != frag)) {
frag->endpoint = ep;
frag->ref_cnt = 1;
}
return OPAL_ERR_OUT_OF_RESOURCE;
return frag;
}
static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (frag->endpoint);
if (frag->registration) {
frag->endpoint->btl->rcache->rcache_deregister (frag->endpoint->btl->rcache,
(mca_rcache_base_registration_t *) frag->registration);
ugni_module->rcache->rcache_deregister (ugni_module->rcache,
(mca_rcache_base_registration_t *) frag->registration);
frag->registration = NULL;
}
@ -174,6 +187,7 @@ static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag)
}
static inline bool mca_btl_ugni_frag_del_ref (mca_btl_ugni_base_frag_t *frag, int rc) {
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (frag->endpoint);
int32_t ref_cnt;
opal_atomic_mb ();
@ -186,7 +200,7 @@ static inline bool mca_btl_ugni_frag_del_ref (mca_btl_ugni_base_frag_t *frag, in
/* call callback if specified */
if (frag->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
frag->base.des_cbfunc(&frag->endpoint->btl->super, frag->endpoint, &frag->base, rc);
frag->base.des_cbfunc(&ugni_module->super, frag->endpoint, &frag->base, rc);
}
if (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) {
@ -208,15 +222,38 @@ static inline bool mca_btl_ugni_frag_check_complete (mca_btl_ugni_base_frag_t *f
return !!(MCA_BTL_UGNI_FRAG_COMPLETE & frag->flags);
}
#define MCA_BTL_UGNI_FRAG_ALLOC_SMSG(ep, frag) \
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->smsg_frags, &(frag))
#define MCA_BTL_UGNI_FRAG_ALLOC_RDMA(ep, frag) \
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->rdma_frags, &(frag))
#define MCA_BTL_UGNI_FRAG_ALLOC_RDMA_INT(ep, frag) \
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->rdma_int_frags, &(frag))
#define MCA_BTL_UGNI_FRAG_ALLOC_EAGER_SEND(ep, frag) \
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->eager_frags_send, &(frag))
#define MCA_BTL_UGNI_FRAG_ALLOC_EAGER_RECV(ep, frag) \
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->eager_frags_recv, &(frag))
void mca_btl_ugni_wait_list_append (mca_btl_ugni_module_t *ugni_module, mca_btl_base_endpoint_t *endpoint,
mca_btl_ugni_base_frag_t *frag);
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_smsg (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_SMSG);
}
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_rdma (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_RDMA);
}
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_rdma_int (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_RDMA_INT);
}
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_eager_send (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_EAGER_SEND);
}
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_eager_recv (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_EAGER_RECV);
}
#endif /* MCA_BTL_UGNI_FRAG_H */

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -37,11 +37,8 @@ int mca_btl_ugni_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t
return OPAL_ERR_NOT_AVAILABLE;
}
BTL_VERBOSE(("Using RDMA/FMA Get from local address %p to remote address %" PRIx64,
local_address, remote_address));
/* cause endpoint to bind if it isn't already (bind is sufficient for rdma) */
(void) mca_btl_ugni_check_endpoint_state_rdma (endpoint);
BTL_VERBOSE(("Using RDMA/FMA Get %lu bytes to local address %p to remote address %" PRIx64,
(unsigned long) size, local_address, remote_address));
return mca_btl_ugni_post (endpoint, true, size, local_address, remote_address, local_handle,
remote_handle, order, cbfunc, cbcontext, cbdata);
@ -110,13 +107,15 @@ static void mca_btl_ugni_callback_eager_get (struct mca_btl_base_module_t *btl,
}
reg = mca_btl_base_active_message_trigger + tag;
reg->cbfunc(&frag->endpoint->btl->super, tag, &(tmp.base), reg->cbdata);
reg->cbfunc(&ugni_module->super, tag, &(tmp.base), reg->cbdata);
/* fill in the response header */
frag->hdr.rdma.ctx = frag->hdr.eager.ctx;
frag->flags = MCA_BTL_UGNI_FRAG_RESPONSE;
frag->ref_cnt = 1;
frag->ref_cnt = 1;
/* once complete use this fragment for a pending eager get if any exist */
frag->base.des_cbfunc = mca_btl_ugni_callback_eager_get_progress_pending;
@ -125,16 +124,7 @@ static void mca_btl_ugni_callback_eager_get (struct mca_btl_base_module_t *btl,
NULL, 0, MCA_BTL_UGNI_TAG_RDMA_COMPLETE);
if (OPAL_UNLIKELY(0 > rc)) {
/* queue fragment */
OPAL_THREAD_LOCK(&endpoint->lock);
if (false == endpoint->wait_listed) {
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
endpoint->wait_listed = true;
}
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&endpoint->lock);
mca_btl_ugni_wait_list_append (ugni_module, endpoint, frag);
}
}
@ -142,7 +132,7 @@ int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *endpoint,
mca_btl_ugni_eager_ex_frag_hdr_t hdr,
mca_btl_ugni_base_frag_t *frag)
{
mca_btl_ugni_module_t *ugni_module = endpoint->btl;
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
size_t size;
int rc;
@ -151,10 +141,10 @@ int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *endpoint,
do {
if (NULL == frag) {
/* try to allocate a registered buffer */
rc = MCA_BTL_UGNI_FRAG_ALLOC_EAGER_RECV(endpoint, frag);
frag = mca_btl_ugni_frag_alloc_eager_recv (endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
/* no registered buffers available. try again later */
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA_INT(endpoint, frag);
frag = mca_btl_ugni_frag_alloc_rdma_int (endpoint);
/* not much can be done if a small fragment can not be allocated. abort! */
assert (NULL != frag);

306
opal/mca/btl/ugni/btl_ugni_init.c Обычный файл
Просмотреть файл

@ -0,0 +1,306 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_ugni.h"
#include "btl_ugni_endpoint.h"
#include "opal/class/opal_list.h"
#include "opal/dss/dss.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/util/bit_ops.h"
static inline int get_ptag(uint8_t *out_ptag)
{
/* TODO no need for tmp */
char *ptr;
uint8_t tmp_ptag;
if (NULL == (ptr = getenv("PMI_GNI_PTAG"))) {
/* TODO add err msg - better rc? */
return OPAL_ERR_NOT_FOUND;
}
errno = 0;
tmp_ptag = (uint8_t)strtoul (ptr, (char **)NULL, 10);
if (0 != errno) {
/* TODO add err msg - better rc? */
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
}
*out_ptag = tmp_ptag;
return OPAL_SUCCESS;
}
static inline int get_cookie (uint32_t *out_cookie)
{
/* TODO no need for tmp */
char *ptr;
uint32_t tmp_cookie;
if (NULL == (ptr = getenv("PMI_GNI_COOKIE"))) {
/* TODO add err msg - better rc? */
return OPAL_ERR_NOT_FOUND;
}
errno = 0;
tmp_cookie = (uint32_t) strtoul (ptr, NULL, 10);
if (0 != errno) {
/* TODO add err msg - better rc? */
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
}
*out_cookie = tmp_cookie;
return OPAL_SUCCESS;
}
static unsigned int mca_btl_ugni_get_nic_address(int device_id)
{
unsigned int address, cpu_id;
gni_return_t status;
int i, alps_dev_id = -1;
char *token,*p_ptr;
p_ptr = getenv("PMI_GNI_DEV_ID");
if (!p_ptr) {
status = GNI_CdmGetNicAddress(device_id, &address, &cpu_id);
if(status != GNI_RC_SUCCESS) {
opal_output (0, "FAILED:GNI_CdmGetNicAddress returned error %d", status);
return (unsigned int)-1;
}
return address;
}
while (NULL != (token = strtok(p_ptr, ":"))) {
alps_dev_id = atoi(token);
if (alps_dev_id == device_id) {
break;
}
p_ptr = NULL;
}
if (OPAL_UNLIKELY(-1 == alps_dev_id)) {
return (unsigned int)-1;
}
p_ptr = getenv("PMI_GNI_LOC_ADDR");
if (OPAL_UNLIKELY(NULL == p_ptr)) {
return (unsigned int)-1;
}
i = 0;
while (NULL != (token = strtok(p_ptr, ":"))) {
if (i == alps_dev_id) {
return strtoul (token, NULL, 10);
}
p_ptr = NULL;
++i;
}
return (unsigned int)-1;
}
int mca_btl_ugni_device_init (mca_btl_ugni_device_t *device, int virtual_device_id)
{
uint32_t dev_pe_addr;
int rc;
OBJ_CONSTRUCT(&device->endpoints, opal_free_list_t);
OBJ_CONSTRUCT(&device->pending_post, opal_list_t);
rc = opal_free_list_init (&device->endpoints, sizeof (mca_btl_ugni_endpoint_handle_t),
8, OBJ_CLASS(mca_btl_ugni_endpoint_handle_t), 0, 8, 0,
mca_btl_ugni_component.local_cq_size, 16,
NULL, 0, NULL, mca_btl_ugni_endpoint_handle_init_rdma,
(void *) device);
if (OPAL_SUCCESS != rc) {
OBJ_DESTRUCT(&device->endpoints);
return rc;
}
/* create a communication domain */
rc = GNI_CdmCreate (mca_btl_ugni_component.cdm_id_base | virtual_device_id, mca_btl_ugni_component.ptag,
mca_btl_ugni_component.cookie, mca_btl_ugni_component.cdm_flags, &device->dev_cd_handle);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
/* this REALLY is an error but under alps + mapn we may not get any credentials */
BTL_VERBOSE(("Error: Creating communication domain %d for virtual device %d", rc, virtual_device_id));
return mca_btl_rc_ugni_to_opal (rc);
}
device->dev_index = virtual_device_id;
/* Create a NIC Adress */
OPAL_OUTPUT((-1, "Got NIC Addr: 0x%08x, CPU ID: %d", mca_btl_ugni_component.dev_addr, 0));
/* Attach device to the communication domain */
rc = GNI_CdmAttach (device->dev_cd_handle, 0, &dev_pe_addr, &device->dev_handle);
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("Error: Attaching to communication domain. rc = %d, virtual device = %d", rc, virtual_device_id));
return mca_btl_rc_ugni_to_opal (rc);
}
device->lock = 0;
device->dev_rdma_local_cq.gni_handle = 0;
device->dev_rdma_local_cq.active_operations = 0;
device->dev_rdma_local_irq_cq.gni_handle = 0;
device->dev_rdma_local_irq_cq.active_operations = 0;
device->dev_smsg_local_cq.gni_handle = 0;
device->dev_smsg_local_cq.active_operations= 0;
return OPAL_SUCCESS;
}
int mca_btl_ugni_device_fini (mca_btl_ugni_device_t *dev)
{
int rc;
OBJ_DESTRUCT(&dev->endpoints);
OBJ_DESTRUCT(&dev->pending_post);
if (0 != dev->dev_rdma_local_cq.gni_handle) {
GNI_CqDestroy (dev->dev_rdma_local_cq.gni_handle);
dev->dev_rdma_local_cq.gni_handle = 0;
}
if (0 != dev->dev_rdma_local_irq_cq.gni_handle) {
GNI_CqDestroy (dev->dev_rdma_local_irq_cq.gni_handle);
dev->dev_rdma_local_irq_cq.gni_handle = 0;
}
if (0 != dev->dev_smsg_local_cq.gni_handle) {
GNI_CqDestroy (dev->dev_smsg_local_cq.gni_handle);
dev->dev_smsg_local_cq.gni_handle = 0;
}
rc = GNI_CdmDestroy (dev->dev_cd_handle);
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("error destroying cdm handle"));
}
return OPAL_SUCCESS;
}
/*
* Send local device information and other information
* required for setup
*/
static int mca_btl_ugni_send_modex (void)
{
struct mca_btl_ugni_modex_t modex;
uint32_t modex_size;
char *modex_msg;
int rc;
modex_size = sizeof (struct mca_btl_ugni_modex_t);
modex_msg = (char *) malloc (modex_size);
if (NULL == modex_msg) {
OPAL_OUTPUT((-1, "Error allocating memory for modex @ %s:%d",
__FILE__, __LINE__));
return OPAL_ERR_OUT_OF_RESOURCE;
}
modex.addr = mca_btl_ugni_component.dev_addr;
modex.id = mca_btl_ugni_component.cdm_id_base;
BTL_VERBOSE(("sending modex. addr: %d, id: %d", modex.addr, modex.id));
memcpy ((void *) modex_msg, (void *) &modex, modex_size);
/*
* need global for edge cases like MPI_Comm_spawn support with
* new ranks started on the same nodes as the spawnee ranks, etc.
*/
OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
&mca_btl_ugni_component.super.btl_version,
modex_msg, modex_size);
free (modex_msg);
return rc;
}
int mca_btl_ugni_fini (void)
{
return OPAL_SUCCESS;
}
int mca_btl_ugni_init (void)
{
int32_t pid_max = 32768;
int rc, bit;
FILE *fh;
if (0 == mca_btl_ugni_component.virtual_device_count) {
/* XXX -- TODO -- might want to improve this logic. One option would be to
* compare the number of local peers vs the number of cores or hyperthreads
* on the node. */
if (!opal_using_threads() || opal_process_info.num_local_peers >= 255) {
/* there is probably no benefit to using multiple device contexts when not
* using threads. */
mca_btl_ugni_component.virtual_device_count = 1;
} else if (opal_process_info.num_local_peers >= 127) {
mca_btl_ugni_component.virtual_device_count = 2;
} else if (opal_process_info.num_local_peers >= 63) {
mca_btl_ugni_component.virtual_device_count = 4;
} else if (opal_process_info.num_local_peers >= 31) {
mca_btl_ugni_component.virtual_device_count = 8;
} else {
mca_btl_ugni_component.virtual_device_count = 16;
}
} else if (MCA_BTL_UGNI_MAX_DEV_HANDLES < mca_btl_ugni_component.virtual_device_count) {
mca_btl_ugni_component.virtual_device_count = MCA_BTL_UGNI_MAX_DEV_HANDLES;
}
fh = fopen ("/proc/sys/kernel/pid_max", "r");
if (NULL != fh) {
fscanf (fh, "%d", &pid_max);
fclose (fh);
}
/* Use pid to generate the cdm_id. Although its not stated in the uGNI
* documentation, the cdm_id only needs to be unique within a node for a
* given ptag/cookie tuple */
bit = opal_hibit (pid_max, 31);
if (bit >= 31) {
mca_btl_ugni_component.virtual_device_count = 1;
mca_btl_ugni_component.cdm_id_base = getpid();
} else if (bit >= 30 && mca_btl_ugni_component.virtual_device_count > 2) {
mca_btl_ugni_component.virtual_device_count = 2;
mca_btl_ugni_component.cdm_id_base = getpid() << 1;
} else {
mca_btl_ugni_component.cdm_id_base = getpid() << 8;
}
/* Create a communication domain */
/* collect uGNI information */
rc = get_ptag(&mca_btl_ugni_component.ptag);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
rc = get_cookie(&mca_btl_ugni_component.cookie);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
/* get the device address of the NIC */
mca_btl_ugni_component.dev_addr = mca_btl_ugni_get_nic_address (0);
/* send ugni modex */
mca_btl_ugni_send_modex ();
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
@ -62,22 +62,18 @@ mca_btl_ugni_module_t mca_btl_ugni_module = {
};
int
mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
opal_common_ugni_device_t *dev)
mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module)
{
int rc;
BTL_VERBOSE(("binding module %p to device %p", (void *) ugni_module,
(void *) dev));
BTL_VERBOSE(("binding module %p to device 0", (void *) ugni_module));
/* copy module defaults (and function pointers) */
memmove (ugni_module, &mca_btl_ugni_module, sizeof (mca_btl_ugni_module));
ugni_module->initialized = false;
ugni_module->nlocal_procs = 0;
ugni_module->active_send_count = 0;
ugni_module->connected_peer_count = 0;
ugni_module->active_rdma_count = 0;
OBJ_CONSTRUCT(&ugni_module->failed_frags, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->failed_frags_lock, opal_mutex_t);
@ -85,11 +81,10 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
OBJ_CONSTRUCT(&ugni_module->eager_get_pending, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->eager_get_pending_lock,opal_mutex_t);
OBJ_CONSTRUCT(&ugni_module->eager_frags_send, opal_free_list_t);
OBJ_CONSTRUCT(&ugni_module->eager_frags_recv, opal_free_list_t);
OBJ_CONSTRUCT(&ugni_module->smsg_frags, opal_free_list_t);
OBJ_CONSTRUCT(&ugni_module->rdma_frags, opal_free_list_t);
OBJ_CONSTRUCT(&ugni_module->rdma_int_frags, opal_free_list_t);
for (int i = 0 ; i < MCA_BTL_UGNI_LIST_MAX ; ++i) {
OBJ_CONSTRUCT(ugni_module->frags_lists + i, opal_free_list_t);
}
OBJ_CONSTRUCT(&ugni_module->pending_smsg_frags_bb, opal_pointer_array_t);
OBJ_CONSTRUCT(&ugni_module->ep_wait_list_lock,opal_mutex_t);
OBJ_CONSTRUCT(&ugni_module->ep_wait_list, opal_list_t);
@ -97,22 +92,26 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t);
OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t);
OBJ_CONSTRUCT(&ugni_module->smsg_mboxes, opal_free_list_t);
OBJ_CONSTRUCT(&ugni_module->pending_descriptors, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->eager_get_pending, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->post_descriptors, opal_free_list_t);
ugni_module->device = dev;
dev->btl_ctx = (void *) ugni_module;
/* set up virtual device handles */
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
rc = mca_btl_ugni_device_init (ugni_module->devices + i, i);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_VERBOSE(("error initializing uGNI device handle"));
return rc;
}
}
/* create wildcard endpoint to listen for connections.
* there is no need to bind this endpoint. */
OPAL_THREAD_LOCK(&dev->dev_lock);
rc = GNI_EpCreate (ugni_module->device->dev_handle, NULL,
/* create wildcard endpoint on first device to listen for connections.
* there is no need to bind this endpoint. We are single threaded
* here so there is no need for a device lock. */
rc = GNI_EpCreate (ugni_module->devices[0].dev_handle, NULL,
&ugni_module->wildcard_ep);
OPAL_THREAD_UNLOCK(&dev->dev_lock);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating wildcard ugni endpoint"));
return opal_common_rc_ugni_to_opal (rc);
return mca_btl_rc_ugni_to_opal (rc);
}
/* post wildcard datagram */
@ -133,16 +132,8 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
uint64_t key;
int rc;
while (ugni_module->active_send_count) {
/* ensure all sends are complete before closing the module */
rc = mca_btl_ugni_progress_local_smsg (ugni_module);
if (OPAL_SUCCESS != rc) {
break;
}
}
/* close all open connections and release endpoints */
if (ugni_module->initialized) {
/* close all open connections and release endpoints */
OPAL_HASH_TABLE_FOREACH(key, uint64, ep, &ugni_module->id_to_endpoint) {
if (NULL != ep) {
mca_btl_ugni_release_ep (ep);
@ -154,28 +145,12 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
}
/* destroy all cqs */
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqDestroy (ugni_module->rdma_local_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down local BTE/FMA CQ - %s",gni_err_str[rc]));
}
rc = GNI_CqDestroy (ugni_module->smsg_local_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down TX SMSG CQ - %s",gni_err_str[rc]));
}
rc = GNI_CqDestroy (ugni_module->smsg_remote_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down RX SMSG CQ - %s",gni_err_str[rc]));
}
if (mca_btl_ugni_component.progress_thread_enabled) {
rc = GNI_CqDestroy (ugni_module->rdma_local_irq_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down local BTE/FMA CQ - %s",gni_err_str[rc]));
}
rc = GNI_CqDestroy (ugni_module->smsg_remote_irq_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down remote SMSG CQ - %s",gni_err_str[rc]));
@ -195,14 +170,12 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("btl/ugni error destroying endpoint - %s",gni_err_str[rc]));
}
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
}
OBJ_DESTRUCT(&ugni_module->eager_frags_send);
OBJ_DESTRUCT(&ugni_module->eager_frags_recv);
OBJ_DESTRUCT(&ugni_module->smsg_frags);
OBJ_DESTRUCT(&ugni_module->rdma_frags);
OBJ_DESTRUCT(&ugni_module->rdma_int_frags);
for (int i = 0 ; i < MCA_BTL_UGNI_LIST_MAX ; ++i) {
OBJ_DESTRUCT(ugni_module->frags_lists + i);
}
OBJ_DESTRUCT(&ugni_module->ep_wait_list);
OBJ_DESTRUCT(&ugni_module->smsg_mboxes);
OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb);
@ -217,6 +190,10 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
mca_rcache_base_module_destroy (ugni_module->rcache);
}
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
mca_btl_ugni_device_fini (ugni_module->devices + i);
}
ugni_module->initialized = false;
return OPAL_SUCCESS;
@ -230,10 +207,17 @@ mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
{
mca_btl_ugni_base_frag_t *frag = NULL;
if (size <= mca_btl_ugni_component.smsg_max_data) {
(void) MCA_BTL_UGNI_FRAG_ALLOC_SMSG(endpoint, frag);
/* do not allocate a fragment unless the wait list is relatively small. this
* reduces the potential for resource exhaustion. note the wait list only exists
* because we have no way to notify the sender that credits are available. */
if (OPAL_UNLIKELY(opal_list_get_size (&endpoint->frag_wait_list) > 32)) {
return NULL;
}
if (size <= mca_btl_ugni_component.smsg_max_data) {
frag = mca_btl_ugni_frag_alloc_smsg (endpoint);
} else if (size <= btl->btl_eager_limit) {
(void) MCA_BTL_UGNI_FRAG_ALLOC_EAGER_SEND(endpoint, frag);
frag = mca_btl_ugni_frag_alloc_eager_send (endpoint);
}
if (OPAL_UNLIKELY(NULL == frag)) {
@ -284,6 +268,13 @@ mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags)
{
/* do not allocate a fragment unless the wait list is relatively small. this
* reduces the potential for resource exhaustion. note the wait list only exists
* because we have no way to notify the sender that credits are available. */
if (OPAL_UNLIKELY(opal_list_get_size (&endpoint->frag_wait_list) > 32)) {
return NULL;
}
return mca_btl_ugni_prepare_src_send (btl, endpoint, convertor,
order, reserve, size, flags);
}

Просмотреть файл

@ -26,7 +26,7 @@ mca_btl_ugni_prepare_src_send_nodata (struct mca_btl_base_module_t *btl,
{
mca_btl_ugni_base_frag_t *frag = NULL;
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA(endpoint, frag);
frag = mca_btl_ugni_frag_alloc_rdma (endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
@ -65,8 +65,7 @@ mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl,
opal_convertor_get_current_pointer (convertor, &data_ptr);
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA(endpoint, frag);
frag = mca_btl_ugni_frag_alloc_rdma (endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
@ -123,7 +122,7 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
int rc;
if (OPAL_UNLIKELY(true == use_eager_get)) {
(void) MCA_BTL_UGNI_FRAG_ALLOC_EAGER_SEND(endpoint, frag);
frag = mca_btl_ugni_frag_alloc_eager_send (endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
@ -136,7 +135,7 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
frag->hdr_size = reserve + sizeof (frag->hdr.eager);
frag->segments[0].seg_addr.pval = frag->hdr.eager_ex.pml_header;
} else {
(void) MCA_BTL_UGNI_FRAG_ALLOC_SMSG(endpoint, frag);
frag = mca_btl_ugni_frag_alloc_smsg (endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
@ -186,8 +185,8 @@ mca_btl_ugni_prepare_src_send (struct mca_btl_base_module_t *btl,
opal_convertor_get_current_pointer (convertor, &data_ptr);
send_in_place = !(opal_convertor_need_buffers(convertor) ||
(use_eager_get && ((uintptr_t)data_ptr & 3)));
send_in_place = (btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) && !(opal_convertor_need_buffers(convertor) ||
(use_eager_get && ((uintptr_t)data_ptr & 3)));
if (send_in_place) {
return mca_btl_ugni_prepare_src_send_inplace (btl, endpoint, convertor, order,

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -29,17 +29,19 @@ static void *mca_btl_ugni_prog_thread_fn(void * data)
{
uint32_t which;
gni_return_t status;
gni_cq_handle_t cq_vec[2];
gni_cq_handle_t cq_vec[1 + MCA_BTL_UGNI_MAX_DEV_HANDLES];
struct mca_btl_ugni_module_t *btl = (mca_btl_ugni_module_t *)data;
int cq_count = 1 + mca_btl_ugni_component.virtual_device_count;
/*
* need to block signals
*/
cq_vec[0] = btl->smsg_remote_irq_cq;
cq_vec[1] = btl->rdma_local_irq_cq;
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
cq_vec[i + 1] = btl->devices[i].dev_rdma_local_irq_cq.gni_handle;
}
while (stop_progress_thread == 0) {
@ -48,7 +50,7 @@ static void *mca_btl_ugni_prog_thread_fn(void * data)
*/
status = GNI_CqVectorMonitor(cq_vec,
2,
cq_count,
-1,
&which);
@ -106,8 +108,8 @@ int mca_btl_ugni_kill_progress_thread(void)
*/
ret = mca_btl_ugni_post_cqwrite (mca_btl_ugni_component.modules[0].local_ep,
mca_btl_ugni_component.modules[0].rdma_local_cq,
mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl,
&mca_btl_ugni_component.modules[0].devices[0].dev_rdma_local_cq,
mca_btl_ugni_component.modules[0].devices[0].smsg_irq_mhndl,
0xdead, NULL, NULL, NULL);
/*
* TODO: if error returned, need to kill off thread manually

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -19,11 +19,8 @@ int mca_btl_ugni_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
BTL_VERBOSE(("Using RDMA/FMA Put from local address %p to remote address %" PRIx64,
local_address, remote_address));
/* cause endpoint to bind if it isn't already (bind is sufficient for rdma) */
(void) mca_btl_ugni_check_endpoint_state_rdma (endpoint);
BTL_VERBOSE(("Using RDMA/FMA Put %lu bytes from local address %p to remote address %" PRIx64,
(unsigned long) size, local_address, remote_address));
return mca_btl_ugni_post (endpoint, false, size, local_address, remote_address, local_handle,
remote_handle, order, cbfunc, cbcontext, cbdata);

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -15,12 +15,13 @@
#include "btl_ugni.h"
#include "btl_ugni_frag.h"
#include "btl_ugni_device.h"
int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *ep,
mca_btl_ugni_eager_ex_frag_hdr_t hdr,
mca_btl_ugni_base_frag_t *frag);
static inline void init_gni_post_desc (opal_common_ugni_post_desc_t *post_desc,
static inline void init_gni_post_desc (mca_btl_ugni_post_descriptor_t *post_desc,
int order, gni_post_type_t op_type,
uint64_t lcl_addr,
gni_mem_handle_t lcl_mdh,
@ -28,20 +29,20 @@ static inline void init_gni_post_desc (opal_common_ugni_post_desc_t *post_desc,
gni_mem_handle_t rem_mdh,
uint64_t bufsize,
gni_cq_handle_t cq_hndl) {
post_desc->base.type = op_type;
post_desc->base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
post_desc->desc.type = op_type;
post_desc->desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
if (MCA_BTL_NO_ORDER == order) {
post_desc->base.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
post_desc->desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
} else {
post_desc->base.dlvr_mode = GNI_DLVMODE_NO_ADAPT;
post_desc->desc.dlvr_mode = GNI_DLVMODE_NO_ADAPT;
}
post_desc->base.local_addr = (uint64_t) lcl_addr;
post_desc->base.local_mem_hndl = lcl_mdh;
post_desc->base.remote_addr = (uint64_t) rem_addr;
post_desc->base.remote_mem_hndl = rem_mdh;
post_desc->base.length = bufsize;
post_desc->base.rdma_mode = 0;
post_desc->base.src_cq_hndl = cq_hndl;
post_desc->desc.local_addr = (uint64_t) lcl_addr;
post_desc->desc.local_mem_hndl = lcl_mdh;
post_desc->desc.remote_addr = (uint64_t) rem_addr;
post_desc->desc.remote_mem_hndl = rem_mdh;
post_desc->desc.length = bufsize;
post_desc->desc.rdma_mode = 0;
post_desc->desc.src_cq_hndl = cq_hndl;
post_desc->tries = 0;
}
@ -54,38 +55,28 @@ static inline int mca_btl_ugni_post_fma (struct mca_btl_base_endpoint_t *endpoin
{
mca_btl_ugni_post_descriptor_t *post_desc;
gni_mem_handle_t local_gni_handle = {0, 0};
gni_return_t grc;
int rc;
if (local_handle) {
local_gni_handle = local_handle->gni_handle;
}
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* Post descriptor (CQ is ignored for FMA transactions) -- The CQ associated with the endpoint
* is used. */
init_gni_post_desc (&post_desc->desc, order, op_type, (intptr_t) local_address, local_gni_handle,
init_gni_post_desc (post_desc, order, op_type, (intptr_t) local_address, local_gni_handle,
remote_address, remote_handle->gni_handle, size, 0);
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
grc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
if (GNI_RC_ALIGNMENT_ERROR == grc) {
BTL_VERBOSE(("GNI_PostFma failed with an alignment error"));
return OPAL_ERR_NOT_AVAILABLE;
}
BTL_VERBOSE(("GNI_PostFma failed with gni rc: %d", grc));
return OPAL_ERR_OUT_OF_RESOURCE;
rc = mca_btl_ugni_endpoint_post_fma (endpoint, post_desc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_ugni_return_post_descriptor (post_desc);
}
return OPAL_SUCCESS;
return rc;
}
static inline int mca_btl_ugni_post_bte (mca_btl_base_endpoint_t *endpoint, gni_post_type_t op_type,
@ -96,70 +87,53 @@ static inline int mca_btl_ugni_post_bte (mca_btl_base_endpoint_t *endpoint, gni_
void *cbcontext, void *cbdata)
{
mca_btl_ugni_post_descriptor_t *post_desc;
gni_cq_handle_t cq_handle = endpoint->btl->rdma_local_cq;
gni_return_t status;
int rc;
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
if (mca_btl_ugni_component.progress_thread_enabled) {
cq_handle = endpoint->btl->rdma_local_irq_cq;
}
/* Post descriptor */
init_gni_post_desc (&post_desc->desc, order, op_type, (intptr_t) local_address, local_handle->gni_handle,
remote_address, remote_handle->gni_handle, size, cq_handle);
init_gni_post_desc (post_desc, order, op_type, (intptr_t) local_address, local_handle->gni_handle,
remote_address, remote_handle->gni_handle, size, 0);
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
status = GNI_PostRdma (endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != status)) {
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
if (GNI_RC_ALIGNMENT_ERROR == status) {
BTL_VERBOSE(("GNI_PostRdma failed with an alignment error"));
return OPAL_ERR_NOT_AVAILABLE;
}
BTL_VERBOSE(("GNI_PostRdma failed with gni rc: %d", status));
return OPAL_ERR_OUT_OF_RESOURCE;
rc = mca_btl_ugni_endpoint_post_rdma (endpoint, post_desc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_ugni_return_post_descriptor (post_desc);
}
return OPAL_SUCCESS;
return rc;
}
static inline int mca_btl_ugni_post_cqwrite (mca_btl_base_endpoint_t *endpoint, gni_cq_handle_t cq_handle,
static inline int mca_btl_ugni_post_cqwrite (mca_btl_base_endpoint_t *endpoint, mca_btl_ugni_cq_t *cq,
gni_mem_handle_t irq_mhndl, uint64_t value,
mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{
mca_btl_ugni_post_descriptor_t *post_desc;
gni_return_t grc;
int rc;
mca_btl_ugni_alloc_post_descriptor (endpoint, NULL, cbfunc, cbcontext, cbdata, &post_desc);
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, NULL, cbfunc, cbcontext, cbdata);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
post_desc->desc.base.type = GNI_POST_CQWRITE;
post_desc->desc.base.cqwrite_value = value; /* up to 48 bytes here, not used for now */
post_desc->desc.base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
post_desc->desc.base.dlvr_mode = GNI_DLVMODE_IN_ORDER;
post_desc->desc.base.src_cq_hndl = cq_handle;
post_desc->desc.base.remote_mem_hndl = irq_mhndl;
post_desc->desc.tries = 0;
post_desc->desc.type = GNI_POST_CQWRITE;
post_desc->desc.cqwrite_value = value; /* up to 48 bytes here, not used for now */
post_desc->desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
post_desc->desc.dlvr_mode = GNI_DLVMODE_IN_ORDER;
post_desc->desc.src_cq_hndl = cq->gni_handle;
post_desc->desc.remote_mem_hndl = irq_mhndl;
post_desc->tries = 0;
post_desc->cq = cq;
OPAL_THREAD_LOCK(&endpoint->common->dev->dev_lock);
grc = GNI_PostCqWrite(endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->common->dev->dev_lock);
if (GNI_RC_SUCCESS != grc) { /* errors for PostCqWrite treated as non-fatal */
BTL_VERBOSE(("GNI_PostCqWrite returned error - %s", gni_err_str[grc]));
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
rc = mca_btl_ugni_endpoint_post_cqwrite (endpoint, post_desc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { /* errors for PostCqWrite treated as non-fatal */
mca_btl_ugni_return_post_descriptor (post_desc);
}
return opal_common_rc_ugni_to_opal (grc);
return rc;
}
static inline int mca_btl_ugni_post (mca_btl_base_endpoint_t *endpoint, int get, size_t size,
@ -183,27 +157,11 @@ static inline int mca_btl_ugni_post (mca_btl_base_endpoint_t *endpoint, int get,
static inline int mca_btl_ugni_repost (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_post_descriptor_t *post_desc)
{
gni_return_t grc;
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
if (GNI_POST_RDMA_PUT == post_desc->desc.base.type ||
GNI_POST_RDMA_GET == post_desc->desc.base.type) {
grc = GNI_PostRdma (post_desc->endpoint->rdma_ep_handle, &post_desc->desc.base);
} else {
grc = GNI_PostFma (post_desc->endpoint->rdma_ep_handle, &post_desc->desc.base);
}
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
/* NTH: Should we even retry these? When this code was written there was no indication
* whether an error in post is recoverable. Clobber this code and the associated data
* structures if post errors are not recoverable. */
OPAL_THREAD_LOCK(&ugni_module->pending_descriptors_lock);
opal_list_append (&ugni_module->pending_descriptors, (opal_list_item_t *) post_desc);
OPAL_THREAD_UNLOCK(&ugni_module->pending_descriptors_lock);
if (GNI_POST_RDMA_PUT == post_desc->desc.type || GNI_POST_RDMA_GET == post_desc->desc.type) {
return mca_btl_ugni_endpoint_post_rdma (post_desc->endpoint, post_desc);
}
return opal_common_rc_ugni_to_opal (grc);
return mca_btl_ugni_endpoint_post_fma (post_desc->endpoint, post_desc);
}
#endif /* MCA_BTL_UGNI_RDMA_H */

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
@ -17,6 +17,30 @@
#include "btl_ugni_smsg.h"
#include "btl_ugni_prepare.h"
void mca_btl_ugni_wait_list_append (mca_btl_ugni_module_t *ugni_module, mca_btl_base_endpoint_t *endpoint,
mca_btl_ugni_base_frag_t *frag)
{
BTL_VERBOSE(("wait-listing fragment %p to %s. endpoint state %d\n", frag, OPAL_NAME_PRINT(endpoint->peer_proc->proc_name), endpoint->state));
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
/* queue up request */
OPAL_THREAD_LOCK(&endpoint->lock);
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&endpoint->lock);
if (false == endpoint->wait_listed && MCA_BTL_UGNI_EP_STATE_CONNECTED == endpoint->state) {
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
if (false == endpoint->wait_listed) {
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
endpoint->wait_listed = true;
}
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
}
}
int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct mca_btl_base_descriptor_t *descriptor,
@ -30,18 +54,15 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
/* tag and len are at the same location in eager and smsg frag hdrs */
frag->hdr.send.lag = (tag << 24) | size;
BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor,
OPAL_PROC_MY_NAME.vpid, endpoint->peer_proc->proc_name.vpid, size));
rc = mca_btl_ugni_check_endpoint_state (endpoint);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
OPAL_THREAD_LOCK(&endpoint->lock);
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&endpoint->lock);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || opal_list_get_size (&endpoint->frag_wait_list))) {
mca_btl_ugni_wait_list_append (ugni_module, endpoint, frag);
return OPAL_SUCCESS;
}
BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor,
OPAL_PROC_MY_NAME.vpid, endpoint->common->ep_rem_id, size));
/* add a reference to prevent the fragment from being returned until after the
* completion flag is checked. */
++frag->ref_cnt;
@ -61,7 +82,7 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
frag->flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
if (call_callback) {
frag->base.des_cbfunc(&frag->endpoint->btl->super, frag->endpoint, &frag->base, rc);
frag->base.des_cbfunc(&ugni_module->super, frag->endpoint, &frag->base, rc);
}
(void) mca_btl_ugni_frag_del_ref (frag, OPAL_SUCCESS);
@ -77,18 +98,7 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
if (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) {
/* queue up request */
if (false == endpoint->wait_listed) {
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
if (false == endpoint->wait_listed) {
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
endpoint->wait_listed = true;
}
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
}
OPAL_THREAD_LOCK(&endpoint->lock);
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&endpoint->lock);
mca_btl_ugni_wait_list_append (ugni_module, endpoint, frag);
rc = OPAL_SUCCESS;
}
@ -109,7 +119,8 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
int rc;
do {
if (OPAL_UNLIKELY(OPAL_SUCCESS != mca_btl_ugni_check_endpoint_state (endpoint))) {
if (OPAL_UNLIKELY(OPAL_SUCCESS != mca_btl_ugni_check_endpoint_state (endpoint) ||
opal_list_get_size (&endpoint->frag_wait_list))) {
break;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -28,7 +28,7 @@ static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
mbox->attr.smsg_attr.buff_size = mca_btl_ugni_component.smsg_mbox_size;
mbox->attr.smsg_attr.mem_hndl = ugni_reg->handle.gni_handle;
mbox->attr.proc_name = OPAL_PROC_MY_NAME;
mbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
mbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].devices[0].smsg_irq_mhndl;
}
OBJ_CLASS_INSTANCE(mca_btl_ugni_smsg_mbox_t, opal_free_list_item_t,
@ -39,11 +39,13 @@ int mca_btl_ugni_smsg_init (mca_btl_ugni_module_t *ugni_module)
{
gni_return_t rc;
rc = GNI_SmsgSetMaxRetrans (ugni_module->device->dev_handle,
mca_btl_ugni_component.smsg_max_retries);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error setting maximum SMSG retries %s",gni_err_str[rc]));
return opal_common_rc_ugni_to_opal (rc);
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
rc = GNI_SmsgSetMaxRetrans (ugni_module->devices[i].dev_handle,
mca_btl_ugni_component.smsg_max_retries);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error setting maximum SMSG retries %s",gni_err_str[rc]));
return mca_btl_rc_ugni_to_opal (rc);
}
}
return OPAL_SUCCESS;
@ -52,6 +54,7 @@ int mca_btl_ugni_smsg_init (mca_btl_ugni_module_t *ugni_module)
/* progress */
int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
mca_btl_active_message_callback_t *reg;
mca_btl_ugni_base_frag_t frag;
mca_btl_base_segment_t seg;
@ -70,27 +73,20 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
do {
uint8_t tag = GNI_SMSG_ANY_TAG;
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
rc = GNI_SmsgGetNextWTag (ep->smsg_ep_handle, (void **) &data_ptr, &tag);
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
if (GNI_RC_NOT_DONE == rc) {
BTL_VERBOSE(("no smsg message waiting. rc = %s", gni_err_str[rc]));
rc = mca_btl_ugni_smsg_get_next_wtag (ep->smsg_ep_handle, &data_ptr, &tag);
if (GNI_RC_SUCCESS != rc) {
if (OPAL_LIKELY(GNI_RC_NOT_DONE == rc)) {
BTL_VERBOSE(("no smsg message waiting. rc = %s", gni_err_str[rc]));
ep->smsg_progressing = 0;
ep->smsg_progressing = 0;
return count;
}
return count;
}
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
BTL_ERROR(("GNI_SmsgGetNextWTag returned error %s", gni_err_str[rc]));
BTL_ERROR(("unhandled GNI_SmsgGetNextWTag error"));
return OPAL_ERROR;
}
if (OPAL_UNLIKELY(0 == data_ptr)) {
BTL_ERROR(("null data ptr!"));
assert (0);
return OPAL_ERROR;
}
assert (0 != data_ptr);
count++;
@ -114,7 +110,7 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
assert (NULL != reg->cbfunc);
reg->cbfunc(&ep->btl->super, tag, &(frag.base), reg->cbdata);
reg->cbfunc(&ugni_module->super, tag, &(frag.base), reg->cbdata);
break;
case MCA_BTL_UGNI_TAG_GET_INIT:
@ -141,16 +137,14 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
break;
}
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
rc = GNI_SmsgRelease (ep->smsg_ep_handle);
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
rc = mca_btl_ugni_smsg_release (ep->smsg_ep_handle);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
BTL_ERROR(("Smsg release failed! rc = %d", rc));
return OPAL_ERROR;
}
} while (!disconnect);
ep->smsg_progressing = false;
ep->smsg_progressing = 0;
/* disconnect if we get here */
opal_mutex_lock (&ep->lock);
@ -165,7 +159,6 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
static inline int
mca_btl_ugni_handle_remote_smsg_overrun (mca_btl_ugni_module_t *btl)
{
gni_cq_entry_t event_data;
size_t endpoint_count;
unsigned int ep_index;
int count, rc;
@ -177,11 +170,7 @@ mca_btl_ugni_handle_remote_smsg_overrun (mca_btl_ugni_module_t *btl)
smsg remote cq and check all mailboxes */
/* clear out remote cq */
do {
OPAL_THREAD_LOCK(&btl->device->dev_lock);
rc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
OPAL_THREAD_UNLOCK(&btl->device->dev_lock);
} while (GNI_RC_NOT_DONE != rc);
mca_btl_ugni_cq_clear (btl->devices, btl->smsg_remote_cq);
endpoint_count = opal_pointer_array_get_size (&btl->endpoints);
@ -212,9 +201,7 @@ int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl)
gni_return_t grc;
uint64_t inst_id;
OPAL_THREAD_LOCK(&btl->device->dev_lock);
grc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
OPAL_THREAD_UNLOCK(&btl->device->dev_lock);
grc = mca_btl_ugni_gni_cq_get_event (btl->devices, btl->smsg_remote_cq, &event_data);
if (GNI_RC_NOT_DONE == grc) {
return 0;
}
@ -231,12 +218,12 @@ int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl)
/* unhandled error: crash */
assert (0);
return opal_common_rc_ugni_to_opal (grc);
return mca_btl_rc_ugni_to_opal (grc);
}
BTL_VERBOSE(("REMOTE CQ: Got event 0x%" PRIx64 ". msg id = %" PRIu64
". ok = %d, type = %" PRIu64 "\n", (uint64_t) event_data,
GNI_CQ_GET_MSG_ID(event_data), GNI_CQ_STATUS_OK(event_data),
". ok = %d, type = %" PRIu64, (uint64_t) event_data,
GNI_CQ_GET_INST_ID(event_data), GNI_CQ_STATUS_OK(event_data),
GNI_CQ_GET_TYPE(event_data)));
inst_id = GNI_CQ_GET_INST_ID(event_data);

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -36,20 +36,18 @@ int mca_btl_ugni_smsg_init (mca_btl_ugni_module_t *ugni_module);
int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep);
int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl);
static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_module)
static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device)
{
mca_btl_ugni_base_frag_t *frag;
gni_cq_entry_t event_data;
gni_return_t grc;
/* nothing to do */
if (0 == ugni_module->active_send_count) {
if (0 == device->dev_smsg_local_cq.active_operations) {
return OPAL_SUCCESS;
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
grc = GNI_CqGetEvent (ugni_module->smsg_local_cq, &event_data);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
grc = mca_btl_ugni_cq_get_event (device, &device->dev_smsg_local_cq, &event_data);
if (GNI_RC_NOT_DONE == grc) {
return OPAL_SUCCESS;
}
@ -59,7 +57,7 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
will the event eventually come back? Ask Cray */
BTL_ERROR(("post error! cq overrun = %d", (int)GNI_CQ_OVERRUN(event_data)));
assert (0);
return opal_common_rc_ugni_to_opal (grc);
return mca_btl_rc_ugni_to_opal (grc);
}
assert (GNI_CQ_GET_TYPE(event_data) == GNI_CQ_EVENT_TYPE_SMSG);
@ -71,8 +69,6 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
return OPAL_ERROR;
}
opal_atomic_add_32(&ugni_module->active_send_count,-1);
frag->flags |= MCA_BTL_UGNI_FRAG_SMSG_COMPLETE;
if (!(frag->flags & MCA_BTL_UGNI_FRAG_IGNORE)) {
@ -87,26 +83,22 @@ static inline int opal_mca_btl_ugni_smsg_send (mca_btl_ugni_base_frag_t *frag,
void *payload, size_t payload_len,
mca_btl_ugni_smsg_tag_t tag)
{
mca_btl_base_endpoint_t *endpoint = frag->endpoint;
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
gni_return_t grc;
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
grc = GNI_SmsgSendWTag (frag->endpoint->smsg_ep_handle, hdr, hdr_len,
payload, payload_len, frag->msg_id, tag);
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
grc = mca_btl_ugni_endpoint_smsg_send_wtag (endpoint, hdr, hdr_len, payload, payload_len,
frag->msg_id, tag);
if (OPAL_LIKELY(GNI_RC_SUCCESS == grc)) {
/* increment the active send counter */
opal_atomic_add_32(&frag->endpoint->btl->active_send_count,1);
if (mca_btl_ugni_component.progress_thread_enabled) {
if (frag->base.des_flags & MCA_BTL_DES_FLAGS_SIGNAL) {
/* errors for PostCqWrite treated as non-fatal */
(void) mca_btl_ugni_post_cqwrite (frag->endpoint, frag->endpoint->btl->rdma_local_cq,
frag->endpoint->rmt_irq_mem_hndl, 0xdead, NULL, NULL, NULL);
(void) mca_btl_ugni_post_cqwrite (endpoint, &ugni_module->devices[0].dev_rdma_local_cq,
endpoint->rmt_irq_mem_hndl, 0xdead, NULL, NULL, NULL);
}
}
(void) mca_btl_ugni_progress_local_smsg ((mca_btl_ugni_module_t *) frag->endpoint->btl);
(void) mca_btl_ugni_progress_local_smsg (ugni_module, endpoint->smsg_ep_handle->device);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,67 +0,0 @@
# -*- indent-tabs-mode:nil -*-
#
# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
#
# Additional copyrights may follow
#
# $HEADER$
#
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
AM_CPPFLAGS = $(common_ugni_CPPFLAGS)
component_noinst = lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst.la
component_install = lib@OPAL_LIB_PREFIX@mca_common_ugni.la
if MCA_BUILD_opal_common_ugni_DSO
lib_LTLIBRARIES = $(component_install)
else
noinst_LTLIBRARIES = $(component_noinst)
endif
headers = common_ugni.h \
common_ugni_ep.h
ugni_SOURCES = common_ugni.c \
common_ugni_ep.c
#mcacomponentdir = $(opallibdir)
lib@OPAL_LIB_PREFIX@mca_common_ugni_la_SOURCES = $(headers) $(ugni_SOURCES)
nodist_lib@OPAL_LIB_PREFIX@mca_common_ugni_la_SOURCES = $(ugni_nodist_SOURCES)
lib@OPAL_LIB_PREFIX@mca_common_ugni_la_LIBADD = $(common_ugni_LIBS)
lib@OPAL_LIB_PREFIX@mca_common_ugni_la_LDFLAGS = \
-version-info $(libmca_opal_common_ugni_so_version) \
$(common_ugni_LDFLAGS)
lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst_la_SOURCES = \
$(headers) $(ugni_SOURCES)
nodist_lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst_la_SOURCES = \
$(ugni_nodist_SOURCES)
lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst_la_LIBADD = $(common_ugni_LIBS)
lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst_la_LDFLAGS = \
-module -avoid-version $(common_ugni_LDFLAGS)
# These two rules will sym link the "noinst" libtool library filename
# to the installable libtool library filename in the case where we are
# compiling this component statically (case 2), described above).
V=0
OMPI_V_LN_SCOMP = $(ompi__v_LN_SCOMP_$V)
ompi__v_LN_SCOMP_ = $(ompi__v_LN_SCOMP_$AM_DEFAULT_VERBOSITY)
ompi__v_LN_SCOMP_0 = @echo " LN_S " `basename $(component_install)`;
all-local:
$(OMPI_V_LN_SCOMP) if test -z "$(lib_LTLIBRARIES)"; then \
rm -f "$(component_install)"; \
$(LN_S) "$(component_noinst)" "$(component_install)"; \
fi
clean-local:
if test -z "$(mcacomponent_LTLIBRARIES)"; then \
rm -f "$(component_install)"; \
fi

Просмотреть файл

@ -1,301 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "common_ugni.h"
#include "opal/class/opal_list.h"
#include "opal/dss/dss.h"
#include "opal/mca/pmix/pmix.h"
/* NTH: we need some options from the btl */
#include "opal/mca/btl/ugni/btl_ugni.h"
static int opal_common_ugni_module_ref_count = 0;
opal_common_ugni_module_t opal_common_ugni_module = {0};
mca_base_component_t opal_common_ugni_component = {
OPAL_MCA_BASE_VERSION_2_1_0("common", 1, 0, 0),
.mca_component_name = "ugni",
.mca_component_major_version = 1,
.mca_component_minor_version = 0,
.mca_component_release_version = 0,
};
static inline int
get_ptag(uint8_t *out_ptag)
{
/* TODO no need for tmp */
char *ptr;
uint8_t tmp_ptag;
if (NULL == (ptr = getenv("PMI_GNI_PTAG"))) {
/* TODO add err msg - better rc? */
return OPAL_ERR_NOT_FOUND;
}
errno = 0;
tmp_ptag = (uint8_t)strtoul (ptr, (char **)NULL, 10);
if (0 != errno) {
/* TODO add err msg - better rc? */
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
}
*out_ptag = tmp_ptag;
return OPAL_SUCCESS;
}
static inline int get_cookie (uint32_t *out_cookie)
{
/* TODO no need for tmp */
char *ptr;
uint32_t tmp_cookie;
if (NULL == (ptr = getenv("PMI_GNI_COOKIE"))) {
/* TODO add err msg - better rc? */
return OPAL_ERR_NOT_FOUND;
}
errno = 0;
tmp_cookie = (uint32_t) strtoul (ptr, NULL, 10);
if (0 != errno) {
/* TODO add err msg - better rc? */
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
}
*out_cookie = tmp_cookie;
return OPAL_SUCCESS;
}
static unsigned int
opal_common_ugni_get_nic_address(int device_id)
{
unsigned int address, cpu_id;
gni_return_t status;
int i, alps_dev_id = -1;
char *token,*p_ptr;
p_ptr = getenv("PMI_GNI_DEV_ID");
if (!p_ptr) {
status = GNI_CdmGetNicAddress(device_id, &address, &cpu_id);
if(status != GNI_RC_SUCCESS) {
opal_output (0, "FAILED:GNI_CdmGetNicAddress returned error %d", status);
return (unsigned int)-1;
}
return address;
}
while (NULL != (token = strtok(p_ptr, ":"))) {
alps_dev_id = atoi(token);
if (alps_dev_id == device_id) {
break;
}
p_ptr = NULL;
}
if (OPAL_UNLIKELY(-1 == alps_dev_id)) {
return (unsigned int)-1;
}
p_ptr = getenv("PMI_GNI_LOC_ADDR");
if (OPAL_UNLIKELY(NULL == p_ptr)) {
return (unsigned int)-1;
}
i = 0;
while (NULL != (token = strtok(p_ptr, ":"))) {
if (i == alps_dev_id) {
return strtoul (token, NULL, 10);
}
p_ptr = NULL;
++i;
}
return (unsigned int)-1;
}
static int opal_common_ugni_device_init (opal_common_ugni_device_t *device,
int device_id)
{
int rc;
/* Create a NIC Adress */
device->dev_id = device_id; /* Minor number of the Gemini NIC */
device->dev_addr = opal_common_ugni_get_nic_address (device->dev_id);
OPAL_OUTPUT((-1, "Got NIC Addr: 0x%08x, CPU ID: %d", device->dev_addr, device->dev_id));
OBJ_CONSTRUCT(&device->dev_lock,opal_mutex_t);
/* Attach device to the communication domain */
rc = GNI_CdmAttach (opal_common_ugni_module.cd_handle, device->dev_id,
&device->dev_pe_addr, &device->dev_handle);
if (GNI_RC_SUCCESS != rc) {
OPAL_OUTPUT((0, "Error: Creating communication domain %d\n", rc));
return opal_common_rc_ugni_to_opal (rc);
}
return OPAL_SUCCESS;
}
static int opal_common_ugni_device_fini (opal_common_ugni_device_t *dev)
{
return OPAL_SUCCESS;
}
/*
* Send local device information and other information
* required for setup
*/
static int opal_common_ugni_send_modex (int my_cdm_id)
{
uint32_t modex_size, total_msg_size, msg_offset;
struct opal_common_ugni_modex_t modex;
char *modex_msg;
int rc, i;
modex_size = sizeof (struct opal_common_ugni_modex_t);
total_msg_size = opal_common_ugni_module.device_count * modex_size;
modex_msg = (char *) malloc (total_msg_size);
if (NULL == modex_msg) {
OPAL_OUTPUT((-1, "Error allocating memory for modex @ %s:%d",
__FILE__, __LINE__));
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* pack modex for all available devices */
for (i = 0, msg_offset = 0; i < opal_common_ugni_module.device_count ; ++i) {
opal_common_ugni_device_t *dev = opal_common_ugni_module.devices + i;
modex.addr = dev->dev_addr;
modex.id = my_cdm_id;
memcpy ((void *)((uintptr_t) modex_msg + msg_offset),
(void *)&modex, modex_size);
msg_offset += modex_size;
}
/*
* need global for edge cases like MPI_Comm_spawn support with
* new ranks started on the same nodes as the spawnee ranks, etc.
*/
OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
&opal_common_ugni_component,
modex_msg, total_msg_size);
free(modex_msg);
return rc;
}
int opal_common_ugni_fini (void)
{
int i, rc;
if (0 == opal_common_ugni_module_ref_count) {
return OPAL_SUCCESS;
}
if (1 == opal_common_ugni_module_ref_count) {
/* tear down component */
if (opal_common_ugni_module.devices) {
/* finalize devices */
for (i = 0 ; i < opal_common_ugni_module.device_count ; ++i) {
opal_common_ugni_device_fini (opal_common_ugni_module.devices + i);
}
free (opal_common_ugni_module.devices);
opal_common_ugni_module.devices = NULL;
}
/* finally, tear down the communication domain */
rc = GNI_CdmDestroy (opal_common_ugni_module.cd_handle);
if (GNI_RC_SUCCESS != rc) {
OPAL_OUTPUT((-1, "error destroying cdm"));
}
}
opal_common_ugni_module_ref_count--;
return OPAL_SUCCESS;
}
int opal_common_ugni_init (void)
{
int modes, rc, i;
uint32_t my_cdm_id;
opal_common_ugni_module_ref_count ++;
if (opal_common_ugni_module_ref_count > 1) {
return OPAL_SUCCESS;
}
/* use pid for my_cdm_id. Although its not stated in the uGNI
documentation, the cdm_id only needs to be unique
within a node for a given ptag/cookie tuple */
my_cdm_id = getpid(); /*TODO: eventually need something else for thread-hot support */
/* pull settings from ugni btl */
opal_common_ugni_module.rdma_max_retries =
mca_btl_ugni_component.rdma_max_retries;
/* Create a communication domain */
modes = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_CACHED_AMO_ENABLED |
GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL |
GNI_CDM_MODE_FMA_SHARED;
/* collect uGNI information */
rc = get_ptag(&opal_common_ugni_module.ptag);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
rc = get_cookie(&opal_common_ugni_module.cookie);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
/* create a communication domain */
rc = GNI_CdmCreate (my_cdm_id, opal_common_ugni_module.ptag,
opal_common_ugni_module.cookie, modes,
&opal_common_ugni_module.cd_handle);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
OPAL_OUTPUT((0, "Error: Creating communication domain %d\n",rc));
return opal_common_rc_ugni_to_opal (rc);
}
/* setup uGNI devices. we only support one device atm */
opal_common_ugni_module.device_count = 1;
opal_common_ugni_module.devices = calloc (opal_common_ugni_module.device_count,
sizeof (opal_common_ugni_device_t));
for (i = 0 ; i < opal_common_ugni_module.device_count ; ++i) {
rc = opal_common_ugni_device_init (opal_common_ugni_module.devices + i, i);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
OPAL_OUTPUT((-1, "error initializing uGNI device"));
return rc;
}
}
/* send ugni modex */
opal_common_ugni_send_modex (my_cdm_id);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,117 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "opal/class/opal_list.h"
#include "opal/include/opal/prefetch.h"
#include "opal_stdint.h"
#include <errno.h>
#include <stdint.h>
#include <sys/types.h>
#include <assert.h>
#include <sys/time.h>
#include <gni_pub.h>
#include "common_ugni_ep.h"
#if !defined(MPI_COMMON_UGNI_H)
#define MPI_COMMON_UGNI_H
struct opal_common_ugni_modex_t {
uint32_t addr;
int id;
gni_mem_handle_t irq_memhndl;
};
typedef struct opal_common_ugni_modex_t opal_common_ugni_modex_t;
struct opal_common_ugni_device_t {
opal_object_t super;
gni_nic_handle_t dev_handle;
/* Minor number of the Gemini NIC */
int32_t dev_id;
uint32_t dev_pe_addr;
uint32_t dev_addr;
uint32_t dev_cpu_id;
size_t dev_ep_count;
opal_mutex_t dev_lock;
gni_mem_handle_t smsg_irq_mhndl;
void *btl_ctx;
};
typedef struct opal_common_ugni_device_t opal_common_ugni_device_t;
struct opal_common_ugni_module_t {
/* protection tag */
uint8_t ptag;
/* unique id for this process assigned by the system */
uint32_t cookie;
/* communication domain handle */
gni_cdm_handle_t cd_handle;
/* device count. to be used if we have more than 1 common per ugni device */
int device_count;
opal_common_ugni_device_t *devices;
int rdma_max_retries;
};
typedef struct opal_common_ugni_module_t opal_common_ugni_module_t;
struct opal_common_ugni_post_desc_t {
gni_post_descriptor_t base;
opal_common_ugni_endpoint_t *endpoint;
int tries;
};
typedef struct opal_common_ugni_post_desc_t opal_common_ugni_post_desc_t;
extern opal_common_ugni_module_t opal_common_ugni_module;
extern mca_base_component_t opal_common_ugni_component;
static inline int
opal_common_rc_ugni_to_opal (gni_return_t rc)
{
int codes[] = {OPAL_SUCCESS,
OPAL_ERR_RESOURCE_BUSY,
OPAL_ERR_BAD_PARAM,
OPAL_ERR_OUT_OF_RESOURCE,
OPAL_ERR_TIMEOUT,
OPAL_ERR_PERM,
OPAL_ERROR,
OPAL_ERR_BAD_PARAM,
OPAL_ERR_BAD_PARAM,
OPAL_ERR_NOT_FOUND,
OPAL_ERR_VALUE_OUT_OF_BOUNDS,
OPAL_ERROR,
OPAL_ERR_NOT_SUPPORTED,
OPAL_ERR_OUT_OF_RESOURCE};
return codes[rc];
}
/*
* Initialize uGNI communication domain and device(s).
*/
int opal_common_ugni_init (void);
/*
* Finalize uGNI communication domain and device(s).
*/
int opal_common_ugni_fini (void);
#endif /* MPI_COMMON_UGNI_H */

Просмотреть файл

@ -1,118 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "common_ugni.h"
#include "opal/mca/pmix/pmix.h"
OBJ_CLASS_INSTANCE(opal_common_ugni_endpoint_t, opal_object_t, NULL, NULL);
int opal_common_ugni_endpoint_for_proc (opal_common_ugni_device_t *dev, opal_proc_t *peer_proc,
opal_common_ugni_endpoint_t **ep)
{
opal_common_ugni_endpoint_t *endpoint;
opal_common_ugni_modex_t *modex;
size_t msg_size;
int rc;
assert (NULL != dev && NULL != ep && peer_proc);
endpoint = OBJ_NEW(opal_common_ugni_endpoint_t);
if (OPAL_UNLIKELY(NULL == endpoint)) {
assert (0);
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* Receive the modex */
OPAL_MODEX_RECV(rc, &opal_common_ugni_component,
&peer_proc->proc_name, (void **)&modex, &msg_size);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
OPAL_OUTPUT((-1, "btl/ugni error receiving modex"));
return rc;
}
endpoint->ep_rem_addr = modex->addr;
endpoint->ep_rem_id = modex->id;
endpoint->ep_rem_irq_memhndl = modex->irq_memhndl;
endpoint->dev = dev;
*ep = endpoint;
free (modex);
return OPAL_SUCCESS;
}
void opal_common_ugni_endpoint_return (opal_common_ugni_endpoint_t *ep)
{
assert(NULL != ep);
OBJ_RELEASE(ep);
}
int opal_common_ugni_ep_create (opal_common_ugni_endpoint_t *cep, gni_cq_handle_t cq,
gni_ep_handle_t *ep_handle)
{
gni_return_t grc;
if (OPAL_UNLIKELY(NULL == cep)) {
assert (0);
return OPAL_ERR_BAD_PARAM;
}
/* create a uGNI endpoint handle and bind it to the remote peer */
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
grc = GNI_EpCreate (cep->dev->dev_handle, cq, ep_handle);
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
return opal_common_rc_ugni_to_opal (grc);
}
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
grc = GNI_EpBind (*ep_handle, cep->ep_rem_addr, cep->ep_rem_id);
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
if (GNI_RC_SUCCESS != grc) {
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
GNI_EpDestroy (*ep_handle);
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
return opal_common_rc_ugni_to_opal (grc);
}
return OPAL_SUCCESS;
}
int opal_common_ugni_ep_destroy (gni_ep_handle_t *ep)
{
int rc;
if (NULL == ep || 0 == *ep) {
return OPAL_SUCCESS;
}
/* TODO: need to fix, may be outstanding tx's, etc. */
rc = GNI_EpUnbind (*ep);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
/* should warn */
}
GNI_EpDestroy (*ep);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
/* should warn */
}
*ep = 0;
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,63 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#if !defined(MPI_COMMON_UGNI_EP_H)
#define MPI_COMMON_UGNI_EP_H
struct opal_common_ugni_device_t;
struct opal_common_ugni_endpoint_t {
opal_object_t super;
uint32_t ep_rem_addr, ep_rem_id; /**< remote information */
gni_mem_handle_t ep_rem_irq_memhndl;
struct opal_common_ugni_device_t *dev; /**< device this endpoint is using */
};
typedef struct opal_common_ugni_endpoint_t opal_common_ugni_endpoint_t;
OBJ_CLASS_DECLARATION(opal_common_ugni_endpoint_t);
/*
* Get (and retain) a reference to an endpoint to peer_proc. This endpoint
* needs to be returned with opal_common_ugni_endpoint_return.
*
* @param[IN] dev uGNI device this endpoint should be bound to.
* @param[IN] peer_proc remote peer the endpoint will be connected to.
* @param[OUT] ep uGNI endpoint for the peer
*/
int opal_common_ugni_endpoint_for_proc (struct opal_common_ugni_device_t *dev, opal_proc_t *peer_proc,
opal_common_ugni_endpoint_t **ep);
/*
* Allocate and bind a uGNI endpoint handle to the remote peer.
*
* @param[IN] cep common endpoint
* @param[IN] cq completion queue
* @param[OUT] ep_handle uGNI endpoint handle
*/
int opal_common_ugni_ep_create (opal_common_ugni_endpoint_t *cep, gni_cq_handle_t cq, gni_ep_handle_t *ep_handle);
/*
* Unbind and free the uGNI endpoint handle.
*
* @param[IN] ep_handle uGNI endpoint handle to unbind and release
*/
int opal_common_ugni_ep_destroy (gni_ep_handle_t *ep_handle);
/*
* Return (and possibly free) a common endpoint. The endpoint may not be used
* once it is returned.
*
* @param[IN] ep uGNI endpoint to return
*/
void opal_common_ugni_endpoint_return (opal_common_ugni_endpoint_t *ep);
#endif /* MPI_COMMON_UGNI_EP_H */

Просмотреть файл

@ -1,54 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2006 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 QLogic Corp. All rights reserved.
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# OPAL_CHECK_UGNI(prefix, [action-if-found], [action-if-not-found])
# --------------------------------------------------------
# check if GNI support can be found. sets prefix_{CPPFLAGS,
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
# support, otherwise executes action-if-not-found
#
# NOTES
# on Cray XE6 systems, the GNI development header (gni_pub.h) is in a
# completely different place than the ugni library (libugni).
#
# EXAMPLE CONFIGURE USAGE:
# --with-ugni=/base/path/to/libugni --with-ugni-includedir=/path/to/gni_pub.h
#
# --with-ugni=/opt/cray/ugni/default --with-ugni-includedir=/opt/cray/gni-headers/default/include
AC_DEFUN([MCA_opal_common_ugni_CONFIG],[
AC_CONFIG_FILES([opal/mca/common/ugni/Makefile])
OPAL_CHECK_UGNI([common_ugni],
[common_ugni_happy="yes"],
[common_ugni_happy="no"])
AS_IF([test "$common_ugni_happy" = "yes"],
[$1],
[$2])
# substitute in the things needed to build ugni
AC_SUBST([common_ugni_CPPFLAGS])
AC_SUBST([common_ugni_LDFLAGS])
AC_SUBST([common_ugni_LIBS])
])dnl

Просмотреть файл

@ -1,7 +0,0 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: LANL
status: active