Merge pull request #3149 from hjelmn/btl_ugni_2_0
Improve multi-threaded RMA performance of the ugni btl
Этот коммит содержится в:
Коммит
9410574253
@ -1,6 +1,6 @@
|
||||
# -*- indent-tabs-mode:nil -*-
|
||||
#
|
||||
# Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
|
||||
# Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
#
|
||||
@ -40,14 +40,15 @@ ugni_SOURCES = \
|
||||
btl_ugni_smsg.c \
|
||||
btl_ugni_progress_thread.c \
|
||||
btl_ugni_prepare.h \
|
||||
btl_ugni_atomic.c
|
||||
btl_ugni_atomic.c \
|
||||
btl_ugni_init.c \
|
||||
btl_ugni_device.h
|
||||
|
||||
mcacomponentdir = $(opallibdir)
|
||||
mcacomponent_LTLIBRARIES = $(component_install)
|
||||
mca_btl_ugni_la_SOURCES = $(ugni_SOURCES)
|
||||
nodist_mca_btl_ugni_la_SOURCES = $(ugni_nodist_SOURCES)
|
||||
mca_btl_ugni_la_LIBADD = $(btl_ugni_LIBS) \
|
||||
$(OPAL_TOP_BUILDDIR)/opal/mca/common/ugni/lib@OPAL_LIB_PREFIX@mca_common_ugni.la
|
||||
mca_btl_ugni_la_LIBADD = $(btl_ugni_LIBS)
|
||||
mca_btl_ugni_la_LDFLAGS = -module -avoid-version $(btl_ugni_LDFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
@ -34,7 +34,6 @@
|
||||
#include "opal/mca/btl/base/btl_base_error.h"
|
||||
#include "opal/class/opal_hash_table.h"
|
||||
#include "opal/class/opal_free_list.h"
|
||||
#include "opal/mca/common/ugni/common_ugni.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
@ -48,6 +47,23 @@
|
||||
#define MCA_BTL_UGNI_CONNECT_DIRECTED_ID 0x8000000000000000ull
|
||||
#define MCA_BTL_UGNI_DATAGRAM_MASK 0x8000000000000000ull
|
||||
|
||||
/** maximum number of supported virtual devices */
|
||||
#define MCA_BTL_UGNI_MAX_DEV_HANDLES 128
|
||||
|
||||
/** number of rdma completion queue items to remove per progress loop */
|
||||
#define MCA_BTL_UGNI_COMPLETIONS_PER_LOOP 16
|
||||
|
||||
/**
|
||||
* Modex data
|
||||
*/
|
||||
struct mca_btl_ugni_modex_t {
|
||||
/** GNI NIC address */
|
||||
uint32_t addr;
|
||||
/** CDM identifier (base) */
|
||||
int id;
|
||||
};
|
||||
typedef struct mca_btl_ugni_modex_t mca_btl_ugni_modex_t;
|
||||
|
||||
/* ompi and smsg endpoint attributes */
|
||||
typedef struct mca_btl_ugni_endpoint_attr_t {
|
||||
opal_process_name_t proc_name;
|
||||
@ -61,12 +77,73 @@ enum {
|
||||
MCA_BTL_UGNI_RCACHE_GRDMA
|
||||
};
|
||||
|
||||
enum mca_btl_ugni_free_list_id_t {
|
||||
/* eager fragment list (registered) */
|
||||
MCA_BTL_UGNI_LIST_EAGER_SEND,
|
||||
MCA_BTL_UGNI_LIST_EAGER_RECV,
|
||||
/* SMSG fragment list (unregistered) */
|
||||
MCA_BTL_UGNI_LIST_SMSG,
|
||||
/* RDMA fragment list */
|
||||
MCA_BTL_UGNI_LIST_RDMA,
|
||||
MCA_BTL_UGNI_LIST_RDMA_INT,
|
||||
MCA_BTL_UGNI_LIST_MAX,
|
||||
};
|
||||
|
||||
struct mca_btl_ugni_cq_t {
|
||||
/** ugni CQ handle */
|
||||
gni_cq_handle_t gni_handle;
|
||||
/** number of completions expected on the CQ */
|
||||
int32_t active_operations;
|
||||
};
|
||||
typedef struct mca_btl_ugni_cq_t mca_btl_ugni_cq_t;
|
||||
|
||||
/**
|
||||
* GNI virtual device
|
||||
*/
|
||||
struct mca_btl_ugni_device_t {
|
||||
/** Communication domain handle */
|
||||
gni_cdm_handle_t dev_cd_handle;
|
||||
|
||||
/** protection for ugni access */
|
||||
volatile int32_t lock;
|
||||
|
||||
/** Index of device in module devices array */
|
||||
int dev_index;
|
||||
|
||||
/** number of SMSG connections */
|
||||
volatile int32_t smsg_connections;
|
||||
|
||||
/** uGNI device handle */
|
||||
gni_nic_handle_t dev_handle;
|
||||
|
||||
/** uGNI rdma completion queue */
|
||||
mca_btl_ugni_cq_t dev_rdma_local_cq;
|
||||
|
||||
/** local rdma completion queue (async) */
|
||||
mca_btl_ugni_cq_t dev_rdma_local_irq_cq;
|
||||
|
||||
/** local SMSG completion queue */
|
||||
mca_btl_ugni_cq_t dev_smsg_local_cq;
|
||||
|
||||
/** IRQ memory handle for this device */
|
||||
gni_mem_handle_t smsg_irq_mhndl;
|
||||
|
||||
/** RDMA endpoint free list */
|
||||
opal_free_list_t endpoints;
|
||||
|
||||
/** post descriptors pending resources */
|
||||
opal_list_t pending_post;
|
||||
};
|
||||
typedef struct mca_btl_ugni_device_t mca_btl_ugni_device_t;
|
||||
|
||||
typedef intptr_t (*mca_btl_ugni_device_serialize_fn_t) (mca_btl_ugni_device_t *device, void *arg);
|
||||
|
||||
typedef struct mca_btl_ugni_module_t {
|
||||
mca_btl_base_module_t super;
|
||||
|
||||
bool initialized;
|
||||
|
||||
opal_common_ugni_device_t *device;
|
||||
mca_btl_ugni_device_t devices[MCA_BTL_UGNI_MAX_DEV_HANDLES];
|
||||
|
||||
opal_mutex_t endpoint_lock;
|
||||
size_t endpoint_count;
|
||||
@ -82,9 +159,6 @@ typedef struct mca_btl_ugni_module_t {
|
||||
opal_mutex_t eager_get_pending_lock;
|
||||
opal_list_t eager_get_pending;
|
||||
|
||||
opal_mutex_t pending_descriptors_lock;
|
||||
opal_list_t pending_descriptors;
|
||||
|
||||
opal_free_list_t post_descriptors;
|
||||
|
||||
mca_mpool_base_module_t *mpool;
|
||||
@ -95,23 +169,11 @@ typedef struct mca_btl_ugni_module_t {
|
||||
|
||||
struct mca_btl_ugni_endpoint_attr_t wc_remote_attr, wc_local_attr;
|
||||
|
||||
gni_cq_handle_t rdma_local_cq;
|
||||
gni_cq_handle_t smsg_remote_cq;
|
||||
gni_cq_handle_t smsg_local_cq;
|
||||
gni_cq_handle_t smsg_remote_irq_cq;
|
||||
gni_cq_handle_t rdma_local_irq_cq;
|
||||
|
||||
/* eager fragment list (registered) */
|
||||
opal_free_list_t eager_frags_send;
|
||||
opal_free_list_t eager_frags_recv;
|
||||
|
||||
/* SMSG fragment list (unregistered) */
|
||||
opal_free_list_t smsg_frags;
|
||||
|
||||
/* RDMA fragment list */
|
||||
opal_free_list_t rdma_frags;
|
||||
opal_free_list_t rdma_int_frags;
|
||||
|
||||
/** fragment free lists (see enum mca_btl_ugni_free_list_id_t) */
|
||||
opal_free_list_t frags_lists[MCA_BTL_UGNI_LIST_MAX];
|
||||
|
||||
/* lock for this list */
|
||||
opal_mutex_t ep_wait_list_lock;
|
||||
@ -197,10 +259,62 @@ typedef struct mca_btl_ugni_component_t {
|
||||
/* Indicate whether progress thread allowed */
|
||||
bool progress_thread_enabled;
|
||||
|
||||
/** Number of ugni device contexts to create per GNI device */
|
||||
int virtual_device_count;
|
||||
|
||||
/** Protection tag */
|
||||
uint8_t ptag;
|
||||
|
||||
/** Unique id for this process assigned by the system */
|
||||
uint32_t cookie;
|
||||
|
||||
/** Starting value of communication identifier */
|
||||
uint32_t cdm_id_base;
|
||||
|
||||
/** GNI CDM flags */
|
||||
uint32_t cdm_flags;
|
||||
|
||||
/** NIC address */
|
||||
uint32_t dev_addr;
|
||||
} mca_btl_ugni_component_t;
|
||||
|
||||
int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
|
||||
opal_common_ugni_device_t *device);
|
||||
/* Global structures */
|
||||
|
||||
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component;
|
||||
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module;
|
||||
|
||||
/**
|
||||
* Get a virtual device for communication
|
||||
*/
|
||||
static inline mca_btl_ugni_device_t *mca_btl_ugni_ep_get_device (mca_btl_ugni_module_t *ugni_module)
|
||||
{
|
||||
static volatile uint32_t device_index = (uint32_t) 0;
|
||||
uint32_t dev_index;
|
||||
|
||||
/* don't really care if the device index is atomically updated */
|
||||
dev_index = (device_index++) & (mca_btl_ugni_component.virtual_device_count - 1);
|
||||
|
||||
return ugni_module->devices + dev_index;
|
||||
}
|
||||
|
||||
static inline int mca_btl_rc_ugni_to_opal (gni_return_t rc)
|
||||
{
|
||||
static int codes[] = {OPAL_SUCCESS,
|
||||
OPAL_ERR_RESOURCE_BUSY,
|
||||
OPAL_ERR_BAD_PARAM,
|
||||
OPAL_ERR_OUT_OF_RESOURCE,
|
||||
OPAL_ERR_TIMEOUT,
|
||||
OPAL_ERR_PERM,
|
||||
OPAL_ERROR,
|
||||
OPAL_ERR_BAD_PARAM,
|
||||
OPAL_ERR_BAD_PARAM,
|
||||
OPAL_ERR_NOT_FOUND,
|
||||
OPAL_ERR_VALUE_OUT_OF_BOUNDS,
|
||||
OPAL_ERROR,
|
||||
OPAL_ERR_NOT_SUPPORTED,
|
||||
OPAL_ERR_OUT_OF_RESOURCE};
|
||||
return codes[rc];
|
||||
}
|
||||
|
||||
/**
|
||||
* BML->BTL notification of change in the process list.
|
||||
@ -324,10 +438,32 @@ typedef struct mca_btl_ugni_reg_t {
|
||||
mca_btl_base_registration_handle_t handle;
|
||||
} mca_btl_ugni_reg_t;
|
||||
|
||||
/* Global structures */
|
||||
/**
|
||||
* Initialize uGNI support.
|
||||
*/
|
||||
int mca_btl_ugni_init (void);
|
||||
|
||||
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component;
|
||||
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module;
|
||||
/**
|
||||
* Finalize uGNI support.
|
||||
*/
|
||||
int mca_btl_ugni_fini (void);
|
||||
|
||||
int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module);
|
||||
|
||||
/**
|
||||
* Intialize a virtual device for device index 0.
|
||||
*
|
||||
* @param[inout] device Device to initialize
|
||||
* @param[in] virtual_device_id Virtual device identified (up to max handles)
|
||||
*/
|
||||
int mca_btl_ugni_device_init (mca_btl_ugni_device_t *device, int virtual_device_id);
|
||||
|
||||
/**
|
||||
* Finalize a virtual device.
|
||||
*
|
||||
* @param[in] device Device to finalize
|
||||
*/
|
||||
int mca_btl_ugni_device_fini (mca_btl_ugni_device_t *dev);
|
||||
|
||||
/* Get a unique 64-bit id for the process name */
|
||||
static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name) {
|
||||
@ -338,6 +474,57 @@ static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name) {
|
||||
int mca_btl_ugni_spawn_progress_thread(struct mca_btl_base_module_t* btl);
|
||||
int mca_btl_ugni_kill_progress_thread(void);
|
||||
|
||||
/**
|
||||
* Try to lock a uGNI device for exclusive access
|
||||
*/
|
||||
static inline int mca_btl_ugni_device_trylock (mca_btl_ugni_device_t *device)
|
||||
{
|
||||
/* checking the lock non-atomically first can reduce the number of
|
||||
* unnecessary atomic operations. */
|
||||
return (device->lock || opal_atomic_swap_32 (&device->lock, 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Lock a uGNI device for exclusive access
|
||||
*/
|
||||
static inline void mca_btl_ugni_device_lock (mca_btl_ugni_device_t *device)
|
||||
{
|
||||
while (mca_btl_ugni_device_trylock (device));
|
||||
}
|
||||
|
||||
/**
|
||||
* Release exclusive access to the device
|
||||
*/
|
||||
static inline void mca_btl_ugni_device_unlock (mca_btl_ugni_device_t *device)
|
||||
{
|
||||
opal_atomic_wmb ();
|
||||
device->lock = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize an operation on a uGNI device
|
||||
*
|
||||
* @params[in] device ugni device
|
||||
* @params[in] fn function to serialize
|
||||
* @params[in] arg function argument
|
||||
*/
|
||||
static inline intptr_t mca_btl_ugni_device_serialize (mca_btl_ugni_device_t *device,
|
||||
mca_btl_ugni_device_serialize_fn_t fn, void *arg)
|
||||
{
|
||||
intptr_t rc;
|
||||
|
||||
if (!opal_using_threads ()) {
|
||||
return fn (device, arg);
|
||||
}
|
||||
|
||||
/* NTH: for now the device is just protected by a spin lock but this will change in the future */
|
||||
mca_btl_ugni_device_lock (device);
|
||||
rc = fn (device, arg);
|
||||
mca_btl_ugni_device_unlock (device);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
||||
/** Number of times the progress thread has woken up */
|
||||
extern unsigned int mca_btl_ugni_progress_thread_wakeups;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
@ -20,7 +20,7 @@
|
||||
#include "opal/include/opal/align.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
|
||||
#define INITIAL_GNI_EPS 10000
|
||||
#define INITIAL_GNI_EPS 1024
|
||||
|
||||
static int
|
||||
mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module);
|
||||
@ -50,7 +50,7 @@ int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
|
||||
/* NTH: might want to vary this size based off the universe size (if
|
||||
* one exists). the table is only used for connection lookup and
|
||||
* endpoint removal. */
|
||||
rc = opal_hash_table_init (&ugni_module->id_to_endpoint, 512);
|
||||
rc = opal_hash_table_init (&ugni_module->id_to_endpoint, INITIAL_GNI_EPS);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
BTL_ERROR(("error initializing the endpoint hash. rc = %d", rc));
|
||||
return rc;
|
||||
@ -58,93 +58,63 @@ int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
|
||||
}
|
||||
|
||||
for (size_t i = 0 ; i < nprocs ; ++i) {
|
||||
struct opal_proc_t *opal_proc = procs[i];
|
||||
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name);
|
||||
|
||||
/* check for an existing endpoint */
|
||||
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
|
||||
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) (peers + i))) {
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) {
|
||||
ugni_module->nlocal_procs++;
|
||||
|
||||
/* ugni is allowed on local processes to provide support for network
|
||||
* atomic operations */
|
||||
}
|
||||
|
||||
/* Create and Init endpoints */
|
||||
rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, opal_proc);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
|
||||
BTL_ERROR(("btl/ugni error initializing endpoint"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* go ahead and connect the local endpoint for RDMA/CQ write */
|
||||
if (opal_proc == opal_proc_local_get ()) {
|
||||
ugni_module->local_ep = peers[i];
|
||||
}
|
||||
|
||||
/* Add this endpoint to the pointer array. */
|
||||
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) peers[i]));
|
||||
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, peers[i]);
|
||||
|
||||
++ugni_module->endpoint_count;
|
||||
peers[i] = mca_btl_ugni_get_ep (btl, procs[i]);
|
||||
if (NULL == peers[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (procs[i] == opal_proc_local_get ()) {
|
||||
ugni_module->local_ep = peers[i];
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
|
||||
|
||||
/* Set the reachable bit if necessary */
|
||||
if (reachable) {
|
||||
rc = opal_bitmap_set_bit (reachable, i);
|
||||
(void) opal_bitmap_set_bit (reachable, i);
|
||||
}
|
||||
}
|
||||
|
||||
mca_btl_ugni_module_set_max_reg (ugni_module, ugni_module->nlocal_procs);
|
||||
|
||||
if (false == ugni_module->initialized) {
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
|
||||
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->rdma_local_cq);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error creating local BTE/FMA CQ"));
|
||||
return opal_common_rc_ugni_to_opal (rc);
|
||||
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
|
||||
mca_btl_ugni_device_t *device = ugni_module->devices + i;
|
||||
rc = GNI_CqCreate (device->dev_handle, mca_btl_ugni_component.local_cq_size, 0,
|
||||
GNI_CQ_NOBLOCK, NULL, NULL, &device->dev_rdma_local_cq.gni_handle);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error creating local BTE/FMA CQ"));
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
rc = GNI_CqCreate (device->dev_handle, mca_btl_ugni_component.local_cq_size,
|
||||
0, GNI_CQ_NOBLOCK, NULL, NULL, &device->dev_smsg_local_cq.gni_handle);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error creating local SMSG CQ"));
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
if (mca_btl_ugni_component.progress_thread_enabled) {
|
||||
rc = GNI_CqCreate (device->dev_handle, mca_btl_ugni_component.local_cq_size,
|
||||
0, GNI_CQ_BLOCKING, NULL, NULL, &device->dev_rdma_local_irq_cq.gni_handle);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error creating local BTE/FMA CQ"));
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
|
||||
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->smsg_local_cq);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error creating local SMSG CQ"));
|
||||
return opal_common_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.remote_cq_size,
|
||||
rc = GNI_CqCreate (ugni_module->devices[0].dev_handle, mca_btl_ugni_component.remote_cq_size,
|
||||
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->smsg_remote_cq);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error creating remote SMSG CQ"));
|
||||
return opal_common_rc_ugni_to_opal (rc);
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
if (mca_btl_ugni_component.progress_thread_enabled) {
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
|
||||
0, GNI_CQ_BLOCKING, NULL, NULL, &ugni_module->rdma_local_irq_cq);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error creating local BTE/FMA CQ"));
|
||||
return opal_common_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.remote_cq_size,
|
||||
rc = GNI_CqCreate (ugni_module->devices[0].dev_handle, mca_btl_ugni_component.remote_cq_size,
|
||||
0, GNI_CQ_BLOCKING, NULL, NULL, &ugni_module->smsg_remote_irq_cq);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error creating remote SMSG CQ"));
|
||||
return opal_common_rc_ugni_to_opal (rc);
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
}
|
||||
|
||||
@ -175,15 +145,13 @@ int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
rc = GNI_MemRegister(ugni_module->device->dev_handle,
|
||||
rc = GNI_MemRegister(ugni_module->devices[0].dev_handle,
|
||||
(unsigned long)mmap_start_addr,
|
||||
4096,
|
||||
ugni_module->smsg_remote_irq_cq,
|
||||
GNI_MEM_READWRITE,
|
||||
-1,
|
||||
&ugni_module->device->smsg_irq_mhndl);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
&ugni_module->devices[0].smsg_irq_mhndl);
|
||||
|
||||
mca_btl_ugni_spawn_progress_thread(btl);
|
||||
}
|
||||
@ -198,18 +166,10 @@ int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
|
||||
size_t nprocs, struct opal_proc_t **procs,
|
||||
struct mca_btl_base_endpoint_t **peers) {
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
|
||||
size_t i;
|
||||
int rc;
|
||||
|
||||
while (ugni_module->active_send_count) {
|
||||
/* ensure all sends are complete before removing and procs */
|
||||
rc = mca_btl_ugni_progress_local_smsg (ugni_module);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
|
||||
|
||||
for (i = 0 ; i < nprocs ; ++i) {
|
||||
for (size_t i = 0 ; i < nprocs ; ++i) {
|
||||
struct opal_proc_t *opal_proc = procs[i];
|
||||
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name);
|
||||
mca_btl_base_endpoint_t *ep = NULL;
|
||||
@ -224,10 +184,18 @@ int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
|
||||
--ugni_module->endpoint_count;
|
||||
}
|
||||
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) {
|
||||
--ugni_module->nlocal_procs;
|
||||
}
|
||||
|
||||
/* remote the endpoint from the hash table */
|
||||
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, NULL);
|
||||
}
|
||||
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
|
||||
|
||||
mca_btl_ugni_module_set_max_reg (ugni_module, ugni_module->nlocal_procs);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -244,9 +212,12 @@ struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_
|
||||
do {
|
||||
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
|
||||
if (OPAL_SUCCESS == rc) {
|
||||
BTL_VERBOSE(("returning existing endpoint for proc %s", OPAL_NAME_PRINT(proc->proc_name)));
|
||||
break;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) proc));
|
||||
|
||||
/* Create and Init endpoints */
|
||||
rc = mca_btl_ugni_init_ep (ugni_module, &ep, ugni_module, proc);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
@ -254,8 +225,13 @@ struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_
|
||||
break;
|
||||
}
|
||||
|
||||
/* Add this endpoint to the pointer array. */
|
||||
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) ep));
|
||||
/* ugni is allowed on local processes to provide support for network atomic operations */
|
||||
if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) {
|
||||
++ugni_module->nlocal_procs;
|
||||
}
|
||||
++ugni_module->endpoint_count;
|
||||
|
||||
/* add this endpoint to the connection lookup table */
|
||||
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, ep);
|
||||
} while (0);
|
||||
|
||||
@ -269,10 +245,8 @@ static int ugni_reg_mem (void *reg_data, void *base, size_t size,
|
||||
mca_rcache_base_registration_t *reg)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
|
||||
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg;
|
||||
gni_cq_handle_t cq = NULL;
|
||||
gni_return_t rc;
|
||||
int flags;
|
||||
gni_cq_handle_t cq = 0;
|
||||
int flags, rc;
|
||||
|
||||
if (ugni_module->reg_count >= ugni_module->reg_max) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
@ -293,37 +267,26 @@ static int ugni_reg_mem (void *reg_data, void *base, size_t size,
|
||||
cq = ugni_module->smsg_remote_cq;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
|
||||
size, cq, flags, -1, &(ugni_reg->handle.gni_handle));
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
rc = mca_btl_ugni_reg_mem (ugni_module, base, size, (mca_btl_ugni_reg_t *) reg, cq, flags);
|
||||
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
|
||||
opal_atomic_add_32(&ugni_module->reg_count,1);
|
||||
}
|
||||
|
||||
opal_atomic_add_32(&ugni_module->reg_count,1);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int
|
||||
ugni_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
|
||||
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *)reg;
|
||||
gni_return_t rc;
|
||||
int rc;
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
rc = GNI_MemDeregister (ugni_module->device->dev_handle, &ugni_reg->handle.gni_handle);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
return OPAL_ERROR;
|
||||
rc = mca_btl_ugni_dereg_mem (ugni_module, (mca_btl_ugni_reg_t *) reg);
|
||||
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
|
||||
opal_atomic_add_32(&ugni_module->reg_count,-1);
|
||||
}
|
||||
|
||||
opal_atomic_add_32(&ugni_module->reg_count,-1);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int
|
||||
@ -356,7 +319,7 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = opal_free_list_init (&ugni_module->smsg_frags,
|
||||
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_SMSG,
|
||||
sizeof (mca_btl_ugni_smsg_frag_t),
|
||||
opal_cache_line_size, OBJ_CLASS(mca_btl_ugni_smsg_frag_t),
|
||||
mca_btl_ugni_component.ugni_smsg_limit,
|
||||
@ -365,13 +328,13 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
||||
mca_btl_ugni_component.ugni_free_list_max,
|
||||
mca_btl_ugni_component.ugni_free_list_inc,
|
||||
NULL, 0, NULL, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
|
||||
(void *) ugni_module);
|
||||
(void *) (intptr_t) MCA_BTL_UGNI_LIST_SMSG);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
BTL_ERROR(("error creating smsg fragment free list"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = opal_free_list_init (&ugni_module->rdma_frags,
|
||||
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_RDMA,
|
||||
sizeof (mca_btl_ugni_rdma_frag_t), 64,
|
||||
OBJ_CLASS(mca_btl_ugni_rdma_frag_t),
|
||||
0, opal_cache_line_size,
|
||||
@ -379,17 +342,17 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
||||
mca_btl_ugni_component.ugni_free_list_max,
|
||||
mca_btl_ugni_component.ugni_free_list_inc,
|
||||
NULL, 0, NULL, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
|
||||
(void *) ugni_module);
|
||||
(void *) (intptr_t) MCA_BTL_UGNI_LIST_RDMA);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = opal_free_list_init (&ugni_module->rdma_int_frags,
|
||||
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_RDMA_INT,
|
||||
sizeof (mca_btl_ugni_rdma_frag_t), 8,
|
||||
OBJ_CLASS(mca_btl_ugni_rdma_frag_t),
|
||||
0, opal_cache_line_size, 0, -1, 64,
|
||||
NULL, 0, NULL, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
|
||||
(void *) ugni_module);
|
||||
(void *) (intptr_t) MCA_BTL_UGNI_LIST_RDMA_INT);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
@ -419,14 +382,14 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
||||
}
|
||||
|
||||
ugni_module->rcache =
|
||||
mca_rcache_base_module_create (rcache_name, ugni_module->device, &rcache_resources.base);
|
||||
mca_rcache_base_module_create (rcache_name, ugni_module->devices, &rcache_resources.base);
|
||||
|
||||
if (NULL == ugni_module->rcache) {
|
||||
BTL_ERROR(("error creating registration cache"));
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
rc = opal_free_list_init (&ugni_module->eager_frags_send,
|
||||
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_EAGER_SEND,
|
||||
sizeof (mca_btl_ugni_eager_frag_t), 8,
|
||||
OBJ_CLASS(mca_btl_ugni_eager_frag_t),
|
||||
ugni_module->super.btl_eager_limit, 64,
|
||||
@ -435,13 +398,13 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
||||
mca_btl_ugni_component.ugni_eager_inc,
|
||||
ugni_module->super.btl_mpool, 0, ugni_module->rcache,
|
||||
(opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
|
||||
(void *) ugni_module);
|
||||
(void *) (intptr_t) MCA_BTL_UGNI_LIST_EAGER_SEND);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
BTL_ERROR(("error creating eager send fragment free list"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = opal_free_list_init (&ugni_module->eager_frags_recv,
|
||||
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_EAGER_RECV,
|
||||
sizeof (mca_btl_ugni_eager_frag_t), 8,
|
||||
OBJ_CLASS(mca_btl_ugni_eager_frag_t),
|
||||
ugni_module->super.btl_eager_limit, 64,
|
||||
@ -450,7 +413,7 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
|
||||
mca_btl_ugni_component.ugni_eager_inc,
|
||||
ugni_module->super.btl_mpool, 0, ugni_module->rcache,
|
||||
(opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
|
||||
(void *) ugni_module);
|
||||
(void *) (intptr_t) MCA_BTL_UGNI_LIST_EAGER_RECV);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
BTL_ERROR(("error creating eager receive fragment free list"));
|
||||
return rc;
|
||||
@ -503,14 +466,22 @@ mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_
|
||||
gni_return_t grc;
|
||||
int fuzz = 20;
|
||||
|
||||
grc = GNI_GetJobResInfo (ugni_module->device->dev_id, opal_common_ugni_module.ptag,
|
||||
grc = GNI_GetJobResInfo (0, mca_btl_ugni_component.ptag,
|
||||
GNI_JOB_RES_MDD, &res_des);
|
||||
if (GNI_RC_SUCCESS == grc) {
|
||||
ugni_module->reg_max = (res_des.limit - fuzz) / nlocal_procs;
|
||||
if (nlocal_procs) {
|
||||
ugni_module->reg_max = (res_des.limit - fuzz) / nlocal_procs;
|
||||
} else {
|
||||
ugni_module->reg_max = 0;
|
||||
}
|
||||
}
|
||||
#else
|
||||
/* no way to determine the maximum registration count */
|
||||
ugni_module->reg_max = 1200 / nlocal_procs;
|
||||
if (nlocal_procs) {
|
||||
ugni_module->reg_max = 1200 / nlocal_procs;
|
||||
} else {
|
||||
ugni_module->reg_max = 0;
|
||||
}
|
||||
#endif
|
||||
} else if (-1 == mca_btl_ugni_component.max_mem_reg) {
|
||||
ugni_module->reg_max = INT_MAX;
|
||||
@ -557,7 +528,7 @@ static int mca_btl_ugni_smsg_setup (int nprocs)
|
||||
grc = GNI_SmsgBufferSizeNeeded (&tmp_smsg_attrib, &mbox_size);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
|
||||
BTL_ERROR(("error in GNI_SmsgBufferSizeNeeded"));
|
||||
return opal_common_rc_ugni_to_opal (grc);
|
||||
return mca_btl_rc_ugni_to_opal (grc);
|
||||
}
|
||||
|
||||
mca_btl_ugni_component.smsg_mbox_size = OPAL_ALIGN(mbox_size, 64, unsigned int);
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
@ -95,34 +95,23 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
mca_btl_ugni_alloc_post_descriptor (endpoint, NULL, cbfunc, cbcontext, cbdata, &post_desc);
|
||||
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, NULL, cbfunc, cbcontext, cbdata);
|
||||
if (OPAL_UNLIKELY(NULL == post_desc)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, 0, dummy, remote_address,
|
||||
init_gni_post_desc (post_desc, order, GNI_POST_AMO, 0, dummy, remote_address,
|
||||
remote_handle->gni_handle, size, 0);
|
||||
post_desc->desc.base.amo_cmd = gni_op;
|
||||
post_desc->desc.amo_cmd = gni_op;
|
||||
|
||||
post_desc->desc.base.first_operand = operand;
|
||||
post_desc->desc.first_operand = operand;
|
||||
|
||||
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
|
||||
rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
|
||||
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
|
||||
if (GNI_RC_ILLEGAL_OP == rc) {
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
rc = mca_btl_ugni_endpoint_post_fma (endpoint, post_desc);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
mca_btl_ugni_return_post_descriptor (post_desc);
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
return rc;
|
||||
}
|
||||
|
||||
int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
@ -147,35 +136,24 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
|
||||
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata);
|
||||
if (OPAL_UNLIKELY(NULL == post_desc)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
|
||||
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
|
||||
init_gni_post_desc (post_desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
|
||||
remote_address, remote_handle->gni_handle, size, 0);
|
||||
post_desc->desc.base.amo_cmd = gni_op;
|
||||
post_desc->desc.amo_cmd = gni_op;
|
||||
|
||||
post_desc->desc.base.first_operand = operand;
|
||||
post_desc->desc.first_operand = operand;
|
||||
|
||||
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
|
||||
rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
|
||||
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
|
||||
if (GNI_RC_ILLEGAL_OP == rc) {
|
||||
return OPAL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
rc = mca_btl_ugni_endpoint_post_fma (endpoint, post_desc);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
mca_btl_ugni_return_post_descriptor (post_desc);
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
return rc;
|
||||
}
|
||||
|
||||
int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
|
||||
@ -190,31 +168,23 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_
|
||||
gni_op = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? GNI_FMA_ATOMIC2_CSWAP_S : GNI_FMA_ATOMIC_CSWAP;
|
||||
size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8;
|
||||
|
||||
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
|
||||
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata);
|
||||
if (OPAL_UNLIKELY(NULL == post_desc)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
|
||||
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
|
||||
init_gni_post_desc (post_desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
|
||||
remote_address, remote_handle->gni_handle, size, 0);
|
||||
post_desc->desc.base.amo_cmd = gni_op;
|
||||
post_desc->desc.amo_cmd = gni_op;
|
||||
|
||||
post_desc->desc.base.first_operand = compare;
|
||||
post_desc->desc.base.second_operand = value;
|
||||
post_desc->desc.first_operand = compare;
|
||||
post_desc->desc.second_operand = value;
|
||||
|
||||
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
|
||||
rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
|
||||
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
rc = mca_btl_ugni_endpoint_post_fma (endpoint, post_desc);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
mca_btl_ugni_return_post_descriptor (post_desc);
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
return rc;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -19,6 +19,7 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <fcntl.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "opal/memoryhooks/memory.h"
|
||||
#include "opal/runtime/opal_params.h"
|
||||
@ -56,8 +57,49 @@ mca_base_var_enum_value_t rcache_values[] = {
|
||||
{-1, NULL} /* sentinal */
|
||||
};
|
||||
|
||||
static int
|
||||
btl_ugni_component_register(void)
|
||||
mca_base_var_enum_value_flag_t cdm_flags[] = {
|
||||
{.flag = GNI_CDM_MODE_FORK_NOCOPY, .string = "fork-no-copy", .conflicting_flag = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_FORK_PARTCOPY},
|
||||
{.flag = GNI_CDM_MODE_FORK_FULLCOPY, .string = "fork-full-copy", .conflicting_flag = GNI_CDM_MODE_FORK_NOCOPY | GNI_CDM_MODE_FORK_PARTCOPY},
|
||||
{.flag = GNI_CDM_MODE_FORK_PARTCOPY, .string = "fork-part-copy", .conflicting_flag = GNI_CDM_MODE_FORK_NOCOPY | GNI_CDM_MODE_FORK_FULLCOPY},
|
||||
{.flag = GNI_CDM_MODE_ERR_NO_KILL, .string = "err-no-kill", .conflicting_flag = GNI_CDM_MODE_ERR_ALL_KILL},
|
||||
{.flag = GNI_CDM_MODE_ERR_ALL_KILL, .string = "err-all-kill", .conflicting_flag = GNI_CDM_MODE_ERR_NO_KILL},
|
||||
{.flag = GNI_CDM_MODE_FAST_DATAGRAM_POLL, .string = "fast-datagram-poll", .conflicting_flag = 0},
|
||||
{.flag = GNI_CDM_MODE_BTE_SINGLE_CHANNEL, .string = "bte-single-channel", .conflicting_flag = 0},
|
||||
{.flag = GNI_CDM_MODE_USE_PCI_IOMMU, .string = "use-pci-iommu", .conflicting_flag = 0},
|
||||
{.flag = GNI_CDM_MODE_MDD_DEDICATED, .string = "mdd-dedicated", .conflicting_flag = GNI_CDM_MODE_MDD_SHARED},
|
||||
{.flag = GNI_CDM_MODE_MDD_SHARED, .string = "mdd-shared", .conflicting_flag = GNI_CDM_MODE_MDD_DEDICATED},
|
||||
{.flag = GNI_CDM_MODE_FMA_DEDICATED, .string = "fma-dedicated", .conflicting_flag = GNI_CDM_MODE_FMA_SHARED},
|
||||
{.flag = GNI_CDM_MODE_FMA_SHARED, .string = "fma-shared", .conflicting_flag = GNI_CDM_MODE_FMA_DEDICATED},
|
||||
{.flag = GNI_CDM_MODE_CACHED_AMO_ENABLED, .string = "cached-amo-enabled", .conflicting_flag = 0},
|
||||
{.flag = GNI_CDM_MODE_CQ_NIC_LOCAL_PLACEMENT, .string = "cq-nic-placement", .conflicting_flag = 0},
|
||||
{.flag = GNI_CDM_MODE_FMA_SMALL_WINDOW, .string = "fma-small-window", .conflicting_flag = 0},
|
||||
{.string = NULL}
|
||||
};
|
||||
|
||||
static inline int mca_btl_ugni_get_stat (const mca_base_pvar_t *pvar, void *value, void *obj)
|
||||
{
|
||||
gni_statistic_t statistic = (gni_statistic_t) (intptr_t) pvar->ctx;
|
||||
gni_return_t rc = GNI_RC_SUCCESS;
|
||||
|
||||
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
|
||||
rc = GNI_GetNicStat (mca_btl_ugni_component.modules[0].devices[i].dev_handle, statistic,
|
||||
((unsigned int *) value) + i);
|
||||
}
|
||||
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_notify_stat (mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj, int *count)
|
||||
{
|
||||
if (MCA_BASE_PVAR_HANDLE_BIND == event) {
|
||||
/* one value for each virtual device handle */
|
||||
*count = mca_btl_ugni_component.virtual_device_count;
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static int btl_ugni_component_register(void)
|
||||
{
|
||||
mca_base_var_enum_t *new_enum;
|
||||
gni_nic_device_t device_type;
|
||||
@ -181,6 +223,31 @@ btl_ugni_component_register(void)
|
||||
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment);
|
||||
|
||||
/* communication domain flags */
|
||||
rc = mca_base_var_enum_create_flag ("btl_ugni_cdm_flags", cdm_flags, (mca_base_var_enum_flag_t **) &new_enum);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
mca_btl_ugni_component.cdm_flags = GNI_CDM_MODE_FORK_PARTCOPY | GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL |
|
||||
GNI_CDM_MODE_MDD_SHARED | GNI_CDM_MODE_FMA_SHARED | GNI_CDM_MODE_FMA_SMALL_WINDOW;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"cdm_flags", "Flags to set when creating a communication domain "
|
||||
" (default: fork-fullcopy,cached-amo-enabled,err-no-kill,fast-datagram-poll,"
|
||||
"fma-shared,fma-small-window)",
|
||||
MCA_BASE_VAR_TYPE_UNSIGNED_INT, new_enum, 0,
|
||||
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.cdm_flags);
|
||||
OBJ_RELEASE(new_enum);
|
||||
|
||||
mca_btl_ugni_component.virtual_device_count = 0;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"virtual_device_count", "Number of virtual devices to create. Higher numbers may "
|
||||
"result in better performance when using threads. (default: auto, max: 8)",
|
||||
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
|
||||
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.virtual_device_count);
|
||||
|
||||
/* determine if there are get alignment restrictions */
|
||||
GNI_GetDeviceType (&device_type);
|
||||
|
||||
@ -202,12 +269,9 @@ btl_ugni_component_register(void)
|
||||
}
|
||||
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"smsg_page_size", "Page size to use for SMSG "
|
||||
"mailbox allocation (default: detect)",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0,
|
||||
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL,
|
||||
&mca_btl_ugni_component.smsg_page_size);
|
||||
"smsg_page_size", "Page size to use for SMSG mailbox allocation (default: detect)",
|
||||
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_page_size);
|
||||
|
||||
mca_btl_ugni_component.progress_thread_requested = 0;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
@ -228,6 +292,31 @@ btl_ugni_component_register(void)
|
||||
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL,
|
||||
NULL, NULL, &mca_btl_ugni_progress_thread_wakeups);
|
||||
|
||||
/* register network statistics as performance variables */
|
||||
for (int i = 0 ; i < GNI_NUM_STATS ; ++i) {
|
||||
char name[128], desc[128];
|
||||
size_t str_len = strlen (gni_statistic_str[i]);
|
||||
|
||||
assert (str_len < sizeof (name));
|
||||
|
||||
/* we can get an all-caps string for the variable from gni_statistic_str. need to make it lowercase
|
||||
* to match ompi standards */
|
||||
for (size_t j = 0 ; j < str_len ; ++j) {
|
||||
name[j] = tolower (gni_statistic_str[i][j]);
|
||||
desc[j] = ('_' == name[j]) ? ' ' : name[j];
|
||||
}
|
||||
|
||||
name[str_len] = '\0';
|
||||
desc[str_len] = '\0';
|
||||
|
||||
(void) mca_base_component_pvar_register (&mca_btl_ugni_component.super.btl_version, name, desc,
|
||||
OPAL_INFO_LVL_4, MCA_BASE_PVAR_CLASS_COUNTER,
|
||||
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
|
||||
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
|
||||
mca_btl_ugni_get_stat, NULL, mca_btl_ugni_notify_stat,
|
||||
(void *) (intptr_t) i);
|
||||
}
|
||||
|
||||
/* btl/ugni can only support only a fixed set of rcache components (these rcache components have compatible resource
|
||||
* structures) */
|
||||
rc = mca_base_var_enum_create ("btl_ugni_rcache", rcache_values, &new_enum);
|
||||
@ -235,9 +324,10 @@ btl_ugni_component_register(void)
|
||||
return rc;
|
||||
}
|
||||
|
||||
mca_btl_ugni_component.rcache_type = MCA_BTL_UGNI_RCACHE_UDREG;
|
||||
/* NTH: there are known *serious* performance issues with udreg. if they are ever resolved it is the preferred rcache */
|
||||
mca_btl_ugni_component.rcache_type = MCA_BTL_UGNI_RCACHE_GRDMA;
|
||||
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
|
||||
"rcache", "registration cache to use", MCA_BASE_VAR_TYPE_INT, new_enum,
|
||||
"rcache", "registration cache to use (default: grdma)", MCA_BASE_VAR_TYPE_INT, new_enum,
|
||||
0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
|
||||
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.rcache_type);
|
||||
OBJ_RELEASE(new_enum);
|
||||
@ -325,7 +415,7 @@ btl_ugni_component_open(void)
|
||||
static int
|
||||
btl_ugni_component_close(void)
|
||||
{
|
||||
opal_common_ugni_fini ();
|
||||
mca_btl_ugni_fini ();
|
||||
|
||||
if (mca_btl_ugni_component.modules) {
|
||||
free (mca_btl_ugni_component.modules);
|
||||
@ -342,7 +432,6 @@ mca_btl_ugni_component_init (int *num_btl_modules,
|
||||
{
|
||||
struct mca_btl_base_module_t **base_modules;
|
||||
mca_btl_ugni_module_t *ugni_modules;
|
||||
unsigned int i;
|
||||
int rc;
|
||||
|
||||
if (16384 < mca_btl_ugni_component.ugni_smsg_limit) {
|
||||
@ -360,19 +449,18 @@ mca_btl_ugni_component_init (int *num_btl_modules,
|
||||
}
|
||||
|
||||
/* Initialize ugni library and create communication domain */
|
||||
rc = opal_common_ugni_init();
|
||||
rc = mca_btl_ugni_init();
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Create and initialize one module per uGNI device */
|
||||
mca_btl_ugni_component.ugni_num_btls = opal_common_ugni_module.device_count;
|
||||
/* For now only create a single BTL module */
|
||||
mca_btl_ugni_component.ugni_num_btls = 1;
|
||||
|
||||
BTL_VERBOSE(("btl/ugni initializing"));
|
||||
|
||||
ugni_modules = mca_btl_ugni_component.modules = (mca_btl_ugni_module_t *)
|
||||
calloc (mca_btl_ugni_component.ugni_num_btls,
|
||||
sizeof (mca_btl_ugni_module_t));
|
||||
calloc (mca_btl_ugni_component.ugni_num_btls, sizeof (mca_btl_ugni_module_t));
|
||||
|
||||
if (OPAL_UNLIKELY(NULL == mca_btl_ugni_component.modules)) {
|
||||
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
|
||||
@ -395,20 +483,15 @@ mca_btl_ugni_component_init (int *num_btl_modules,
|
||||
|
||||
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = mca_btl_ugni_module.super.btl_eager_limit;
|
||||
|
||||
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
|
||||
mca_btl_ugni_module_t *ugni_module = ugni_modules + i;
|
||||
|
||||
rc = mca_btl_ugni_module_init (ugni_module,
|
||||
opal_common_ugni_module.devices + i);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
BTL_ERROR(("Failed to initialize uGNI module @ %s:%d", __FILE__,
|
||||
__LINE__));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
base_modules[i] = (mca_btl_base_module_t *) ugni_module;
|
||||
rc = mca_btl_ugni_module_init (ugni_modules);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
BTL_ERROR(("Failed to initialize uGNI module @ %s:%d", __FILE__,
|
||||
__LINE__));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
*base_modules = (mca_btl_base_module_t *) ugni_modules;
|
||||
|
||||
*num_btl_modules = mca_btl_ugni_component.ugni_num_btls;
|
||||
|
||||
BTL_VERBOSE(("btl/ugni done initializing %d module(s)", *num_btl_modules));
|
||||
@ -417,80 +500,47 @@ mca_btl_ugni_component_init (int *num_btl_modules,
|
||||
}
|
||||
|
||||
static inline int
|
||||
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
|
||||
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device)
|
||||
{
|
||||
uint64_t datagram_id, data, proc_id;
|
||||
uint32_t remote_addr, remote_id;
|
||||
mca_btl_base_endpoint_t *ep;
|
||||
gni_post_state_t post_state;
|
||||
gni_ep_handle_t handle;
|
||||
gni_return_t grc;
|
||||
int count = 0, rc;
|
||||
|
||||
/* check for datagram completion */
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); /* TODO: may not need lock for this function */
|
||||
grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id);
|
||||
if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
return 0;
|
||||
rc = mca_btl_ugni_get_datagram (ugni_module, device, &handle, &ep);
|
||||
if (1 != rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK);
|
||||
|
||||
BTL_VERBOSE(("datgram_id: %" PRIx64 ", mask: %" PRIx64, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK)));
|
||||
|
||||
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) {
|
||||
ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&ugni_module->endpoints, data);
|
||||
handle = ep->smsg_ep_handle;
|
||||
} else {
|
||||
handle = ugni_module->wildcard_ep;
|
||||
}
|
||||
|
||||
/* wait for the incoming datagram to complete (in case it isn't) */
|
||||
grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state,
|
||||
&remote_addr, &remote_id);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
if (GNI_RC_SUCCESS != grc) {
|
||||
BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc));
|
||||
return opal_common_rc_ugni_to_opal (grc);
|
||||
}
|
||||
BTL_VERBOSE(("remote datagram completion on handle %p", handle));
|
||||
|
||||
/* if this is a wildcard endpoint lookup the remote peer by the proc id we received */
|
||||
if (handle == ugni_module->wildcard_ep) {
|
||||
proc_id = mca_btl_ugni_proc_name_to_id (ugni_module->wc_remote_attr.proc_name);
|
||||
struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name);
|
||||
|
||||
BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64,
|
||||
proc_id));
|
||||
BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc: %s",
|
||||
OPAL_NAME_PRINT(ugni_module->wc_remote_attr.proc_name)));
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
|
||||
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
|
||||
|
||||
/* check if the endpoint is known */
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) {
|
||||
struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name);
|
||||
BTL_VERBOSE(("Got connection request from an unknown peer {jobid = 0x%x, vid = 0x%x}",
|
||||
ugni_module->wc_remote_attr.proc_name.jobid, ugni_module->wc_remote_attr.proc_name.vpid));
|
||||
ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc);
|
||||
if (OPAL_UNLIKELY(NULL == ep)) {
|
||||
return rc;
|
||||
}
|
||||
ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc);
|
||||
if (OPAL_UNLIKELY(NULL == ep)) {
|
||||
/* there is no way to recover from this error so just abort() */
|
||||
BTL_ERROR(("could not find/allocate a btl endpoint for peer %s",
|
||||
OPAL_NAME_PRINT(ugni_module->wc_remote_attr.proc_name)));
|
||||
abort ();
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
} else {
|
||||
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
|
||||
}
|
||||
|
||||
/* should not have gotten a NULL endpoint */
|
||||
assert (NULL != ep);
|
||||
|
||||
BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, "
|
||||
"data = 0x%" PRIx64 ", ep = %p, remote id: %d", datagram_id, post_state,
|
||||
data, (void *) ep, remote_id));
|
||||
BTL_VERBOSE(("got a datagram completion: ep = %p. wc = %d", (void *) ep, handle == ugni_module->wildcard_ep));
|
||||
|
||||
/* NTH: TODO -- error handling */
|
||||
opal_mutex_lock (&ep->lock);
|
||||
if (handle != ugni_module->wildcard_ep) {
|
||||
/* directed post complete */
|
||||
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
|
||||
|
||||
ep->dg_posted = false;
|
||||
}
|
||||
|
||||
@ -514,106 +564,106 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
|
||||
static inline void btl_ugni_dump_post_desc (mca_btl_ugni_post_descriptor_t *desc)
|
||||
{
|
||||
|
||||
fprintf (stderr, "desc->desc.base.post_id = %" PRIx64 "\n", desc->desc.base.post_id);
|
||||
fprintf (stderr, "desc->desc.base.status = %" PRIx64 "\n", desc->desc.base.status);
|
||||
fprintf (stderr, "desc->desc.base.cq_mode_complete = %hu\n", desc->desc.base.cq_mode_complete);
|
||||
fprintf (stderr, "desc->desc.base.type = %d\n", desc->desc.base.type);
|
||||
fprintf (stderr, "desc->desc.base.cq_mode = %hu\n", desc->desc.base.cq_mode);
|
||||
fprintf (stderr, "desc->desc.base.dlvr_mode = %hu\n", desc->desc.base.dlvr_mode);
|
||||
fprintf (stderr, "desc->desc.base.local_addr = %" PRIx64 "\n", desc->desc.base.local_addr);
|
||||
fprintf (stderr, "desc->desc.base.local_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.base.local_mem_hndl.qword1,
|
||||
desc->desc.base.local_mem_hndl.qword2);
|
||||
fprintf (stderr, "desc->desc.base.remote_addr = %" PRIx64 "\n", desc->desc.base.remote_addr);
|
||||
fprintf (stderr, "desc->desc.base.remote_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.base.remote_mem_hndl.qword1,
|
||||
desc->desc.base.remote_mem_hndl.qword2);
|
||||
fprintf (stderr, "desc->desc.base.length = %" PRIu64 "\n", desc->desc.base.length);
|
||||
fprintf (stderr, "desc->desc.base.rdma_mode = %hu\n", desc->desc.base.rdma_mode);
|
||||
fprintf (stderr, "desc->desc.base.amo_cmd = %d\n", desc->desc.base.amo_cmd);
|
||||
fprintf (stderr, "desc->desc.post_id = %" PRIx64 "\n", desc->desc.post_id);
|
||||
fprintf (stderr, "desc->desc.status = %" PRIx64 "\n", desc->desc.status);
|
||||
fprintf (stderr, "desc->desc.cq_mode_complete = %hu\n", desc->desc.cq_mode_complete);
|
||||
fprintf (stderr, "desc->desc.type = %d\n", desc->desc.type);
|
||||
fprintf (stderr, "desc->desc.cq_mode = %hu\n", desc->desc.cq_mode);
|
||||
fprintf (stderr, "desc->desc.dlvr_mode = %hu\n", desc->desc.dlvr_mode);
|
||||
fprintf (stderr, "desc->desc.local_addr = %" PRIx64 "\n", desc->desc.local_addr);
|
||||
fprintf (stderr, "desc->desc.local_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.local_mem_hndl.qword1,
|
||||
desc->desc.local_mem_hndl.qword2);
|
||||
fprintf (stderr, "desc->desc.remote_addr = %" PRIx64 "\n", desc->desc.remote_addr);
|
||||
fprintf (stderr, "desc->desc.remote_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.remote_mem_hndl.qword1,
|
||||
desc->desc.remote_mem_hndl.qword2);
|
||||
fprintf (stderr, "desc->desc.length = %" PRIu64 "\n", desc->desc.length);
|
||||
fprintf (stderr, "desc->desc.rdma_mode = %hu\n", desc->desc.rdma_mode);
|
||||
fprintf (stderr, "desc->desc.amo_cmd = %d\n", desc->desc.amo_cmd);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, int which_cq)
|
||||
{
|
||||
mca_btl_ugni_post_descriptor_t *post_desc = NULL;
|
||||
gni_cq_entry_t event_data = 0;
|
||||
gni_post_descriptor_t *desc;
|
||||
uint32_t recoverable = 1;
|
||||
gni_return_t grc;
|
||||
gni_cq_handle_t the_cq;
|
||||
|
||||
the_cq = (which_cq == 0) ? ugni_module->rdma_local_cq : ugni_module->rdma_local_irq_cq;
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
grc = GNI_CqGetEvent (the_cq, &event_data);
|
||||
if (GNI_RC_NOT_DONE == grc) {
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY((GNI_RC_SUCCESS != grc && !event_data) || GNI_CQ_OVERRUN(event_data))) {
|
||||
/* TODO -- need to handle overrun -- how do we do this without an event?
|
||||
will the event eventually come back? Ask Cray */
|
||||
BTL_ERROR(("unhandled post error! ugni rc = %d %s", grc, gni_err_str[grc]));
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
|
||||
return opal_common_rc_ugni_to_opal (grc);
|
||||
}
|
||||
|
||||
grc = GNI_GetCompleted (the_cq, event_data, &desc);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc && GNI_RC_TRANSACTION_ERROR != grc)) {
|
||||
BTL_ERROR(("Error in GNI_GetComplete %s", gni_err_str[grc]));
|
||||
return opal_common_rc_ugni_to_opal (grc);
|
||||
}
|
||||
|
||||
post_desc = MCA_BTL_UGNI_DESC_TO_PDESC(desc);
|
||||
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc || !GNI_CQ_STATUS_OK(event_data))) {
|
||||
(void) GNI_CqErrorRecoverable (event_data, &recoverable);
|
||||
|
||||
if (OPAL_UNLIKELY(++post_desc->desc.tries >= mca_btl_ugni_component.rdma_max_retries ||
|
||||
!recoverable)) {
|
||||
char char_buffer[1024];
|
||||
GNI_CqErrorStr (event_data, char_buffer, 1024);
|
||||
/* give up */
|
||||
BTL_ERROR(("giving up on desciptor %p, recoverable %d: %s", (void *) post_desc,
|
||||
recoverable, char_buffer));
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
btl_ugni_dump_post_desc (post_desc);
|
||||
#endif
|
||||
mca_btl_ugni_post_desc_complete (ugni_module, post_desc, OPAL_ERROR);
|
||||
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
mca_btl_ugni_repost (ugni_module, post_desc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
mca_btl_ugni_post_desc_complete (ugni_module, post_desc, opal_common_rc_ugni_to_opal (grc));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int
|
||||
mca_btl_ugni_post_pending (mca_btl_ugni_module_t *ugni_module)
|
||||
mca_btl_ugni_post_pending (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device)
|
||||
{
|
||||
int count = opal_list_get_size (&ugni_module->pending_descriptors);
|
||||
int i;
|
||||
int pending_post_count = opal_list_get_size (&device->pending_post);
|
||||
mca_btl_ugni_post_descriptor_t *post_desc;
|
||||
int rc;
|
||||
|
||||
for (i = 0 ; i < count ; ++i) {
|
||||
OPAL_THREAD_LOCK(&ugni_module->pending_descriptors_lock);
|
||||
mca_btl_ugni_post_descriptor_t *post_desc =
|
||||
(mca_btl_ugni_post_descriptor_t *) opal_list_remove_first (&ugni_module->pending_descriptors);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->pending_descriptors_lock);
|
||||
/* check if there are any posts pending resources */
|
||||
if (OPAL_LIKELY(0 == pending_post_count)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (OPAL_SUCCESS != mca_btl_ugni_repost (ugni_module, post_desc)) {
|
||||
BTL_VERBOSE(("progressing %d pending FMA/RDMA operations", pending_post_count));
|
||||
for (int i = 0 ; i < pending_post_count ; ++i) {
|
||||
mca_btl_ugni_device_lock (device);
|
||||
post_desc = (mca_btl_ugni_post_descriptor_t *) opal_list_remove_first (&device->pending_post);
|
||||
mca_btl_ugni_device_unlock (device);
|
||||
if (NULL == post_desc) {
|
||||
break;
|
||||
}
|
||||
rc = mca_btl_ugni_repost (ugni_module, post_desc);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
mca_btl_ugni_device_lock (device);
|
||||
opal_list_prepend (&device->pending_post, (opal_list_item_t *) post_desc);
|
||||
mca_btl_ugni_device_unlock (device);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device,
|
||||
mca_btl_ugni_cq_t *cq)
|
||||
{
|
||||
mca_btl_ugni_post_descriptor_t *post_desc[MCA_BTL_UGNI_COMPLETIONS_PER_LOOP];
|
||||
gni_cq_entry_t event_data[MCA_BTL_UGNI_COMPLETIONS_PER_LOOP];
|
||||
int rc;
|
||||
|
||||
rc = mca_btl_ugni_cq_get_completed_desc (device, cq, event_data, post_desc, MCA_BTL_UGNI_COMPLETIONS_PER_LOOP);
|
||||
if (0 >= rc) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("got %d completed rdma descriptors", rc));
|
||||
|
||||
for (int i = 0 ; i < rc ; ++i) {
|
||||
BTL_VERBOSE(("post descriptor %p complete. GNI_CQ_STATUS_OK(): %d", post_desc[i],
|
||||
GNI_CQ_STATUS_OK(event_data[i])));
|
||||
|
||||
if (OPAL_UNLIKELY(!GNI_CQ_STATUS_OK(event_data[i]))) {
|
||||
uint32_t recoverable = 1;
|
||||
|
||||
(void) GNI_CqErrorRecoverable (event_data[i], &recoverable);
|
||||
|
||||
if (OPAL_UNLIKELY(++post_desc[i]->tries >= mca_btl_ugni_component.rdma_max_retries ||
|
||||
!recoverable)) {
|
||||
char char_buffer[1024];
|
||||
GNI_CqErrorStr (event_data[i], char_buffer, 1024);
|
||||
/* give up */
|
||||
BTL_ERROR(("giving up on desciptor %p, recoverable %d: %s", (void *) post_desc[i],
|
||||
recoverable, char_buffer));
|
||||
#if OPAL_ENABLE_DEBUG
|
||||
btl_ugni_dump_post_desc (post_desc[i]);
|
||||
#endif
|
||||
mca_btl_ugni_post_desc_complete (ugni_module, post_desc[i], OPAL_ERROR);
|
||||
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
mca_btl_ugni_repost (ugni_module, post_desc[i]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
mca_btl_ugni_post_desc_complete (ugni_module, post_desc[i], OPAL_SUCCESS);
|
||||
}
|
||||
|
||||
/* should be resources to progress the pending post list */
|
||||
(void) mca_btl_ugni_post_pending (ugni_module, device);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int
|
||||
@ -627,9 +677,14 @@ mca_btl_ugni_progress_wait_list (mca_btl_ugni_module_t *ugni_module)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* check the count before taking the lock to avoid unnecessary locking */
|
||||
count = opal_list_get_size(&ugni_module->ep_wait_list);
|
||||
if (0 == count) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
|
||||
count = opal_list_get_size(&ugni_module->ep_wait_list);
|
||||
|
||||
do {
|
||||
endpoint = (mca_btl_base_endpoint_t *) opal_list_remove_first (&ugni_module->ep_wait_list);
|
||||
if (endpoint != NULL) {
|
||||
@ -649,35 +704,34 @@ mca_btl_ugni_progress_wait_list (mca_btl_ugni_module_t *ugni_module)
|
||||
|
||||
static int mca_btl_ugni_component_progress (void)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module;
|
||||
static int64_t call_count = 0;
|
||||
int64_t cur_call_count = OPAL_THREAD_ADD64(&call_count, 1);
|
||||
unsigned int i;
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules;
|
||||
static volatile int32_t call_count = 0;
|
||||
int32_t current_call;
|
||||
int count = 0;
|
||||
|
||||
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
|
||||
ugni_module = mca_btl_ugni_component.modules + i;
|
||||
current_call = OPAL_THREAD_ADD32(&call_count, 1);
|
||||
|
||||
if ((cur_call_count & 0x7) == 0) {
|
||||
count += mca_btl_ugni_progress_datagram (ugni_module);
|
||||
}
|
||||
count += mca_btl_ugni_progress_remote_smsg (ugni_module);
|
||||
|
||||
if (ugni_module->connected_peer_count) {
|
||||
if ((current_call & 0x7) == 0) {
|
||||
count += mca_btl_ugni_progress_datagram (ugni_module, ugni_module->devices);
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
|
||||
mca_btl_ugni_device_t *device = ugni_module->devices + i;
|
||||
|
||||
if (device->smsg_connections) {
|
||||
count += mca_btl_ugni_progress_local_smsg (ugni_module, device);
|
||||
mca_btl_ugni_progress_wait_list (ugni_module);
|
||||
count += mca_btl_ugni_progress_local_smsg (ugni_module);
|
||||
count += mca_btl_ugni_progress_remote_smsg (ugni_module);
|
||||
}
|
||||
|
||||
if (ugni_module->active_rdma_count) {
|
||||
count += mca_btl_ugni_progress_rdma (ugni_module, 0);
|
||||
if (device->dev_rdma_local_cq.active_operations) {
|
||||
count += mca_btl_ugni_progress_rdma (ugni_module, device, &device->dev_rdma_local_cq);
|
||||
}
|
||||
|
||||
if (mca_btl_ugni_component.progress_thread_enabled) {
|
||||
count += mca_btl_ugni_progress_rdma (ugni_module, 1);
|
||||
if (mca_btl_ugni_component.progress_thread_enabled && device->dev_rdma_local_irq_cq.active_operations) {
|
||||
count += mca_btl_ugni_progress_rdma (ugni_module, device, &device->dev_rdma_local_irq_cq);
|
||||
}
|
||||
|
||||
/* post pending after progressing rdma */
|
||||
mca_btl_ugni_post_pending (ugni_module);
|
||||
}
|
||||
|
||||
return count;
|
||||
|
430
opal/mca/btl/ugni/btl_ugni_device.h
Обычный файл
430
opal/mca/btl/ugni/btl_ugni_device.h
Обычный файл
@ -0,0 +1,430 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
/**
|
||||
* @file This file contains wrappers for uGNI functionality. These wrappers are thread-safe
|
||||
* and intended to provide a way to measure various different ways to handle mutual exclusion
|
||||
* into the uGNI library (which is not thread safe). These functions are all defined to be
|
||||
* inline to limit the cost to non-threaded users.
|
||||
*/
|
||||
|
||||
#if !defined(BTL_UGNI_DEVICE_H)
|
||||
#define BTL_UGNI_DEVICE_H
|
||||
|
||||
#include "btl_ugni_endpoint.h"
|
||||
#include "btl_ugni_frag.h"
|
||||
|
||||
/* helper functions */
|
||||
|
||||
typedef struct mca_btl_ugni_smsg_send_wtag_arg_t {
|
||||
gni_ep_handle_t ep_handle;
|
||||
void *hdr;
|
||||
size_t hdr_len;
|
||||
void *payload;
|
||||
size_t payload_len;
|
||||
uint32_t msg_id;
|
||||
int tag;
|
||||
} mca_btl_ugni_smsg_send_wtag_arg_t;
|
||||
|
||||
static inline int mca_btl_ugni_smsg_send_wtag_device (mca_btl_ugni_device_t *device, void *arg)
|
||||
{
|
||||
mca_btl_ugni_smsg_send_wtag_arg_t *args = (mca_btl_ugni_smsg_send_wtag_arg_t *) arg;
|
||||
gni_return_t grc;
|
||||
|
||||
grc = GNI_SmsgSendWTag (args->ep_handle, args->hdr, args->hdr_len, args->payload,
|
||||
args->payload_len, args->msg_id, args->tag);
|
||||
device->dev_smsg_local_cq.active_operations += (GNI_RC_SUCCESS == grc);
|
||||
return grc;
|
||||
}
|
||||
|
||||
typedef struct mca_btl_ugni_smsg_get_next_wtag_arg_t {
|
||||
gni_ep_handle_t ep_handle;
|
||||
uintptr_t *data_ptr;
|
||||
uint8_t *tag;
|
||||
} mca_btl_ugni_smsg_get_next_wtag_arg_t;
|
||||
|
||||
static inline intptr_t mca_btl_ugni_smsg_get_next_wtag_device (mca_btl_ugni_device_t *device, void *arg)
|
||||
{
|
||||
mca_btl_ugni_smsg_get_next_wtag_arg_t *args = (mca_btl_ugni_smsg_get_next_wtag_arg_t *) arg;
|
||||
return GNI_SmsgGetNextWTag(args->ep_handle, (void **) args->data_ptr, args->tag);
|
||||
}
|
||||
|
||||
static inline intptr_t mca_btl_ugni_smsg_release_device (mca_btl_ugni_device_t *device, void *arg)
|
||||
{
|
||||
mca_btl_ugni_endpoint_handle_t *ep_handle = (mca_btl_ugni_endpoint_handle_t *) arg;
|
||||
|
||||
return GNI_SmsgRelease (ep_handle->gni_handle);
|
||||
}
|
||||
|
||||
static inline intptr_t mca_btl_ugni_cq_clear_device (mca_btl_ugni_device_t *device, void *arg)
|
||||
{
|
||||
gni_cq_handle_t cq = (gni_cq_handle_t) (intptr_t) arg;
|
||||
gni_cq_entry_t event_data;
|
||||
int rc;
|
||||
|
||||
do {
|
||||
rc = GNI_CqGetEvent (cq, &event_data);
|
||||
} while (GNI_RC_NOT_DONE != rc);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
typedef struct mca_btl_ugni_cq_get_event_args_t {
|
||||
mca_btl_ugni_cq_t *cq;
|
||||
gni_cq_entry_t *event_data;
|
||||
} mca_btl_ugni_cq_get_event_args_t;
|
||||
|
||||
static inline intptr_t mca_btl_ugni_cq_get_event_device (mca_btl_ugni_device_t *device, void *arg)
|
||||
{
|
||||
mca_btl_ugni_cq_get_event_args_t *args = (mca_btl_ugni_cq_get_event_args_t *) arg;
|
||||
gni_return_t rc;
|
||||
|
||||
rc = GNI_CqGetEvent (args->cq->gni_handle, args->event_data);
|
||||
args->cq->active_operations -= GNI_RC_NOT_DONE != rc;
|
||||
return rc;
|
||||
}
|
||||
|
||||
typedef struct mca_btl_ugni_gni_cq_get_event_args_t {
|
||||
gni_cq_handle_t cq;
|
||||
gni_cq_entry_t *event_data;
|
||||
} mca_btl_ugni_gni_cq_get_event_args_t;
|
||||
|
||||
static inline intptr_t mca_btl_ugni_gni_cq_get_event_device (mca_btl_ugni_device_t *device, void *arg)
|
||||
{
|
||||
mca_btl_ugni_gni_cq_get_event_args_t *args = (mca_btl_ugni_gni_cq_get_event_args_t *) arg;
|
||||
|
||||
return GNI_CqGetEvent (args->cq, args->event_data);
|
||||
}
|
||||
|
||||
static inline intptr_t mca_btl_ugni_post_fma_device (mca_btl_ugni_device_t *device, void *arg)
|
||||
{
|
||||
mca_btl_ugni_post_descriptor_t *desc = (mca_btl_ugni_post_descriptor_t *) arg;
|
||||
bool ep_handle_allocated = false;
|
||||
int rc;
|
||||
|
||||
if (NULL == desc->ep_handle) {
|
||||
desc->ep_handle = mca_btl_ugni_ep_get_rdma (desc->endpoint, device);
|
||||
if (OPAL_UNLIKELY(NULL == desc->ep_handle)) {
|
||||
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
}
|
||||
ep_handle_allocated = true;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("Posting FMA descriptor %p with op_type %d, amo %d, ep_handle %p, remote_addr 0x%lx, "
|
||||
"length %lu", desc, desc->desc.type, desc->desc.amo_cmd, desc->ep_handle,
|
||||
desc->desc.remote_addr, desc->desc.length));
|
||||
|
||||
rc = GNI_PostFma (desc->ep_handle->gni_handle, &desc->desc);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
if (ep_handle_allocated) {
|
||||
/* only return the endpoint handle if we allocated it. if we didn't allocate the
|
||||
* handle this call was likely made from repost() */
|
||||
mca_btl_ugni_ep_return_rdma (desc->ep_handle);
|
||||
desc->ep_handle = NULL;
|
||||
}
|
||||
} else {
|
||||
++device->dev_rdma_local_cq.active_operations;
|
||||
}
|
||||
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
static inline intptr_t mca_btl_ugni_post_rdma_device (mca_btl_ugni_device_t *device, void *arg)
|
||||
{
|
||||
mca_btl_ugni_post_descriptor_t *desc = (mca_btl_ugni_post_descriptor_t *) arg;
|
||||
bool ep_handle_allocated = false;
|
||||
int rc;
|
||||
|
||||
if (NULL == desc->ep_handle) {
|
||||
desc->ep_handle = mca_btl_ugni_ep_get_rdma (desc->endpoint, device);
|
||||
if (OPAL_UNLIKELY(NULL == desc->ep_handle)) {
|
||||
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
|
||||
}
|
||||
ep_handle_allocated = true;
|
||||
}
|
||||
|
||||
/* pick the appropriate CQ */
|
||||
desc->cq = mca_btl_ugni_component.progress_thread_enabled ? &device->dev_rdma_local_irq_cq :
|
||||
&device->dev_rdma_local_cq;
|
||||
|
||||
desc->desc.src_cq_hndl = desc->cq->gni_handle;
|
||||
|
||||
BTL_VERBOSE(("Posting RDMA descriptor %p with op_type %d, ep_handle %p, remote_addr 0x%lx, "
|
||||
"length %lu", desc, desc->desc.type, desc->ep_handle, desc->desc.remote_addr,
|
||||
desc->desc.length));
|
||||
|
||||
rc = GNI_PostRdma (desc->ep_handle->gni_handle, &desc->desc);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
if (ep_handle_allocated) {
|
||||
/* only return the endpoint handle if we allocated it. if we didn't allocate the
|
||||
* handle this call was likely made from repost() */
|
||||
mca_btl_ugni_ep_return_rdma (desc->ep_handle);
|
||||
desc->ep_handle = NULL;
|
||||
}
|
||||
} else {
|
||||
++desc->cq->active_operations;
|
||||
}
|
||||
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
static inline intptr_t mca_btl_ugni_post_cqwrite_device (mca_btl_ugni_device_t *device, void *arg)
|
||||
{
|
||||
mca_btl_ugni_post_descriptor_t *desc = (mca_btl_ugni_post_descriptor_t *) arg;
|
||||
int rc;
|
||||
|
||||
desc->ep_handle = mca_btl_ugni_ep_get_rdma (desc->endpoint, device);
|
||||
if (OPAL_UNLIKELY(NULL == desc->ep_handle)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
desc->desc.src_cq_hndl = device->dev_rdma_local_cq.gni_handle;
|
||||
|
||||
rc = GNI_PostCqWrite (desc->ep_handle->gni_handle, &desc->desc);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
mca_btl_ugni_ep_return_rdma (desc->ep_handle);
|
||||
desc->ep_handle = NULL;
|
||||
}
|
||||
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
typedef struct mca_btl_ugni_cq_get_completed_desc_arg_t {
|
||||
mca_btl_ugni_cq_t *cq;
|
||||
gni_cq_entry_t *event_data;
|
||||
mca_btl_ugni_post_descriptor_t **post_desc;
|
||||
int count;
|
||||
} mca_btl_ugni_cq_get_completed_desc_arg_t;
|
||||
|
||||
static inline intptr_t mca_btl_ugni_cq_get_completed_desc_device (mca_btl_ugni_device_t *device, void *arg0)
|
||||
{
|
||||
mca_btl_ugni_cq_get_completed_desc_arg_t *args = (mca_btl_ugni_cq_get_completed_desc_arg_t *) arg0;
|
||||
mca_btl_ugni_cq_t *cq = args->cq;
|
||||
gni_post_descriptor_t *desc;
|
||||
int rc;
|
||||
|
||||
for (int i = 0 ; i < args->count ; ++i) {
|
||||
rc = GNI_CqGetEvent (cq->gni_handle, args->event_data + i);
|
||||
if (GNI_RC_NOT_DONE == rc) {
|
||||
return i;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY((GNI_RC_SUCCESS != rc && !args->event_data[i]) || GNI_CQ_OVERRUN(args->event_data[i]))) {
|
||||
/* TODO -- need to handle overrun -- how do we do this without an event?
|
||||
will the event eventually come back? Ask Cray */
|
||||
BTL_ERROR(("unhandled post error! ugni rc = %d %s", rc, gni_err_str[rc]));
|
||||
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
rc = GNI_GetCompleted (cq->gni_handle, args->event_data[i], &desc);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc && GNI_RC_TRANSACTION_ERROR != rc)) {
|
||||
BTL_ERROR(("Error in GNI_GetComplete %s", gni_err_str[rc]));
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
args->post_desc[i] = MCA_BTL_UGNI_DESC_TO_PDESC(desc);
|
||||
/* return the endpoint handle while we have the lock. see the explanation in
|
||||
* the documentation for mca_btl_ugni_ep_return_rdma() */
|
||||
if (OPAL_LIKELY(GNI_CQ_STATUS_OK(args->event_data[i]))) {
|
||||
/* the operation completed successfully. return the endpoint handle now. otherwise
|
||||
* we may still need the endpoint handle to start the repost(). */
|
||||
mca_btl_ugni_ep_return_rdma (args->post_desc[i]->ep_handle);
|
||||
args->post_desc[i]->ep_handle = NULL;
|
||||
}
|
||||
--cq->active_operations;
|
||||
}
|
||||
|
||||
return args->count;
|
||||
}
|
||||
|
||||
typedef struct mca_btl_ugni_get_datagram_args_t {
|
||||
mca_btl_ugni_module_t *ugni_module;
|
||||
gni_ep_handle_t *handle;
|
||||
mca_btl_base_endpoint_t **ep;
|
||||
} mca_btl_ugni_get_datagram_args_t;
|
||||
|
||||
static inline intptr_t mca_btl_ugni_get_datagram_device (mca_btl_ugni_device_t *device, void *arg0)
|
||||
{
|
||||
mca_btl_ugni_get_datagram_args_t *args = (mca_btl_ugni_get_datagram_args_t *) arg0;
|
||||
uint32_t remote_addr, remote_id;
|
||||
uint64_t datagram_id;
|
||||
gni_post_state_t post_state;
|
||||
gni_return_t grc;
|
||||
uint64_t data;
|
||||
|
||||
grc = GNI_PostDataProbeById (device->dev_handle, &datagram_id);
|
||||
if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK);
|
||||
|
||||
BTL_VERBOSE(("rc: %d, datgram_id: %" PRIx64 ", mask: %" PRIx64, grc, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK)));
|
||||
|
||||
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) {
|
||||
*(args->ep) = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&args->ugni_module->endpoints, data);
|
||||
*(args->handle) = (*args->ep)->smsg_ep_handle->gni_handle;
|
||||
} else {
|
||||
*(args->handle) = args->ugni_module->wildcard_ep;
|
||||
}
|
||||
|
||||
/* wait for the incoming datagram to complete (in case it isn't) */
|
||||
grc = GNI_EpPostDataWaitById (*args->handle, datagram_id, -1, &post_state,
|
||||
&remote_addr, &remote_id);
|
||||
if (GNI_RC_SUCCESS != grc) {
|
||||
BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc));
|
||||
return mca_btl_rc_ugni_to_opal (grc);
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("handled datagram completion. post_state: %d, remote_addr: %u, remote_id: %u, directed?: %d",
|
||||
post_state, remote_addr, remote_id, (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
typedef struct mca_btl_ugni_reg_mem_args_t {
|
||||
mca_btl_ugni_module_t *ugni_module;
|
||||
void *base;
|
||||
size_t size;
|
||||
mca_btl_ugni_reg_t *ugni_reg;
|
||||
gni_cq_handle_t cq;
|
||||
int flags;
|
||||
} mca_btl_ugni_reg_mem_args_t;
|
||||
|
||||
static intptr_t mca_btl_ugni_reg_mem_device (mca_btl_ugni_device_t *device, void *arg)
|
||||
{
|
||||
mca_btl_ugni_reg_mem_args_t *args = (mca_btl_ugni_reg_mem_args_t *) arg;
|
||||
gni_return_t rc;
|
||||
|
||||
rc = GNI_MemRegister (device->dev_handle, (uint64_t) args->base, args->size, args->cq,
|
||||
args->flags, -1, &args->ugni_reg->handle.gni_handle);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
typedef struct mca_btl_ugni_dereg_mem_arg_t {
|
||||
mca_btl_ugni_module_t *ugni_module;
|
||||
mca_btl_ugni_reg_t *ugni_reg;
|
||||
} mca_btl_ugni_dereg_mem_arg_t;
|
||||
|
||||
static intptr_t mca_btl_ugni_dereg_mem_device (mca_btl_ugni_device_t *device, void *arg)
|
||||
{
|
||||
mca_btl_ugni_dereg_mem_arg_t *args = (mca_btl_ugni_dereg_mem_arg_t *) arg;
|
||||
gni_return_t rc;
|
||||
|
||||
rc = GNI_MemDeregister (device->dev_handle, &args->ugni_reg->handle.gni_handle);
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
/* multi-thread safe interface to uGNI */
|
||||
|
||||
static inline int mca_btl_ugni_endpoint_smsg_send_wtag (mca_btl_base_endpoint_t *endpoint, void *hdr, size_t hdr_len,
|
||||
void *payload, size_t payload_len, uint32_t msg_id, int tag)
|
||||
{
|
||||
mca_btl_ugni_smsg_send_wtag_arg_t args = {.ep_handle = endpoint->smsg_ep_handle->gni_handle,
|
||||
.hdr = hdr, .hdr_len = hdr_len, .payload = payload,
|
||||
.payload_len = payload_len, .msg_id = msg_id,
|
||||
.tag = tag};
|
||||
mca_btl_ugni_device_t *device = endpoint->smsg_ep_handle->device;
|
||||
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_send_wtag_device, &args);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_smsg_get_next_wtag (mca_btl_ugni_endpoint_handle_t *ep_handle, uintptr_t *data_ptr, uint8_t *tag)
|
||||
{
|
||||
mca_btl_ugni_device_t *device = ep_handle->device;
|
||||
mca_btl_ugni_smsg_get_next_wtag_arg_t args = {.ep_handle = ep_handle->gni_handle, .data_ptr = data_ptr, .tag = tag};
|
||||
|
||||
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_get_next_wtag_device, &args);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_smsg_release (mca_btl_ugni_endpoint_handle_t *ep_handle)
|
||||
{
|
||||
mca_btl_ugni_device_t *device = ep_handle->device;
|
||||
|
||||
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_release_device, ep_handle);
|
||||
}
|
||||
|
||||
static inline void mca_btl_ugni_cq_clear (mca_btl_ugni_device_t *device, gni_cq_handle_t cq)
|
||||
{
|
||||
(void) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_clear_device, (void *) (intptr_t) cq);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_cq_get_event (mca_btl_ugni_device_t *device, mca_btl_ugni_cq_t *cq, gni_cq_entry_t *event_data)
|
||||
{
|
||||
mca_btl_ugni_cq_get_event_args_t args = {.cq = cq, .event_data = event_data};
|
||||
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_get_event_device, &args);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_gni_cq_get_event (mca_btl_ugni_device_t *device, gni_cq_handle_t cq, gni_cq_entry_t *event_data)
|
||||
{
|
||||
mca_btl_ugni_gni_cq_get_event_args_t args = {.cq = cq, .event_data = event_data};
|
||||
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_gni_cq_get_event_device, &args);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_endpoint_post_fma (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
|
||||
mca_btl_ugni_device_t *device = desc->ep_handle ? desc->ep_handle->device : mca_btl_ugni_ep_get_device (ugni_module);
|
||||
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_fma_device, desc);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_endpoint_post_rdma (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
|
||||
mca_btl_ugni_device_t *device = desc->ep_handle ? desc->ep_handle->device : mca_btl_ugni_ep_get_device (ugni_module);
|
||||
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_rdma_device, desc);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_endpoint_post_cqwrite (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
|
||||
mca_btl_ugni_device_t *device = ugni_module->devices;
|
||||
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_cqwrite_device, desc);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_cq_get_completed_desc (mca_btl_ugni_device_t *device, mca_btl_ugni_cq_t *cq,
|
||||
gni_cq_entry_t *event_data, mca_btl_ugni_post_descriptor_t **post_desc,
|
||||
int count)
|
||||
{
|
||||
mca_btl_ugni_cq_get_completed_desc_arg_t args = {.cq = cq, .event_data = event_data, .post_desc = post_desc, .count = count};
|
||||
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_get_completed_desc_device, &args);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_get_datagram (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device, gni_ep_handle_t *gni_handle,
|
||||
mca_btl_base_endpoint_t **ep)
|
||||
{
|
||||
mca_btl_ugni_get_datagram_args_t args = {.ugni_module = ugni_module, .ep = ep, .handle = gni_handle};
|
||||
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_get_datagram_device, &args);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_reg_mem (mca_btl_ugni_module_t *ugni_module, void *base, size_t size, mca_btl_ugni_reg_t *ugni_reg,
|
||||
gni_cq_handle_t cq, int flags)
|
||||
{
|
||||
mca_btl_ugni_reg_mem_args_t args = {.ugni_module = ugni_module, .base = base, .size = size,
|
||||
.ugni_reg = ugni_reg, .cq = cq, .flags = flags};
|
||||
mca_btl_ugni_device_t *device = ugni_module->devices;
|
||||
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_reg_mem_device, &args);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_dereg_mem (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_reg_t *ugni_reg)
|
||||
{
|
||||
mca_btl_ugni_dereg_mem_arg_t args = {.ugni_module = ugni_module, .ugni_reg = ugni_reg};
|
||||
mca_btl_ugni_device_t *device = ugni_module->devices;
|
||||
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_dereg_mem_device, &args);
|
||||
}
|
||||
|
||||
#endif /* BTL_UGNI_DEVICE_H */
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011-2013 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -12,6 +12,7 @@
|
||||
|
||||
#include "btl_ugni_endpoint.h"
|
||||
#include "btl_ugni_smsg.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
|
||||
static void mca_btl_ugni_ep_construct (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
@ -24,15 +25,94 @@ static void mca_btl_ugni_ep_destruct (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
OBJ_DESTRUCT(&ep->frag_wait_list);
|
||||
OBJ_DESTRUCT(&ep->lock);
|
||||
free (ep->remote_attr);
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_ugni_endpoint_t, opal_list_item_t,
|
||||
mca_btl_ugni_ep_construct, mca_btl_ugni_ep_destruct);
|
||||
|
||||
static int mca_btl_ugni_endpoint_get_modex (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
mca_btl_ugni_modex_t *modex;
|
||||
size_t msg_size;
|
||||
int rc;
|
||||
|
||||
assert (NULL != ep && NULL != ep->peer_proc);
|
||||
|
||||
/* Receive the modex */
|
||||
OPAL_MODEX_RECV(rc, &mca_btl_ugni_component.super.btl_version,
|
||||
&ep->peer_proc->proc_name, (void **)&modex, &msg_size);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
BTL_ERROR(("error receiving modex"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
ep->ep_rem_addr = modex->addr;
|
||||
ep->ep_rem_id = modex->id;
|
||||
|
||||
|
||||
BTL_VERBOSE(("received modex for ep %p. addr: %d, id: %d", ep, ep->ep_rem_addr, ep->ep_rem_id));
|
||||
|
||||
free (modex);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_btl_ugni_init_ep (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_endpoint_t **ep,
|
||||
mca_btl_ugni_module_t *btl, opal_proc_t *peer_proc)
|
||||
{
|
||||
mca_btl_ugni_endpoint_t *endpoint;
|
||||
int rc;
|
||||
|
||||
endpoint = OBJ_NEW(mca_btl_ugni_endpoint_t);
|
||||
assert (endpoint != NULL);
|
||||
|
||||
endpoint->smsg_progressing = 0;
|
||||
endpoint->state = MCA_BTL_UGNI_EP_STATE_INIT;
|
||||
endpoint->peer_proc = peer_proc;
|
||||
|
||||
/* get the modex info for this endpoint and setup a ugni endpoint. this call may lead
|
||||
* to re-entry through opal_progress(). */
|
||||
rc = mca_btl_ugni_endpoint_get_modex (endpoint);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
assert (0);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* add this endpoint to the pointer array */
|
||||
endpoint->index = opal_pointer_array_add (&ugni_module->endpoints, endpoint);
|
||||
|
||||
*ep = endpoint;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
void mca_btl_ugni_release_ep (mca_btl_ugni_endpoint_t *ep)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
|
||||
int rc;
|
||||
|
||||
opal_mutex_lock (&ep->lock);
|
||||
|
||||
rc = mca_btl_ugni_ep_disconnect (ep, false);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
BTL_VERBOSE(("btl/ugni error disconnecting endpoint"));
|
||||
}
|
||||
|
||||
/* TODO -- Clear space at the end of the endpoint array */
|
||||
opal_pointer_array_set_item (&ugni_module->endpoints, ep->index, NULL);
|
||||
|
||||
opal_mutex_unlock (&ep->lock);
|
||||
|
||||
OBJ_RELEASE(ep);
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_ep_smsg_get_mbox (mca_btl_base_endpoint_t *ep) {
|
||||
mca_btl_ugni_module_t *ugni_module = ep->btl;
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
|
||||
opal_free_list_item_t *mbox;
|
||||
|
||||
assert (NULL == ep->mailbox);
|
||||
|
||||
mbox = opal_free_list_get (&ugni_module->smsg_mboxes);
|
||||
if (OPAL_UNLIKELY(NULL == mbox)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
@ -47,61 +127,103 @@ static inline int mca_btl_ugni_ep_smsg_get_mbox (mca_btl_base_endpoint_t *ep) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect) {
|
||||
gni_return_t rc;
|
||||
static int mca_btl_ugni_ep_send_disconnect (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
int rc;
|
||||
|
||||
do {
|
||||
rc = mca_btl_ugni_endpoint_smsg_send_wtag (ep, NULL, 0, NULL, 0, -1, MCA_BTL_UGNI_TAG_DISCONNECT);
|
||||
if (OPAL_LIKELY(GNI_RC_NOT_DONE != rc)) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* most likely got here because we are out of credits. check the remote CQ to get credit return */
|
||||
(void) mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_ep_btl (ep));
|
||||
} while (1);
|
||||
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
|
||||
mca_btl_ugni_device_t *device;
|
||||
int rc;
|
||||
|
||||
if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
|
||||
/* nothing to do */
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
|
||||
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
|
||||
rc = GNI_SmsgSendWTag (ep->smsg_ep_handle, NULL, 0, NULL, 0, -1,
|
||||
MCA_BTL_UGNI_TAG_DISCONNECT);
|
||||
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_VERBOSE(("btl/ugni could not send close message"));
|
||||
device = ep->smsg_ep_handle->device;
|
||||
|
||||
while (device->dev_smsg_local_cq.active_operations) {
|
||||
/* ensure all sends are complete before removing and procs */
|
||||
rc = mca_btl_ugni_progress_local_smsg (ugni_module, device);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* we might want to wait for local completion here (do we even care), yes we do */
|
||||
/* TODO: FIX FIX FIX */
|
||||
|
||||
}
|
||||
|
||||
/* TODO: FIX GROSS */
|
||||
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
|
||||
(void) opal_common_ugni_ep_destroy (&ep->smsg_ep_handle);
|
||||
(void) opal_common_ugni_ep_destroy (&ep->rdma_ep_handle);
|
||||
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
|
||||
if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
|
||||
rc = mca_btl_ugni_ep_send_disconnect (ep);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
BTL_VERBOSE(("could not send disconnect message to peer"));
|
||||
}
|
||||
|
||||
/* wait for the disconnect messagse to go */
|
||||
do {
|
||||
/* ensure all sends are complete before removing and procs */
|
||||
rc = mca_btl_ugni_progress_local_smsg (ugni_module, device);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
break;
|
||||
}
|
||||
} while (device->dev_smsg_local_cq.active_operations);
|
||||
|
||||
(void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, -1);
|
||||
}
|
||||
|
||||
mca_btl_ugni_device_lock (device);
|
||||
|
||||
/* NTH: this call may not need the device lock. seems to work without it but
|
||||
* the lock is here to be safe. */
|
||||
(void) mca_btl_ugni_ep_handle_destroy (ep->smsg_ep_handle);
|
||||
ep->smsg_ep_handle = NULL;
|
||||
|
||||
mca_btl_ugni_device_unlock (device);
|
||||
|
||||
if (ep->mailbox) {
|
||||
opal_free_list_return (&ep->btl->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox));
|
||||
opal_free_list_return (&ugni_module->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox));
|
||||
ep->mailbox = NULL;
|
||||
}
|
||||
|
||||
ep->state = MCA_BTL_UGNI_EP_STATE_INIT;
|
||||
(void) opal_atomic_add_64 (&ep->btl->connected_peer_count, -11);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
|
||||
mca_btl_ugni_device_t *device = ugni_module->devices;
|
||||
int rc;
|
||||
|
||||
rc = mca_btl_ugni_ep_connect_rdma (ep);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return rc;
|
||||
/* protect against re-entry from opal_progress */
|
||||
if (OPAL_UNLIKELY(MCA_BTL_UGNI_EP_STATE_CONNECTING == ep->state)) {
|
||||
return OPAL_ERR_RESOURCE_BUSY;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("initiaiting connection to remote peer with address: %u id: %u proc: %p",
|
||||
ep->common->ep_rem_addr, ep->common->ep_rem_id, (void *)ep->peer_proc));
|
||||
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTING;
|
||||
|
||||
BTL_VERBOSE(("initiating connection to remote peer with address: %u id: %u proc: %p",
|
||||
ep->ep_rem_addr, ep->ep_rem_id, (void *)ep->peer_proc));
|
||||
|
||||
/* bind endpoint to remote address */
|
||||
/* we bind two endpoints to seperate out local smsg completion and local fma completion */
|
||||
rc = opal_common_ugni_ep_create (ep->common, ep->btl->smsg_local_cq, &ep->smsg_ep_handle);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return rc;
|
||||
mca_btl_ugni_device_lock (device);
|
||||
ep->smsg_ep_handle = mca_btl_ugni_ep_handle_create (ep, device->dev_smsg_local_cq.gni_handle, device);
|
||||
mca_btl_ugni_device_unlock (device);
|
||||
if (OPAL_UNLIKELY(NULL == ep->smsg_ep_handle)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* build connection data */
|
||||
@ -110,9 +232,10 @@ static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTING;
|
||||
|
||||
memset (&ep->remote_attr, 0, sizeof (ep->remote_attr));
|
||||
ep->remote_attr = calloc (1, sizeof (*ep->remote_attr));
|
||||
if (OPAL_UNLIKELY(NULL == ep->remote_attr)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("btl/ugni connection to remote peer initiated"));
|
||||
|
||||
@ -120,15 +243,16 @@ static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
|
||||
gni_return_t grc;
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("finishing connection. remote attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
|
||||
"mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
|
||||
"msg_maxsize = %d", ep->remote_attr.smsg_attr.msg_type, ep->remote_attr.smsg_attr.msg_buffer,
|
||||
ep->remote_attr.smsg_attr.buff_size, ep->remote_attr.smsg_attr.mem_hndl.qword1,
|
||||
ep->remote_attr.smsg_attr.mem_hndl.qword2, ep->remote_attr.smsg_attr.mbox_offset,
|
||||
ep->remote_attr.smsg_attr.mbox_maxcredit, ep->remote_attr.smsg_attr.msg_maxsize));
|
||||
"msg_maxsize = %d", ep->remote_attr->smsg_attr.msg_type, ep->remote_attr->smsg_attr.msg_buffer,
|
||||
ep->remote_attr->smsg_attr.buff_size, ep->remote_attr->smsg_attr.mem_hndl.qword1,
|
||||
ep->remote_attr->smsg_attr.mem_hndl.qword2, ep->remote_attr->smsg_attr.mbox_offset,
|
||||
ep->remote_attr->smsg_attr.mbox_maxcredit, ep->remote_attr->smsg_attr.msg_maxsize));
|
||||
|
||||
BTL_VERBOSE(("finishing connection. local attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
|
||||
"mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
|
||||
@ -137,54 +261,78 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
|
||||
ep->mailbox->attr.smsg_attr.mem_hndl.qword2, ep->mailbox->attr.smsg_attr.mbox_offset,
|
||||
ep->mailbox->attr.smsg_attr.mbox_maxcredit, ep->mailbox->attr.smsg_attr.msg_maxsize));
|
||||
|
||||
grc = GNI_SmsgInit (ep->smsg_ep_handle, &ep->mailbox->attr.smsg_attr, &ep->remote_attr.smsg_attr);
|
||||
grc = GNI_SmsgInit (ep->smsg_ep_handle->gni_handle, &ep->mailbox->attr.smsg_attr,
|
||||
&ep->remote_attr->smsg_attr);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
|
||||
BTL_ERROR(("error initializing SMSG protocol. rc = %d", grc));
|
||||
|
||||
return opal_common_rc_ugni_to_opal (grc);
|
||||
return mca_btl_rc_ugni_to_opal (grc);
|
||||
}
|
||||
|
||||
/* set the local event data to the local index and the remote event data to my
|
||||
* index on the remote peer. This makes lookup of endpoints on completion take
|
||||
* a single lookup in the endpoints array. we will not be able to change the
|
||||
* remote peer's index in the endpoint's array after this point. */
|
||||
GNI_EpSetEventData (ep->rdma_ep_handle, ep->index, ep->remote_attr.index);
|
||||
GNI_EpSetEventData (ep->smsg_ep_handle, ep->index, ep->remote_attr.index);
|
||||
GNI_EpSetEventData (ep->smsg_ep_handle->gni_handle, ep->index, ep->remote_attr->index);
|
||||
|
||||
ep->rmt_irq_mem_hndl = ep->remote_attr.rmt_irq_mem_hndl;
|
||||
ep->rmt_irq_mem_hndl = ep->remote_attr->rmt_irq_mem_hndl;
|
||||
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED;
|
||||
(void) opal_atomic_add_64 (&ep->btl->connected_peer_count, 1);
|
||||
(void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, 1);
|
||||
|
||||
/* send all pending messages */
|
||||
BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list)));
|
||||
|
||||
rc = mca_btl_ugni_progress_send_wait_list (ep);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
OPAL_THREAD_LOCK(&ep->btl->ep_wait_list_lock);
|
||||
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
|
||||
if (false == ep->wait_listed) {
|
||||
opal_list_append (&ep->btl->ep_wait_list, &ep->super);
|
||||
opal_list_append (&ugni_module->ep_wait_list, &ep->super);
|
||||
ep->wait_listed = true;
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ep->btl->ep_wait_list_lock);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
|
||||
}
|
||||
|
||||
free (ep->remote_attr);
|
||||
ep->remote_attr = NULL;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep) {
|
||||
static int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
|
||||
mca_btl_ugni_device_t *device = ep->smsg_ep_handle->device;
|
||||
gni_return_t rc;
|
||||
|
||||
BTL_VERBOSE(("posting directed datagram to remote id: %d for endpoint %p", ep->common->ep_rem_id, (void *)ep));
|
||||
ep->mailbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
|
||||
BTL_VERBOSE(("posting directed datagram to remote id: %d for endpoint %p", ep->ep_rem_id, (void *)ep));
|
||||
/* the irq cq is associated with only the first device */
|
||||
ep->mailbox->attr.rmt_irq_mem_hndl = ugni_module->devices->smsg_irq_mhndl;
|
||||
|
||||
rc = GNI_EpPostDataWId (ep->smsg_ep_handle, &ep->mailbox->attr, sizeof (ep->mailbox->attr),
|
||||
&ep->remote_attr, sizeof (ep->remote_attr),
|
||||
rc = GNI_EpPostDataWId (ep->smsg_ep_handle->gni_handle, &ep->mailbox->attr, sizeof (ep->mailbox->attr),
|
||||
ep->remote_attr, sizeof (*ep->remote_attr),
|
||||
MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index);
|
||||
|
||||
return opal_common_rc_ugni_to_opal (rc);
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) {
|
||||
int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module)
|
||||
{
|
||||
gni_return_t rc;
|
||||
|
||||
BTL_VERBOSE(("posting wildcard datagram"));
|
||||
|
||||
memset (&ugni_module->wc_local_attr, 0, sizeof (ugni_module->wc_local_attr));
|
||||
memset (&ugni_module->wc_remote_attr, 0, sizeof (ugni_module->wc_remote_attr));
|
||||
rc = GNI_EpPostDataWId (ugni_module->wildcard_ep, &ugni_module->wc_local_attr,
|
||||
sizeof (ugni_module->wc_local_attr), &ugni_module->wc_remote_attr,
|
||||
sizeof (ugni_module->wc_remote_attr), MCA_BTL_UGNI_CONNECT_WILDCARD_ID);
|
||||
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
|
||||
int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("progressing connection for endpoint %p with state %d", (void *)ep, ep->state));
|
||||
@ -193,14 +341,17 @@ int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
if (MCA_BTL_UGNI_EP_STATE_RDMA >= ep->state) {
|
||||
if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
|
||||
rc = mca_btl_ugni_ep_connect_start (ep);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
if (GNI_SMSG_TYPE_INVALID == ep->remote_attr.smsg_attr.msg_type) {
|
||||
BTL_VERBOSE(("ep->remote_attr->smsg_attr = {.msg_type = %d, .msg_buffer = 0x%lx}", ep->remote_attr->smsg_attr.msg_type,
|
||||
ep->remote_attr->smsg_attr.msg_buffer));
|
||||
|
||||
if (GNI_SMSG_TYPE_INVALID == ep->remote_attr->smsg_attr.msg_type) {
|
||||
/* use datagram to exchange connection information with the remote peer */
|
||||
if (!ep->dg_posted) {
|
||||
rc = mca_btl_ugni_directed_ep_post (ep);
|
||||
@ -217,3 +368,77 @@ int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) {
|
||||
|
||||
return mca_btl_ugni_ep_connect_finish (ep);
|
||||
}
|
||||
|
||||
int mca_btl_ugni_endpoint_handle_init_rdma (opal_free_list_item_t *item, void *ctx)
|
||||
{
|
||||
mca_btl_ugni_endpoint_handle_t *handle = (mca_btl_ugni_endpoint_handle_t *) item;
|
||||
mca_btl_ugni_device_t *device = (mca_btl_ugni_device_t *) ctx;
|
||||
gni_return_t grc;
|
||||
|
||||
grc = GNI_EpCreate (device->dev_handle, device->dev_rdma_local_cq.gni_handle, &handle->gni_handle);
|
||||
handle->device = device;
|
||||
return mca_btl_rc_ugni_to_opal (grc);
|
||||
}
|
||||
|
||||
static void mca_btl_ugni_endpoint_handle_construct (mca_btl_ugni_endpoint_handle_t *handle)
|
||||
{
|
||||
handle->gni_handle = 0;
|
||||
}
|
||||
|
||||
static void mca_btl_ugni_endpoint_handle_destruct (mca_btl_ugni_endpoint_handle_t *handle)
|
||||
{
|
||||
if (handle->gni_handle) {
|
||||
GNI_EpDestroy (handle->gni_handle);
|
||||
handle->gni_handle = 0;
|
||||
}
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_ugni_endpoint_handle_t, opal_object_t,
|
||||
mca_btl_ugni_endpoint_handle_construct,
|
||||
mca_btl_ugni_endpoint_handle_destruct);
|
||||
|
||||
mca_btl_ugni_endpoint_handle_t *mca_btl_ugni_ep_handle_create (mca_btl_ugni_endpoint_t *ep, gni_cq_handle_t cq,
|
||||
mca_btl_ugni_device_t *device)
|
||||
{
|
||||
mca_btl_ugni_endpoint_handle_t *ep_handle;
|
||||
gni_return_t grc;
|
||||
|
||||
ep_handle = OBJ_NEW(mca_btl_ugni_endpoint_handle_t);
|
||||
if (OPAL_UNLIKELY(NULL == ep_handle)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ep_handle->device = device;
|
||||
|
||||
/* create a uGNI endpoint handle and bind it to the remote peer */
|
||||
grc = GNI_EpCreate (device->dev_handle, cq, &ep_handle->gni_handle);
|
||||
if (OPAL_LIKELY(GNI_RC_SUCCESS == grc)) {
|
||||
grc = GNI_EpBind (ep_handle->gni_handle, ep->ep_rem_addr, ep->ep_rem_id);
|
||||
}
|
||||
|
||||
if (GNI_RC_SUCCESS != grc) {
|
||||
OBJ_RELEASE(ep_handle);
|
||||
ep_handle = NULL;
|
||||
}
|
||||
|
||||
return ep_handle;
|
||||
}
|
||||
|
||||
int mca_btl_ugni_ep_handle_destroy (mca_btl_ugni_endpoint_handle_t *ep_handle)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (NULL == ep_handle || 0 == ep_handle->gni_handle) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
/* TODO: need to fix, may be outstanding tx's, etc. */
|
||||
rc = GNI_EpUnbind (ep_handle->gni_handle);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
/* should warn */
|
||||
}
|
||||
|
||||
OBJ_RELEASE(ep_handle);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -17,15 +17,22 @@
|
||||
|
||||
enum mca_btl_ugni_endpoint_state_t {
|
||||
MCA_BTL_UGNI_EP_STATE_INIT = 0,
|
||||
MCA_BTL_UGNI_EP_STATE_START,
|
||||
MCA_BTL_UGNI_EP_STATE_RDMA,
|
||||
MCA_BTL_UGNI_EP_STATE_CONNECTING,
|
||||
MCA_BTL_UGNI_EP_STATE_CONNECTED
|
||||
MCA_BTL_UGNI_EP_STATE_CONNECTED,
|
||||
};
|
||||
typedef enum mca_btl_ugni_endpoint_state_t mca_btl_ugni_endpoint_state_t;
|
||||
|
||||
struct mca_btl_ugni_smsg_mbox_t;
|
||||
|
||||
struct mca_btl_ugni_endpoint_handle_t {
|
||||
opal_free_list_item_t super;
|
||||
mca_btl_ugni_device_t *device;
|
||||
gni_ep_handle_t gni_handle;
|
||||
};
|
||||
|
||||
typedef struct mca_btl_ugni_endpoint_handle_t mca_btl_ugni_endpoint_handle_t;
|
||||
OBJ_CLASS_DECLARATION(mca_btl_ugni_endpoint_handle_t);
|
||||
|
||||
typedef struct mca_btl_base_endpoint_t {
|
||||
opal_list_item_t super;
|
||||
|
||||
@ -37,24 +44,34 @@ typedef struct mca_btl_base_endpoint_t {
|
||||
opal_recursive_mutex_t lock;
|
||||
mca_btl_ugni_endpoint_state_t state;
|
||||
|
||||
opal_common_ugni_endpoint_t *common;
|
||||
/** Remote NIC address */
|
||||
uint32_t ep_rem_addr;
|
||||
|
||||
mca_btl_ugni_module_t *btl;
|
||||
/** Remote CDM identifier (base) */
|
||||
uint32_t ep_rem_id;
|
||||
|
||||
gni_ep_handle_t smsg_ep_handle;
|
||||
gni_ep_handle_t rdma_ep_handle;
|
||||
/** endpoint to use for SMSG messages */
|
||||
mca_btl_ugni_endpoint_handle_t *smsg_ep_handle;
|
||||
|
||||
mca_btl_ugni_endpoint_attr_t remote_attr; /* TODO: UGH, remove this */
|
||||
/** temporary space to store the remote SMSG attributes */
|
||||
mca_btl_ugni_endpoint_attr_t *remote_attr;
|
||||
|
||||
/** SMSG mailbox assigned to this endpoint */
|
||||
struct mca_btl_ugni_smsg_mbox_t *mailbox;
|
||||
gni_mem_handle_t rmt_irq_mem_hndl;
|
||||
|
||||
/** Remote IRQ handle (for async completion) */
|
||||
gni_mem_handle_t rmt_irq_mem_hndl;
|
||||
|
||||
/** frags waiting for SMSG credits */
|
||||
opal_list_t frag_wait_list;
|
||||
|
||||
/** endpoint is currently wait-listed for SMSG progress */
|
||||
bool wait_listed;
|
||||
|
||||
/** protect against race on connection */
|
||||
bool dg_posted;
|
||||
|
||||
/** protect against re-entry to SMSG */
|
||||
int32_t smsg_progressing;
|
||||
|
||||
int index;
|
||||
@ -65,49 +82,10 @@ OBJ_CLASS_DECLARATION(mca_btl_ugni_endpoint_t);
|
||||
|
||||
int mca_btl_ugni_ep_connect_progress (mca_btl_ugni_endpoint_t *ep);
|
||||
int mca_btl_ugni_ep_disconnect (mca_btl_ugni_endpoint_t *ep, bool send_disconnect);
|
||||
|
||||
static inline int mca_btl_ugni_init_ep (mca_btl_ugni_module_t *ugni_module,
|
||||
mca_btl_ugni_endpoint_t **ep,
|
||||
mca_btl_ugni_module_t *btl,
|
||||
opal_proc_t *peer_proc) {
|
||||
mca_btl_ugni_endpoint_t *endpoint;
|
||||
|
||||
endpoint = OBJ_NEW(mca_btl_ugni_endpoint_t);
|
||||
assert (endpoint != NULL);
|
||||
|
||||
endpoint->smsg_progressing = 0;
|
||||
endpoint->state = MCA_BTL_UGNI_EP_STATE_INIT;
|
||||
|
||||
endpoint->btl = btl;
|
||||
endpoint->peer_proc = peer_proc;
|
||||
endpoint->index = opal_pointer_array_add (&ugni_module->endpoints, endpoint);
|
||||
|
||||
*ep = endpoint;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static inline void mca_btl_ugni_release_ep (mca_btl_ugni_endpoint_t *ep) {
|
||||
int rc;
|
||||
|
||||
if (ep->common) {
|
||||
opal_mutex_lock (&ep->lock);
|
||||
|
||||
rc = mca_btl_ugni_ep_disconnect (ep, false);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
BTL_VERBOSE(("btl/ugni error disconnecting endpoint"));
|
||||
}
|
||||
|
||||
/* TODO -- Clear space at the end of the endpoint array */
|
||||
opal_pointer_array_set_item (&ep->btl->endpoints, ep->index, NULL);
|
||||
|
||||
opal_mutex_unlock (&ep->lock);
|
||||
|
||||
opal_common_ugni_endpoint_return (ep->common);
|
||||
}
|
||||
|
||||
OBJ_RELEASE(ep);
|
||||
}
|
||||
int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module);
|
||||
void mca_btl_ugni_release_ep (mca_btl_ugni_endpoint_t *ep);
|
||||
int mca_btl_ugni_init_ep (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_endpoint_t **ep,
|
||||
mca_btl_ugni_module_t *btl, opal_proc_t *peer_proc);
|
||||
|
||||
static inline int mca_btl_ugni_check_endpoint_state (mca_btl_ugni_endpoint_t *ep) {
|
||||
int rc;
|
||||
@ -120,8 +98,6 @@ static inline int mca_btl_ugni_check_endpoint_state (mca_btl_ugni_endpoint_t *ep
|
||||
|
||||
switch (ep->state) {
|
||||
case MCA_BTL_UGNI_EP_STATE_INIT:
|
||||
case MCA_BTL_UGNI_EP_STATE_RDMA:
|
||||
case MCA_BTL_UGNI_EP_STATE_START:
|
||||
rc = mca_btl_ugni_ep_connect_progress (ep);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
break;
|
||||
@ -138,63 +114,91 @@ static inline int mca_btl_ugni_check_endpoint_state (mca_btl_ugni_endpoint_t *ep
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_ep_connect_rdma (mca_btl_base_endpoint_t *ep) {
|
||||
int rc;
|
||||
|
||||
if (ep->state >= MCA_BTL_UGNI_EP_STATE_RDMA) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
/* protect against re-entry from opal_progress */
|
||||
if (OPAL_UNLIKELY(MCA_BTL_UGNI_EP_STATE_START == ep->state)) {
|
||||
return OPAL_ERR_RESOURCE_BUSY;
|
||||
}
|
||||
|
||||
ep->state = MCA_BTL_UGNI_EP_STATE_START;
|
||||
|
||||
/* get the modex info for this endpoint and setup a ugni endpoint. this call may lead
|
||||
* to re-entry through opal_progress(). */
|
||||
rc = opal_common_ugni_endpoint_for_proc (ep->btl->device, ep->peer_proc, &ep->common);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
assert (0);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* bind endpoint to remote address */
|
||||
rc = opal_common_ugni_ep_create (ep->common, ep->btl->rdma_local_cq, &ep->rdma_ep_handle);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
ep->state = MCA_BTL_UGNI_EP_STATE_RDMA;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
/**
|
||||
* Accessor function for endpoint btl
|
||||
*
|
||||
* @param[in] ep endpoint to query
|
||||
*
|
||||
* This helper function exists to make it easy to switch between using a single
|
||||
* and multiple ugni modules. Currently there is only one so we just use the
|
||||
* pointer in the component structure. This saves 4-8 bytes in the endpoint
|
||||
* structure.
|
||||
*/
|
||||
static inline mca_btl_ugni_module_t *mca_btl_ugni_ep_btl (mca_btl_ugni_endpoint_t *ep)
|
||||
{
|
||||
/* there is only one ugni module at this time. if that changes add a btl pointer back
|
||||
* to the endpoint structure. */
|
||||
return mca_btl_ugni_component.modules;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_check_endpoint_state_rdma (mca_btl_base_endpoint_t *ep) {
|
||||
int rc;
|
||||
if (OPAL_LIKELY(MCA_BTL_UGNI_EP_STATE_INIT < ep->state)) {
|
||||
return OPAL_SUCCESS;
|
||||
/**
|
||||
* Allocate and bind a uGNI endpoint handle to the remote peer.
|
||||
*
|
||||
* @param[in] ep BTL endpoint
|
||||
* @param[in] cq completion queue
|
||||
* @param[out] ep_handle uGNI endpoint handle
|
||||
*/
|
||||
mca_btl_ugni_endpoint_handle_t *mca_btl_ugni_ep_handle_create (mca_btl_ugni_endpoint_t *ep, gni_cq_handle_t cq,
|
||||
mca_btl_ugni_device_t *device);
|
||||
|
||||
/**
|
||||
* Unbind and free the uGNI endpoint handle.
|
||||
*
|
||||
* @param[in] ep_handle uGNI endpoint handle to unbind and release
|
||||
*/
|
||||
int mca_btl_ugni_ep_handle_destroy (mca_btl_ugni_endpoint_handle_t *ep_handle);
|
||||
|
||||
/**
|
||||
* Free list initialization function for endpoint handles (DO NOT CALL outside free list)
|
||||
*
|
||||
* @param[in] item Free list item to initialize
|
||||
* @param[in] ctx Free list context
|
||||
*
|
||||
* @returns OPAL_SUCCESS on success
|
||||
* @returns OPAL error code on error
|
||||
*/
|
||||
int mca_btl_ugni_endpoint_handle_init_rdma (opal_free_list_item_t *item, void *ctx);
|
||||
|
||||
/**
|
||||
* @brief get an endpoint handle from a device's free list
|
||||
*
|
||||
* @param[in] ep btl endpoint
|
||||
* @param[in] device btl device to use
|
||||
*
|
||||
* This function MUST be called with the device lock held. This was done over using
|
||||
* the atomic free list to avoid unnecessary atomics in the critical path.
|
||||
*/
|
||||
static inline mca_btl_ugni_endpoint_handle_t *
|
||||
mca_btl_ugni_ep_get_rdma (mca_btl_ugni_endpoint_t *ep, mca_btl_ugni_device_t *device)
|
||||
{
|
||||
mca_btl_ugni_endpoint_handle_t *ep_handle;
|
||||
gni_return_t grc;
|
||||
|
||||
ep_handle = (mca_btl_ugni_endpoint_handle_t *) opal_free_list_get_st (&device->endpoints);
|
||||
if (OPAL_UNLIKELY(NULL == ep_handle)) {
|
||||
return NULL;
|
||||
}
|
||||
grc = GNI_EpBind (ep_handle->gni_handle, ep->ep_rem_addr, ep->ep_rem_id | device->dev_index);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
|
||||
opal_free_list_return_st (&device->endpoints, &ep_handle->super);
|
||||
ep_handle = NULL;
|
||||
}
|
||||
|
||||
opal_mutex_lock (&ep->lock);
|
||||
rc = mca_btl_ugni_ep_connect_rdma (ep);
|
||||
opal_mutex_unlock (&ep->lock);
|
||||
return rc;
|
||||
return ep_handle;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module) {
|
||||
gni_return_t rc;
|
||||
|
||||
BTL_VERBOSE(("posting wildcard datagram"));
|
||||
|
||||
memset (&ugni_module->wc_local_attr, 0, sizeof (ugni_module->wc_local_attr));
|
||||
memset (&ugni_module->wc_remote_attr, 0, sizeof (ugni_module->wc_remote_attr));
|
||||
rc = GNI_EpPostDataWId (ugni_module->wildcard_ep, &ugni_module->wc_local_attr,
|
||||
sizeof (ugni_module->wc_local_attr), &ugni_module->wc_remote_attr,
|
||||
sizeof (ugni_module->wc_remote_attr), MCA_BTL_UGNI_CONNECT_WILDCARD_ID);
|
||||
|
||||
return opal_common_rc_ugni_to_opal (rc);
|
||||
/**
|
||||
* @brief return an endpoint handle to a device's free list
|
||||
*
|
||||
* @param[in] ep_handle endpoint handle to return
|
||||
*
|
||||
* This function MUST be called with the device lock held. This was done over using
|
||||
* the atomic free list to avoid unnecessary atomics in the critical path. If
|
||||
*/
|
||||
static inline void mca_btl_ugni_ep_return_rdma (mca_btl_ugni_endpoint_handle_t *ep_handle)
|
||||
{
|
||||
(void) GNI_EpUnbind (ep_handle->gni_handle);
|
||||
opal_free_list_return_st (&ep_handle->device->endpoints, &ep_handle->super);
|
||||
}
|
||||
|
||||
#endif /* MCA_BTL_UGNI_ENDPOINT_H */
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -38,11 +38,25 @@ OBJ_CLASS_INSTANCE(mca_btl_ugni_rdma_frag_t, mca_btl_base_descriptor_t,
|
||||
OBJ_CLASS_INSTANCE(mca_btl_ugni_eager_frag_t, mca_btl_base_descriptor_t,
|
||||
mca_btl_ugni_eager_frag_constructor, NULL);
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_ugni_post_descriptor_t, opal_free_list_item_t,
|
||||
NULL, NULL);
|
||||
|
||||
int mca_btl_ugni_frag_init (mca_btl_ugni_base_frag_t *frag, mca_btl_ugni_module_t *ugni_module)
|
||||
static void mca_btl_ugni_post_descriptor_constructor (mca_btl_ugni_post_descriptor_t *desc)
|
||||
{
|
||||
desc->cq = NULL;
|
||||
desc->ep_handle = NULL;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_ugni_post_descriptor_t, opal_free_list_item_t,
|
||||
mca_btl_ugni_post_descriptor_constructor, NULL);
|
||||
|
||||
int mca_btl_ugni_frag_init (mca_btl_ugni_base_frag_t *frag, void *id)
|
||||
{
|
||||
/* NTH: the id is a combination of the module id and the free list id. for now there
|
||||
* is only ever one module so the module id is ignored. if this changes the code
|
||||
* here and btl_ugni_add_procs.c (opal_free_list_init calls) needs to be updated */
|
||||
intptr_t free_list_id = (intptr_t) id & 0xff;
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules;
|
||||
|
||||
frag->msg_id = opal_pointer_array_add (&ugni_module->pending_smsg_frags_bb, (void *) frag);
|
||||
frag->my_list = ugni_module->frags_lists + free_list_id;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2013 The University of Tennessee and The University
|
||||
@ -72,7 +72,7 @@ typedef struct mca_btl_ugni_base_frag_t {
|
||||
uint16_t flags;
|
||||
mca_btl_ugni_frag_hdr_t hdr;
|
||||
mca_btl_base_segment_t segments[2];
|
||||
opal_common_ugni_post_desc_t post_desc;
|
||||
gni_post_descriptor_t post_desc;
|
||||
mca_btl_base_endpoint_t *endpoint;
|
||||
mca_btl_ugni_reg_t *registration;
|
||||
opal_free_list_t *my_list;
|
||||
@ -88,12 +88,15 @@ typedef struct mca_btl_ugni_base_frag_t mca_btl_ugni_eager_frag_t;
|
||||
|
||||
typedef struct mca_btl_ugni_post_descriptor_t {
|
||||
opal_free_list_item_t super;
|
||||
opal_common_ugni_post_desc_t desc;
|
||||
gni_post_descriptor_t desc;
|
||||
mca_btl_ugni_endpoint_handle_t *ep_handle;
|
||||
mca_btl_base_endpoint_t *endpoint;
|
||||
mca_btl_base_registration_handle_t *local_handle;
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc;
|
||||
mca_btl_ugni_cq_t *cq;
|
||||
void *cbdata;
|
||||
void *ctx;
|
||||
int tries;
|
||||
} mca_btl_ugni_post_descriptor_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_btl_ugni_post_descriptor_t);
|
||||
@ -101,26 +104,38 @@ OBJ_CLASS_DECLARATION(mca_btl_ugni_post_descriptor_t);
|
||||
#define MCA_BTL_UGNI_DESC_TO_PDESC(desc) \
|
||||
((mca_btl_ugni_post_descriptor_t *)((uintptr_t) (desc) - offsetof (mca_btl_ugni_post_descriptor_t, desc)))
|
||||
|
||||
static inline void mca_btl_ugni_alloc_post_descriptor (mca_btl_base_endpoint_t *endpoint, mca_btl_base_registration_handle_t *local_handle,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata,
|
||||
mca_btl_ugni_post_descriptor_t **desc)
|
||||
static inline mca_btl_ugni_post_descriptor_t *
|
||||
mca_btl_ugni_alloc_post_descriptor (mca_btl_base_endpoint_t *endpoint, mca_btl_base_registration_handle_t *local_handle,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
*desc = (mca_btl_ugni_post_descriptor_t *) opal_free_list_get (&endpoint->btl->post_descriptors);
|
||||
if (NULL != *desc) {
|
||||
(*desc)->cbfunc = cbfunc;
|
||||
(*desc)->ctx = cbcontext;
|
||||
(*desc)->cbdata = cbdata;
|
||||
(*desc)->local_handle = local_handle;
|
||||
(*desc)->endpoint = endpoint;
|
||||
(void) OPAL_THREAD_ADD64(&endpoint->btl->active_rdma_count, 1);
|
||||
/* mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint); */
|
||||
mca_btl_ugni_post_descriptor_t *desc;
|
||||
|
||||
desc = OBJ_NEW(mca_btl_ugni_post_descriptor_t);
|
||||
/* (mca_btl_ugni_post_descriptor_t *) opal_free_list_get (&ugni_module->post_descriptors); */
|
||||
if (OPAL_UNLIKELY(NULL != desc)) {
|
||||
desc->cbfunc = cbfunc;
|
||||
desc->ctx = cbcontext;
|
||||
desc->cbdata = cbdata;
|
||||
desc->local_handle = local_handle;
|
||||
desc->endpoint = endpoint;
|
||||
}
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
static inline void mca_btl_ugni_return_post_descriptor (mca_btl_ugni_module_t *module,
|
||||
mca_btl_ugni_post_descriptor_t *desc)
|
||||
static inline void mca_btl_ugni_return_post_descriptor (mca_btl_ugni_post_descriptor_t *desc)
|
||||
{
|
||||
(void) OPAL_THREAD_ADD64(&module->active_rdma_count, -1);
|
||||
opal_free_list_return (&module->post_descriptors, &desc->super);
|
||||
/* mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (desc->endpoint); */
|
||||
|
||||
if (NULL != desc->ep_handle) {
|
||||
mca_btl_ugni_ep_return_rdma (desc->ep_handle);
|
||||
/* desc->ep_handle = NULL; */
|
||||
}
|
||||
|
||||
/* desc->cq = NULL; */
|
||||
/* opal_free_list_return (&ugni_module->post_descriptors, &desc->super); */
|
||||
free (desc);
|
||||
}
|
||||
|
||||
static inline void mca_btl_ugni_post_desc_complete (mca_btl_ugni_module_t *module, mca_btl_ugni_post_descriptor_t *desc, int rc)
|
||||
@ -129,40 +144,38 @@ static inline void mca_btl_ugni_post_desc_complete (mca_btl_ugni_module_t *modul
|
||||
|
||||
if (NULL != desc->cbfunc) {
|
||||
/* call the user's callback function */
|
||||
desc->cbfunc (&module->super, desc->endpoint, (void *)(intptr_t) desc->desc.base.local_addr,
|
||||
desc->cbfunc (&module->super, desc->endpoint, (void *)(intptr_t) desc->desc.local_addr,
|
||||
desc->local_handle, desc->ctx, desc->cbdata, rc);
|
||||
}
|
||||
|
||||
/* the descriptor is no longer needed */
|
||||
mca_btl_ugni_return_post_descriptor (module, desc);
|
||||
mca_btl_ugni_return_post_descriptor (desc);
|
||||
}
|
||||
|
||||
OBJ_CLASS_DECLARATION(mca_btl_ugni_smsg_frag_t);
|
||||
OBJ_CLASS_DECLARATION(mca_btl_ugni_rdma_frag_t);
|
||||
OBJ_CLASS_DECLARATION(mca_btl_ugni_eager_frag_t);
|
||||
|
||||
int mca_btl_ugni_frag_init (mca_btl_ugni_base_frag_t *frag, mca_btl_ugni_module_t *ugni_module);
|
||||
int mca_btl_ugni_frag_init (mca_btl_ugni_base_frag_t *frag, void *id);
|
||||
|
||||
static inline int mca_btl_ugni_frag_alloc (mca_btl_base_endpoint_t *ep,
|
||||
opal_free_list_t *list,
|
||||
mca_btl_ugni_base_frag_t **frag)
|
||||
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc (mca_btl_base_endpoint_t *ep,
|
||||
opal_free_list_t *list)
|
||||
{
|
||||
*frag = (mca_btl_ugni_base_frag_t *) opal_free_list_get (list);
|
||||
if (OPAL_LIKELY(NULL != *frag)) {
|
||||
(*frag)->my_list = list;
|
||||
(*frag)->endpoint = ep;
|
||||
(*frag)->ref_cnt = 1;
|
||||
return OPAL_SUCCESS;
|
||||
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) opal_free_list_get (list);
|
||||
if (OPAL_LIKELY(NULL != frag)) {
|
||||
frag->endpoint = ep;
|
||||
frag->ref_cnt = 1;
|
||||
}
|
||||
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
return frag;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (frag->endpoint);
|
||||
if (frag->registration) {
|
||||
frag->endpoint->btl->rcache->rcache_deregister (frag->endpoint->btl->rcache,
|
||||
(mca_rcache_base_registration_t *) frag->registration);
|
||||
ugni_module->rcache->rcache_deregister (ugni_module->rcache,
|
||||
(mca_rcache_base_registration_t *) frag->registration);
|
||||
frag->registration = NULL;
|
||||
}
|
||||
|
||||
@ -174,6 +187,7 @@ static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag)
|
||||
}
|
||||
|
||||
static inline bool mca_btl_ugni_frag_del_ref (mca_btl_ugni_base_frag_t *frag, int rc) {
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (frag->endpoint);
|
||||
int32_t ref_cnt;
|
||||
|
||||
opal_atomic_mb ();
|
||||
@ -186,7 +200,7 @@ static inline bool mca_btl_ugni_frag_del_ref (mca_btl_ugni_base_frag_t *frag, in
|
||||
|
||||
/* call callback if specified */
|
||||
if (frag->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
|
||||
frag->base.des_cbfunc(&frag->endpoint->btl->super, frag->endpoint, &frag->base, rc);
|
||||
frag->base.des_cbfunc(&ugni_module->super, frag->endpoint, &frag->base, rc);
|
||||
}
|
||||
|
||||
if (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) {
|
||||
@ -208,15 +222,38 @@ static inline bool mca_btl_ugni_frag_check_complete (mca_btl_ugni_base_frag_t *f
|
||||
return !!(MCA_BTL_UGNI_FRAG_COMPLETE & frag->flags);
|
||||
}
|
||||
|
||||
#define MCA_BTL_UGNI_FRAG_ALLOC_SMSG(ep, frag) \
|
||||
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->smsg_frags, &(frag))
|
||||
#define MCA_BTL_UGNI_FRAG_ALLOC_RDMA(ep, frag) \
|
||||
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->rdma_frags, &(frag))
|
||||
#define MCA_BTL_UGNI_FRAG_ALLOC_RDMA_INT(ep, frag) \
|
||||
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->rdma_int_frags, &(frag))
|
||||
#define MCA_BTL_UGNI_FRAG_ALLOC_EAGER_SEND(ep, frag) \
|
||||
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->eager_frags_send, &(frag))
|
||||
#define MCA_BTL_UGNI_FRAG_ALLOC_EAGER_RECV(ep, frag) \
|
||||
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->eager_frags_recv, &(frag))
|
||||
|
||||
void mca_btl_ugni_wait_list_append (mca_btl_ugni_module_t *ugni_module, mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_ugni_base_frag_t *frag);
|
||||
|
||||
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_smsg (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
|
||||
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_SMSG);
|
||||
}
|
||||
|
||||
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_rdma (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
|
||||
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_RDMA);
|
||||
}
|
||||
|
||||
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_rdma_int (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
|
||||
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_RDMA_INT);
|
||||
}
|
||||
|
||||
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_eager_send (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
|
||||
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_EAGER_SEND);
|
||||
}
|
||||
|
||||
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_eager_recv (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
|
||||
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_EAGER_RECV);
|
||||
}
|
||||
|
||||
#endif /* MCA_BTL_UGNI_FRAG_H */
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -37,11 +37,8 @@ int mca_btl_ugni_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("Using RDMA/FMA Get from local address %p to remote address %" PRIx64,
|
||||
local_address, remote_address));
|
||||
|
||||
/* cause endpoint to bind if it isn't already (bind is sufficient for rdma) */
|
||||
(void) mca_btl_ugni_check_endpoint_state_rdma (endpoint);
|
||||
BTL_VERBOSE(("Using RDMA/FMA Get %lu bytes to local address %p to remote address %" PRIx64,
|
||||
(unsigned long) size, local_address, remote_address));
|
||||
|
||||
return mca_btl_ugni_post (endpoint, true, size, local_address, remote_address, local_handle,
|
||||
remote_handle, order, cbfunc, cbcontext, cbdata);
|
||||
@ -110,13 +107,15 @@ static void mca_btl_ugni_callback_eager_get (struct mca_btl_base_module_t *btl,
|
||||
}
|
||||
|
||||
reg = mca_btl_base_active_message_trigger + tag;
|
||||
reg->cbfunc(&frag->endpoint->btl->super, tag, &(tmp.base), reg->cbdata);
|
||||
reg->cbfunc(&ugni_module->super, tag, &(tmp.base), reg->cbdata);
|
||||
|
||||
/* fill in the response header */
|
||||
frag->hdr.rdma.ctx = frag->hdr.eager.ctx;
|
||||
frag->flags = MCA_BTL_UGNI_FRAG_RESPONSE;
|
||||
frag->ref_cnt = 1;
|
||||
|
||||
frag->ref_cnt = 1;
|
||||
|
||||
/* once complete use this fragment for a pending eager get if any exist */
|
||||
frag->base.des_cbfunc = mca_btl_ugni_callback_eager_get_progress_pending;
|
||||
|
||||
@ -125,16 +124,7 @@ static void mca_btl_ugni_callback_eager_get (struct mca_btl_base_module_t *btl,
|
||||
NULL, 0, MCA_BTL_UGNI_TAG_RDMA_COMPLETE);
|
||||
if (OPAL_UNLIKELY(0 > rc)) {
|
||||
/* queue fragment */
|
||||
OPAL_THREAD_LOCK(&endpoint->lock);
|
||||
if (false == endpoint->wait_listed) {
|
||||
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
|
||||
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
|
||||
endpoint->wait_listed = true;
|
||||
}
|
||||
|
||||
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
|
||||
OPAL_THREAD_UNLOCK(&endpoint->lock);
|
||||
mca_btl_ugni_wait_list_append (ugni_module, endpoint, frag);
|
||||
}
|
||||
}
|
||||
|
||||
@ -142,7 +132,7 @@ int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_ugni_eager_ex_frag_hdr_t hdr,
|
||||
mca_btl_ugni_base_frag_t *frag)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = endpoint->btl;
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
|
||||
size_t size;
|
||||
int rc;
|
||||
|
||||
@ -151,10 +141,10 @@ int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *endpoint,
|
||||
do {
|
||||
if (NULL == frag) {
|
||||
/* try to allocate a registered buffer */
|
||||
rc = MCA_BTL_UGNI_FRAG_ALLOC_EAGER_RECV(endpoint, frag);
|
||||
frag = mca_btl_ugni_frag_alloc_eager_recv (endpoint);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
/* no registered buffers available. try again later */
|
||||
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA_INT(endpoint, frag);
|
||||
frag = mca_btl_ugni_frag_alloc_rdma_int (endpoint);
|
||||
|
||||
/* not much can be done if a small fragment can not be allocated. abort! */
|
||||
assert (NULL != frag);
|
||||
|
306
opal/mca/btl/ugni/btl_ugni_init.c
Обычный файл
306
opal/mca/btl/ugni/btl_ugni_init.c
Обычный файл
@ -0,0 +1,306 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "btl_ugni.h"
|
||||
#include "btl_ugni_endpoint.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
#include "opal/util/bit_ops.h"
|
||||
|
||||
static inline int get_ptag(uint8_t *out_ptag)
|
||||
{
|
||||
/* TODO no need for tmp */
|
||||
char *ptr;
|
||||
uint8_t tmp_ptag;
|
||||
|
||||
if (NULL == (ptr = getenv("PMI_GNI_PTAG"))) {
|
||||
/* TODO add err msg - better rc? */
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
errno = 0;
|
||||
tmp_ptag = (uint8_t)strtoul (ptr, (char **)NULL, 10);
|
||||
if (0 != errno) {
|
||||
/* TODO add err msg - better rc? */
|
||||
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
|
||||
}
|
||||
*out_ptag = tmp_ptag;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static inline int get_cookie (uint32_t *out_cookie)
|
||||
{
|
||||
/* TODO no need for tmp */
|
||||
char *ptr;
|
||||
uint32_t tmp_cookie;
|
||||
|
||||
if (NULL == (ptr = getenv("PMI_GNI_COOKIE"))) {
|
||||
/* TODO add err msg - better rc? */
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
errno = 0;
|
||||
tmp_cookie = (uint32_t) strtoul (ptr, NULL, 10);
|
||||
if (0 != errno) {
|
||||
/* TODO add err msg - better rc? */
|
||||
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
|
||||
}
|
||||
|
||||
*out_cookie = tmp_cookie;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static unsigned int mca_btl_ugni_get_nic_address(int device_id)
|
||||
{
|
||||
unsigned int address, cpu_id;
|
||||
gni_return_t status;
|
||||
int i, alps_dev_id = -1;
|
||||
char *token,*p_ptr;
|
||||
|
||||
p_ptr = getenv("PMI_GNI_DEV_ID");
|
||||
if (!p_ptr) {
|
||||
status = GNI_CdmGetNicAddress(device_id, &address, &cpu_id);
|
||||
if(status != GNI_RC_SUCCESS) {
|
||||
opal_output (0, "FAILED:GNI_CdmGetNicAddress returned error %d", status);
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
while (NULL != (token = strtok(p_ptr, ":"))) {
|
||||
alps_dev_id = atoi(token);
|
||||
if (alps_dev_id == device_id) {
|
||||
break;
|
||||
}
|
||||
p_ptr = NULL;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(-1 == alps_dev_id)) {
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
|
||||
p_ptr = getenv("PMI_GNI_LOC_ADDR");
|
||||
if (OPAL_UNLIKELY(NULL == p_ptr)) {
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (NULL != (token = strtok(p_ptr, ":"))) {
|
||||
if (i == alps_dev_id) {
|
||||
return strtoul (token, NULL, 10);
|
||||
}
|
||||
p_ptr = NULL;
|
||||
++i;
|
||||
}
|
||||
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
|
||||
int mca_btl_ugni_device_init (mca_btl_ugni_device_t *device, int virtual_device_id)
|
||||
{
|
||||
uint32_t dev_pe_addr;
|
||||
int rc;
|
||||
|
||||
OBJ_CONSTRUCT(&device->endpoints, opal_free_list_t);
|
||||
OBJ_CONSTRUCT(&device->pending_post, opal_list_t);
|
||||
|
||||
rc = opal_free_list_init (&device->endpoints, sizeof (mca_btl_ugni_endpoint_handle_t),
|
||||
8, OBJ_CLASS(mca_btl_ugni_endpoint_handle_t), 0, 8, 0,
|
||||
mca_btl_ugni_component.local_cq_size, 16,
|
||||
NULL, 0, NULL, mca_btl_ugni_endpoint_handle_init_rdma,
|
||||
(void *) device);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
OBJ_DESTRUCT(&device->endpoints);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* create a communication domain */
|
||||
rc = GNI_CdmCreate (mca_btl_ugni_component.cdm_id_base | virtual_device_id, mca_btl_ugni_component.ptag,
|
||||
mca_btl_ugni_component.cookie, mca_btl_ugni_component.cdm_flags, &device->dev_cd_handle);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
/* this REALLY is an error but under alps + mapn we may not get any credentials */
|
||||
BTL_VERBOSE(("Error: Creating communication domain %d for virtual device %d", rc, virtual_device_id));
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
device->dev_index = virtual_device_id;
|
||||
|
||||
/* Create a NIC Adress */
|
||||
OPAL_OUTPUT((-1, "Got NIC Addr: 0x%08x, CPU ID: %d", mca_btl_ugni_component.dev_addr, 0));
|
||||
|
||||
/* Attach device to the communication domain */
|
||||
rc = GNI_CdmAttach (device->dev_cd_handle, 0, &dev_pe_addr, &device->dev_handle);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_VERBOSE(("Error: Attaching to communication domain. rc = %d, virtual device = %d", rc, virtual_device_id));
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
device->lock = 0;
|
||||
device->dev_rdma_local_cq.gni_handle = 0;
|
||||
device->dev_rdma_local_cq.active_operations = 0;
|
||||
device->dev_rdma_local_irq_cq.gni_handle = 0;
|
||||
device->dev_rdma_local_irq_cq.active_operations = 0;
|
||||
device->dev_smsg_local_cq.gni_handle = 0;
|
||||
device->dev_smsg_local_cq.active_operations= 0;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_btl_ugni_device_fini (mca_btl_ugni_device_t *dev)
|
||||
{
|
||||
int rc;
|
||||
|
||||
OBJ_DESTRUCT(&dev->endpoints);
|
||||
OBJ_DESTRUCT(&dev->pending_post);
|
||||
|
||||
if (0 != dev->dev_rdma_local_cq.gni_handle) {
|
||||
GNI_CqDestroy (dev->dev_rdma_local_cq.gni_handle);
|
||||
dev->dev_rdma_local_cq.gni_handle = 0;
|
||||
}
|
||||
|
||||
if (0 != dev->dev_rdma_local_irq_cq.gni_handle) {
|
||||
GNI_CqDestroy (dev->dev_rdma_local_irq_cq.gni_handle);
|
||||
dev->dev_rdma_local_irq_cq.gni_handle = 0;
|
||||
}
|
||||
|
||||
if (0 != dev->dev_smsg_local_cq.gni_handle) {
|
||||
GNI_CqDestroy (dev->dev_smsg_local_cq.gni_handle);
|
||||
dev->dev_smsg_local_cq.gni_handle = 0;
|
||||
}
|
||||
|
||||
rc = GNI_CdmDestroy (dev->dev_cd_handle);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_VERBOSE(("error destroying cdm handle"));
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Send local device information and other information
|
||||
* required for setup
|
||||
*/
|
||||
static int mca_btl_ugni_send_modex (void)
|
||||
{
|
||||
struct mca_btl_ugni_modex_t modex;
|
||||
uint32_t modex_size;
|
||||
char *modex_msg;
|
||||
int rc;
|
||||
|
||||
modex_size = sizeof (struct mca_btl_ugni_modex_t);
|
||||
|
||||
modex_msg = (char *) malloc (modex_size);
|
||||
if (NULL == modex_msg) {
|
||||
OPAL_OUTPUT((-1, "Error allocating memory for modex @ %s:%d",
|
||||
__FILE__, __LINE__));
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
modex.addr = mca_btl_ugni_component.dev_addr;
|
||||
modex.id = mca_btl_ugni_component.cdm_id_base;
|
||||
|
||||
BTL_VERBOSE(("sending modex. addr: %d, id: %d", modex.addr, modex.id));
|
||||
|
||||
memcpy ((void *) modex_msg, (void *) &modex, modex_size);
|
||||
|
||||
/*
|
||||
* need global for edge cases like MPI_Comm_spawn support with
|
||||
* new ranks started on the same nodes as the spawnee ranks, etc.
|
||||
*/
|
||||
|
||||
OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
|
||||
&mca_btl_ugni_component.super.btl_version,
|
||||
modex_msg, modex_size);
|
||||
|
||||
free (modex_msg);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int mca_btl_ugni_fini (void)
|
||||
{
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
int mca_btl_ugni_init (void)
|
||||
{
|
||||
int32_t pid_max = 32768;
|
||||
int rc, bit;
|
||||
FILE *fh;
|
||||
|
||||
if (0 == mca_btl_ugni_component.virtual_device_count) {
|
||||
/* XXX -- TODO -- might want to improve this logic. One option would be to
|
||||
* compare the number of local peers vs the number of cores or hyperthreads
|
||||
* on the node. */
|
||||
|
||||
if (!opal_using_threads() || opal_process_info.num_local_peers >= 255) {
|
||||
/* there is probably no benefit to using multiple device contexts when not
|
||||
* using threads. */
|
||||
mca_btl_ugni_component.virtual_device_count = 1;
|
||||
} else if (opal_process_info.num_local_peers >= 127) {
|
||||
mca_btl_ugni_component.virtual_device_count = 2;
|
||||
} else if (opal_process_info.num_local_peers >= 63) {
|
||||
mca_btl_ugni_component.virtual_device_count = 4;
|
||||
} else if (opal_process_info.num_local_peers >= 31) {
|
||||
mca_btl_ugni_component.virtual_device_count = 8;
|
||||
} else {
|
||||
mca_btl_ugni_component.virtual_device_count = 16;
|
||||
}
|
||||
} else if (MCA_BTL_UGNI_MAX_DEV_HANDLES < mca_btl_ugni_component.virtual_device_count) {
|
||||
mca_btl_ugni_component.virtual_device_count = MCA_BTL_UGNI_MAX_DEV_HANDLES;
|
||||
}
|
||||
|
||||
fh = fopen ("/proc/sys/kernel/pid_max", "r");
|
||||
if (NULL != fh) {
|
||||
fscanf (fh, "%d", &pid_max);
|
||||
fclose (fh);
|
||||
}
|
||||
|
||||
/* Use pid to generate the cdm_id. Although its not stated in the uGNI
|
||||
* documentation, the cdm_id only needs to be unique within a node for a
|
||||
* given ptag/cookie tuple */
|
||||
bit = opal_hibit (pid_max, 31);
|
||||
if (bit >= 31) {
|
||||
mca_btl_ugni_component.virtual_device_count = 1;
|
||||
mca_btl_ugni_component.cdm_id_base = getpid();
|
||||
} else if (bit >= 30 && mca_btl_ugni_component.virtual_device_count > 2) {
|
||||
mca_btl_ugni_component.virtual_device_count = 2;
|
||||
mca_btl_ugni_component.cdm_id_base = getpid() << 1;
|
||||
} else {
|
||||
mca_btl_ugni_component.cdm_id_base = getpid() << 8;
|
||||
}
|
||||
|
||||
/* Create a communication domain */
|
||||
/* collect uGNI information */
|
||||
rc = get_ptag(&mca_btl_ugni_component.ptag);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = get_cookie(&mca_btl_ugni_component.cookie);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* get the device address of the NIC */
|
||||
mca_btl_ugni_component.dev_addr = mca_btl_ugni_get_nic_address (0);
|
||||
|
||||
/* send ugni modex */
|
||||
mca_btl_ugni_send_modex ();
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2014-2016 Research Organization for Information Science
|
||||
@ -62,22 +62,18 @@ mca_btl_ugni_module_t mca_btl_ugni_module = {
|
||||
};
|
||||
|
||||
int
|
||||
mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
|
||||
opal_common_ugni_device_t *dev)
|
||||
mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module)
|
||||
{
|
||||
int rc;
|
||||
|
||||
BTL_VERBOSE(("binding module %p to device %p", (void *) ugni_module,
|
||||
(void *) dev));
|
||||
BTL_VERBOSE(("binding module %p to device 0", (void *) ugni_module));
|
||||
|
||||
/* copy module defaults (and function pointers) */
|
||||
memmove (ugni_module, &mca_btl_ugni_module, sizeof (mca_btl_ugni_module));
|
||||
|
||||
ugni_module->initialized = false;
|
||||
ugni_module->nlocal_procs = 0;
|
||||
ugni_module->active_send_count = 0;
|
||||
ugni_module->connected_peer_count = 0;
|
||||
ugni_module->active_rdma_count = 0;
|
||||
|
||||
OBJ_CONSTRUCT(&ugni_module->failed_frags, opal_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->failed_frags_lock, opal_mutex_t);
|
||||
@ -85,11 +81,10 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
|
||||
OBJ_CONSTRUCT(&ugni_module->eager_get_pending, opal_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->eager_get_pending_lock,opal_mutex_t);
|
||||
|
||||
OBJ_CONSTRUCT(&ugni_module->eager_frags_send, opal_free_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->eager_frags_recv, opal_free_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->smsg_frags, opal_free_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->rdma_frags, opal_free_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->rdma_int_frags, opal_free_list_t);
|
||||
for (int i = 0 ; i < MCA_BTL_UGNI_LIST_MAX ; ++i) {
|
||||
OBJ_CONSTRUCT(ugni_module->frags_lists + i, opal_free_list_t);
|
||||
}
|
||||
|
||||
OBJ_CONSTRUCT(&ugni_module->pending_smsg_frags_bb, opal_pointer_array_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->ep_wait_list_lock,opal_mutex_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->ep_wait_list, opal_list_t);
|
||||
@ -97,22 +92,26 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
|
||||
OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->smsg_mboxes, opal_free_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->pending_descriptors, opal_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->eager_get_pending, opal_list_t);
|
||||
OBJ_CONSTRUCT(&ugni_module->post_descriptors, opal_free_list_t);
|
||||
|
||||
ugni_module->device = dev;
|
||||
dev->btl_ctx = (void *) ugni_module;
|
||||
/* set up virtual device handles */
|
||||
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
|
||||
rc = mca_btl_ugni_device_init (ugni_module->devices + i, i);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
BTL_VERBOSE(("error initializing uGNI device handle"));
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* create wildcard endpoint to listen for connections.
|
||||
* there is no need to bind this endpoint. */
|
||||
OPAL_THREAD_LOCK(&dev->dev_lock);
|
||||
rc = GNI_EpCreate (ugni_module->device->dev_handle, NULL,
|
||||
/* create wildcard endpoint on first device to listen for connections.
|
||||
* there is no need to bind this endpoint. We are single threaded
|
||||
* here so there is no need for a device lock. */
|
||||
rc = GNI_EpCreate (ugni_module->devices[0].dev_handle, NULL,
|
||||
&ugni_module->wildcard_ep);
|
||||
OPAL_THREAD_UNLOCK(&dev->dev_lock);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
BTL_ERROR(("error creating wildcard ugni endpoint"));
|
||||
return opal_common_rc_ugni_to_opal (rc);
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
/* post wildcard datagram */
|
||||
@ -133,16 +132,8 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
|
||||
uint64_t key;
|
||||
int rc;
|
||||
|
||||
while (ugni_module->active_send_count) {
|
||||
/* ensure all sends are complete before closing the module */
|
||||
rc = mca_btl_ugni_progress_local_smsg (ugni_module);
|
||||
if (OPAL_SUCCESS != rc) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* close all open connections and release endpoints */
|
||||
if (ugni_module->initialized) {
|
||||
/* close all open connections and release endpoints */
|
||||
OPAL_HASH_TABLE_FOREACH(key, uint64, ep, &ugni_module->id_to_endpoint) {
|
||||
if (NULL != ep) {
|
||||
mca_btl_ugni_release_ep (ep);
|
||||
@ -154,28 +145,12 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
|
||||
}
|
||||
|
||||
/* destroy all cqs */
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
rc = GNI_CqDestroy (ugni_module->rdma_local_cq);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error tearing down local BTE/FMA CQ - %s",gni_err_str[rc]));
|
||||
}
|
||||
|
||||
rc = GNI_CqDestroy (ugni_module->smsg_local_cq);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error tearing down TX SMSG CQ - %s",gni_err_str[rc]));
|
||||
}
|
||||
|
||||
rc = GNI_CqDestroy (ugni_module->smsg_remote_cq);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error tearing down RX SMSG CQ - %s",gni_err_str[rc]));
|
||||
}
|
||||
|
||||
if (mca_btl_ugni_component.progress_thread_enabled) {
|
||||
rc = GNI_CqDestroy (ugni_module->rdma_local_irq_cq);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error tearing down local BTE/FMA CQ - %s",gni_err_str[rc]));
|
||||
}
|
||||
|
||||
rc = GNI_CqDestroy (ugni_module->smsg_remote_irq_cq);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error tearing down remote SMSG CQ - %s",gni_err_str[rc]));
|
||||
@ -195,14 +170,12 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_VERBOSE(("btl/ugni error destroying endpoint - %s",gni_err_str[rc]));
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&ugni_module->eager_frags_send);
|
||||
OBJ_DESTRUCT(&ugni_module->eager_frags_recv);
|
||||
OBJ_DESTRUCT(&ugni_module->smsg_frags);
|
||||
OBJ_DESTRUCT(&ugni_module->rdma_frags);
|
||||
OBJ_DESTRUCT(&ugni_module->rdma_int_frags);
|
||||
for (int i = 0 ; i < MCA_BTL_UGNI_LIST_MAX ; ++i) {
|
||||
OBJ_DESTRUCT(ugni_module->frags_lists + i);
|
||||
}
|
||||
|
||||
OBJ_DESTRUCT(&ugni_module->ep_wait_list);
|
||||
OBJ_DESTRUCT(&ugni_module->smsg_mboxes);
|
||||
OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb);
|
||||
@ -217,6 +190,10 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
|
||||
mca_rcache_base_module_destroy (ugni_module->rcache);
|
||||
}
|
||||
|
||||
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
|
||||
mca_btl_ugni_device_fini (ugni_module->devices + i);
|
||||
}
|
||||
|
||||
ugni_module->initialized = false;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
@ -230,10 +207,17 @@ mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
|
||||
{
|
||||
mca_btl_ugni_base_frag_t *frag = NULL;
|
||||
|
||||
if (size <= mca_btl_ugni_component.smsg_max_data) {
|
||||
(void) MCA_BTL_UGNI_FRAG_ALLOC_SMSG(endpoint, frag);
|
||||
/* do not allocate a fragment unless the wait list is relatively small. this
|
||||
* reduces the potential for resource exhaustion. note the wait list only exists
|
||||
* because we have no way to notify the sender that credits are available. */
|
||||
if (OPAL_UNLIKELY(opal_list_get_size (&endpoint->frag_wait_list) > 32)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (size <= mca_btl_ugni_component.smsg_max_data) {
|
||||
frag = mca_btl_ugni_frag_alloc_smsg (endpoint);
|
||||
} else if (size <= btl->btl_eager_limit) {
|
||||
(void) MCA_BTL_UGNI_FRAG_ALLOC_EAGER_SEND(endpoint, frag);
|
||||
frag = mca_btl_ugni_frag_alloc_eager_send (endpoint);
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
@ -284,6 +268,13 @@ mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
|
||||
uint8_t order, size_t reserve, size_t *size,
|
||||
uint32_t flags)
|
||||
{
|
||||
/* do not allocate a fragment unless the wait list is relatively small. this
|
||||
* reduces the potential for resource exhaustion. note the wait list only exists
|
||||
* because we have no way to notify the sender that credits are available. */
|
||||
if (OPAL_UNLIKELY(opal_list_get_size (&endpoint->frag_wait_list) > 32)) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return mca_btl_ugni_prepare_src_send (btl, endpoint, convertor,
|
||||
order, reserve, size, flags);
|
||||
}
|
||||
|
@ -26,7 +26,7 @@ mca_btl_ugni_prepare_src_send_nodata (struct mca_btl_base_module_t *btl,
|
||||
{
|
||||
mca_btl_ugni_base_frag_t *frag = NULL;
|
||||
|
||||
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA(endpoint, frag);
|
||||
frag = mca_btl_ugni_frag_alloc_rdma (endpoint);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
}
|
||||
@ -65,8 +65,7 @@ mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl,
|
||||
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
|
||||
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA(endpoint, frag);
|
||||
|
||||
frag = mca_btl_ugni_frag_alloc_rdma (endpoint);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
}
|
||||
@ -123,7 +122,7 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
|
||||
int rc;
|
||||
|
||||
if (OPAL_UNLIKELY(true == use_eager_get)) {
|
||||
(void) MCA_BTL_UGNI_FRAG_ALLOC_EAGER_SEND(endpoint, frag);
|
||||
frag = mca_btl_ugni_frag_alloc_eager_send (endpoint);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
}
|
||||
@ -136,7 +135,7 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
|
||||
frag->hdr_size = reserve + sizeof (frag->hdr.eager);
|
||||
frag->segments[0].seg_addr.pval = frag->hdr.eager_ex.pml_header;
|
||||
} else {
|
||||
(void) MCA_BTL_UGNI_FRAG_ALLOC_SMSG(endpoint, frag);
|
||||
frag = mca_btl_ugni_frag_alloc_smsg (endpoint);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
return NULL;
|
||||
}
|
||||
@ -186,8 +185,8 @@ mca_btl_ugni_prepare_src_send (struct mca_btl_base_module_t *btl,
|
||||
|
||||
opal_convertor_get_current_pointer (convertor, &data_ptr);
|
||||
|
||||
send_in_place = !(opal_convertor_need_buffers(convertor) ||
|
||||
(use_eager_get && ((uintptr_t)data_ptr & 3)));
|
||||
send_in_place = (btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) && !(opal_convertor_need_buffers(convertor) ||
|
||||
(use_eager_get && ((uintptr_t)data_ptr & 3)));
|
||||
|
||||
if (send_in_place) {
|
||||
return mca_btl_ugni_prepare_src_send_inplace (btl, endpoint, convertor, order,
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -29,17 +29,19 @@ static void *mca_btl_ugni_prog_thread_fn(void * data)
|
||||
{
|
||||
uint32_t which;
|
||||
gni_return_t status;
|
||||
gni_cq_handle_t cq_vec[2];
|
||||
gni_cq_handle_t cq_vec[1 + MCA_BTL_UGNI_MAX_DEV_HANDLES];
|
||||
|
||||
struct mca_btl_ugni_module_t *btl = (mca_btl_ugni_module_t *)data;
|
||||
int cq_count = 1 + mca_btl_ugni_component.virtual_device_count;
|
||||
|
||||
/*
|
||||
* need to block signals
|
||||
*/
|
||||
|
||||
cq_vec[0] = btl->smsg_remote_irq_cq;
|
||||
cq_vec[1] = btl->rdma_local_irq_cq;
|
||||
|
||||
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
|
||||
cq_vec[i + 1] = btl->devices[i].dev_rdma_local_irq_cq.gni_handle;
|
||||
}
|
||||
|
||||
while (stop_progress_thread == 0) {
|
||||
|
||||
@ -48,7 +50,7 @@ static void *mca_btl_ugni_prog_thread_fn(void * data)
|
||||
*/
|
||||
|
||||
status = GNI_CqVectorMonitor(cq_vec,
|
||||
2,
|
||||
cq_count,
|
||||
-1,
|
||||
&which);
|
||||
|
||||
@ -106,8 +108,8 @@ int mca_btl_ugni_kill_progress_thread(void)
|
||||
*/
|
||||
|
||||
ret = mca_btl_ugni_post_cqwrite (mca_btl_ugni_component.modules[0].local_ep,
|
||||
mca_btl_ugni_component.modules[0].rdma_local_cq,
|
||||
mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl,
|
||||
&mca_btl_ugni_component.modules[0].devices[0].dev_rdma_local_cq,
|
||||
mca_btl_ugni_component.modules[0].devices[0].smsg_irq_mhndl,
|
||||
0xdead, NULL, NULL, NULL);
|
||||
/*
|
||||
* TODO: if error returned, need to kill off thread manually
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -19,11 +19,8 @@ int mca_btl_ugni_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t
|
||||
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
|
||||
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
|
||||
{
|
||||
BTL_VERBOSE(("Using RDMA/FMA Put from local address %p to remote address %" PRIx64,
|
||||
local_address, remote_address));
|
||||
|
||||
/* cause endpoint to bind if it isn't already (bind is sufficient for rdma) */
|
||||
(void) mca_btl_ugni_check_endpoint_state_rdma (endpoint);
|
||||
BTL_VERBOSE(("Using RDMA/FMA Put %lu bytes from local address %p to remote address %" PRIx64,
|
||||
(unsigned long) size, local_address, remote_address));
|
||||
|
||||
return mca_btl_ugni_post (endpoint, false, size, local_address, remote_address, local_handle,
|
||||
remote_handle, order, cbfunc, cbcontext, cbdata);
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -15,12 +15,13 @@
|
||||
|
||||
#include "btl_ugni.h"
|
||||
#include "btl_ugni_frag.h"
|
||||
#include "btl_ugni_device.h"
|
||||
|
||||
int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *ep,
|
||||
mca_btl_ugni_eager_ex_frag_hdr_t hdr,
|
||||
mca_btl_ugni_base_frag_t *frag);
|
||||
|
||||
static inline void init_gni_post_desc (opal_common_ugni_post_desc_t *post_desc,
|
||||
static inline void init_gni_post_desc (mca_btl_ugni_post_descriptor_t *post_desc,
|
||||
int order, gni_post_type_t op_type,
|
||||
uint64_t lcl_addr,
|
||||
gni_mem_handle_t lcl_mdh,
|
||||
@ -28,20 +29,20 @@ static inline void init_gni_post_desc (opal_common_ugni_post_desc_t *post_desc,
|
||||
gni_mem_handle_t rem_mdh,
|
||||
uint64_t bufsize,
|
||||
gni_cq_handle_t cq_hndl) {
|
||||
post_desc->base.type = op_type;
|
||||
post_desc->base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
|
||||
post_desc->desc.type = op_type;
|
||||
post_desc->desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
|
||||
if (MCA_BTL_NO_ORDER == order) {
|
||||
post_desc->base.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
|
||||
post_desc->desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
|
||||
} else {
|
||||
post_desc->base.dlvr_mode = GNI_DLVMODE_NO_ADAPT;
|
||||
post_desc->desc.dlvr_mode = GNI_DLVMODE_NO_ADAPT;
|
||||
}
|
||||
post_desc->base.local_addr = (uint64_t) lcl_addr;
|
||||
post_desc->base.local_mem_hndl = lcl_mdh;
|
||||
post_desc->base.remote_addr = (uint64_t) rem_addr;
|
||||
post_desc->base.remote_mem_hndl = rem_mdh;
|
||||
post_desc->base.length = bufsize;
|
||||
post_desc->base.rdma_mode = 0;
|
||||
post_desc->base.src_cq_hndl = cq_hndl;
|
||||
post_desc->desc.local_addr = (uint64_t) lcl_addr;
|
||||
post_desc->desc.local_mem_hndl = lcl_mdh;
|
||||
post_desc->desc.remote_addr = (uint64_t) rem_addr;
|
||||
post_desc->desc.remote_mem_hndl = rem_mdh;
|
||||
post_desc->desc.length = bufsize;
|
||||
post_desc->desc.rdma_mode = 0;
|
||||
post_desc->desc.src_cq_hndl = cq_hndl;
|
||||
post_desc->tries = 0;
|
||||
}
|
||||
|
||||
@ -54,38 +55,28 @@ static inline int mca_btl_ugni_post_fma (struct mca_btl_base_endpoint_t *endpoin
|
||||
{
|
||||
mca_btl_ugni_post_descriptor_t *post_desc;
|
||||
gni_mem_handle_t local_gni_handle = {0, 0};
|
||||
gni_return_t grc;
|
||||
int rc;
|
||||
|
||||
if (local_handle) {
|
||||
local_gni_handle = local_handle->gni_handle;
|
||||
}
|
||||
|
||||
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
|
||||
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata);
|
||||
if (OPAL_UNLIKELY(NULL == post_desc)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Post descriptor (CQ is ignored for FMA transactions) -- The CQ associated with the endpoint
|
||||
* is used. */
|
||||
init_gni_post_desc (&post_desc->desc, order, op_type, (intptr_t) local_address, local_gni_handle,
|
||||
init_gni_post_desc (post_desc, order, op_type, (intptr_t) local_address, local_gni_handle,
|
||||
remote_address, remote_handle->gni_handle, size, 0);
|
||||
|
||||
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
|
||||
grc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
|
||||
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
|
||||
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
|
||||
|
||||
if (GNI_RC_ALIGNMENT_ERROR == grc) {
|
||||
BTL_VERBOSE(("GNI_PostFma failed with an alignment error"));
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("GNI_PostFma failed with gni rc: %d", grc));
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
rc = mca_btl_ugni_endpoint_post_fma (endpoint, post_desc);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
mca_btl_ugni_return_post_descriptor (post_desc);
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_post_bte (mca_btl_base_endpoint_t *endpoint, gni_post_type_t op_type,
|
||||
@ -96,70 +87,53 @@ static inline int mca_btl_ugni_post_bte (mca_btl_base_endpoint_t *endpoint, gni_
|
||||
void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_ugni_post_descriptor_t *post_desc;
|
||||
gni_cq_handle_t cq_handle = endpoint->btl->rdma_local_cq;
|
||||
gni_return_t status;
|
||||
int rc;
|
||||
|
||||
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
|
||||
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata);
|
||||
if (OPAL_UNLIKELY(NULL == post_desc)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
if (mca_btl_ugni_component.progress_thread_enabled) {
|
||||
cq_handle = endpoint->btl->rdma_local_irq_cq;
|
||||
}
|
||||
|
||||
/* Post descriptor */
|
||||
init_gni_post_desc (&post_desc->desc, order, op_type, (intptr_t) local_address, local_handle->gni_handle,
|
||||
remote_address, remote_handle->gni_handle, size, cq_handle);
|
||||
init_gni_post_desc (post_desc, order, op_type, (intptr_t) local_address, local_handle->gni_handle,
|
||||
remote_address, remote_handle->gni_handle, size, 0);
|
||||
|
||||
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
|
||||
status = GNI_PostRdma (endpoint->rdma_ep_handle, &post_desc->desc.base);
|
||||
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != status)) {
|
||||
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
|
||||
|
||||
if (GNI_RC_ALIGNMENT_ERROR == status) {
|
||||
BTL_VERBOSE(("GNI_PostRdma failed with an alignment error"));
|
||||
return OPAL_ERR_NOT_AVAILABLE;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("GNI_PostRdma failed with gni rc: %d", status));
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
rc = mca_btl_ugni_endpoint_post_rdma (endpoint, post_desc);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
mca_btl_ugni_return_post_descriptor (post_desc);
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_post_cqwrite (mca_btl_base_endpoint_t *endpoint, gni_cq_handle_t cq_handle,
|
||||
static inline int mca_btl_ugni_post_cqwrite (mca_btl_base_endpoint_t *endpoint, mca_btl_ugni_cq_t *cq,
|
||||
gni_mem_handle_t irq_mhndl, uint64_t value,
|
||||
mca_btl_base_rdma_completion_fn_t cbfunc,
|
||||
void *cbcontext, void *cbdata)
|
||||
{
|
||||
mca_btl_ugni_post_descriptor_t *post_desc;
|
||||
gni_return_t grc;
|
||||
int rc;
|
||||
|
||||
mca_btl_ugni_alloc_post_descriptor (endpoint, NULL, cbfunc, cbcontext, cbdata, &post_desc);
|
||||
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, NULL, cbfunc, cbcontext, cbdata);
|
||||
if (OPAL_UNLIKELY(NULL == post_desc)) {
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
post_desc->desc.base.type = GNI_POST_CQWRITE;
|
||||
post_desc->desc.base.cqwrite_value = value; /* up to 48 bytes here, not used for now */
|
||||
post_desc->desc.base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
|
||||
post_desc->desc.base.dlvr_mode = GNI_DLVMODE_IN_ORDER;
|
||||
post_desc->desc.base.src_cq_hndl = cq_handle;
|
||||
post_desc->desc.base.remote_mem_hndl = irq_mhndl;
|
||||
post_desc->desc.tries = 0;
|
||||
post_desc->desc.type = GNI_POST_CQWRITE;
|
||||
post_desc->desc.cqwrite_value = value; /* up to 48 bytes here, not used for now */
|
||||
post_desc->desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
|
||||
post_desc->desc.dlvr_mode = GNI_DLVMODE_IN_ORDER;
|
||||
post_desc->desc.src_cq_hndl = cq->gni_handle;
|
||||
post_desc->desc.remote_mem_hndl = irq_mhndl;
|
||||
post_desc->tries = 0;
|
||||
post_desc->cq = cq;
|
||||
|
||||
OPAL_THREAD_LOCK(&endpoint->common->dev->dev_lock);
|
||||
grc = GNI_PostCqWrite(endpoint->rdma_ep_handle, &post_desc->desc.base);
|
||||
OPAL_THREAD_UNLOCK(&endpoint->common->dev->dev_lock);
|
||||
if (GNI_RC_SUCCESS != grc) { /* errors for PostCqWrite treated as non-fatal */
|
||||
BTL_VERBOSE(("GNI_PostCqWrite returned error - %s", gni_err_str[grc]));
|
||||
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
|
||||
rc = mca_btl_ugni_endpoint_post_cqwrite (endpoint, post_desc);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { /* errors for PostCqWrite treated as non-fatal */
|
||||
mca_btl_ugni_return_post_descriptor (post_desc);
|
||||
}
|
||||
|
||||
return opal_common_rc_ugni_to_opal (grc);
|
||||
return rc;
|
||||
}
|
||||
|
||||
static inline int mca_btl_ugni_post (mca_btl_base_endpoint_t *endpoint, int get, size_t size,
|
||||
@ -183,27 +157,11 @@ static inline int mca_btl_ugni_post (mca_btl_base_endpoint_t *endpoint, int get,
|
||||
|
||||
static inline int mca_btl_ugni_repost (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_post_descriptor_t *post_desc)
|
||||
{
|
||||
gni_return_t grc;
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
if (GNI_POST_RDMA_PUT == post_desc->desc.base.type ||
|
||||
GNI_POST_RDMA_GET == post_desc->desc.base.type) {
|
||||
grc = GNI_PostRdma (post_desc->endpoint->rdma_ep_handle, &post_desc->desc.base);
|
||||
} else {
|
||||
grc = GNI_PostFma (post_desc->endpoint->rdma_ep_handle, &post_desc->desc.base);
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
|
||||
/* NTH: Should we even retry these? When this code was written there was no indication
|
||||
* whether an error in post is recoverable. Clobber this code and the associated data
|
||||
* structures if post errors are not recoverable. */
|
||||
OPAL_THREAD_LOCK(&ugni_module->pending_descriptors_lock);
|
||||
opal_list_append (&ugni_module->pending_descriptors, (opal_list_item_t *) post_desc);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->pending_descriptors_lock);
|
||||
if (GNI_POST_RDMA_PUT == post_desc->desc.type || GNI_POST_RDMA_GET == post_desc->desc.type) {
|
||||
return mca_btl_ugni_endpoint_post_rdma (post_desc->endpoint, post_desc);
|
||||
}
|
||||
|
||||
return opal_common_rc_ugni_to_opal (grc);
|
||||
return mca_btl_ugni_endpoint_post_fma (post_desc->endpoint, post_desc);
|
||||
}
|
||||
|
||||
#endif /* MCA_BTL_UGNI_RDMA_H */
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
@ -17,6 +17,30 @@
|
||||
#include "btl_ugni_smsg.h"
|
||||
#include "btl_ugni_prepare.h"
|
||||
|
||||
void mca_btl_ugni_wait_list_append (mca_btl_ugni_module_t *ugni_module, mca_btl_base_endpoint_t *endpoint,
|
||||
mca_btl_ugni_base_frag_t *frag)
|
||||
{
|
||||
BTL_VERBOSE(("wait-listing fragment %p to %s. endpoint state %d\n", frag, OPAL_NAME_PRINT(endpoint->peer_proc->proc_name), endpoint->state));
|
||||
|
||||
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||
|
||||
/* queue up request */
|
||||
OPAL_THREAD_LOCK(&endpoint->lock);
|
||||
|
||||
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
|
||||
|
||||
OPAL_THREAD_UNLOCK(&endpoint->lock);
|
||||
|
||||
if (false == endpoint->wait_listed && MCA_BTL_UGNI_EP_STATE_CONNECTED == endpoint->state) {
|
||||
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
|
||||
if (false == endpoint->wait_listed) {
|
||||
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
|
||||
endpoint->wait_listed = true;
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
|
||||
}
|
||||
}
|
||||
|
||||
int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
|
||||
struct mca_btl_base_endpoint_t *endpoint,
|
||||
struct mca_btl_base_descriptor_t *descriptor,
|
||||
@ -30,18 +54,15 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
|
||||
/* tag and len are at the same location in eager and smsg frag hdrs */
|
||||
frag->hdr.send.lag = (tag << 24) | size;
|
||||
|
||||
BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor,
|
||||
OPAL_PROC_MY_NAME.vpid, endpoint->peer_proc->proc_name.vpid, size));
|
||||
|
||||
rc = mca_btl_ugni_check_endpoint_state (endpoint);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||
OPAL_THREAD_LOCK(&endpoint->lock);
|
||||
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
|
||||
OPAL_THREAD_UNLOCK(&endpoint->lock);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || opal_list_get_size (&endpoint->frag_wait_list))) {
|
||||
mca_btl_ugni_wait_list_append (ugni_module, endpoint, frag);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor,
|
||||
OPAL_PROC_MY_NAME.vpid, endpoint->common->ep_rem_id, size));
|
||||
|
||||
/* add a reference to prevent the fragment from being returned until after the
|
||||
* completion flag is checked. */
|
||||
++frag->ref_cnt;
|
||||
@ -61,7 +82,7 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
|
||||
frag->flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
|
||||
|
||||
if (call_callback) {
|
||||
frag->base.des_cbfunc(&frag->endpoint->btl->super, frag->endpoint, &frag->base, rc);
|
||||
frag->base.des_cbfunc(&ugni_module->super, frag->endpoint, &frag->base, rc);
|
||||
}
|
||||
|
||||
(void) mca_btl_ugni_frag_del_ref (frag, OPAL_SUCCESS);
|
||||
@ -77,18 +98,7 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
|
||||
|
||||
if (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) {
|
||||
/* queue up request */
|
||||
if (false == endpoint->wait_listed) {
|
||||
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
|
||||
if (false == endpoint->wait_listed) {
|
||||
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
|
||||
endpoint->wait_listed = true;
|
||||
}
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&endpoint->lock);
|
||||
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
|
||||
OPAL_THREAD_UNLOCK(&endpoint->lock);
|
||||
mca_btl_ugni_wait_list_append (ugni_module, endpoint, frag);
|
||||
rc = OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
@ -109,9 +119,9 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
|
||||
int rc;
|
||||
|
||||
do {
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != mca_btl_ugni_check_endpoint_state (endpoint))) {
|
||||
break;
|
||||
}
|
||||
BTL_VERBOSE(("btl/ugni isend sending fragment from %d -> %d. length = %" PRIu64
|
||||
" endoint state %d", OPAL_PROC_MY_NAME.vpid, endpoint->peer_proc->proc_name.vpid,
|
||||
payload_size + header_size, endpoint->state));
|
||||
|
||||
flags |= MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
|
||||
|
||||
@ -124,7 +134,8 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
|
||||
}
|
||||
|
||||
assert (packed_size == payload_size);
|
||||
if (OPAL_UNLIKELY(NULL == frag)) {
|
||||
if (OPAL_UNLIKELY(NULL == frag || OPAL_SUCCESS != mca_btl_ugni_check_endpoint_state (endpoint) ||
|
||||
opal_list_get_size (&endpoint->frag_wait_list))) {
|
||||
break;
|
||||
}
|
||||
|
||||
@ -141,8 +152,9 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
|
||||
} while (0);
|
||||
|
||||
if (NULL != descriptor) {
|
||||
*descriptor = NULL;
|
||||
*descriptor = &frag->base;
|
||||
}
|
||||
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -28,7 +28,7 @@ static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
|
||||
mbox->attr.smsg_attr.buff_size = mca_btl_ugni_component.smsg_mbox_size;
|
||||
mbox->attr.smsg_attr.mem_hndl = ugni_reg->handle.gni_handle;
|
||||
mbox->attr.proc_name = OPAL_PROC_MY_NAME;
|
||||
mbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
|
||||
mbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].devices[0].smsg_irq_mhndl;
|
||||
}
|
||||
|
||||
OBJ_CLASS_INSTANCE(mca_btl_ugni_smsg_mbox_t, opal_free_list_item_t,
|
||||
@ -39,11 +39,13 @@ int mca_btl_ugni_smsg_init (mca_btl_ugni_module_t *ugni_module)
|
||||
{
|
||||
gni_return_t rc;
|
||||
|
||||
rc = GNI_SmsgSetMaxRetrans (ugni_module->device->dev_handle,
|
||||
mca_btl_ugni_component.smsg_max_retries);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error setting maximum SMSG retries %s",gni_err_str[rc]));
|
||||
return opal_common_rc_ugni_to_opal (rc);
|
||||
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
|
||||
rc = GNI_SmsgSetMaxRetrans (ugni_module->devices[i].dev_handle,
|
||||
mca_btl_ugni_component.smsg_max_retries);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
BTL_ERROR(("error setting maximum SMSG retries %s",gni_err_str[rc]));
|
||||
return mca_btl_rc_ugni_to_opal (rc);
|
||||
}
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
@ -52,6 +54,7 @@ int mca_btl_ugni_smsg_init (mca_btl_ugni_module_t *ugni_module)
|
||||
/* progress */
|
||||
int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
|
||||
{
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
|
||||
mca_btl_active_message_callback_t *reg;
|
||||
mca_btl_ugni_base_frag_t frag;
|
||||
mca_btl_base_segment_t seg;
|
||||
@ -70,27 +73,20 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
|
||||
do {
|
||||
uint8_t tag = GNI_SMSG_ANY_TAG;
|
||||
|
||||
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
|
||||
rc = GNI_SmsgGetNextWTag (ep->smsg_ep_handle, (void **) &data_ptr, &tag);
|
||||
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
|
||||
if (GNI_RC_NOT_DONE == rc) {
|
||||
BTL_VERBOSE(("no smsg message waiting. rc = %s", gni_err_str[rc]));
|
||||
rc = mca_btl_ugni_smsg_get_next_wtag (ep->smsg_ep_handle, &data_ptr, &tag);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
if (OPAL_LIKELY(GNI_RC_NOT_DONE == rc)) {
|
||||
BTL_VERBOSE(("no smsg message waiting. rc = %s", gni_err_str[rc]));
|
||||
|
||||
ep->smsg_progressing = 0;
|
||||
ep->smsg_progressing = 0;
|
||||
return count;
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
BTL_ERROR(("GNI_SmsgGetNextWTag returned error %s", gni_err_str[rc]));
|
||||
BTL_ERROR(("unhandled GNI_SmsgGetNextWTag error"));
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(0 == data_ptr)) {
|
||||
BTL_ERROR(("null data ptr!"));
|
||||
assert (0);
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
assert (0 != data_ptr);
|
||||
|
||||
count++;
|
||||
|
||||
@ -114,7 +110,7 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
|
||||
|
||||
assert (NULL != reg->cbfunc);
|
||||
|
||||
reg->cbfunc(&ep->btl->super, tag, &(frag.base), reg->cbdata);
|
||||
reg->cbfunc(&ugni_module->super, tag, &(frag.base), reg->cbdata);
|
||||
|
||||
break;
|
||||
case MCA_BTL_UGNI_TAG_GET_INIT:
|
||||
@ -141,16 +137,14 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
|
||||
break;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
|
||||
rc = GNI_SmsgRelease (ep->smsg_ep_handle);
|
||||
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
|
||||
rc = mca_btl_ugni_smsg_release (ep->smsg_ep_handle);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
BTL_ERROR(("Smsg release failed! rc = %d", rc));
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
} while (!disconnect);
|
||||
|
||||
ep->smsg_progressing = false;
|
||||
ep->smsg_progressing = 0;
|
||||
|
||||
/* disconnect if we get here */
|
||||
opal_mutex_lock (&ep->lock);
|
||||
@ -165,7 +159,6 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
|
||||
static inline int
|
||||
mca_btl_ugni_handle_remote_smsg_overrun (mca_btl_ugni_module_t *btl)
|
||||
{
|
||||
gni_cq_entry_t event_data;
|
||||
size_t endpoint_count;
|
||||
unsigned int ep_index;
|
||||
int count, rc;
|
||||
@ -177,11 +170,7 @@ mca_btl_ugni_handle_remote_smsg_overrun (mca_btl_ugni_module_t *btl)
|
||||
smsg remote cq and check all mailboxes */
|
||||
|
||||
/* clear out remote cq */
|
||||
do {
|
||||
OPAL_THREAD_LOCK(&btl->device->dev_lock);
|
||||
rc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
|
||||
OPAL_THREAD_UNLOCK(&btl->device->dev_lock);
|
||||
} while (GNI_RC_NOT_DONE != rc);
|
||||
mca_btl_ugni_cq_clear (btl->devices, btl->smsg_remote_cq);
|
||||
|
||||
endpoint_count = opal_pointer_array_get_size (&btl->endpoints);
|
||||
|
||||
@ -212,9 +201,7 @@ int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl)
|
||||
gni_return_t grc;
|
||||
uint64_t inst_id;
|
||||
|
||||
OPAL_THREAD_LOCK(&btl->device->dev_lock);
|
||||
grc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
|
||||
OPAL_THREAD_UNLOCK(&btl->device->dev_lock);
|
||||
grc = mca_btl_ugni_gni_cq_get_event (btl->devices, btl->smsg_remote_cq, &event_data);
|
||||
if (GNI_RC_NOT_DONE == grc) {
|
||||
return 0;
|
||||
}
|
||||
@ -231,12 +218,12 @@ int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl)
|
||||
|
||||
/* unhandled error: crash */
|
||||
assert (0);
|
||||
return opal_common_rc_ugni_to_opal (grc);
|
||||
return mca_btl_rc_ugni_to_opal (grc);
|
||||
}
|
||||
|
||||
BTL_VERBOSE(("REMOTE CQ: Got event 0x%" PRIx64 ". msg id = %" PRIu64
|
||||
". ok = %d, type = %" PRIu64 "\n", (uint64_t) event_data,
|
||||
GNI_CQ_GET_MSG_ID(event_data), GNI_CQ_STATUS_OK(event_data),
|
||||
". ok = %d, type = %" PRIu64, (uint64_t) event_data,
|
||||
GNI_CQ_GET_INST_ID(event_data), GNI_CQ_STATUS_OK(event_data),
|
||||
GNI_CQ_GET_TYPE(event_data)));
|
||||
|
||||
inst_id = GNI_CQ_GET_INST_ID(event_data);
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
|
||||
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
@ -36,20 +36,13 @@ int mca_btl_ugni_smsg_init (mca_btl_ugni_module_t *ugni_module);
|
||||
int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep);
|
||||
int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl);
|
||||
|
||||
static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_module)
|
||||
static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device)
|
||||
{
|
||||
mca_btl_ugni_base_frag_t *frag;
|
||||
gni_cq_entry_t event_data;
|
||||
gni_return_t grc;
|
||||
|
||||
/* nothing to do */
|
||||
if (0 == ugni_module->active_send_count) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
|
||||
grc = GNI_CqGetEvent (ugni_module->smsg_local_cq, &event_data);
|
||||
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
|
||||
grc = mca_btl_ugni_cq_get_event (device, &device->dev_smsg_local_cq, &event_data);
|
||||
if (GNI_RC_NOT_DONE == grc) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
@ -59,7 +52,7 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
|
||||
will the event eventually come back? Ask Cray */
|
||||
BTL_ERROR(("post error! cq overrun = %d", (int)GNI_CQ_OVERRUN(event_data)));
|
||||
assert (0);
|
||||
return opal_common_rc_ugni_to_opal (grc);
|
||||
return mca_btl_rc_ugni_to_opal (grc);
|
||||
}
|
||||
|
||||
assert (GNI_CQ_GET_TYPE(event_data) == GNI_CQ_EVENT_TYPE_SMSG);
|
||||
@ -71,8 +64,6 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
|
||||
return OPAL_ERROR;
|
||||
}
|
||||
|
||||
opal_atomic_add_32(&ugni_module->active_send_count,-1);
|
||||
|
||||
frag->flags |= MCA_BTL_UGNI_FRAG_SMSG_COMPLETE;
|
||||
|
||||
if (!(frag->flags & MCA_BTL_UGNI_FRAG_IGNORE)) {
|
||||
@ -87,26 +78,22 @@ static inline int opal_mca_btl_ugni_smsg_send (mca_btl_ugni_base_frag_t *frag,
|
||||
void *payload, size_t payload_len,
|
||||
mca_btl_ugni_smsg_tag_t tag)
|
||||
{
|
||||
mca_btl_base_endpoint_t *endpoint = frag->endpoint;
|
||||
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
|
||||
gni_return_t grc;
|
||||
|
||||
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
|
||||
grc = GNI_SmsgSendWTag (frag->endpoint->smsg_ep_handle, hdr, hdr_len,
|
||||
payload, payload_len, frag->msg_id, tag);
|
||||
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
|
||||
|
||||
grc = mca_btl_ugni_endpoint_smsg_send_wtag (endpoint, hdr, hdr_len, payload, payload_len,
|
||||
frag->msg_id, tag);
|
||||
if (OPAL_LIKELY(GNI_RC_SUCCESS == grc)) {
|
||||
/* increment the active send counter */
|
||||
opal_atomic_add_32(&frag->endpoint->btl->active_send_count,1);
|
||||
|
||||
if (mca_btl_ugni_component.progress_thread_enabled) {
|
||||
if (frag->base.des_flags & MCA_BTL_DES_FLAGS_SIGNAL) {
|
||||
/* errors for PostCqWrite treated as non-fatal */
|
||||
(void) mca_btl_ugni_post_cqwrite (frag->endpoint, frag->endpoint->btl->rdma_local_cq,
|
||||
frag->endpoint->rmt_irq_mem_hndl, 0xdead, NULL, NULL, NULL);
|
||||
(void) mca_btl_ugni_post_cqwrite (endpoint, &ugni_module->devices[0].dev_rdma_local_cq,
|
||||
endpoint->rmt_irq_mem_hndl, 0xdead, NULL, NULL, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
(void) mca_btl_ugni_progress_local_smsg ((mca_btl_ugni_module_t *) frag->endpoint->btl);
|
||||
(void) mca_btl_ugni_progress_local_smsg (ugni_module, endpoint->smsg_ep_handle->device);
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
|
@ -1,67 +0,0 @@
|
||||
# -*- indent-tabs-mode:nil -*-
|
||||
#
|
||||
# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# Make the output library in this directory, and name it either
|
||||
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
|
||||
# (for static builds).
|
||||
|
||||
AM_CPPFLAGS = $(common_ugni_CPPFLAGS)
|
||||
|
||||
component_noinst = lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst.la
|
||||
component_install = lib@OPAL_LIB_PREFIX@mca_common_ugni.la
|
||||
|
||||
if MCA_BUILD_opal_common_ugni_DSO
|
||||
lib_LTLIBRARIES = $(component_install)
|
||||
else
|
||||
noinst_LTLIBRARIES = $(component_noinst)
|
||||
endif
|
||||
|
||||
headers = common_ugni.h \
|
||||
common_ugni_ep.h
|
||||
|
||||
ugni_SOURCES = common_ugni.c \
|
||||
common_ugni_ep.c
|
||||
|
||||
#mcacomponentdir = $(opallibdir)
|
||||
lib@OPAL_LIB_PREFIX@mca_common_ugni_la_SOURCES = $(headers) $(ugni_SOURCES)
|
||||
nodist_lib@OPAL_LIB_PREFIX@mca_common_ugni_la_SOURCES = $(ugni_nodist_SOURCES)
|
||||
lib@OPAL_LIB_PREFIX@mca_common_ugni_la_LIBADD = $(common_ugni_LIBS)
|
||||
lib@OPAL_LIB_PREFIX@mca_common_ugni_la_LDFLAGS = \
|
||||
-version-info $(libmca_opal_common_ugni_so_version) \
|
||||
$(common_ugni_LDFLAGS)
|
||||
|
||||
lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst_la_SOURCES = \
|
||||
$(headers) $(ugni_SOURCES)
|
||||
nodist_lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst_la_SOURCES = \
|
||||
$(ugni_nodist_SOURCES)
|
||||
lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst_la_LIBADD = $(common_ugni_LIBS)
|
||||
lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst_la_LDFLAGS = \
|
||||
-module -avoid-version $(common_ugni_LDFLAGS)
|
||||
|
||||
# These two rules will sym link the "noinst" libtool library filename
|
||||
# to the installable libtool library filename in the case where we are
|
||||
# compiling this component statically (case 2), described above).
|
||||
|
||||
V=0
|
||||
OMPI_V_LN_SCOMP = $(ompi__v_LN_SCOMP_$V)
|
||||
ompi__v_LN_SCOMP_ = $(ompi__v_LN_SCOMP_$AM_DEFAULT_VERBOSITY)
|
||||
ompi__v_LN_SCOMP_0 = @echo " LN_S " `basename $(component_install)`;
|
||||
|
||||
all-local:
|
||||
$(OMPI_V_LN_SCOMP) if test -z "$(lib_LTLIBRARIES)"; then \
|
||||
rm -f "$(component_install)"; \
|
||||
$(LN_S) "$(component_noinst)" "$(component_install)"; \
|
||||
fi
|
||||
|
||||
clean-local:
|
||||
if test -z "$(mcacomponent_LTLIBRARIES)"; then \
|
||||
rm -f "$(component_install)"; \
|
||||
fi
|
@ -1,301 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* Copyright (c) 2014 Research Organization for Information Science
|
||||
* and Technology (RIST). All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
|
||||
#include "common_ugni.h"
|
||||
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/dss/dss.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
|
||||
/* NTH: we need some options from the btl */
|
||||
#include "opal/mca/btl/ugni/btl_ugni.h"
|
||||
|
||||
static int opal_common_ugni_module_ref_count = 0;
|
||||
opal_common_ugni_module_t opal_common_ugni_module = {0};
|
||||
|
||||
mca_base_component_t opal_common_ugni_component = {
|
||||
OPAL_MCA_BASE_VERSION_2_1_0("common", 1, 0, 0),
|
||||
.mca_component_name = "ugni",
|
||||
.mca_component_major_version = 1,
|
||||
.mca_component_minor_version = 0,
|
||||
.mca_component_release_version = 0,
|
||||
};
|
||||
|
||||
static inline int
|
||||
get_ptag(uint8_t *out_ptag)
|
||||
{
|
||||
/* TODO no need for tmp */
|
||||
char *ptr;
|
||||
uint8_t tmp_ptag;
|
||||
|
||||
if (NULL == (ptr = getenv("PMI_GNI_PTAG"))) {
|
||||
/* TODO add err msg - better rc? */
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
errno = 0;
|
||||
tmp_ptag = (uint8_t)strtoul (ptr, (char **)NULL, 10);
|
||||
if (0 != errno) {
|
||||
/* TODO add err msg - better rc? */
|
||||
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
|
||||
}
|
||||
*out_ptag = tmp_ptag;
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static inline int get_cookie (uint32_t *out_cookie)
|
||||
{
|
||||
/* TODO no need for tmp */
|
||||
char *ptr;
|
||||
uint32_t tmp_cookie;
|
||||
|
||||
if (NULL == (ptr = getenv("PMI_GNI_COOKIE"))) {
|
||||
/* TODO add err msg - better rc? */
|
||||
return OPAL_ERR_NOT_FOUND;
|
||||
}
|
||||
errno = 0;
|
||||
tmp_cookie = (uint32_t) strtoul (ptr, NULL, 10);
|
||||
if (0 != errno) {
|
||||
/* TODO add err msg - better rc? */
|
||||
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
|
||||
}
|
||||
|
||||
*out_cookie = tmp_cookie;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
opal_common_ugni_get_nic_address(int device_id)
|
||||
{
|
||||
unsigned int address, cpu_id;
|
||||
gni_return_t status;
|
||||
int i, alps_dev_id = -1;
|
||||
char *token,*p_ptr;
|
||||
|
||||
p_ptr = getenv("PMI_GNI_DEV_ID");
|
||||
if (!p_ptr) {
|
||||
status = GNI_CdmGetNicAddress(device_id, &address, &cpu_id);
|
||||
if(status != GNI_RC_SUCCESS) {
|
||||
opal_output (0, "FAILED:GNI_CdmGetNicAddress returned error %d", status);
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
return address;
|
||||
}
|
||||
|
||||
while (NULL != (token = strtok(p_ptr, ":"))) {
|
||||
alps_dev_id = atoi(token);
|
||||
if (alps_dev_id == device_id) {
|
||||
break;
|
||||
}
|
||||
p_ptr = NULL;
|
||||
}
|
||||
|
||||
if (OPAL_UNLIKELY(-1 == alps_dev_id)) {
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
|
||||
p_ptr = getenv("PMI_GNI_LOC_ADDR");
|
||||
if (OPAL_UNLIKELY(NULL == p_ptr)) {
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
|
||||
i = 0;
|
||||
while (NULL != (token = strtok(p_ptr, ":"))) {
|
||||
if (i == alps_dev_id) {
|
||||
return strtoul (token, NULL, 10);
|
||||
}
|
||||
p_ptr = NULL;
|
||||
++i;
|
||||
}
|
||||
|
||||
return (unsigned int)-1;
|
||||
}
|
||||
|
||||
static int opal_common_ugni_device_init (opal_common_ugni_device_t *device,
|
||||
int device_id)
|
||||
{
|
||||
int rc;
|
||||
|
||||
/* Create a NIC Adress */
|
||||
device->dev_id = device_id; /* Minor number of the Gemini NIC */
|
||||
|
||||
device->dev_addr = opal_common_ugni_get_nic_address (device->dev_id);
|
||||
|
||||
OPAL_OUTPUT((-1, "Got NIC Addr: 0x%08x, CPU ID: %d", device->dev_addr, device->dev_id));
|
||||
|
||||
OBJ_CONSTRUCT(&device->dev_lock,opal_mutex_t);
|
||||
|
||||
/* Attach device to the communication domain */
|
||||
rc = GNI_CdmAttach (opal_common_ugni_module.cd_handle, device->dev_id,
|
||||
&device->dev_pe_addr, &device->dev_handle);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
OPAL_OUTPUT((0, "Error: Creating communication domain %d\n", rc));
|
||||
return opal_common_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
static int opal_common_ugni_device_fini (opal_common_ugni_device_t *dev)
|
||||
{
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Send local device information and other information
|
||||
* required for setup
|
||||
*/
|
||||
static int opal_common_ugni_send_modex (int my_cdm_id)
|
||||
{
|
||||
uint32_t modex_size, total_msg_size, msg_offset;
|
||||
struct opal_common_ugni_modex_t modex;
|
||||
char *modex_msg;
|
||||
int rc, i;
|
||||
|
||||
modex_size = sizeof (struct opal_common_ugni_modex_t);
|
||||
total_msg_size = opal_common_ugni_module.device_count * modex_size;
|
||||
|
||||
modex_msg = (char *) malloc (total_msg_size);
|
||||
if (NULL == modex_msg) {
|
||||
OPAL_OUTPUT((-1, "Error allocating memory for modex @ %s:%d",
|
||||
__FILE__, __LINE__));
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* pack modex for all available devices */
|
||||
for (i = 0, msg_offset = 0; i < opal_common_ugni_module.device_count ; ++i) {
|
||||
opal_common_ugni_device_t *dev = opal_common_ugni_module.devices + i;
|
||||
|
||||
modex.addr = dev->dev_addr;
|
||||
modex.id = my_cdm_id;
|
||||
|
||||
memcpy ((void *)((uintptr_t) modex_msg + msg_offset),
|
||||
(void *)&modex, modex_size);
|
||||
|
||||
msg_offset += modex_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* need global for edge cases like MPI_Comm_spawn support with
|
||||
* new ranks started on the same nodes as the spawnee ranks, etc.
|
||||
*/
|
||||
|
||||
OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
|
||||
&opal_common_ugni_component,
|
||||
modex_msg, total_msg_size);
|
||||
|
||||
free(modex_msg);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
int opal_common_ugni_fini (void)
|
||||
{
|
||||
int i, rc;
|
||||
|
||||
if (0 == opal_common_ugni_module_ref_count) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
if (1 == opal_common_ugni_module_ref_count) {
|
||||
/* tear down component */
|
||||
if (opal_common_ugni_module.devices) {
|
||||
/* finalize devices */
|
||||
for (i = 0 ; i < opal_common_ugni_module.device_count ; ++i) {
|
||||
opal_common_ugni_device_fini (opal_common_ugni_module.devices + i);
|
||||
}
|
||||
|
||||
free (opal_common_ugni_module.devices);
|
||||
opal_common_ugni_module.devices = NULL;
|
||||
}
|
||||
|
||||
/* finally, tear down the communication domain */
|
||||
rc = GNI_CdmDestroy (opal_common_ugni_module.cd_handle);
|
||||
if (GNI_RC_SUCCESS != rc) {
|
||||
OPAL_OUTPUT((-1, "error destroying cdm"));
|
||||
}
|
||||
}
|
||||
|
||||
opal_common_ugni_module_ref_count--;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
int opal_common_ugni_init (void)
|
||||
{
|
||||
int modes, rc, i;
|
||||
uint32_t my_cdm_id;
|
||||
|
||||
opal_common_ugni_module_ref_count ++;
|
||||
|
||||
if (opal_common_ugni_module_ref_count > 1) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
/* use pid for my_cdm_id. Although its not stated in the uGNI
|
||||
documentation, the cdm_id only needs to be unique
|
||||
within a node for a given ptag/cookie tuple */
|
||||
|
||||
my_cdm_id = getpid(); /*TODO: eventually need something else for thread-hot support */
|
||||
|
||||
/* pull settings from ugni btl */
|
||||
opal_common_ugni_module.rdma_max_retries =
|
||||
mca_btl_ugni_component.rdma_max_retries;
|
||||
|
||||
/* Create a communication domain */
|
||||
|
||||
modes = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_CACHED_AMO_ENABLED |
|
||||
GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL |
|
||||
GNI_CDM_MODE_FMA_SHARED;
|
||||
|
||||
/* collect uGNI information */
|
||||
rc = get_ptag(&opal_common_ugni_module.ptag);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = get_cookie(&opal_common_ugni_module.cookie);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* create a communication domain */
|
||||
rc = GNI_CdmCreate (my_cdm_id, opal_common_ugni_module.ptag,
|
||||
opal_common_ugni_module.cookie, modes,
|
||||
&opal_common_ugni_module.cd_handle);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
OPAL_OUTPUT((0, "Error: Creating communication domain %d\n",rc));
|
||||
return opal_common_rc_ugni_to_opal (rc);
|
||||
}
|
||||
|
||||
/* setup uGNI devices. we only support one device atm */
|
||||
opal_common_ugni_module.device_count = 1;
|
||||
opal_common_ugni_module.devices = calloc (opal_common_ugni_module.device_count,
|
||||
sizeof (opal_common_ugni_device_t));
|
||||
|
||||
for (i = 0 ; i < opal_common_ugni_module.device_count ; ++i) {
|
||||
rc = opal_common_ugni_device_init (opal_common_ugni_module.devices + i, i);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
OPAL_OUTPUT((-1, "error initializing uGNI device"));
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
|
||||
/* send ugni modex */
|
||||
opal_common_ugni_send_modex (my_cdm_id);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
@ -1,117 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "opal_config.h"
|
||||
|
||||
#include "opal/util/output.h"
|
||||
#include "opal/util/proc.h"
|
||||
#include "opal/class/opal_list.h"
|
||||
#include "opal/include/opal/prefetch.h"
|
||||
#include "opal_stdint.h"
|
||||
|
||||
#include <errno.h>
|
||||
#include <stdint.h>
|
||||
#include <sys/types.h>
|
||||
#include <assert.h>
|
||||
#include <sys/time.h>
|
||||
#include <gni_pub.h>
|
||||
|
||||
#include "common_ugni_ep.h"
|
||||
|
||||
#if !defined(MPI_COMMON_UGNI_H)
|
||||
#define MPI_COMMON_UGNI_H
|
||||
|
||||
struct opal_common_ugni_modex_t {
|
||||
uint32_t addr;
|
||||
int id;
|
||||
gni_mem_handle_t irq_memhndl;
|
||||
};
|
||||
typedef struct opal_common_ugni_modex_t opal_common_ugni_modex_t;
|
||||
|
||||
struct opal_common_ugni_device_t {
|
||||
opal_object_t super;
|
||||
|
||||
gni_nic_handle_t dev_handle;
|
||||
|
||||
/* Minor number of the Gemini NIC */
|
||||
int32_t dev_id;
|
||||
uint32_t dev_pe_addr;
|
||||
uint32_t dev_addr;
|
||||
uint32_t dev_cpu_id;
|
||||
|
||||
size_t dev_ep_count;
|
||||
opal_mutex_t dev_lock;
|
||||
gni_mem_handle_t smsg_irq_mhndl;
|
||||
void *btl_ctx;
|
||||
};
|
||||
typedef struct opal_common_ugni_device_t opal_common_ugni_device_t;
|
||||
|
||||
struct opal_common_ugni_module_t {
|
||||
/* protection tag */
|
||||
uint8_t ptag;
|
||||
|
||||
/* unique id for this process assigned by the system */
|
||||
uint32_t cookie;
|
||||
|
||||
/* communication domain handle */
|
||||
gni_cdm_handle_t cd_handle;
|
||||
|
||||
/* device count. to be used if we have more than 1 common per ugni device */
|
||||
int device_count;
|
||||
opal_common_ugni_device_t *devices;
|
||||
|
||||
int rdma_max_retries;
|
||||
};
|
||||
typedef struct opal_common_ugni_module_t opal_common_ugni_module_t;
|
||||
|
||||
struct opal_common_ugni_post_desc_t {
|
||||
gni_post_descriptor_t base;
|
||||
|
||||
opal_common_ugni_endpoint_t *endpoint;
|
||||
int tries;
|
||||
};
|
||||
typedef struct opal_common_ugni_post_desc_t opal_common_ugni_post_desc_t;
|
||||
|
||||
extern opal_common_ugni_module_t opal_common_ugni_module;
|
||||
extern mca_base_component_t opal_common_ugni_component;
|
||||
|
||||
static inline int
|
||||
opal_common_rc_ugni_to_opal (gni_return_t rc)
|
||||
{
|
||||
int codes[] = {OPAL_SUCCESS,
|
||||
OPAL_ERR_RESOURCE_BUSY,
|
||||
OPAL_ERR_BAD_PARAM,
|
||||
OPAL_ERR_OUT_OF_RESOURCE,
|
||||
OPAL_ERR_TIMEOUT,
|
||||
OPAL_ERR_PERM,
|
||||
OPAL_ERROR,
|
||||
OPAL_ERR_BAD_PARAM,
|
||||
OPAL_ERR_BAD_PARAM,
|
||||
OPAL_ERR_NOT_FOUND,
|
||||
OPAL_ERR_VALUE_OUT_OF_BOUNDS,
|
||||
OPAL_ERROR,
|
||||
OPAL_ERR_NOT_SUPPORTED,
|
||||
OPAL_ERR_OUT_OF_RESOURCE};
|
||||
return codes[rc];
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize uGNI communication domain and device(s).
|
||||
*/
|
||||
int opal_common_ugni_init (void);
|
||||
|
||||
/*
|
||||
* Finalize uGNI communication domain and device(s).
|
||||
*/
|
||||
int opal_common_ugni_fini (void);
|
||||
|
||||
#endif /* MPI_COMMON_UGNI_H */
|
@ -1,118 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#include "common_ugni.h"
|
||||
#include "opal/mca/pmix/pmix.h"
|
||||
|
||||
OBJ_CLASS_INSTANCE(opal_common_ugni_endpoint_t, opal_object_t, NULL, NULL);
|
||||
|
||||
int opal_common_ugni_endpoint_for_proc (opal_common_ugni_device_t *dev, opal_proc_t *peer_proc,
|
||||
opal_common_ugni_endpoint_t **ep)
|
||||
{
|
||||
opal_common_ugni_endpoint_t *endpoint;
|
||||
opal_common_ugni_modex_t *modex;
|
||||
size_t msg_size;
|
||||
int rc;
|
||||
|
||||
assert (NULL != dev && NULL != ep && peer_proc);
|
||||
|
||||
endpoint = OBJ_NEW(opal_common_ugni_endpoint_t);
|
||||
if (OPAL_UNLIKELY(NULL == endpoint)) {
|
||||
assert (0);
|
||||
return OPAL_ERR_OUT_OF_RESOURCE;
|
||||
}
|
||||
|
||||
/* Receive the modex */
|
||||
OPAL_MODEX_RECV(rc, &opal_common_ugni_component,
|
||||
&peer_proc->proc_name, (void **)&modex, &msg_size);
|
||||
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
|
||||
OPAL_OUTPUT((-1, "btl/ugni error receiving modex"));
|
||||
return rc;
|
||||
}
|
||||
|
||||
endpoint->ep_rem_addr = modex->addr;
|
||||
endpoint->ep_rem_id = modex->id;
|
||||
endpoint->ep_rem_irq_memhndl = modex->irq_memhndl;
|
||||
|
||||
endpoint->dev = dev;
|
||||
|
||||
*ep = endpoint;
|
||||
|
||||
free (modex);
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
void opal_common_ugni_endpoint_return (opal_common_ugni_endpoint_t *ep)
|
||||
{
|
||||
assert(NULL != ep);
|
||||
|
||||
OBJ_RELEASE(ep);
|
||||
}
|
||||
|
||||
int opal_common_ugni_ep_create (opal_common_ugni_endpoint_t *cep, gni_cq_handle_t cq,
|
||||
gni_ep_handle_t *ep_handle)
|
||||
{
|
||||
gni_return_t grc;
|
||||
|
||||
if (OPAL_UNLIKELY(NULL == cep)) {
|
||||
assert (0);
|
||||
return OPAL_ERR_BAD_PARAM;
|
||||
}
|
||||
|
||||
/* create a uGNI endpoint handle and bind it to the remote peer */
|
||||
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
|
||||
grc = GNI_EpCreate (cep->dev->dev_handle, cq, ep_handle);
|
||||
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
|
||||
return opal_common_rc_ugni_to_opal (grc);
|
||||
}
|
||||
|
||||
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
|
||||
grc = GNI_EpBind (*ep_handle, cep->ep_rem_addr, cep->ep_rem_id);
|
||||
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
|
||||
|
||||
if (GNI_RC_SUCCESS != grc) {
|
||||
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
|
||||
GNI_EpDestroy (*ep_handle);
|
||||
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
|
||||
return opal_common_rc_ugni_to_opal (grc);
|
||||
}
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
int opal_common_ugni_ep_destroy (gni_ep_handle_t *ep)
|
||||
{
|
||||
int rc;
|
||||
|
||||
if (NULL == ep || 0 == *ep) {
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
||||
/* TODO: need to fix, may be outstanding tx's, etc. */
|
||||
rc = GNI_EpUnbind (*ep);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
/* should warn */
|
||||
}
|
||||
|
||||
GNI_EpDestroy (*ep);
|
||||
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
|
||||
/* should warn */
|
||||
}
|
||||
|
||||
*ep = 0;
|
||||
|
||||
return OPAL_SUCCESS;
|
||||
}
|
||||
|
@ -1,63 +0,0 @@
|
||||
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
|
||||
/*
|
||||
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
|
||||
* reserved.
|
||||
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#if !defined(MPI_COMMON_UGNI_EP_H)
|
||||
#define MPI_COMMON_UGNI_EP_H
|
||||
|
||||
struct opal_common_ugni_device_t;
|
||||
|
||||
struct opal_common_ugni_endpoint_t {
|
||||
opal_object_t super;
|
||||
uint32_t ep_rem_addr, ep_rem_id; /**< remote information */
|
||||
gni_mem_handle_t ep_rem_irq_memhndl;
|
||||
struct opal_common_ugni_device_t *dev; /**< device this endpoint is using */
|
||||
};
|
||||
typedef struct opal_common_ugni_endpoint_t opal_common_ugni_endpoint_t;
|
||||
|
||||
OBJ_CLASS_DECLARATION(opal_common_ugni_endpoint_t);
|
||||
|
||||
/*
|
||||
* Get (and retain) a reference to an endpoint to peer_proc. This endpoint
|
||||
* needs to be returned with opal_common_ugni_endpoint_return.
|
||||
*
|
||||
* @param[IN] dev uGNI device this endpoint should be bound to.
|
||||
* @param[IN] peer_proc remote peer the endpoint will be connected to.
|
||||
* @param[OUT] ep uGNI endpoint for the peer
|
||||
*/
|
||||
int opal_common_ugni_endpoint_for_proc (struct opal_common_ugni_device_t *dev, opal_proc_t *peer_proc,
|
||||
opal_common_ugni_endpoint_t **ep);
|
||||
|
||||
/*
|
||||
* Allocate and bind a uGNI endpoint handle to the remote peer.
|
||||
*
|
||||
* @param[IN] cep common endpoint
|
||||
* @param[IN] cq completion queue
|
||||
* @param[OUT] ep_handle uGNI endpoint handle
|
||||
*/
|
||||
int opal_common_ugni_ep_create (opal_common_ugni_endpoint_t *cep, gni_cq_handle_t cq, gni_ep_handle_t *ep_handle);
|
||||
|
||||
/*
|
||||
* Unbind and free the uGNI endpoint handle.
|
||||
*
|
||||
* @param[IN] ep_handle uGNI endpoint handle to unbind and release
|
||||
*/
|
||||
int opal_common_ugni_ep_destroy (gni_ep_handle_t *ep_handle);
|
||||
|
||||
/*
|
||||
* Return (and possibly free) a common endpoint. The endpoint may not be used
|
||||
* once it is returned.
|
||||
*
|
||||
* @param[IN] ep uGNI endpoint to return
|
||||
*/
|
||||
void opal_common_ugni_endpoint_return (opal_common_ugni_endpoint_t *ep);
|
||||
|
||||
#endif /* MPI_COMMON_UGNI_EP_H */
|
@ -1,54 +0,0 @@
|
||||
# -*- shell-script -*-
|
||||
#
|
||||
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
|
||||
# University Research and Technology
|
||||
# Corporation. All rights reserved.
|
||||
# Copyright (c) 2004-2005 The University of Tennessee and The University
|
||||
# of Tennessee Research Foundation. All rights
|
||||
# reserved.
|
||||
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
|
||||
# University of Stuttgart. All rights reserved.
|
||||
# Copyright (c) 2004-2006 The Regents of the University of California.
|
||||
# All rights reserved.
|
||||
# Copyright (c) 2006 QLogic Corp. All rights reserved.
|
||||
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2011 Los Alamos National Security, LLC.
|
||||
# All rights reserved.
|
||||
# $COPYRIGHT$
|
||||
#
|
||||
# Additional copyrights may follow
|
||||
#
|
||||
# $HEADER$
|
||||
#
|
||||
|
||||
# OPAL_CHECK_UGNI(prefix, [action-if-found], [action-if-not-found])
|
||||
# --------------------------------------------------------
|
||||
# check if GNI support can be found. sets prefix_{CPPFLAGS,
|
||||
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
|
||||
# support, otherwise executes action-if-not-found
|
||||
#
|
||||
# NOTES
|
||||
# on Cray XE6 systems, the GNI development header (gni_pub.h) is in a
|
||||
# completely different place than the ugni library (libugni).
|
||||
#
|
||||
# EXAMPLE CONFIGURE USAGE:
|
||||
# --with-ugni=/base/path/to/libugni --with-ugni-includedir=/path/to/gni_pub.h
|
||||
#
|
||||
# --with-ugni=/opt/cray/ugni/default --with-ugni-includedir=/opt/cray/gni-headers/default/include
|
||||
|
||||
AC_DEFUN([MCA_opal_common_ugni_CONFIG],[
|
||||
AC_CONFIG_FILES([opal/mca/common/ugni/Makefile])
|
||||
|
||||
OPAL_CHECK_UGNI([common_ugni],
|
||||
[common_ugni_happy="yes"],
|
||||
[common_ugni_happy="no"])
|
||||
|
||||
AS_IF([test "$common_ugni_happy" = "yes"],
|
||||
[$1],
|
||||
[$2])
|
||||
|
||||
# substitute in the things needed to build ugni
|
||||
AC_SUBST([common_ugni_CPPFLAGS])
|
||||
AC_SUBST([common_ugni_LDFLAGS])
|
||||
AC_SUBST([common_ugni_LIBS])
|
||||
])dnl
|
@ -1,7 +0,0 @@
|
||||
#
|
||||
# owner/status file
|
||||
# owner: institution that is responsible for this package
|
||||
# status: e.g. active, maintenance, unmaintained
|
||||
#
|
||||
owner: LANL
|
||||
status: active
|
Загрузка…
x
Ссылка в новой задаче
Block a user