1
1

Merge pull request #3149 from hjelmn/btl_ugni_2_0

Improve multi-threaded RMA performance of the ugni btl
Этот коммит содержится в:
Nathan Hjelm 2017-03-13 16:28:41 -06:00 коммит произвёл GitHub
родитель e4a35f2dbf d5aaeb74b6
Коммит 9410574253
27 изменённых файлов: 2006 добавлений и 1611 удалений

Просмотреть файл

@ -1,6 +1,6 @@
# -*- indent-tabs-mode:nil -*-
#
# Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
# Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
#
@ -40,14 +40,15 @@ ugni_SOURCES = \
btl_ugni_smsg.c \
btl_ugni_progress_thread.c \
btl_ugni_prepare.h \
btl_ugni_atomic.c
btl_ugni_atomic.c \
btl_ugni_init.c \
btl_ugni_device.h
mcacomponentdir = $(opallibdir)
mcacomponent_LTLIBRARIES = $(component_install)
mca_btl_ugni_la_SOURCES = $(ugni_SOURCES)
nodist_mca_btl_ugni_la_SOURCES = $(ugni_nodist_SOURCES)
mca_btl_ugni_la_LIBADD = $(btl_ugni_LIBS) \
$(OPAL_TOP_BUILDDIR)/opal/mca/common/ugni/lib@OPAL_LIB_PREFIX@mca_common_ugni.la
mca_btl_ugni_la_LIBADD = $(btl_ugni_LIBS)
mca_btl_ugni_la_LDFLAGS = -module -avoid-version $(btl_ugni_LDFLAGS)
noinst_LTLIBRARIES = $(component_noinst)

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
@ -34,7 +34,6 @@
#include "opal/mca/btl/base/btl_base_error.h"
#include "opal/class/opal_hash_table.h"
#include "opal/class/opal_free_list.h"
#include "opal/mca/common/ugni/common_ugni.h"
#include <errno.h>
#include <stdint.h>
@ -48,6 +47,23 @@
#define MCA_BTL_UGNI_CONNECT_DIRECTED_ID 0x8000000000000000ull
#define MCA_BTL_UGNI_DATAGRAM_MASK 0x8000000000000000ull
/** maximum number of supported virtual devices */
#define MCA_BTL_UGNI_MAX_DEV_HANDLES 128
/** number of rdma completion queue items to remove per progress loop */
#define MCA_BTL_UGNI_COMPLETIONS_PER_LOOP 16
/**
* Modex data
*/
struct mca_btl_ugni_modex_t {
/** GNI NIC address */
uint32_t addr;
/** CDM identifier (base) */
int id;
};
typedef struct mca_btl_ugni_modex_t mca_btl_ugni_modex_t;
/* ompi and smsg endpoint attributes */
typedef struct mca_btl_ugni_endpoint_attr_t {
opal_process_name_t proc_name;
@ -61,12 +77,73 @@ enum {
MCA_BTL_UGNI_RCACHE_GRDMA
};
enum mca_btl_ugni_free_list_id_t {
/* eager fragment list (registered) */
MCA_BTL_UGNI_LIST_EAGER_SEND,
MCA_BTL_UGNI_LIST_EAGER_RECV,
/* SMSG fragment list (unregistered) */
MCA_BTL_UGNI_LIST_SMSG,
/* RDMA fragment list */
MCA_BTL_UGNI_LIST_RDMA,
MCA_BTL_UGNI_LIST_RDMA_INT,
MCA_BTL_UGNI_LIST_MAX,
};
struct mca_btl_ugni_cq_t {
/** ugni CQ handle */
gni_cq_handle_t gni_handle;
/** number of completions expected on the CQ */
int32_t active_operations;
};
typedef struct mca_btl_ugni_cq_t mca_btl_ugni_cq_t;
/**
* GNI virtual device
*/
struct mca_btl_ugni_device_t {
/** Communication domain handle */
gni_cdm_handle_t dev_cd_handle;
/** protection for ugni access */
volatile int32_t lock;
/** Index of device in module devices array */
int dev_index;
/** number of SMSG connections */
volatile int32_t smsg_connections;
/** uGNI device handle */
gni_nic_handle_t dev_handle;
/** uGNI rdma completion queue */
mca_btl_ugni_cq_t dev_rdma_local_cq;
/** local rdma completion queue (async) */
mca_btl_ugni_cq_t dev_rdma_local_irq_cq;
/** local SMSG completion queue */
mca_btl_ugni_cq_t dev_smsg_local_cq;
/** IRQ memory handle for this device */
gni_mem_handle_t smsg_irq_mhndl;
/** RDMA endpoint free list */
opal_free_list_t endpoints;
/** post descriptors pending resources */
opal_list_t pending_post;
};
typedef struct mca_btl_ugni_device_t mca_btl_ugni_device_t;
typedef intptr_t (*mca_btl_ugni_device_serialize_fn_t) (mca_btl_ugni_device_t *device, void *arg);
typedef struct mca_btl_ugni_module_t {
mca_btl_base_module_t super;
bool initialized;
opal_common_ugni_device_t *device;
mca_btl_ugni_device_t devices[MCA_BTL_UGNI_MAX_DEV_HANDLES];
opal_mutex_t endpoint_lock;
size_t endpoint_count;
@ -82,9 +159,6 @@ typedef struct mca_btl_ugni_module_t {
opal_mutex_t eager_get_pending_lock;
opal_list_t eager_get_pending;
opal_mutex_t pending_descriptors_lock;
opal_list_t pending_descriptors;
opal_free_list_t post_descriptors;
mca_mpool_base_module_t *mpool;
@ -95,23 +169,11 @@ typedef struct mca_btl_ugni_module_t {
struct mca_btl_ugni_endpoint_attr_t wc_remote_attr, wc_local_attr;
gni_cq_handle_t rdma_local_cq;
gni_cq_handle_t smsg_remote_cq;
gni_cq_handle_t smsg_local_cq;
gni_cq_handle_t smsg_remote_irq_cq;
gni_cq_handle_t rdma_local_irq_cq;
/* eager fragment list (registered) */
opal_free_list_t eager_frags_send;
opal_free_list_t eager_frags_recv;
/* SMSG fragment list (unregistered) */
opal_free_list_t smsg_frags;
/* RDMA fragment list */
opal_free_list_t rdma_frags;
opal_free_list_t rdma_int_frags;
/** fragment free lists (see enum mca_btl_ugni_free_list_id_t) */
opal_free_list_t frags_lists[MCA_BTL_UGNI_LIST_MAX];
/* lock for this list */
opal_mutex_t ep_wait_list_lock;
@ -197,10 +259,62 @@ typedef struct mca_btl_ugni_component_t {
/* Indicate whether progress thread allowed */
bool progress_thread_enabled;
/** Number of ugni device contexts to create per GNI device */
int virtual_device_count;
/** Protection tag */
uint8_t ptag;
/** Unique id for this process assigned by the system */
uint32_t cookie;
/** Starting value of communication identifier */
uint32_t cdm_id_base;
/** GNI CDM flags */
uint32_t cdm_flags;
/** NIC address */
uint32_t dev_addr;
} mca_btl_ugni_component_t;
int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
opal_common_ugni_device_t *device);
/* Global structures */
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component;
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module;
/**
* Get a virtual device for communication
*/
static inline mca_btl_ugni_device_t *mca_btl_ugni_ep_get_device (mca_btl_ugni_module_t *ugni_module)
{
static volatile uint32_t device_index = (uint32_t) 0;
uint32_t dev_index;
/* don't really care if the device index is atomically updated */
dev_index = (device_index++) & (mca_btl_ugni_component.virtual_device_count - 1);
return ugni_module->devices + dev_index;
}
static inline int mca_btl_rc_ugni_to_opal (gni_return_t rc)
{
static int codes[] = {OPAL_SUCCESS,
OPAL_ERR_RESOURCE_BUSY,
OPAL_ERR_BAD_PARAM,
OPAL_ERR_OUT_OF_RESOURCE,
OPAL_ERR_TIMEOUT,
OPAL_ERR_PERM,
OPAL_ERROR,
OPAL_ERR_BAD_PARAM,
OPAL_ERR_BAD_PARAM,
OPAL_ERR_NOT_FOUND,
OPAL_ERR_VALUE_OUT_OF_BOUNDS,
OPAL_ERROR,
OPAL_ERR_NOT_SUPPORTED,
OPAL_ERR_OUT_OF_RESOURCE};
return codes[rc];
}
/**
* BML->BTL notification of change in the process list.
@ -324,10 +438,32 @@ typedef struct mca_btl_ugni_reg_t {
mca_btl_base_registration_handle_t handle;
} mca_btl_ugni_reg_t;
/* Global structures */
/**
* Initialize uGNI support.
*/
int mca_btl_ugni_init (void);
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component;
OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module;
/**
* Finalize uGNI support.
*/
int mca_btl_ugni_fini (void);
int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module);
/**
* Intialize a virtual device for device index 0.
*
* @param[inout] device Device to initialize
* @param[in] virtual_device_id Virtual device identified (up to max handles)
*/
int mca_btl_ugni_device_init (mca_btl_ugni_device_t *device, int virtual_device_id);
/**
* Finalize a virtual device.
*
* @param[in] device Device to finalize
*/
int mca_btl_ugni_device_fini (mca_btl_ugni_device_t *dev);
/* Get a unique 64-bit id for the process name */
static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name) {
@ -338,6 +474,57 @@ static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name) {
int mca_btl_ugni_spawn_progress_thread(struct mca_btl_base_module_t* btl);
int mca_btl_ugni_kill_progress_thread(void);
/**
* Try to lock a uGNI device for exclusive access
*/
static inline int mca_btl_ugni_device_trylock (mca_btl_ugni_device_t *device)
{
/* checking the lock non-atomically first can reduce the number of
* unnecessary atomic operations. */
return (device->lock || opal_atomic_swap_32 (&device->lock, 1));
}
/**
* Lock a uGNI device for exclusive access
*/
static inline void mca_btl_ugni_device_lock (mca_btl_ugni_device_t *device)
{
while (mca_btl_ugni_device_trylock (device));
}
/**
* Release exclusive access to the device
*/
static inline void mca_btl_ugni_device_unlock (mca_btl_ugni_device_t *device)
{
opal_atomic_wmb ();
device->lock = 0;
}
/**
* Serialize an operation on a uGNI device
*
* @params[in] device ugni device
* @params[in] fn function to serialize
* @params[in] arg function argument
*/
static inline intptr_t mca_btl_ugni_device_serialize (mca_btl_ugni_device_t *device,
mca_btl_ugni_device_serialize_fn_t fn, void *arg)
{
intptr_t rc;
if (!opal_using_threads ()) {
return fn (device, arg);
}
/* NTH: for now the device is just protected by a spin lock but this will change in the future */
mca_btl_ugni_device_lock (device);
rc = fn (device, arg);
mca_btl_ugni_device_unlock (device);
return rc;
}
/** Number of times the progress thread has woken up */
extern unsigned int mca_btl_ugni_progress_thread_wakeups;

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
@ -20,7 +20,7 @@
#include "opal/include/opal/align.h"
#include "opal/mca/pmix/pmix.h"
#define INITIAL_GNI_EPS 10000
#define INITIAL_GNI_EPS 1024
static int
mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module);
@ -50,7 +50,7 @@ int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
/* NTH: might want to vary this size based off the universe size (if
* one exists). the table is only used for connection lookup and
* endpoint removal. */
rc = opal_hash_table_init (&ugni_module->id_to_endpoint, 512);
rc = opal_hash_table_init (&ugni_module->id_to_endpoint, INITIAL_GNI_EPS);
if (OPAL_SUCCESS != rc) {
BTL_ERROR(("error initializing the endpoint hash. rc = %d", rc));
return rc;
@ -58,93 +58,63 @@ int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
}
for (size_t i = 0 ; i < nprocs ; ++i) {
struct opal_proc_t *opal_proc = procs[i];
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name);
/* check for an existing endpoint */
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
if (OPAL_SUCCESS != opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) (peers + i))) {
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) {
ugni_module->nlocal_procs++;
/* ugni is allowed on local processes to provide support for network
* atomic operations */
}
/* Create and Init endpoints */
rc = mca_btl_ugni_init_ep (ugni_module, peers + i, (mca_btl_ugni_module_t *) btl, opal_proc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
BTL_ERROR(("btl/ugni error initializing endpoint"));
return rc;
}
/* go ahead and connect the local endpoint for RDMA/CQ write */
if (opal_proc == opal_proc_local_get ()) {
ugni_module->local_ep = peers[i];
}
/* Add this endpoint to the pointer array. */
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) peers[i]));
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, peers[i]);
++ugni_module->endpoint_count;
peers[i] = mca_btl_ugni_get_ep (btl, procs[i]);
if (NULL == peers[i]) {
continue;
}
if (procs[i] == opal_proc_local_get ()) {
ugni_module->local_ep = peers[i];
}
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
/* Set the reachable bit if necessary */
if (reachable) {
rc = opal_bitmap_set_bit (reachable, i);
(void) opal_bitmap_set_bit (reachable, i);
}
}
mca_btl_ugni_module_set_max_reg (ugni_module, ugni_module->nlocal_procs);
if (false == ugni_module->initialized) {
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->rdma_local_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local BTE/FMA CQ"));
return opal_common_rc_ugni_to_opal (rc);
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
mca_btl_ugni_device_t *device = ugni_module->devices + i;
rc = GNI_CqCreate (device->dev_handle, mca_btl_ugni_component.local_cq_size, 0,
GNI_CQ_NOBLOCK, NULL, NULL, &device->dev_rdma_local_cq.gni_handle);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local BTE/FMA CQ"));
return mca_btl_rc_ugni_to_opal (rc);
}
rc = GNI_CqCreate (device->dev_handle, mca_btl_ugni_component.local_cq_size,
0, GNI_CQ_NOBLOCK, NULL, NULL, &device->dev_smsg_local_cq.gni_handle);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local SMSG CQ"));
return mca_btl_rc_ugni_to_opal (rc);
}
if (mca_btl_ugni_component.progress_thread_enabled) {
rc = GNI_CqCreate (device->dev_handle, mca_btl_ugni_component.local_cq_size,
0, GNI_CQ_BLOCKING, NULL, NULL, &device->dev_rdma_local_irq_cq.gni_handle);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local BTE/FMA CQ"));
return mca_btl_rc_ugni_to_opal (rc);
}
}
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->smsg_local_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local SMSG CQ"));
return opal_common_rc_ugni_to_opal (rc);
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.remote_cq_size,
rc = GNI_CqCreate (ugni_module->devices[0].dev_handle, mca_btl_ugni_component.remote_cq_size,
0, GNI_CQ_NOBLOCK, NULL, NULL, &ugni_module->smsg_remote_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating remote SMSG CQ"));
return opal_common_rc_ugni_to_opal (rc);
return mca_btl_rc_ugni_to_opal (rc);
}
if (mca_btl_ugni_component.progress_thread_enabled) {
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.local_cq_size,
0, GNI_CQ_BLOCKING, NULL, NULL, &ugni_module->rdma_local_irq_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating local BTE/FMA CQ"));
return opal_common_rc_ugni_to_opal (rc);
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqCreate (ugni_module->device->dev_handle, mca_btl_ugni_component.remote_cq_size,
rc = GNI_CqCreate (ugni_module->devices[0].dev_handle, mca_btl_ugni_component.remote_cq_size,
0, GNI_CQ_BLOCKING, NULL, NULL, &ugni_module->smsg_remote_irq_cq);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error creating remote SMSG CQ"));
return opal_common_rc_ugni_to_opal (rc);
return mca_btl_rc_ugni_to_opal (rc);
}
}
@ -175,15 +145,13 @@ int mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl, size_t nprocs,
return OPAL_ERR_OUT_OF_RESOURCE;
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemRegister(ugni_module->device->dev_handle,
rc = GNI_MemRegister(ugni_module->devices[0].dev_handle,
(unsigned long)mmap_start_addr,
4096,
ugni_module->smsg_remote_irq_cq,
GNI_MEM_READWRITE,
-1,
&ugni_module->device->smsg_irq_mhndl);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
&ugni_module->devices[0].smsg_irq_mhndl);
mca_btl_ugni_spawn_progress_thread(btl);
}
@ -198,18 +166,10 @@ int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
size_t nprocs, struct opal_proc_t **procs,
struct mca_btl_base_endpoint_t **peers) {
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
size_t i;
int rc;
while (ugni_module->active_send_count) {
/* ensure all sends are complete before removing and procs */
rc = mca_btl_ugni_progress_local_smsg (ugni_module);
if (OPAL_SUCCESS != rc) {
break;
}
}
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
for (i = 0 ; i < nprocs ; ++i) {
for (size_t i = 0 ; i < nprocs ; ++i) {
struct opal_proc_t *opal_proc = procs[i];
uint64_t proc_id = mca_btl_ugni_proc_name_to_id(opal_proc->proc_name);
mca_btl_base_endpoint_t *ep = NULL;
@ -224,10 +184,18 @@ int mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
--ugni_module->endpoint_count;
}
if (OPAL_PROC_ON_LOCAL_NODE(opal_proc->proc_flags)) {
--ugni_module->nlocal_procs;
}
/* remote the endpoint from the hash table */
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, NULL);
}
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
mca_btl_ugni_module_set_max_reg (ugni_module, ugni_module->nlocal_procs);
return OPAL_SUCCESS;
}
@ -244,9 +212,12 @@ struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_
do {
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
if (OPAL_SUCCESS == rc) {
BTL_VERBOSE(("returning existing endpoint for proc %s", OPAL_NAME_PRINT(proc->proc_name)));
break;
}
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) proc));
/* Create and Init endpoints */
rc = mca_btl_ugni_init_ep (ugni_module, &ep, ugni_module, proc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
@ -254,8 +225,13 @@ struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_
break;
}
/* Add this endpoint to the pointer array. */
BTL_VERBOSE(("initialized uGNI endpoint for proc id: 0x%" PRIx64 " ptr: %p", proc_id, (void *) ep));
/* ugni is allowed on local processes to provide support for network atomic operations */
if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) {
++ugni_module->nlocal_procs;
}
++ugni_module->endpoint_count;
/* add this endpoint to the connection lookup table */
opal_hash_table_set_value_uint64 (&ugni_module->id_to_endpoint, proc_id, ep);
} while (0);
@ -269,10 +245,8 @@ static int ugni_reg_mem (void *reg_data, void *base, size_t size,
mca_rcache_base_registration_t *reg)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *) reg;
gni_cq_handle_t cq = NULL;
gni_return_t rc;
int flags;
gni_cq_handle_t cq = 0;
int flags, rc;
if (ugni_module->reg_count >= ugni_module->reg_max) {
return OPAL_ERR_OUT_OF_RESOURCE;
@ -293,37 +267,26 @@ static int ugni_reg_mem (void *reg_data, void *base, size_t size,
cq = ugni_module->smsg_remote_cq;
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemRegister (ugni_module->device->dev_handle, (uint64_t) base,
size, cq, flags, -1, &(ugni_reg->handle.gni_handle));
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
rc = mca_btl_ugni_reg_mem (ugni_module, base, size, (mca_btl_ugni_reg_t *) reg, cq, flags);
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
opal_atomic_add_32(&ugni_module->reg_count,1);
}
opal_atomic_add_32(&ugni_module->reg_count,1);
return OPAL_SUCCESS;
return rc;
}
static int
ugni_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
{
mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) reg_data;
mca_btl_ugni_reg_t *ugni_reg = (mca_btl_ugni_reg_t *)reg;
gni_return_t rc;
int rc;
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_MemDeregister (ugni_module->device->dev_handle, &ugni_reg->handle.gni_handle);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
return OPAL_ERROR;
rc = mca_btl_ugni_dereg_mem (ugni_module, (mca_btl_ugni_reg_t *) reg);
if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
opal_atomic_add_32(&ugni_module->reg_count,-1);
}
opal_atomic_add_32(&ugni_module->reg_count,-1);
return OPAL_SUCCESS;
return rc;
}
static int
@ -356,7 +319,7 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
return rc;
}
rc = opal_free_list_init (&ugni_module->smsg_frags,
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_SMSG,
sizeof (mca_btl_ugni_smsg_frag_t),
opal_cache_line_size, OBJ_CLASS(mca_btl_ugni_smsg_frag_t),
mca_btl_ugni_component.ugni_smsg_limit,
@ -365,13 +328,13 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_component.ugni_free_list_max,
mca_btl_ugni_component.ugni_free_list_inc,
NULL, 0, NULL, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
(void *) ugni_module);
(void *) (intptr_t) MCA_BTL_UGNI_LIST_SMSG);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating smsg fragment free list"));
return rc;
}
rc = opal_free_list_init (&ugni_module->rdma_frags,
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_RDMA,
sizeof (mca_btl_ugni_rdma_frag_t), 64,
OBJ_CLASS(mca_btl_ugni_rdma_frag_t),
0, opal_cache_line_size,
@ -379,17 +342,17 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_component.ugni_free_list_max,
mca_btl_ugni_component.ugni_free_list_inc,
NULL, 0, NULL, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
(void *) ugni_module);
(void *) (intptr_t) MCA_BTL_UGNI_LIST_RDMA);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
rc = opal_free_list_init (&ugni_module->rdma_int_frags,
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_RDMA_INT,
sizeof (mca_btl_ugni_rdma_frag_t), 8,
OBJ_CLASS(mca_btl_ugni_rdma_frag_t),
0, opal_cache_line_size, 0, -1, 64,
NULL, 0, NULL, (opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
(void *) ugni_module);
(void *) (intptr_t) MCA_BTL_UGNI_LIST_RDMA_INT);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
@ -419,14 +382,14 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
}
ugni_module->rcache =
mca_rcache_base_module_create (rcache_name, ugni_module->device, &rcache_resources.base);
mca_rcache_base_module_create (rcache_name, ugni_module->devices, &rcache_resources.base);
if (NULL == ugni_module->rcache) {
BTL_ERROR(("error creating registration cache"));
return OPAL_ERROR;
}
rc = opal_free_list_init (&ugni_module->eager_frags_send,
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_EAGER_SEND,
sizeof (mca_btl_ugni_eager_frag_t), 8,
OBJ_CLASS(mca_btl_ugni_eager_frag_t),
ugni_module->super.btl_eager_limit, 64,
@ -435,13 +398,13 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_component.ugni_eager_inc,
ugni_module->super.btl_mpool, 0, ugni_module->rcache,
(opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
(void *) ugni_module);
(void *) (intptr_t) MCA_BTL_UGNI_LIST_EAGER_SEND);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating eager send fragment free list"));
return rc;
}
rc = opal_free_list_init (&ugni_module->eager_frags_recv,
rc = opal_free_list_init (ugni_module->frags_lists + MCA_BTL_UGNI_LIST_EAGER_RECV,
sizeof (mca_btl_ugni_eager_frag_t), 8,
OBJ_CLASS(mca_btl_ugni_eager_frag_t),
ugni_module->super.btl_eager_limit, 64,
@ -450,7 +413,7 @@ mca_btl_ugni_setup_mpools (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_component.ugni_eager_inc,
ugni_module->super.btl_mpool, 0, ugni_module->rcache,
(opal_free_list_item_init_fn_t) mca_btl_ugni_frag_init,
(void *) ugni_module);
(void *) (intptr_t) MCA_BTL_UGNI_LIST_EAGER_RECV);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating eager receive fragment free list"));
return rc;
@ -503,14 +466,22 @@ mca_btl_ugni_module_set_max_reg (mca_btl_ugni_module_t *ugni_module, int nlocal_
gni_return_t grc;
int fuzz = 20;
grc = GNI_GetJobResInfo (ugni_module->device->dev_id, opal_common_ugni_module.ptag,
grc = GNI_GetJobResInfo (0, mca_btl_ugni_component.ptag,
GNI_JOB_RES_MDD, &res_des);
if (GNI_RC_SUCCESS == grc) {
ugni_module->reg_max = (res_des.limit - fuzz) / nlocal_procs;
if (nlocal_procs) {
ugni_module->reg_max = (res_des.limit - fuzz) / nlocal_procs;
} else {
ugni_module->reg_max = 0;
}
}
#else
/* no way to determine the maximum registration count */
ugni_module->reg_max = 1200 / nlocal_procs;
if (nlocal_procs) {
ugni_module->reg_max = 1200 / nlocal_procs;
} else {
ugni_module->reg_max = 0;
}
#endif
} else if (-1 == mca_btl_ugni_component.max_mem_reg) {
ugni_module->reg_max = INT_MAX;
@ -557,7 +528,7 @@ static int mca_btl_ugni_smsg_setup (int nprocs)
grc = GNI_SmsgBufferSizeNeeded (&tmp_smsg_attrib, &mbox_size);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
BTL_ERROR(("error in GNI_SmsgBufferSizeNeeded"));
return opal_common_rc_ugni_to_opal (grc);
return mca_btl_rc_ugni_to_opal (grc);
}
mca_btl_ugni_component.smsg_mbox_size = OPAL_ALIGN(mbox_size, 64, unsigned int);

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2014-2017 Los Alamos National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
*
@ -95,34 +95,23 @@ int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_end
return OPAL_ERR_NOT_SUPPORTED;
}
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
mca_btl_ugni_alloc_post_descriptor (endpoint, NULL, cbfunc, cbcontext, cbdata, &post_desc);
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, NULL, cbfunc, cbcontext, cbdata);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, 0, dummy, remote_address,
init_gni_post_desc (post_desc, order, GNI_POST_AMO, 0, dummy, remote_address,
remote_handle->gni_handle, size, 0);
post_desc->desc.base.amo_cmd = gni_op;
post_desc->desc.amo_cmd = gni_op;
post_desc->desc.base.first_operand = operand;
post_desc->desc.first_operand = operand;
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
if (GNI_RC_ILLEGAL_OP == rc) {
return OPAL_ERR_NOT_SUPPORTED;
}
return OPAL_ERR_OUT_OF_RESOURCE;
rc = mca_btl_ugni_endpoint_post_fma (endpoint, post_desc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_ugni_return_post_descriptor (post_desc);
}
return OPAL_SUCCESS;
return rc;
}
int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
@ -147,35 +136,24 @@ int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_en
return OPAL_ERR_NOT_SUPPORTED;
}
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
init_gni_post_desc (post_desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
remote_address, remote_handle->gni_handle, size, 0);
post_desc->desc.base.amo_cmd = gni_op;
post_desc->desc.amo_cmd = gni_op;
post_desc->desc.base.first_operand = operand;
post_desc->desc.first_operand = operand;
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
if (GNI_RC_ILLEGAL_OP == rc) {
return OPAL_ERR_NOT_SUPPORTED;
}
return OPAL_ERR_OUT_OF_RESOURCE;
rc = mca_btl_ugni_endpoint_post_fma (endpoint, post_desc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_ugni_return_post_descriptor (post_desc);
}
return OPAL_SUCCESS;
return rc;
}
int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
@ -190,31 +168,23 @@ int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_
gni_op = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? GNI_FMA_ATOMIC2_CSWAP_S : GNI_FMA_ATOMIC_CSWAP;
size = (MCA_BTL_ATOMIC_FLAG_32BIT & flags) ? 4 : 8;
rc = mca_btl_ugni_check_endpoint_state_rdma (endpoint);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
init_gni_post_desc (&post_desc->desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
init_gni_post_desc (post_desc, order, GNI_POST_AMO, (intptr_t) local_address, local_handle->gni_handle,
remote_address, remote_handle->gni_handle, size, 0);
post_desc->desc.base.amo_cmd = gni_op;
post_desc->desc.amo_cmd = gni_op;
post_desc->desc.base.first_operand = compare;
post_desc->desc.base.second_operand = value;
post_desc->desc.first_operand = compare;
post_desc->desc.second_operand = value;
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
rc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
if (GNI_RC_SUCCESS != rc) {
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
return OPAL_ERR_OUT_OF_RESOURCE;
rc = mca_btl_ugni_endpoint_post_fma (endpoint, post_desc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_ugni_return_post_descriptor (post_desc);
}
return OPAL_SUCCESS;
return rc;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -19,6 +19,7 @@
#include <stdlib.h>
#include <fcntl.h>
#include <ctype.h>
#include "opal/memoryhooks/memory.h"
#include "opal/runtime/opal_params.h"
@ -56,8 +57,49 @@ mca_base_var_enum_value_t rcache_values[] = {
{-1, NULL} /* sentinal */
};
static int
btl_ugni_component_register(void)
mca_base_var_enum_value_flag_t cdm_flags[] = {
{.flag = GNI_CDM_MODE_FORK_NOCOPY, .string = "fork-no-copy", .conflicting_flag = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_FORK_PARTCOPY},
{.flag = GNI_CDM_MODE_FORK_FULLCOPY, .string = "fork-full-copy", .conflicting_flag = GNI_CDM_MODE_FORK_NOCOPY | GNI_CDM_MODE_FORK_PARTCOPY},
{.flag = GNI_CDM_MODE_FORK_PARTCOPY, .string = "fork-part-copy", .conflicting_flag = GNI_CDM_MODE_FORK_NOCOPY | GNI_CDM_MODE_FORK_FULLCOPY},
{.flag = GNI_CDM_MODE_ERR_NO_KILL, .string = "err-no-kill", .conflicting_flag = GNI_CDM_MODE_ERR_ALL_KILL},
{.flag = GNI_CDM_MODE_ERR_ALL_KILL, .string = "err-all-kill", .conflicting_flag = GNI_CDM_MODE_ERR_NO_KILL},
{.flag = GNI_CDM_MODE_FAST_DATAGRAM_POLL, .string = "fast-datagram-poll", .conflicting_flag = 0},
{.flag = GNI_CDM_MODE_BTE_SINGLE_CHANNEL, .string = "bte-single-channel", .conflicting_flag = 0},
{.flag = GNI_CDM_MODE_USE_PCI_IOMMU, .string = "use-pci-iommu", .conflicting_flag = 0},
{.flag = GNI_CDM_MODE_MDD_DEDICATED, .string = "mdd-dedicated", .conflicting_flag = GNI_CDM_MODE_MDD_SHARED},
{.flag = GNI_CDM_MODE_MDD_SHARED, .string = "mdd-shared", .conflicting_flag = GNI_CDM_MODE_MDD_DEDICATED},
{.flag = GNI_CDM_MODE_FMA_DEDICATED, .string = "fma-dedicated", .conflicting_flag = GNI_CDM_MODE_FMA_SHARED},
{.flag = GNI_CDM_MODE_FMA_SHARED, .string = "fma-shared", .conflicting_flag = GNI_CDM_MODE_FMA_DEDICATED},
{.flag = GNI_CDM_MODE_CACHED_AMO_ENABLED, .string = "cached-amo-enabled", .conflicting_flag = 0},
{.flag = GNI_CDM_MODE_CQ_NIC_LOCAL_PLACEMENT, .string = "cq-nic-placement", .conflicting_flag = 0},
{.flag = GNI_CDM_MODE_FMA_SMALL_WINDOW, .string = "fma-small-window", .conflicting_flag = 0},
{.string = NULL}
};
static inline int mca_btl_ugni_get_stat (const mca_base_pvar_t *pvar, void *value, void *obj)
{
gni_statistic_t statistic = (gni_statistic_t) (intptr_t) pvar->ctx;
gni_return_t rc = GNI_RC_SUCCESS;
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
rc = GNI_GetNicStat (mca_btl_ugni_component.modules[0].devices[i].dev_handle, statistic,
((unsigned int *) value) + i);
}
return mca_btl_rc_ugni_to_opal (rc);
}
static inline int mca_btl_ugni_notify_stat (mca_base_pvar_t *pvar, mca_base_pvar_event_t event, void *obj, int *count)
{
if (MCA_BASE_PVAR_HANDLE_BIND == event) {
/* one value for each virtual device handle */
*count = mca_btl_ugni_component.virtual_device_count;
}
return OPAL_SUCCESS;
}
static int btl_ugni_component_register(void)
{
mca_base_var_enum_t *new_enum;
gni_nic_device_t device_type;
@ -181,6 +223,31 @@ btl_ugni_component_register(void)
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.mbox_increment);
/* communication domain flags */
rc = mca_base_var_enum_create_flag ("btl_ugni_cdm_flags", cdm_flags, (mca_base_var_enum_flag_t **) &new_enum);
if (OPAL_SUCCESS != rc) {
return rc;
}
mca_btl_ugni_component.cdm_flags = GNI_CDM_MODE_FORK_PARTCOPY | GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL |
GNI_CDM_MODE_MDD_SHARED | GNI_CDM_MODE_FMA_SHARED | GNI_CDM_MODE_FMA_SMALL_WINDOW;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"cdm_flags", "Flags to set when creating a communication domain "
" (default: fork-fullcopy,cached-amo-enabled,err-no-kill,fast-datagram-poll,"
"fma-shared,fma-small-window)",
MCA_BASE_VAR_TYPE_UNSIGNED_INT, new_enum, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.cdm_flags);
OBJ_RELEASE(new_enum);
mca_btl_ugni_component.virtual_device_count = 0;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"virtual_device_count", "Number of virtual devices to create. Higher numbers may "
"result in better performance when using threads. (default: auto, max: 8)",
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.virtual_device_count);
/* determine if there are get alignment restrictions */
GNI_GetDeviceType (&device_type);
@ -202,12 +269,9 @@ btl_ugni_component_register(void)
}
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"smsg_page_size", "Page size to use for SMSG "
"mailbox allocation (default: detect)",
MCA_BASE_VAR_TYPE_INT, NULL, 0,
MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_btl_ugni_component.smsg_page_size);
"smsg_page_size", "Page size to use for SMSG mailbox allocation (default: detect)",
MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.smsg_page_size);
mca_btl_ugni_component.progress_thread_requested = 0;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
@ -228,6 +292,31 @@ btl_ugni_component_register(void)
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS, NULL,
NULL, NULL, &mca_btl_ugni_progress_thread_wakeups);
/* register network statistics as performance variables */
for (int i = 0 ; i < GNI_NUM_STATS ; ++i) {
char name[128], desc[128];
size_t str_len = strlen (gni_statistic_str[i]);
assert (str_len < sizeof (name));
/* we can get an all-caps string for the variable from gni_statistic_str. need to make it lowercase
* to match ompi standards */
for (size_t j = 0 ; j < str_len ; ++j) {
name[j] = tolower (gni_statistic_str[i][j]);
desc[j] = ('_' == name[j]) ? ' ' : name[j];
}
name[str_len] = '\0';
desc[str_len] = '\0';
(void) mca_base_component_pvar_register (&mca_btl_ugni_component.super.btl_version, name, desc,
OPAL_INFO_LVL_4, MCA_BASE_PVAR_CLASS_COUNTER,
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
mca_btl_ugni_get_stat, NULL, mca_btl_ugni_notify_stat,
(void *) (intptr_t) i);
}
/* btl/ugni can only support only a fixed set of rcache components (these rcache components have compatible resource
* structures) */
rc = mca_base_var_enum_create ("btl_ugni_rcache", rcache_values, &new_enum);
@ -235,9 +324,10 @@ btl_ugni_component_register(void)
return rc;
}
mca_btl_ugni_component.rcache_type = MCA_BTL_UGNI_RCACHE_UDREG;
/* NTH: there are known *serious* performance issues with udreg. if they are ever resolved it is the preferred rcache */
mca_btl_ugni_component.rcache_type = MCA_BTL_UGNI_RCACHE_GRDMA;
(void) mca_base_component_var_register(&mca_btl_ugni_component.super.btl_version,
"rcache", "registration cache to use", MCA_BASE_VAR_TYPE_INT, new_enum,
"rcache", "registration cache to use (default: grdma)", MCA_BASE_VAR_TYPE_INT, new_enum,
0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_3,
MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_ugni_component.rcache_type);
OBJ_RELEASE(new_enum);
@ -325,7 +415,7 @@ btl_ugni_component_open(void)
static int
btl_ugni_component_close(void)
{
opal_common_ugni_fini ();
mca_btl_ugni_fini ();
if (mca_btl_ugni_component.modules) {
free (mca_btl_ugni_component.modules);
@ -342,7 +432,6 @@ mca_btl_ugni_component_init (int *num_btl_modules,
{
struct mca_btl_base_module_t **base_modules;
mca_btl_ugni_module_t *ugni_modules;
unsigned int i;
int rc;
if (16384 < mca_btl_ugni_component.ugni_smsg_limit) {
@ -360,19 +449,18 @@ mca_btl_ugni_component_init (int *num_btl_modules,
}
/* Initialize ugni library and create communication domain */
rc = opal_common_ugni_init();
rc = mca_btl_ugni_init();
if (OPAL_SUCCESS != rc) {
return NULL;
}
/* Create and initialize one module per uGNI device */
mca_btl_ugni_component.ugni_num_btls = opal_common_ugni_module.device_count;
/* For now only create a single BTL module */
mca_btl_ugni_component.ugni_num_btls = 1;
BTL_VERBOSE(("btl/ugni initializing"));
ugni_modules = mca_btl_ugni_component.modules = (mca_btl_ugni_module_t *)
calloc (mca_btl_ugni_component.ugni_num_btls,
sizeof (mca_btl_ugni_module_t));
calloc (mca_btl_ugni_component.ugni_num_btls, sizeof (mca_btl_ugni_module_t));
if (OPAL_UNLIKELY(NULL == mca_btl_ugni_component.modules)) {
BTL_ERROR(("Failed malloc: %s:%d", __FILE__, __LINE__));
@ -395,20 +483,15 @@ mca_btl_ugni_component_init (int *num_btl_modules,
mca_btl_ugni_module.super.btl_rdma_pipeline_send_length = mca_btl_ugni_module.super.btl_eager_limit;
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
mca_btl_ugni_module_t *ugni_module = ugni_modules + i;
rc = mca_btl_ugni_module_init (ugni_module,
opal_common_ugni_module.devices + i);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("Failed to initialize uGNI module @ %s:%d", __FILE__,
__LINE__));
return NULL;
}
base_modules[i] = (mca_btl_base_module_t *) ugni_module;
rc = mca_btl_ugni_module_init (ugni_modules);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("Failed to initialize uGNI module @ %s:%d", __FILE__,
__LINE__));
return NULL;
}
*base_modules = (mca_btl_base_module_t *) ugni_modules;
*num_btl_modules = mca_btl_ugni_component.ugni_num_btls;
BTL_VERBOSE(("btl/ugni done initializing %d module(s)", *num_btl_modules));
@ -417,80 +500,47 @@ mca_btl_ugni_component_init (int *num_btl_modules,
}
static inline int
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device)
{
uint64_t datagram_id, data, proc_id;
uint32_t remote_addr, remote_id;
mca_btl_base_endpoint_t *ep;
gni_post_state_t post_state;
gni_ep_handle_t handle;
gni_return_t grc;
int count = 0, rc;
/* check for datagram completion */
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock); /* TODO: may not need lock for this function */
grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id);
if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return 0;
rc = mca_btl_ugni_get_datagram (ugni_module, device, &handle, &ep);
if (1 != rc) {
return rc;
}
data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK);
BTL_VERBOSE(("datgram_id: %" PRIx64 ", mask: %" PRIx64, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK)));
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) {
ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&ugni_module->endpoints, data);
handle = ep->smsg_ep_handle;
} else {
handle = ugni_module->wildcard_ep;
}
/* wait for the incoming datagram to complete (in case it isn't) */
grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state,
&remote_addr, &remote_id);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (GNI_RC_SUCCESS != grc) {
BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc));
return opal_common_rc_ugni_to_opal (grc);
}
BTL_VERBOSE(("remote datagram completion on handle %p", handle));
/* if this is a wildcard endpoint lookup the remote peer by the proc id we received */
if (handle == ugni_module->wildcard_ep) {
proc_id = mca_btl_ugni_proc_name_to_id (ugni_module->wc_remote_attr.proc_name);
struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name);
BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc id: %" PRIx64,
proc_id));
BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc: %s",
OPAL_NAME_PRINT(ugni_module->wc_remote_attr.proc_name)));
OPAL_THREAD_LOCK(&ugni_module->endpoint_lock);
rc = opal_hash_table_get_value_uint64 (&ugni_module->id_to_endpoint, proc_id, (void **) &ep);
OPAL_THREAD_UNLOCK(&ugni_module->endpoint_lock);
/* check if the endpoint is known */
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || NULL == ep)) {
struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name);
BTL_VERBOSE(("Got connection request from an unknown peer {jobid = 0x%x, vid = 0x%x}",
ugni_module->wc_remote_attr.proc_name.jobid, ugni_module->wc_remote_attr.proc_name.vpid));
ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc);
if (OPAL_UNLIKELY(NULL == ep)) {
return rc;
}
ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc);
if (OPAL_UNLIKELY(NULL == ep)) {
/* there is no way to recover from this error so just abort() */
BTL_ERROR(("could not find/allocate a btl endpoint for peer %s",
OPAL_NAME_PRINT(ugni_module->wc_remote_attr.proc_name)));
abort ();
return OPAL_ERR_NOT_FOUND;
}
} else {
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
}
/* should not have gotten a NULL endpoint */
assert (NULL != ep);
BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, "
"data = 0x%" PRIx64 ", ep = %p, remote id: %d", datagram_id, post_state,
data, (void *) ep, remote_id));
BTL_VERBOSE(("got a datagram completion: ep = %p. wc = %d", (void *) ep, handle == ugni_module->wildcard_ep));
/* NTH: TODO -- error handling */
opal_mutex_lock (&ep->lock);
if (handle != ugni_module->wildcard_ep) {
/* directed post complete */
BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep));
ep->dg_posted = false;
}
@ -514,106 +564,106 @@ mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module)
static inline void btl_ugni_dump_post_desc (mca_btl_ugni_post_descriptor_t *desc)
{
fprintf (stderr, "desc->desc.base.post_id = %" PRIx64 "\n", desc->desc.base.post_id);
fprintf (stderr, "desc->desc.base.status = %" PRIx64 "\n", desc->desc.base.status);
fprintf (stderr, "desc->desc.base.cq_mode_complete = %hu\n", desc->desc.base.cq_mode_complete);
fprintf (stderr, "desc->desc.base.type = %d\n", desc->desc.base.type);
fprintf (stderr, "desc->desc.base.cq_mode = %hu\n", desc->desc.base.cq_mode);
fprintf (stderr, "desc->desc.base.dlvr_mode = %hu\n", desc->desc.base.dlvr_mode);
fprintf (stderr, "desc->desc.base.local_addr = %" PRIx64 "\n", desc->desc.base.local_addr);
fprintf (stderr, "desc->desc.base.local_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.base.local_mem_hndl.qword1,
desc->desc.base.local_mem_hndl.qword2);
fprintf (stderr, "desc->desc.base.remote_addr = %" PRIx64 "\n", desc->desc.base.remote_addr);
fprintf (stderr, "desc->desc.base.remote_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.base.remote_mem_hndl.qword1,
desc->desc.base.remote_mem_hndl.qword2);
fprintf (stderr, "desc->desc.base.length = %" PRIu64 "\n", desc->desc.base.length);
fprintf (stderr, "desc->desc.base.rdma_mode = %hu\n", desc->desc.base.rdma_mode);
fprintf (stderr, "desc->desc.base.amo_cmd = %d\n", desc->desc.base.amo_cmd);
fprintf (stderr, "desc->desc.post_id = %" PRIx64 "\n", desc->desc.post_id);
fprintf (stderr, "desc->desc.status = %" PRIx64 "\n", desc->desc.status);
fprintf (stderr, "desc->desc.cq_mode_complete = %hu\n", desc->desc.cq_mode_complete);
fprintf (stderr, "desc->desc.type = %d\n", desc->desc.type);
fprintf (stderr, "desc->desc.cq_mode = %hu\n", desc->desc.cq_mode);
fprintf (stderr, "desc->desc.dlvr_mode = %hu\n", desc->desc.dlvr_mode);
fprintf (stderr, "desc->desc.local_addr = %" PRIx64 "\n", desc->desc.local_addr);
fprintf (stderr, "desc->desc.local_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.local_mem_hndl.qword1,
desc->desc.local_mem_hndl.qword2);
fprintf (stderr, "desc->desc.remote_addr = %" PRIx64 "\n", desc->desc.remote_addr);
fprintf (stderr, "desc->desc.remote_mem_hndl = {%" PRIx64 ", %" PRIx64 "}\n", desc->desc.remote_mem_hndl.qword1,
desc->desc.remote_mem_hndl.qword2);
fprintf (stderr, "desc->desc.length = %" PRIu64 "\n", desc->desc.length);
fprintf (stderr, "desc->desc.rdma_mode = %hu\n", desc->desc.rdma_mode);
fprintf (stderr, "desc->desc.amo_cmd = %d\n", desc->desc.amo_cmd);
}
#endif
static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, int which_cq)
{
mca_btl_ugni_post_descriptor_t *post_desc = NULL;
gni_cq_entry_t event_data = 0;
gni_post_descriptor_t *desc;
uint32_t recoverable = 1;
gni_return_t grc;
gni_cq_handle_t the_cq;
the_cq = (which_cq == 0) ? ugni_module->rdma_local_cq : ugni_module->rdma_local_irq_cq;
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
grc = GNI_CqGetEvent (the_cq, &event_data);
if (GNI_RC_NOT_DONE == grc) {
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return 0;
}
if (OPAL_UNLIKELY((GNI_RC_SUCCESS != grc && !event_data) || GNI_CQ_OVERRUN(event_data))) {
/* TODO -- need to handle overrun -- how do we do this without an event?
will the event eventually come back? Ask Cray */
BTL_ERROR(("unhandled post error! ugni rc = %d %s", grc, gni_err_str[grc]));
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
return opal_common_rc_ugni_to_opal (grc);
}
grc = GNI_GetCompleted (the_cq, event_data, &desc);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc && GNI_RC_TRANSACTION_ERROR != grc)) {
BTL_ERROR(("Error in GNI_GetComplete %s", gni_err_str[grc]));
return opal_common_rc_ugni_to_opal (grc);
}
post_desc = MCA_BTL_UGNI_DESC_TO_PDESC(desc);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc || !GNI_CQ_STATUS_OK(event_data))) {
(void) GNI_CqErrorRecoverable (event_data, &recoverable);
if (OPAL_UNLIKELY(++post_desc->desc.tries >= mca_btl_ugni_component.rdma_max_retries ||
!recoverable)) {
char char_buffer[1024];
GNI_CqErrorStr (event_data, char_buffer, 1024);
/* give up */
BTL_ERROR(("giving up on desciptor %p, recoverable %d: %s", (void *) post_desc,
recoverable, char_buffer));
#if OPAL_ENABLE_DEBUG
btl_ugni_dump_post_desc (post_desc);
#endif
mca_btl_ugni_post_desc_complete (ugni_module, post_desc, OPAL_ERROR);
return OPAL_ERROR;
}
mca_btl_ugni_repost (ugni_module, post_desc);
return 0;
}
mca_btl_ugni_post_desc_complete (ugni_module, post_desc, opal_common_rc_ugni_to_opal (grc));
return 1;
}
static inline int
mca_btl_ugni_post_pending (mca_btl_ugni_module_t *ugni_module)
mca_btl_ugni_post_pending (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device)
{
int count = opal_list_get_size (&ugni_module->pending_descriptors);
int i;
int pending_post_count = opal_list_get_size (&device->pending_post);
mca_btl_ugni_post_descriptor_t *post_desc;
int rc;
for (i = 0 ; i < count ; ++i) {
OPAL_THREAD_LOCK(&ugni_module->pending_descriptors_lock);
mca_btl_ugni_post_descriptor_t *post_desc =
(mca_btl_ugni_post_descriptor_t *) opal_list_remove_first (&ugni_module->pending_descriptors);
OPAL_THREAD_UNLOCK(&ugni_module->pending_descriptors_lock);
/* check if there are any posts pending resources */
if (OPAL_LIKELY(0 == pending_post_count)) {
return 0;
}
if (OPAL_SUCCESS != mca_btl_ugni_repost (ugni_module, post_desc)) {
BTL_VERBOSE(("progressing %d pending FMA/RDMA operations", pending_post_count));
for (int i = 0 ; i < pending_post_count ; ++i) {
mca_btl_ugni_device_lock (device);
post_desc = (mca_btl_ugni_post_descriptor_t *) opal_list_remove_first (&device->pending_post);
mca_btl_ugni_device_unlock (device);
if (NULL == post_desc) {
break;
}
rc = mca_btl_ugni_repost (ugni_module, post_desc);
if (OPAL_SUCCESS != rc) {
mca_btl_ugni_device_lock (device);
opal_list_prepend (&device->pending_post, (opal_list_item_t *) post_desc);
mca_btl_ugni_device_unlock (device);
break;
}
}
return i;
return 1;
}
static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device,
mca_btl_ugni_cq_t *cq)
{
mca_btl_ugni_post_descriptor_t *post_desc[MCA_BTL_UGNI_COMPLETIONS_PER_LOOP];
gni_cq_entry_t event_data[MCA_BTL_UGNI_COMPLETIONS_PER_LOOP];
int rc;
rc = mca_btl_ugni_cq_get_completed_desc (device, cq, event_data, post_desc, MCA_BTL_UGNI_COMPLETIONS_PER_LOOP);
if (0 >= rc) {
return rc;
}
BTL_VERBOSE(("got %d completed rdma descriptors", rc));
for (int i = 0 ; i < rc ; ++i) {
BTL_VERBOSE(("post descriptor %p complete. GNI_CQ_STATUS_OK(): %d", post_desc[i],
GNI_CQ_STATUS_OK(event_data[i])));
if (OPAL_UNLIKELY(!GNI_CQ_STATUS_OK(event_data[i]))) {
uint32_t recoverable = 1;
(void) GNI_CqErrorRecoverable (event_data[i], &recoverable);
if (OPAL_UNLIKELY(++post_desc[i]->tries >= mca_btl_ugni_component.rdma_max_retries ||
!recoverable)) {
char char_buffer[1024];
GNI_CqErrorStr (event_data[i], char_buffer, 1024);
/* give up */
BTL_ERROR(("giving up on desciptor %p, recoverable %d: %s", (void *) post_desc[i],
recoverable, char_buffer));
#if OPAL_ENABLE_DEBUG
btl_ugni_dump_post_desc (post_desc[i]);
#endif
mca_btl_ugni_post_desc_complete (ugni_module, post_desc[i], OPAL_ERROR);
return OPAL_ERROR;
}
mca_btl_ugni_repost (ugni_module, post_desc[i]);
return 0;
}
mca_btl_ugni_post_desc_complete (ugni_module, post_desc[i], OPAL_SUCCESS);
}
/* should be resources to progress the pending post list */
(void) mca_btl_ugni_post_pending (ugni_module, device);
return rc;
}
static inline int
@ -627,9 +677,14 @@ mca_btl_ugni_progress_wait_list (mca_btl_ugni_module_t *ugni_module)
return 0;
}
/* check the count before taking the lock to avoid unnecessary locking */
count = opal_list_get_size(&ugni_module->ep_wait_list);
if (0 == count) {
return 0;
}
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
count = opal_list_get_size(&ugni_module->ep_wait_list);
do {
endpoint = (mca_btl_base_endpoint_t *) opal_list_remove_first (&ugni_module->ep_wait_list);
if (endpoint != NULL) {
@ -649,35 +704,34 @@ mca_btl_ugni_progress_wait_list (mca_btl_ugni_module_t *ugni_module)
static int mca_btl_ugni_component_progress (void)
{
mca_btl_ugni_module_t *ugni_module;
static int64_t call_count = 0;
int64_t cur_call_count = OPAL_THREAD_ADD64(&call_count, 1);
unsigned int i;
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules;
static volatile int32_t call_count = 0;
int32_t current_call;
int count = 0;
for (i = 0 ; i < mca_btl_ugni_component.ugni_num_btls ; ++i) {
ugni_module = mca_btl_ugni_component.modules + i;
current_call = OPAL_THREAD_ADD32(&call_count, 1);
if ((cur_call_count & 0x7) == 0) {
count += mca_btl_ugni_progress_datagram (ugni_module);
}
count += mca_btl_ugni_progress_remote_smsg (ugni_module);
if (ugni_module->connected_peer_count) {
if ((current_call & 0x7) == 0) {
count += mca_btl_ugni_progress_datagram (ugni_module, ugni_module->devices);
}
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
mca_btl_ugni_device_t *device = ugni_module->devices + i;
if (device->smsg_connections) {
count += mca_btl_ugni_progress_local_smsg (ugni_module, device);
mca_btl_ugni_progress_wait_list (ugni_module);
count += mca_btl_ugni_progress_local_smsg (ugni_module);
count += mca_btl_ugni_progress_remote_smsg (ugni_module);
}
if (ugni_module->active_rdma_count) {
count += mca_btl_ugni_progress_rdma (ugni_module, 0);
if (device->dev_rdma_local_cq.active_operations) {
count += mca_btl_ugni_progress_rdma (ugni_module, device, &device->dev_rdma_local_cq);
}
if (mca_btl_ugni_component.progress_thread_enabled) {
count += mca_btl_ugni_progress_rdma (ugni_module, 1);
if (mca_btl_ugni_component.progress_thread_enabled && device->dev_rdma_local_irq_cq.active_operations) {
count += mca_btl_ugni_progress_rdma (ugni_module, device, &device->dev_rdma_local_irq_cq);
}
/* post pending after progressing rdma */
mca_btl_ugni_post_pending (ugni_module);
}
return count;

430
opal/mca/btl/ugni/btl_ugni_device.h Обычный файл
Просмотреть файл

@ -0,0 +1,430 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
/**
* @file This file contains wrappers for uGNI functionality. These wrappers are thread-safe
* and intended to provide a way to measure various different ways to handle mutual exclusion
* into the uGNI library (which is not thread safe). These functions are all defined to be
* inline to limit the cost to non-threaded users.
*/
#if !defined(BTL_UGNI_DEVICE_H)
#define BTL_UGNI_DEVICE_H
#include "btl_ugni_endpoint.h"
#include "btl_ugni_frag.h"
/* helper functions */
typedef struct mca_btl_ugni_smsg_send_wtag_arg_t {
gni_ep_handle_t ep_handle;
void *hdr;
size_t hdr_len;
void *payload;
size_t payload_len;
uint32_t msg_id;
int tag;
} mca_btl_ugni_smsg_send_wtag_arg_t;
static inline int mca_btl_ugni_smsg_send_wtag_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_smsg_send_wtag_arg_t *args = (mca_btl_ugni_smsg_send_wtag_arg_t *) arg;
gni_return_t grc;
grc = GNI_SmsgSendWTag (args->ep_handle, args->hdr, args->hdr_len, args->payload,
args->payload_len, args->msg_id, args->tag);
device->dev_smsg_local_cq.active_operations += (GNI_RC_SUCCESS == grc);
return grc;
}
typedef struct mca_btl_ugni_smsg_get_next_wtag_arg_t {
gni_ep_handle_t ep_handle;
uintptr_t *data_ptr;
uint8_t *tag;
} mca_btl_ugni_smsg_get_next_wtag_arg_t;
static inline intptr_t mca_btl_ugni_smsg_get_next_wtag_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_smsg_get_next_wtag_arg_t *args = (mca_btl_ugni_smsg_get_next_wtag_arg_t *) arg;
return GNI_SmsgGetNextWTag(args->ep_handle, (void **) args->data_ptr, args->tag);
}
static inline intptr_t mca_btl_ugni_smsg_release_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_endpoint_handle_t *ep_handle = (mca_btl_ugni_endpoint_handle_t *) arg;
return GNI_SmsgRelease (ep_handle->gni_handle);
}
static inline intptr_t mca_btl_ugni_cq_clear_device (mca_btl_ugni_device_t *device, void *arg)
{
gni_cq_handle_t cq = (gni_cq_handle_t) (intptr_t) arg;
gni_cq_entry_t event_data;
int rc;
do {
rc = GNI_CqGetEvent (cq, &event_data);
} while (GNI_RC_NOT_DONE != rc);
return OPAL_SUCCESS;
}
typedef struct mca_btl_ugni_cq_get_event_args_t {
mca_btl_ugni_cq_t *cq;
gni_cq_entry_t *event_data;
} mca_btl_ugni_cq_get_event_args_t;
static inline intptr_t mca_btl_ugni_cq_get_event_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_cq_get_event_args_t *args = (mca_btl_ugni_cq_get_event_args_t *) arg;
gni_return_t rc;
rc = GNI_CqGetEvent (args->cq->gni_handle, args->event_data);
args->cq->active_operations -= GNI_RC_NOT_DONE != rc;
return rc;
}
typedef struct mca_btl_ugni_gni_cq_get_event_args_t {
gni_cq_handle_t cq;
gni_cq_entry_t *event_data;
} mca_btl_ugni_gni_cq_get_event_args_t;
static inline intptr_t mca_btl_ugni_gni_cq_get_event_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_gni_cq_get_event_args_t *args = (mca_btl_ugni_gni_cq_get_event_args_t *) arg;
return GNI_CqGetEvent (args->cq, args->event_data);
}
static inline intptr_t mca_btl_ugni_post_fma_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_post_descriptor_t *desc = (mca_btl_ugni_post_descriptor_t *) arg;
bool ep_handle_allocated = false;
int rc;
if (NULL == desc->ep_handle) {
desc->ep_handle = mca_btl_ugni_ep_get_rdma (desc->endpoint, device);
if (OPAL_UNLIKELY(NULL == desc->ep_handle)) {
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
}
ep_handle_allocated = true;
}
BTL_VERBOSE(("Posting FMA descriptor %p with op_type %d, amo %d, ep_handle %p, remote_addr 0x%lx, "
"length %lu", desc, desc->desc.type, desc->desc.amo_cmd, desc->ep_handle,
desc->desc.remote_addr, desc->desc.length));
rc = GNI_PostFma (desc->ep_handle->gni_handle, &desc->desc);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
if (ep_handle_allocated) {
/* only return the endpoint handle if we allocated it. if we didn't allocate the
* handle this call was likely made from repost() */
mca_btl_ugni_ep_return_rdma (desc->ep_handle);
desc->ep_handle = NULL;
}
} else {
++device->dev_rdma_local_cq.active_operations;
}
return mca_btl_rc_ugni_to_opal (rc);
}
static inline intptr_t mca_btl_ugni_post_rdma_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_post_descriptor_t *desc = (mca_btl_ugni_post_descriptor_t *) arg;
bool ep_handle_allocated = false;
int rc;
if (NULL == desc->ep_handle) {
desc->ep_handle = mca_btl_ugni_ep_get_rdma (desc->endpoint, device);
if (OPAL_UNLIKELY(NULL == desc->ep_handle)) {
return OPAL_ERR_TEMP_OUT_OF_RESOURCE;
}
ep_handle_allocated = true;
}
/* pick the appropriate CQ */
desc->cq = mca_btl_ugni_component.progress_thread_enabled ? &device->dev_rdma_local_irq_cq :
&device->dev_rdma_local_cq;
desc->desc.src_cq_hndl = desc->cq->gni_handle;
BTL_VERBOSE(("Posting RDMA descriptor %p with op_type %d, ep_handle %p, remote_addr 0x%lx, "
"length %lu", desc, desc->desc.type, desc->ep_handle, desc->desc.remote_addr,
desc->desc.length));
rc = GNI_PostRdma (desc->ep_handle->gni_handle, &desc->desc);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
if (ep_handle_allocated) {
/* only return the endpoint handle if we allocated it. if we didn't allocate the
* handle this call was likely made from repost() */
mca_btl_ugni_ep_return_rdma (desc->ep_handle);
desc->ep_handle = NULL;
}
} else {
++desc->cq->active_operations;
}
return mca_btl_rc_ugni_to_opal (rc);
}
static inline intptr_t mca_btl_ugni_post_cqwrite_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_post_descriptor_t *desc = (mca_btl_ugni_post_descriptor_t *) arg;
int rc;
desc->ep_handle = mca_btl_ugni_ep_get_rdma (desc->endpoint, device);
if (OPAL_UNLIKELY(NULL == desc->ep_handle)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
desc->desc.src_cq_hndl = device->dev_rdma_local_cq.gni_handle;
rc = GNI_PostCqWrite (desc->ep_handle->gni_handle, &desc->desc);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
mca_btl_ugni_ep_return_rdma (desc->ep_handle);
desc->ep_handle = NULL;
}
return mca_btl_rc_ugni_to_opal (rc);
}
typedef struct mca_btl_ugni_cq_get_completed_desc_arg_t {
mca_btl_ugni_cq_t *cq;
gni_cq_entry_t *event_data;
mca_btl_ugni_post_descriptor_t **post_desc;
int count;
} mca_btl_ugni_cq_get_completed_desc_arg_t;
static inline intptr_t mca_btl_ugni_cq_get_completed_desc_device (mca_btl_ugni_device_t *device, void *arg0)
{
mca_btl_ugni_cq_get_completed_desc_arg_t *args = (mca_btl_ugni_cq_get_completed_desc_arg_t *) arg0;
mca_btl_ugni_cq_t *cq = args->cq;
gni_post_descriptor_t *desc;
int rc;
for (int i = 0 ; i < args->count ; ++i) {
rc = GNI_CqGetEvent (cq->gni_handle, args->event_data + i);
if (GNI_RC_NOT_DONE == rc) {
return i;
}
if (OPAL_UNLIKELY((GNI_RC_SUCCESS != rc && !args->event_data[i]) || GNI_CQ_OVERRUN(args->event_data[i]))) {
/* TODO -- need to handle overrun -- how do we do this without an event?
will the event eventually come back? Ask Cray */
BTL_ERROR(("unhandled post error! ugni rc = %d %s", rc, gni_err_str[rc]));
return mca_btl_rc_ugni_to_opal (rc);
}
rc = GNI_GetCompleted (cq->gni_handle, args->event_data[i], &desc);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc && GNI_RC_TRANSACTION_ERROR != rc)) {
BTL_ERROR(("Error in GNI_GetComplete %s", gni_err_str[rc]));
return mca_btl_rc_ugni_to_opal (rc);
}
args->post_desc[i] = MCA_BTL_UGNI_DESC_TO_PDESC(desc);
/* return the endpoint handle while we have the lock. see the explanation in
* the documentation for mca_btl_ugni_ep_return_rdma() */
if (OPAL_LIKELY(GNI_CQ_STATUS_OK(args->event_data[i]))) {
/* the operation completed successfully. return the endpoint handle now. otherwise
* we may still need the endpoint handle to start the repost(). */
mca_btl_ugni_ep_return_rdma (args->post_desc[i]->ep_handle);
args->post_desc[i]->ep_handle = NULL;
}
--cq->active_operations;
}
return args->count;
}
typedef struct mca_btl_ugni_get_datagram_args_t {
mca_btl_ugni_module_t *ugni_module;
gni_ep_handle_t *handle;
mca_btl_base_endpoint_t **ep;
} mca_btl_ugni_get_datagram_args_t;
static inline intptr_t mca_btl_ugni_get_datagram_device (mca_btl_ugni_device_t *device, void *arg0)
{
mca_btl_ugni_get_datagram_args_t *args = (mca_btl_ugni_get_datagram_args_t *) arg0;
uint32_t remote_addr, remote_id;
uint64_t datagram_id;
gni_post_state_t post_state;
gni_return_t grc;
uint64_t data;
grc = GNI_PostDataProbeById (device->dev_handle, &datagram_id);
if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) {
return 0;
}
data = datagram_id & ~(MCA_BTL_UGNI_DATAGRAM_MASK);
BTL_VERBOSE(("rc: %d, datgram_id: %" PRIx64 ", mask: %" PRIx64, grc, datagram_id, (uint64_t) (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK)));
if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID) {
*(args->ep) = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&args->ugni_module->endpoints, data);
*(args->handle) = (*args->ep)->smsg_ep_handle->gni_handle;
} else {
*(args->handle) = args->ugni_module->wildcard_ep;
}
/* wait for the incoming datagram to complete (in case it isn't) */
grc = GNI_EpPostDataWaitById (*args->handle, datagram_id, -1, &post_state,
&remote_addr, &remote_id);
if (GNI_RC_SUCCESS != grc) {
BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc));
return mca_btl_rc_ugni_to_opal (grc);
}
BTL_VERBOSE(("handled datagram completion. post_state: %d, remote_addr: %u, remote_id: %u, directed?: %d",
post_state, remote_addr, remote_id, (datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_DIRECTED_ID));
return 1;
}
typedef struct mca_btl_ugni_reg_mem_args_t {
mca_btl_ugni_module_t *ugni_module;
void *base;
size_t size;
mca_btl_ugni_reg_t *ugni_reg;
gni_cq_handle_t cq;
int flags;
} mca_btl_ugni_reg_mem_args_t;
static intptr_t mca_btl_ugni_reg_mem_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_reg_mem_args_t *args = (mca_btl_ugni_reg_mem_args_t *) arg;
gni_return_t rc;
rc = GNI_MemRegister (device->dev_handle, (uint64_t) args->base, args->size, args->cq,
args->flags, -1, &args->ugni_reg->handle.gni_handle);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
return OPAL_SUCCESS;
}
typedef struct mca_btl_ugni_dereg_mem_arg_t {
mca_btl_ugni_module_t *ugni_module;
mca_btl_ugni_reg_t *ugni_reg;
} mca_btl_ugni_dereg_mem_arg_t;
static intptr_t mca_btl_ugni_dereg_mem_device (mca_btl_ugni_device_t *device, void *arg)
{
mca_btl_ugni_dereg_mem_arg_t *args = (mca_btl_ugni_dereg_mem_arg_t *) arg;
gni_return_t rc;
rc = GNI_MemDeregister (device->dev_handle, &args->ugni_reg->handle.gni_handle);
return mca_btl_rc_ugni_to_opal (rc);
}
/* multi-thread safe interface to uGNI */
static inline int mca_btl_ugni_endpoint_smsg_send_wtag (mca_btl_base_endpoint_t *endpoint, void *hdr, size_t hdr_len,
void *payload, size_t payload_len, uint32_t msg_id, int tag)
{
mca_btl_ugni_smsg_send_wtag_arg_t args = {.ep_handle = endpoint->smsg_ep_handle->gni_handle,
.hdr = hdr, .hdr_len = hdr_len, .payload = payload,
.payload_len = payload_len, .msg_id = msg_id,
.tag = tag};
mca_btl_ugni_device_t *device = endpoint->smsg_ep_handle->device;
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_send_wtag_device, &args);
}
static inline int mca_btl_ugni_smsg_get_next_wtag (mca_btl_ugni_endpoint_handle_t *ep_handle, uintptr_t *data_ptr, uint8_t *tag)
{
mca_btl_ugni_device_t *device = ep_handle->device;
mca_btl_ugni_smsg_get_next_wtag_arg_t args = {.ep_handle = ep_handle->gni_handle, .data_ptr = data_ptr, .tag = tag};
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_get_next_wtag_device, &args);
}
static inline int mca_btl_ugni_smsg_release (mca_btl_ugni_endpoint_handle_t *ep_handle)
{
mca_btl_ugni_device_t *device = ep_handle->device;
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_smsg_release_device, ep_handle);
}
static inline void mca_btl_ugni_cq_clear (mca_btl_ugni_device_t *device, gni_cq_handle_t cq)
{
(void) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_clear_device, (void *) (intptr_t) cq);
}
static inline int mca_btl_ugni_cq_get_event (mca_btl_ugni_device_t *device, mca_btl_ugni_cq_t *cq, gni_cq_entry_t *event_data)
{
mca_btl_ugni_cq_get_event_args_t args = {.cq = cq, .event_data = event_data};
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_get_event_device, &args);
}
static inline int mca_btl_ugni_gni_cq_get_event (mca_btl_ugni_device_t *device, gni_cq_handle_t cq, gni_cq_entry_t *event_data)
{
mca_btl_ugni_gni_cq_get_event_args_t args = {.cq = cq, .event_data = event_data};
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_gni_cq_get_event_device, &args);
}
static inline int mca_btl_ugni_endpoint_post_fma (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
mca_btl_ugni_device_t *device = desc->ep_handle ? desc->ep_handle->device : mca_btl_ugni_ep_get_device (ugni_module);
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_fma_device, desc);
}
static inline int mca_btl_ugni_endpoint_post_rdma (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
mca_btl_ugni_device_t *device = desc->ep_handle ? desc->ep_handle->device : mca_btl_ugni_ep_get_device (ugni_module);
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_rdma_device, desc);
}
static inline int mca_btl_ugni_endpoint_post_cqwrite (mca_btl_ugni_endpoint_t *endpoint, mca_btl_ugni_post_descriptor_t *desc)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
mca_btl_ugni_device_t *device = ugni_module->devices;
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_post_cqwrite_device, desc);
}
static inline int mca_btl_ugni_cq_get_completed_desc (mca_btl_ugni_device_t *device, mca_btl_ugni_cq_t *cq,
gni_cq_entry_t *event_data, mca_btl_ugni_post_descriptor_t **post_desc,
int count)
{
mca_btl_ugni_cq_get_completed_desc_arg_t args = {.cq = cq, .event_data = event_data, .post_desc = post_desc, .count = count};
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_cq_get_completed_desc_device, &args);
}
static inline int mca_btl_ugni_get_datagram (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device, gni_ep_handle_t *gni_handle,
mca_btl_base_endpoint_t **ep)
{
mca_btl_ugni_get_datagram_args_t args = {.ugni_module = ugni_module, .ep = ep, .handle = gni_handle};
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_get_datagram_device, &args);
}
static inline int mca_btl_ugni_reg_mem (mca_btl_ugni_module_t *ugni_module, void *base, size_t size, mca_btl_ugni_reg_t *ugni_reg,
gni_cq_handle_t cq, int flags)
{
mca_btl_ugni_reg_mem_args_t args = {.ugni_module = ugni_module, .base = base, .size = size,
.ugni_reg = ugni_reg, .cq = cq, .flags = flags};
mca_btl_ugni_device_t *device = ugni_module->devices;
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_reg_mem_device, &args);
}
static inline int mca_btl_ugni_dereg_mem (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_reg_t *ugni_reg)
{
mca_btl_ugni_dereg_mem_arg_t args = {.ugni_module = ugni_module, .ugni_reg = ugni_reg};
mca_btl_ugni_device_t *device = ugni_module->devices;
return (int) mca_btl_ugni_device_serialize (device, (mca_btl_ugni_device_serialize_fn_t) mca_btl_ugni_dereg_mem_device, &args);
}
#endif /* BTL_UGNI_DEVICE_H */

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011-2013 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -12,6 +12,7 @@
#include "btl_ugni_endpoint.h"
#include "btl_ugni_smsg.h"
#include "opal/mca/pmix/pmix.h"
static void mca_btl_ugni_ep_construct (mca_btl_base_endpoint_t *ep)
{
@ -24,15 +25,94 @@ static void mca_btl_ugni_ep_destruct (mca_btl_base_endpoint_t *ep)
{
OBJ_DESTRUCT(&ep->frag_wait_list);
OBJ_DESTRUCT(&ep->lock);
free (ep->remote_attr);
}
OBJ_CLASS_INSTANCE(mca_btl_ugni_endpoint_t, opal_list_item_t,
mca_btl_ugni_ep_construct, mca_btl_ugni_ep_destruct);
static int mca_btl_ugni_endpoint_get_modex (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_modex_t *modex;
size_t msg_size;
int rc;
assert (NULL != ep && NULL != ep->peer_proc);
/* Receive the modex */
OPAL_MODEX_RECV(rc, &mca_btl_ugni_component.super.btl_version,
&ep->peer_proc->proc_name, (void **)&modex, &msg_size);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error receiving modex"));
return rc;
}
ep->ep_rem_addr = modex->addr;
ep->ep_rem_id = modex->id;
BTL_VERBOSE(("received modex for ep %p. addr: %d, id: %d", ep, ep->ep_rem_addr, ep->ep_rem_id));
free (modex);
return OPAL_SUCCESS;
}
int mca_btl_ugni_init_ep (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_endpoint_t **ep,
mca_btl_ugni_module_t *btl, opal_proc_t *peer_proc)
{
mca_btl_ugni_endpoint_t *endpoint;
int rc;
endpoint = OBJ_NEW(mca_btl_ugni_endpoint_t);
assert (endpoint != NULL);
endpoint->smsg_progressing = 0;
endpoint->state = MCA_BTL_UGNI_EP_STATE_INIT;
endpoint->peer_proc = peer_proc;
/* get the modex info for this endpoint and setup a ugni endpoint. this call may lead
* to re-entry through opal_progress(). */
rc = mca_btl_ugni_endpoint_get_modex (endpoint);
if (OPAL_SUCCESS != rc) {
assert (0);
return rc;
}
/* add this endpoint to the pointer array */
endpoint->index = opal_pointer_array_add (&ugni_module->endpoints, endpoint);
*ep = endpoint;
return OPAL_SUCCESS;
}
void mca_btl_ugni_release_ep (mca_btl_ugni_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
int rc;
opal_mutex_lock (&ep->lock);
rc = mca_btl_ugni_ep_disconnect (ep, false);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_VERBOSE(("btl/ugni error disconnecting endpoint"));
}
/* TODO -- Clear space at the end of the endpoint array */
opal_pointer_array_set_item (&ugni_module->endpoints, ep->index, NULL);
opal_mutex_unlock (&ep->lock);
OBJ_RELEASE(ep);
}
static inline int mca_btl_ugni_ep_smsg_get_mbox (mca_btl_base_endpoint_t *ep) {
mca_btl_ugni_module_t *ugni_module = ep->btl;
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
opal_free_list_item_t *mbox;
assert (NULL == ep->mailbox);
mbox = opal_free_list_get (&ugni_module->smsg_mboxes);
if (OPAL_UNLIKELY(NULL == mbox)) {
return OPAL_ERR_OUT_OF_RESOURCE;
@ -47,61 +127,103 @@ static inline int mca_btl_ugni_ep_smsg_get_mbox (mca_btl_base_endpoint_t *ep) {
return OPAL_SUCCESS;
}
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect) {
gni_return_t rc;
static int mca_btl_ugni_ep_send_disconnect (mca_btl_base_endpoint_t *ep)
{
int rc;
do {
rc = mca_btl_ugni_endpoint_smsg_send_wtag (ep, NULL, 0, NULL, 0, -1, MCA_BTL_UGNI_TAG_DISCONNECT);
if (OPAL_LIKELY(GNI_RC_NOT_DONE != rc)) {
break;
}
/* most likely got here because we are out of credits. check the remote CQ to get credit return */
(void) mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_ep_btl (ep));
} while (1);
return mca_btl_rc_ugni_to_opal (rc);
}
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
mca_btl_ugni_device_t *device;
int rc;
if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
/* nothing to do */
return OPAL_SUCCESS;
}
if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
rc = GNI_SmsgSendWTag (ep->smsg_ep_handle, NULL, 0, NULL, 0, -1,
MCA_BTL_UGNI_TAG_DISCONNECT);
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("btl/ugni could not send close message"));
device = ep->smsg_ep_handle->device;
while (device->dev_smsg_local_cq.active_operations) {
/* ensure all sends are complete before removing and procs */
rc = mca_btl_ugni_progress_local_smsg (ugni_module, device);
if (OPAL_SUCCESS != rc) {
break;
}
/* we might want to wait for local completion here (do we even care), yes we do */
/* TODO: FIX FIX FIX */
}
/* TODO: FIX GROSS */
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
(void) opal_common_ugni_ep_destroy (&ep->smsg_ep_handle);
(void) opal_common_ugni_ep_destroy (&ep->rdma_ep_handle);
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
rc = mca_btl_ugni_ep_send_disconnect (ep);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_VERBOSE(("could not send disconnect message to peer"));
}
/* wait for the disconnect messagse to go */
do {
/* ensure all sends are complete before removing and procs */
rc = mca_btl_ugni_progress_local_smsg (ugni_module, device);
if (OPAL_SUCCESS != rc) {
break;
}
} while (device->dev_smsg_local_cq.active_operations);
(void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, -1);
}
mca_btl_ugni_device_lock (device);
/* NTH: this call may not need the device lock. seems to work without it but
* the lock is here to be safe. */
(void) mca_btl_ugni_ep_handle_destroy (ep->smsg_ep_handle);
ep->smsg_ep_handle = NULL;
mca_btl_ugni_device_unlock (device);
if (ep->mailbox) {
opal_free_list_return (&ep->btl->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox));
opal_free_list_return (&ugni_module->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox));
ep->mailbox = NULL;
}
ep->state = MCA_BTL_UGNI_EP_STATE_INIT;
(void) opal_atomic_add_64 (&ep->btl->connected_peer_count, -11);
return OPAL_SUCCESS;
}
static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
mca_btl_ugni_device_t *device = ugni_module->devices;
int rc;
rc = mca_btl_ugni_ep_connect_rdma (ep);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
/* protect against re-entry from opal_progress */
if (OPAL_UNLIKELY(MCA_BTL_UGNI_EP_STATE_CONNECTING == ep->state)) {
return OPAL_ERR_RESOURCE_BUSY;
}
BTL_VERBOSE(("initiaiting connection to remote peer with address: %u id: %u proc: %p",
ep->common->ep_rem_addr, ep->common->ep_rem_id, (void *)ep->peer_proc));
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTING;
BTL_VERBOSE(("initiating connection to remote peer with address: %u id: %u proc: %p",
ep->ep_rem_addr, ep->ep_rem_id, (void *)ep->peer_proc));
/* bind endpoint to remote address */
/* we bind two endpoints to seperate out local smsg completion and local fma completion */
rc = opal_common_ugni_ep_create (ep->common, ep->btl->smsg_local_cq, &ep->smsg_ep_handle);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
mca_btl_ugni_device_lock (device);
ep->smsg_ep_handle = mca_btl_ugni_ep_handle_create (ep, device->dev_smsg_local_cq.gni_handle, device);
mca_btl_ugni_device_unlock (device);
if (OPAL_UNLIKELY(NULL == ep->smsg_ep_handle)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* build connection data */
@ -110,9 +232,10 @@ static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
return rc;
}
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTING;
memset (&ep->remote_attr, 0, sizeof (ep->remote_attr));
ep->remote_attr = calloc (1, sizeof (*ep->remote_attr));
if (OPAL_UNLIKELY(NULL == ep->remote_attr)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
BTL_VERBOSE(("btl/ugni connection to remote peer initiated"));
@ -120,15 +243,16 @@ static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
}
static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
gni_return_t grc;
int rc;
BTL_VERBOSE(("finishing connection. remote attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
"mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
"msg_maxsize = %d", ep->remote_attr.smsg_attr.msg_type, ep->remote_attr.smsg_attr.msg_buffer,
ep->remote_attr.smsg_attr.buff_size, ep->remote_attr.smsg_attr.mem_hndl.qword1,
ep->remote_attr.smsg_attr.mem_hndl.qword2, ep->remote_attr.smsg_attr.mbox_offset,
ep->remote_attr.smsg_attr.mbox_maxcredit, ep->remote_attr.smsg_attr.msg_maxsize));
"msg_maxsize = %d", ep->remote_attr->smsg_attr.msg_type, ep->remote_attr->smsg_attr.msg_buffer,
ep->remote_attr->smsg_attr.buff_size, ep->remote_attr->smsg_attr.mem_hndl.qword1,
ep->remote_attr->smsg_attr.mem_hndl.qword2, ep->remote_attr->smsg_attr.mbox_offset,
ep->remote_attr->smsg_attr.mbox_maxcredit, ep->remote_attr->smsg_attr.msg_maxsize));
BTL_VERBOSE(("finishing connection. local attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
"mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
@ -137,54 +261,78 @@ static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
ep->mailbox->attr.smsg_attr.mem_hndl.qword2, ep->mailbox->attr.smsg_attr.mbox_offset,
ep->mailbox->attr.smsg_attr.mbox_maxcredit, ep->mailbox->attr.smsg_attr.msg_maxsize));
grc = GNI_SmsgInit (ep->smsg_ep_handle, &ep->mailbox->attr.smsg_attr, &ep->remote_attr.smsg_attr);
grc = GNI_SmsgInit (ep->smsg_ep_handle->gni_handle, &ep->mailbox->attr.smsg_attr,
&ep->remote_attr->smsg_attr);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
BTL_ERROR(("error initializing SMSG protocol. rc = %d", grc));
return opal_common_rc_ugni_to_opal (grc);
return mca_btl_rc_ugni_to_opal (grc);
}
/* set the local event data to the local index and the remote event data to my
* index on the remote peer. This makes lookup of endpoints on completion take
* a single lookup in the endpoints array. we will not be able to change the
* remote peer's index in the endpoint's array after this point. */
GNI_EpSetEventData (ep->rdma_ep_handle, ep->index, ep->remote_attr.index);
GNI_EpSetEventData (ep->smsg_ep_handle, ep->index, ep->remote_attr.index);
GNI_EpSetEventData (ep->smsg_ep_handle->gni_handle, ep->index, ep->remote_attr->index);
ep->rmt_irq_mem_hndl = ep->remote_attr.rmt_irq_mem_hndl;
ep->rmt_irq_mem_hndl = ep->remote_attr->rmt_irq_mem_hndl;
ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED;
(void) opal_atomic_add_64 (&ep->btl->connected_peer_count, 1);
(void) opal_atomic_add_32 (&ep->smsg_ep_handle->device->smsg_connections, 1);
/* send all pending messages */
BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list)));
rc = mca_btl_ugni_progress_send_wait_list (ep);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
OPAL_THREAD_LOCK(&ep->btl->ep_wait_list_lock);
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
if (false == ep->wait_listed) {
opal_list_append (&ep->btl->ep_wait_list, &ep->super);
opal_list_append (&ugni_module->ep_wait_list, &ep->super);
ep->wait_listed = true;
}
OPAL_THREAD_UNLOCK(&ep->btl->ep_wait_list_lock);
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
}
free (ep->remote_attr);
ep->remote_attr = NULL;
return OPAL_SUCCESS;
}
static inline int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep) {
static int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
mca_btl_ugni_device_t *device = ep->smsg_ep_handle->device;
gni_return_t rc;
BTL_VERBOSE(("posting directed datagram to remote id: %d for endpoint %p", ep->common->ep_rem_id, (void *)ep));
ep->mailbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
BTL_VERBOSE(("posting directed datagram to remote id: %d for endpoint %p", ep->ep_rem_id, (void *)ep));
/* the irq cq is associated with only the first device */
ep->mailbox->attr.rmt_irq_mem_hndl = ugni_module->devices->smsg_irq_mhndl;
rc = GNI_EpPostDataWId (ep->smsg_ep_handle, &ep->mailbox->attr, sizeof (ep->mailbox->attr),
&ep->remote_attr, sizeof (ep->remote_attr),
rc = GNI_EpPostDataWId (ep->smsg_ep_handle->gni_handle, &ep->mailbox->attr, sizeof (ep->mailbox->attr),
ep->remote_attr, sizeof (*ep->remote_attr),
MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index);
return opal_common_rc_ugni_to_opal (rc);
return mca_btl_rc_ugni_to_opal (rc);
}
int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) {
int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module)
{
gni_return_t rc;
BTL_VERBOSE(("posting wildcard datagram"));
memset (&ugni_module->wc_local_attr, 0, sizeof (ugni_module->wc_local_attr));
memset (&ugni_module->wc_remote_attr, 0, sizeof (ugni_module->wc_remote_attr));
rc = GNI_EpPostDataWId (ugni_module->wildcard_ep, &ugni_module->wc_local_attr,
sizeof (ugni_module->wc_local_attr), &ugni_module->wc_remote_attr,
sizeof (ugni_module->wc_remote_attr), MCA_BTL_UGNI_CONNECT_WILDCARD_ID);
return mca_btl_rc_ugni_to_opal (rc);
}
int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep)
{
int rc;
BTL_VERBOSE(("progressing connection for endpoint %p with state %d", (void *)ep, ep->state));
@ -193,14 +341,17 @@ int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) {
return OPAL_SUCCESS;
}
if (MCA_BTL_UGNI_EP_STATE_RDMA >= ep->state) {
if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
rc = mca_btl_ugni_ep_connect_start (ep);
if (OPAL_SUCCESS != rc) {
return rc;
}
}
if (GNI_SMSG_TYPE_INVALID == ep->remote_attr.smsg_attr.msg_type) {
BTL_VERBOSE(("ep->remote_attr->smsg_attr = {.msg_type = %d, .msg_buffer = 0x%lx}", ep->remote_attr->smsg_attr.msg_type,
ep->remote_attr->smsg_attr.msg_buffer));
if (GNI_SMSG_TYPE_INVALID == ep->remote_attr->smsg_attr.msg_type) {
/* use datagram to exchange connection information with the remote peer */
if (!ep->dg_posted) {
rc = mca_btl_ugni_directed_ep_post (ep);
@ -217,3 +368,77 @@ int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) {
return mca_btl_ugni_ep_connect_finish (ep);
}
int mca_btl_ugni_endpoint_handle_init_rdma (opal_free_list_item_t *item, void *ctx)
{
mca_btl_ugni_endpoint_handle_t *handle = (mca_btl_ugni_endpoint_handle_t *) item;
mca_btl_ugni_device_t *device = (mca_btl_ugni_device_t *) ctx;
gni_return_t grc;
grc = GNI_EpCreate (device->dev_handle, device->dev_rdma_local_cq.gni_handle, &handle->gni_handle);
handle->device = device;
return mca_btl_rc_ugni_to_opal (grc);
}
static void mca_btl_ugni_endpoint_handle_construct (mca_btl_ugni_endpoint_handle_t *handle)
{
handle->gni_handle = 0;
}
static void mca_btl_ugni_endpoint_handle_destruct (mca_btl_ugni_endpoint_handle_t *handle)
{
if (handle->gni_handle) {
GNI_EpDestroy (handle->gni_handle);
handle->gni_handle = 0;
}
}
OBJ_CLASS_INSTANCE(mca_btl_ugni_endpoint_handle_t, opal_object_t,
mca_btl_ugni_endpoint_handle_construct,
mca_btl_ugni_endpoint_handle_destruct);
mca_btl_ugni_endpoint_handle_t *mca_btl_ugni_ep_handle_create (mca_btl_ugni_endpoint_t *ep, gni_cq_handle_t cq,
mca_btl_ugni_device_t *device)
{
mca_btl_ugni_endpoint_handle_t *ep_handle;
gni_return_t grc;
ep_handle = OBJ_NEW(mca_btl_ugni_endpoint_handle_t);
if (OPAL_UNLIKELY(NULL == ep_handle)) {
return NULL;
}
ep_handle->device = device;
/* create a uGNI endpoint handle and bind it to the remote peer */
grc = GNI_EpCreate (device->dev_handle, cq, &ep_handle->gni_handle);
if (OPAL_LIKELY(GNI_RC_SUCCESS == grc)) {
grc = GNI_EpBind (ep_handle->gni_handle, ep->ep_rem_addr, ep->ep_rem_id);
}
if (GNI_RC_SUCCESS != grc) {
OBJ_RELEASE(ep_handle);
ep_handle = NULL;
}
return ep_handle;
}
int mca_btl_ugni_ep_handle_destroy (mca_btl_ugni_endpoint_handle_t *ep_handle)
{
int rc;
if (NULL == ep_handle || 0 == ep_handle->gni_handle) {
return OPAL_SUCCESS;
}
/* TODO: need to fix, may be outstanding tx's, etc. */
rc = GNI_EpUnbind (ep_handle->gni_handle);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
/* should warn */
}
OBJ_RELEASE(ep_handle);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -17,15 +17,22 @@
enum mca_btl_ugni_endpoint_state_t {
MCA_BTL_UGNI_EP_STATE_INIT = 0,
MCA_BTL_UGNI_EP_STATE_START,
MCA_BTL_UGNI_EP_STATE_RDMA,
MCA_BTL_UGNI_EP_STATE_CONNECTING,
MCA_BTL_UGNI_EP_STATE_CONNECTED
MCA_BTL_UGNI_EP_STATE_CONNECTED,
};
typedef enum mca_btl_ugni_endpoint_state_t mca_btl_ugni_endpoint_state_t;
struct mca_btl_ugni_smsg_mbox_t;
struct mca_btl_ugni_endpoint_handle_t {
opal_free_list_item_t super;
mca_btl_ugni_device_t *device;
gni_ep_handle_t gni_handle;
};
typedef struct mca_btl_ugni_endpoint_handle_t mca_btl_ugni_endpoint_handle_t;
OBJ_CLASS_DECLARATION(mca_btl_ugni_endpoint_handle_t);
typedef struct mca_btl_base_endpoint_t {
opal_list_item_t super;
@ -37,24 +44,34 @@ typedef struct mca_btl_base_endpoint_t {
opal_recursive_mutex_t lock;
mca_btl_ugni_endpoint_state_t state;
opal_common_ugni_endpoint_t *common;
/** Remote NIC address */
uint32_t ep_rem_addr;
mca_btl_ugni_module_t *btl;
/** Remote CDM identifier (base) */
uint32_t ep_rem_id;
gni_ep_handle_t smsg_ep_handle;
gni_ep_handle_t rdma_ep_handle;
/** endpoint to use for SMSG messages */
mca_btl_ugni_endpoint_handle_t *smsg_ep_handle;
mca_btl_ugni_endpoint_attr_t remote_attr; /* TODO: UGH, remove this */
/** temporary space to store the remote SMSG attributes */
mca_btl_ugni_endpoint_attr_t *remote_attr;
/** SMSG mailbox assigned to this endpoint */
struct mca_btl_ugni_smsg_mbox_t *mailbox;
gni_mem_handle_t rmt_irq_mem_hndl;
/** Remote IRQ handle (for async completion) */
gni_mem_handle_t rmt_irq_mem_hndl;
/** frags waiting for SMSG credits */
opal_list_t frag_wait_list;
/** endpoint is currently wait-listed for SMSG progress */
bool wait_listed;
/** protect against race on connection */
bool dg_posted;
/** protect against re-entry to SMSG */
int32_t smsg_progressing;
int index;
@ -65,49 +82,10 @@ OBJ_CLASS_DECLARATION(mca_btl_ugni_endpoint_t);
int mca_btl_ugni_ep_connect_progress (mca_btl_ugni_endpoint_t *ep);
int mca_btl_ugni_ep_disconnect (mca_btl_ugni_endpoint_t *ep, bool send_disconnect);
static inline int mca_btl_ugni_init_ep (mca_btl_ugni_module_t *ugni_module,
mca_btl_ugni_endpoint_t **ep,
mca_btl_ugni_module_t *btl,
opal_proc_t *peer_proc) {
mca_btl_ugni_endpoint_t *endpoint;
endpoint = OBJ_NEW(mca_btl_ugni_endpoint_t);
assert (endpoint != NULL);
endpoint->smsg_progressing = 0;
endpoint->state = MCA_BTL_UGNI_EP_STATE_INIT;
endpoint->btl = btl;
endpoint->peer_proc = peer_proc;
endpoint->index = opal_pointer_array_add (&ugni_module->endpoints, endpoint);
*ep = endpoint;
return OPAL_SUCCESS;
}
static inline void mca_btl_ugni_release_ep (mca_btl_ugni_endpoint_t *ep) {
int rc;
if (ep->common) {
opal_mutex_lock (&ep->lock);
rc = mca_btl_ugni_ep_disconnect (ep, false);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_VERBOSE(("btl/ugni error disconnecting endpoint"));
}
/* TODO -- Clear space at the end of the endpoint array */
opal_pointer_array_set_item (&ep->btl->endpoints, ep->index, NULL);
opal_mutex_unlock (&ep->lock);
opal_common_ugni_endpoint_return (ep->common);
}
OBJ_RELEASE(ep);
}
int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module);
void mca_btl_ugni_release_ep (mca_btl_ugni_endpoint_t *ep);
int mca_btl_ugni_init_ep (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_endpoint_t **ep,
mca_btl_ugni_module_t *btl, opal_proc_t *peer_proc);
static inline int mca_btl_ugni_check_endpoint_state (mca_btl_ugni_endpoint_t *ep) {
int rc;
@ -120,8 +98,6 @@ static inline int mca_btl_ugni_check_endpoint_state (mca_btl_ugni_endpoint_t *ep
switch (ep->state) {
case MCA_BTL_UGNI_EP_STATE_INIT:
case MCA_BTL_UGNI_EP_STATE_RDMA:
case MCA_BTL_UGNI_EP_STATE_START:
rc = mca_btl_ugni_ep_connect_progress (ep);
if (OPAL_SUCCESS != rc) {
break;
@ -138,63 +114,91 @@ static inline int mca_btl_ugni_check_endpoint_state (mca_btl_ugni_endpoint_t *ep
return rc;
}
static inline int mca_btl_ugni_ep_connect_rdma (mca_btl_base_endpoint_t *ep) {
int rc;
if (ep->state >= MCA_BTL_UGNI_EP_STATE_RDMA) {
return OPAL_SUCCESS;
}
/* protect against re-entry from opal_progress */
if (OPAL_UNLIKELY(MCA_BTL_UGNI_EP_STATE_START == ep->state)) {
return OPAL_ERR_RESOURCE_BUSY;
}
ep->state = MCA_BTL_UGNI_EP_STATE_START;
/* get the modex info for this endpoint and setup a ugni endpoint. this call may lead
* to re-entry through opal_progress(). */
rc = opal_common_ugni_endpoint_for_proc (ep->btl->device, ep->peer_proc, &ep->common);
if (OPAL_SUCCESS != rc) {
assert (0);
return rc;
}
/* bind endpoint to remote address */
rc = opal_common_ugni_ep_create (ep->common, ep->btl->rdma_local_cq, &ep->rdma_ep_handle);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
ep->state = MCA_BTL_UGNI_EP_STATE_RDMA;
return OPAL_SUCCESS;
/**
* Accessor function for endpoint btl
*
* @param[in] ep endpoint to query
*
* This helper function exists to make it easy to switch between using a single
* and multiple ugni modules. Currently there is only one so we just use the
* pointer in the component structure. This saves 4-8 bytes in the endpoint
* structure.
*/
static inline mca_btl_ugni_module_t *mca_btl_ugni_ep_btl (mca_btl_ugni_endpoint_t *ep)
{
/* there is only one ugni module at this time. if that changes add a btl pointer back
* to the endpoint structure. */
return mca_btl_ugni_component.modules;
}
static inline int mca_btl_ugni_check_endpoint_state_rdma (mca_btl_base_endpoint_t *ep) {
int rc;
if (OPAL_LIKELY(MCA_BTL_UGNI_EP_STATE_INIT < ep->state)) {
return OPAL_SUCCESS;
/**
* Allocate and bind a uGNI endpoint handle to the remote peer.
*
* @param[in] ep BTL endpoint
* @param[in] cq completion queue
* @param[out] ep_handle uGNI endpoint handle
*/
mca_btl_ugni_endpoint_handle_t *mca_btl_ugni_ep_handle_create (mca_btl_ugni_endpoint_t *ep, gni_cq_handle_t cq,
mca_btl_ugni_device_t *device);
/**
* Unbind and free the uGNI endpoint handle.
*
* @param[in] ep_handle uGNI endpoint handle to unbind and release
*/
int mca_btl_ugni_ep_handle_destroy (mca_btl_ugni_endpoint_handle_t *ep_handle);
/**
* Free list initialization function for endpoint handles (DO NOT CALL outside free list)
*
* @param[in] item Free list item to initialize
* @param[in] ctx Free list context
*
* @returns OPAL_SUCCESS on success
* @returns OPAL error code on error
*/
int mca_btl_ugni_endpoint_handle_init_rdma (opal_free_list_item_t *item, void *ctx);
/**
* @brief get an endpoint handle from a device's free list
*
* @param[in] ep btl endpoint
* @param[in] device btl device to use
*
* This function MUST be called with the device lock held. This was done over using
* the atomic free list to avoid unnecessary atomics in the critical path.
*/
static inline mca_btl_ugni_endpoint_handle_t *
mca_btl_ugni_ep_get_rdma (mca_btl_ugni_endpoint_t *ep, mca_btl_ugni_device_t *device)
{
mca_btl_ugni_endpoint_handle_t *ep_handle;
gni_return_t grc;
ep_handle = (mca_btl_ugni_endpoint_handle_t *) opal_free_list_get_st (&device->endpoints);
if (OPAL_UNLIKELY(NULL == ep_handle)) {
return NULL;
}
grc = GNI_EpBind (ep_handle->gni_handle, ep->ep_rem_addr, ep->ep_rem_id | device->dev_index);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
opal_free_list_return_st (&device->endpoints, &ep_handle->super);
ep_handle = NULL;
}
opal_mutex_lock (&ep->lock);
rc = mca_btl_ugni_ep_connect_rdma (ep);
opal_mutex_unlock (&ep->lock);
return rc;
return ep_handle;
}
static inline int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module) {
gni_return_t rc;
BTL_VERBOSE(("posting wildcard datagram"));
memset (&ugni_module->wc_local_attr, 0, sizeof (ugni_module->wc_local_attr));
memset (&ugni_module->wc_remote_attr, 0, sizeof (ugni_module->wc_remote_attr));
rc = GNI_EpPostDataWId (ugni_module->wildcard_ep, &ugni_module->wc_local_attr,
sizeof (ugni_module->wc_local_attr), &ugni_module->wc_remote_attr,
sizeof (ugni_module->wc_remote_attr), MCA_BTL_UGNI_CONNECT_WILDCARD_ID);
return opal_common_rc_ugni_to_opal (rc);
/**
* @brief return an endpoint handle to a device's free list
*
* @param[in] ep_handle endpoint handle to return
*
* This function MUST be called with the device lock held. This was done over using
* the atomic free list to avoid unnecessary atomics in the critical path. If
*/
static inline void mca_btl_ugni_ep_return_rdma (mca_btl_ugni_endpoint_handle_t *ep_handle)
{
(void) GNI_EpUnbind (ep_handle->gni_handle);
opal_free_list_return_st (&ep_handle->device->endpoints, &ep_handle->super);
}
#endif /* MCA_BTL_UGNI_ENDPOINT_H */

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -38,11 +38,25 @@ OBJ_CLASS_INSTANCE(mca_btl_ugni_rdma_frag_t, mca_btl_base_descriptor_t,
OBJ_CLASS_INSTANCE(mca_btl_ugni_eager_frag_t, mca_btl_base_descriptor_t,
mca_btl_ugni_eager_frag_constructor, NULL);
OBJ_CLASS_INSTANCE(mca_btl_ugni_post_descriptor_t, opal_free_list_item_t,
NULL, NULL);
int mca_btl_ugni_frag_init (mca_btl_ugni_base_frag_t *frag, mca_btl_ugni_module_t *ugni_module)
static void mca_btl_ugni_post_descriptor_constructor (mca_btl_ugni_post_descriptor_t *desc)
{
desc->cq = NULL;
desc->ep_handle = NULL;
}
OBJ_CLASS_INSTANCE(mca_btl_ugni_post_descriptor_t, opal_free_list_item_t,
mca_btl_ugni_post_descriptor_constructor, NULL);
int mca_btl_ugni_frag_init (mca_btl_ugni_base_frag_t *frag, void *id)
{
/* NTH: the id is a combination of the module id and the free list id. for now there
* is only ever one module so the module id is ignored. if this changes the code
* here and btl_ugni_add_procs.c (opal_free_list_init calls) needs to be updated */
intptr_t free_list_id = (intptr_t) id & 0xff;
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules;
frag->msg_id = opal_pointer_array_add (&ugni_module->pending_smsg_frags_bb, (void *) frag);
frag->my_list = ugni_module->frags_lists + free_list_id;
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2013 The University of Tennessee and The University
@ -72,7 +72,7 @@ typedef struct mca_btl_ugni_base_frag_t {
uint16_t flags;
mca_btl_ugni_frag_hdr_t hdr;
mca_btl_base_segment_t segments[2];
opal_common_ugni_post_desc_t post_desc;
gni_post_descriptor_t post_desc;
mca_btl_base_endpoint_t *endpoint;
mca_btl_ugni_reg_t *registration;
opal_free_list_t *my_list;
@ -88,12 +88,15 @@ typedef struct mca_btl_ugni_base_frag_t mca_btl_ugni_eager_frag_t;
typedef struct mca_btl_ugni_post_descriptor_t {
opal_free_list_item_t super;
opal_common_ugni_post_desc_t desc;
gni_post_descriptor_t desc;
mca_btl_ugni_endpoint_handle_t *ep_handle;
mca_btl_base_endpoint_t *endpoint;
mca_btl_base_registration_handle_t *local_handle;
mca_btl_base_rdma_completion_fn_t cbfunc;
mca_btl_ugni_cq_t *cq;
void *cbdata;
void *ctx;
int tries;
} mca_btl_ugni_post_descriptor_t;
OBJ_CLASS_DECLARATION(mca_btl_ugni_post_descriptor_t);
@ -101,26 +104,38 @@ OBJ_CLASS_DECLARATION(mca_btl_ugni_post_descriptor_t);
#define MCA_BTL_UGNI_DESC_TO_PDESC(desc) \
((mca_btl_ugni_post_descriptor_t *)((uintptr_t) (desc) - offsetof (mca_btl_ugni_post_descriptor_t, desc)))
static inline void mca_btl_ugni_alloc_post_descriptor (mca_btl_base_endpoint_t *endpoint, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata,
mca_btl_ugni_post_descriptor_t **desc)
static inline mca_btl_ugni_post_descriptor_t *
mca_btl_ugni_alloc_post_descriptor (mca_btl_base_endpoint_t *endpoint, mca_btl_base_registration_handle_t *local_handle,
mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
*desc = (mca_btl_ugni_post_descriptor_t *) opal_free_list_get (&endpoint->btl->post_descriptors);
if (NULL != *desc) {
(*desc)->cbfunc = cbfunc;
(*desc)->ctx = cbcontext;
(*desc)->cbdata = cbdata;
(*desc)->local_handle = local_handle;
(*desc)->endpoint = endpoint;
(void) OPAL_THREAD_ADD64(&endpoint->btl->active_rdma_count, 1);
/* mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint); */
mca_btl_ugni_post_descriptor_t *desc;
desc = OBJ_NEW(mca_btl_ugni_post_descriptor_t);
/* (mca_btl_ugni_post_descriptor_t *) opal_free_list_get (&ugni_module->post_descriptors); */
if (OPAL_UNLIKELY(NULL != desc)) {
desc->cbfunc = cbfunc;
desc->ctx = cbcontext;
desc->cbdata = cbdata;
desc->local_handle = local_handle;
desc->endpoint = endpoint;
}
return desc;
}
static inline void mca_btl_ugni_return_post_descriptor (mca_btl_ugni_module_t *module,
mca_btl_ugni_post_descriptor_t *desc)
static inline void mca_btl_ugni_return_post_descriptor (mca_btl_ugni_post_descriptor_t *desc)
{
(void) OPAL_THREAD_ADD64(&module->active_rdma_count, -1);
opal_free_list_return (&module->post_descriptors, &desc->super);
/* mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (desc->endpoint); */
if (NULL != desc->ep_handle) {
mca_btl_ugni_ep_return_rdma (desc->ep_handle);
/* desc->ep_handle = NULL; */
}
/* desc->cq = NULL; */
/* opal_free_list_return (&ugni_module->post_descriptors, &desc->super); */
free (desc);
}
static inline void mca_btl_ugni_post_desc_complete (mca_btl_ugni_module_t *module, mca_btl_ugni_post_descriptor_t *desc, int rc)
@ -129,40 +144,38 @@ static inline void mca_btl_ugni_post_desc_complete (mca_btl_ugni_module_t *modul
if (NULL != desc->cbfunc) {
/* call the user's callback function */
desc->cbfunc (&module->super, desc->endpoint, (void *)(intptr_t) desc->desc.base.local_addr,
desc->cbfunc (&module->super, desc->endpoint, (void *)(intptr_t) desc->desc.local_addr,
desc->local_handle, desc->ctx, desc->cbdata, rc);
}
/* the descriptor is no longer needed */
mca_btl_ugni_return_post_descriptor (module, desc);
mca_btl_ugni_return_post_descriptor (desc);
}
OBJ_CLASS_DECLARATION(mca_btl_ugni_smsg_frag_t);
OBJ_CLASS_DECLARATION(mca_btl_ugni_rdma_frag_t);
OBJ_CLASS_DECLARATION(mca_btl_ugni_eager_frag_t);
int mca_btl_ugni_frag_init (mca_btl_ugni_base_frag_t *frag, mca_btl_ugni_module_t *ugni_module);
int mca_btl_ugni_frag_init (mca_btl_ugni_base_frag_t *frag, void *id);
static inline int mca_btl_ugni_frag_alloc (mca_btl_base_endpoint_t *ep,
opal_free_list_t *list,
mca_btl_ugni_base_frag_t **frag)
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc (mca_btl_base_endpoint_t *ep,
opal_free_list_t *list)
{
*frag = (mca_btl_ugni_base_frag_t *) opal_free_list_get (list);
if (OPAL_LIKELY(NULL != *frag)) {
(*frag)->my_list = list;
(*frag)->endpoint = ep;
(*frag)->ref_cnt = 1;
return OPAL_SUCCESS;
mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) opal_free_list_get (list);
if (OPAL_LIKELY(NULL != frag)) {
frag->endpoint = ep;
frag->ref_cnt = 1;
}
return OPAL_ERR_OUT_OF_RESOURCE;
return frag;
}
static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (frag->endpoint);
if (frag->registration) {
frag->endpoint->btl->rcache->rcache_deregister (frag->endpoint->btl->rcache,
(mca_rcache_base_registration_t *) frag->registration);
ugni_module->rcache->rcache_deregister (ugni_module->rcache,
(mca_rcache_base_registration_t *) frag->registration);
frag->registration = NULL;
}
@ -174,6 +187,7 @@ static inline int mca_btl_ugni_frag_return (mca_btl_ugni_base_frag_t *frag)
}
static inline bool mca_btl_ugni_frag_del_ref (mca_btl_ugni_base_frag_t *frag, int rc) {
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (frag->endpoint);
int32_t ref_cnt;
opal_atomic_mb ();
@ -186,7 +200,7 @@ static inline bool mca_btl_ugni_frag_del_ref (mca_btl_ugni_base_frag_t *frag, in
/* call callback if specified */
if (frag->base.des_flags & MCA_BTL_DES_SEND_ALWAYS_CALLBACK) {
frag->base.des_cbfunc(&frag->endpoint->btl->super, frag->endpoint, &frag->base, rc);
frag->base.des_cbfunc(&ugni_module->super, frag->endpoint, &frag->base, rc);
}
if (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP) {
@ -208,15 +222,38 @@ static inline bool mca_btl_ugni_frag_check_complete (mca_btl_ugni_base_frag_t *f
return !!(MCA_BTL_UGNI_FRAG_COMPLETE & frag->flags);
}
#define MCA_BTL_UGNI_FRAG_ALLOC_SMSG(ep, frag) \
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->smsg_frags, &(frag))
#define MCA_BTL_UGNI_FRAG_ALLOC_RDMA(ep, frag) \
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->rdma_frags, &(frag))
#define MCA_BTL_UGNI_FRAG_ALLOC_RDMA_INT(ep, frag) \
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->rdma_int_frags, &(frag))
#define MCA_BTL_UGNI_FRAG_ALLOC_EAGER_SEND(ep, frag) \
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->eager_frags_send, &(frag))
#define MCA_BTL_UGNI_FRAG_ALLOC_EAGER_RECV(ep, frag) \
mca_btl_ugni_frag_alloc((ep), &(ep)->btl->eager_frags_recv, &(frag))
void mca_btl_ugni_wait_list_append (mca_btl_ugni_module_t *ugni_module, mca_btl_base_endpoint_t *endpoint,
mca_btl_ugni_base_frag_t *frag);
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_smsg (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_SMSG);
}
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_rdma (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_RDMA);
}
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_rdma_int (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_RDMA_INT);
}
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_eager_send (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_EAGER_SEND);
}
static inline mca_btl_ugni_base_frag_t *mca_btl_ugni_frag_alloc_eager_recv (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
return mca_btl_ugni_frag_alloc (ep, ugni_module->frags_lists + MCA_BTL_UGNI_LIST_EAGER_RECV);
}
#endif /* MCA_BTL_UGNI_FRAG_H */

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -37,11 +37,8 @@ int mca_btl_ugni_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t
return OPAL_ERR_NOT_AVAILABLE;
}
BTL_VERBOSE(("Using RDMA/FMA Get from local address %p to remote address %" PRIx64,
local_address, remote_address));
/* cause endpoint to bind if it isn't already (bind is sufficient for rdma) */
(void) mca_btl_ugni_check_endpoint_state_rdma (endpoint);
BTL_VERBOSE(("Using RDMA/FMA Get %lu bytes to local address %p to remote address %" PRIx64,
(unsigned long) size, local_address, remote_address));
return mca_btl_ugni_post (endpoint, true, size, local_address, remote_address, local_handle,
remote_handle, order, cbfunc, cbcontext, cbdata);
@ -110,13 +107,15 @@ static void mca_btl_ugni_callback_eager_get (struct mca_btl_base_module_t *btl,
}
reg = mca_btl_base_active_message_trigger + tag;
reg->cbfunc(&frag->endpoint->btl->super, tag, &(tmp.base), reg->cbdata);
reg->cbfunc(&ugni_module->super, tag, &(tmp.base), reg->cbdata);
/* fill in the response header */
frag->hdr.rdma.ctx = frag->hdr.eager.ctx;
frag->flags = MCA_BTL_UGNI_FRAG_RESPONSE;
frag->ref_cnt = 1;
frag->ref_cnt = 1;
/* once complete use this fragment for a pending eager get if any exist */
frag->base.des_cbfunc = mca_btl_ugni_callback_eager_get_progress_pending;
@ -125,16 +124,7 @@ static void mca_btl_ugni_callback_eager_get (struct mca_btl_base_module_t *btl,
NULL, 0, MCA_BTL_UGNI_TAG_RDMA_COMPLETE);
if (OPAL_UNLIKELY(0 > rc)) {
/* queue fragment */
OPAL_THREAD_LOCK(&endpoint->lock);
if (false == endpoint->wait_listed) {
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
endpoint->wait_listed = true;
}
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&endpoint->lock);
mca_btl_ugni_wait_list_append (ugni_module, endpoint, frag);
}
}
@ -142,7 +132,7 @@ int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *endpoint,
mca_btl_ugni_eager_ex_frag_hdr_t hdr,
mca_btl_ugni_base_frag_t *frag)
{
mca_btl_ugni_module_t *ugni_module = endpoint->btl;
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
size_t size;
int rc;
@ -151,10 +141,10 @@ int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *endpoint,
do {
if (NULL == frag) {
/* try to allocate a registered buffer */
rc = MCA_BTL_UGNI_FRAG_ALLOC_EAGER_RECV(endpoint, frag);
frag = mca_btl_ugni_frag_alloc_eager_recv (endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
/* no registered buffers available. try again later */
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA_INT(endpoint, frag);
frag = mca_btl_ugni_frag_alloc_rdma_int (endpoint);
/* not much can be done if a small fragment can not be allocated. abort! */
assert (NULL != frag);

306
opal/mca/btl/ugni/btl_ugni_init.c Обычный файл
Просмотреть файл

@ -0,0 +1,306 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "btl_ugni.h"
#include "btl_ugni_endpoint.h"
#include "opal/class/opal_list.h"
#include "opal/dss/dss.h"
#include "opal/mca/pmix/pmix.h"
#include "opal/util/bit_ops.h"
static inline int get_ptag(uint8_t *out_ptag)
{
/* TODO no need for tmp */
char *ptr;
uint8_t tmp_ptag;
if (NULL == (ptr = getenv("PMI_GNI_PTAG"))) {
/* TODO add err msg - better rc? */
return OPAL_ERR_NOT_FOUND;
}
errno = 0;
tmp_ptag = (uint8_t)strtoul (ptr, (char **)NULL, 10);
if (0 != errno) {
/* TODO add err msg - better rc? */
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
}
*out_ptag = tmp_ptag;
return OPAL_SUCCESS;
}
static inline int get_cookie (uint32_t *out_cookie)
{
/* TODO no need for tmp */
char *ptr;
uint32_t tmp_cookie;
if (NULL == (ptr = getenv("PMI_GNI_COOKIE"))) {
/* TODO add err msg - better rc? */
return OPAL_ERR_NOT_FOUND;
}
errno = 0;
tmp_cookie = (uint32_t) strtoul (ptr, NULL, 10);
if (0 != errno) {
/* TODO add err msg - better rc? */
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
}
*out_cookie = tmp_cookie;
return OPAL_SUCCESS;
}
static unsigned int mca_btl_ugni_get_nic_address(int device_id)
{
unsigned int address, cpu_id;
gni_return_t status;
int i, alps_dev_id = -1;
char *token,*p_ptr;
p_ptr = getenv("PMI_GNI_DEV_ID");
if (!p_ptr) {
status = GNI_CdmGetNicAddress(device_id, &address, &cpu_id);
if(status != GNI_RC_SUCCESS) {
opal_output (0, "FAILED:GNI_CdmGetNicAddress returned error %d", status);
return (unsigned int)-1;
}
return address;
}
while (NULL != (token = strtok(p_ptr, ":"))) {
alps_dev_id = atoi(token);
if (alps_dev_id == device_id) {
break;
}
p_ptr = NULL;
}
if (OPAL_UNLIKELY(-1 == alps_dev_id)) {
return (unsigned int)-1;
}
p_ptr = getenv("PMI_GNI_LOC_ADDR");
if (OPAL_UNLIKELY(NULL == p_ptr)) {
return (unsigned int)-1;
}
i = 0;
while (NULL != (token = strtok(p_ptr, ":"))) {
if (i == alps_dev_id) {
return strtoul (token, NULL, 10);
}
p_ptr = NULL;
++i;
}
return (unsigned int)-1;
}
int mca_btl_ugni_device_init (mca_btl_ugni_device_t *device, int virtual_device_id)
{
uint32_t dev_pe_addr;
int rc;
OBJ_CONSTRUCT(&device->endpoints, opal_free_list_t);
OBJ_CONSTRUCT(&device->pending_post, opal_list_t);
rc = opal_free_list_init (&device->endpoints, sizeof (mca_btl_ugni_endpoint_handle_t),
8, OBJ_CLASS(mca_btl_ugni_endpoint_handle_t), 0, 8, 0,
mca_btl_ugni_component.local_cq_size, 16,
NULL, 0, NULL, mca_btl_ugni_endpoint_handle_init_rdma,
(void *) device);
if (OPAL_SUCCESS != rc) {
OBJ_DESTRUCT(&device->endpoints);
return rc;
}
/* create a communication domain */
rc = GNI_CdmCreate (mca_btl_ugni_component.cdm_id_base | virtual_device_id, mca_btl_ugni_component.ptag,
mca_btl_ugni_component.cookie, mca_btl_ugni_component.cdm_flags, &device->dev_cd_handle);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
/* this REALLY is an error but under alps + mapn we may not get any credentials */
BTL_VERBOSE(("Error: Creating communication domain %d for virtual device %d", rc, virtual_device_id));
return mca_btl_rc_ugni_to_opal (rc);
}
device->dev_index = virtual_device_id;
/* Create a NIC Adress */
OPAL_OUTPUT((-1, "Got NIC Addr: 0x%08x, CPU ID: %d", mca_btl_ugni_component.dev_addr, 0));
/* Attach device to the communication domain */
rc = GNI_CdmAttach (device->dev_cd_handle, 0, &dev_pe_addr, &device->dev_handle);
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("Error: Attaching to communication domain. rc = %d, virtual device = %d", rc, virtual_device_id));
return mca_btl_rc_ugni_to_opal (rc);
}
device->lock = 0;
device->dev_rdma_local_cq.gni_handle = 0;
device->dev_rdma_local_cq.active_operations = 0;
device->dev_rdma_local_irq_cq.gni_handle = 0;
device->dev_rdma_local_irq_cq.active_operations = 0;
device->dev_smsg_local_cq.gni_handle = 0;
device->dev_smsg_local_cq.active_operations= 0;
return OPAL_SUCCESS;
}
int mca_btl_ugni_device_fini (mca_btl_ugni_device_t *dev)
{
int rc;
OBJ_DESTRUCT(&dev->endpoints);
OBJ_DESTRUCT(&dev->pending_post);
if (0 != dev->dev_rdma_local_cq.gni_handle) {
GNI_CqDestroy (dev->dev_rdma_local_cq.gni_handle);
dev->dev_rdma_local_cq.gni_handle = 0;
}
if (0 != dev->dev_rdma_local_irq_cq.gni_handle) {
GNI_CqDestroy (dev->dev_rdma_local_irq_cq.gni_handle);
dev->dev_rdma_local_irq_cq.gni_handle = 0;
}
if (0 != dev->dev_smsg_local_cq.gni_handle) {
GNI_CqDestroy (dev->dev_smsg_local_cq.gni_handle);
dev->dev_smsg_local_cq.gni_handle = 0;
}
rc = GNI_CdmDestroy (dev->dev_cd_handle);
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("error destroying cdm handle"));
}
return OPAL_SUCCESS;
}
/*
* Send local device information and other information
* required for setup
*/
static int mca_btl_ugni_send_modex (void)
{
struct mca_btl_ugni_modex_t modex;
uint32_t modex_size;
char *modex_msg;
int rc;
modex_size = sizeof (struct mca_btl_ugni_modex_t);
modex_msg = (char *) malloc (modex_size);
if (NULL == modex_msg) {
OPAL_OUTPUT((-1, "Error allocating memory for modex @ %s:%d",
__FILE__, __LINE__));
return OPAL_ERR_OUT_OF_RESOURCE;
}
modex.addr = mca_btl_ugni_component.dev_addr;
modex.id = mca_btl_ugni_component.cdm_id_base;
BTL_VERBOSE(("sending modex. addr: %d, id: %d", modex.addr, modex.id));
memcpy ((void *) modex_msg, (void *) &modex, modex_size);
/*
* need global for edge cases like MPI_Comm_spawn support with
* new ranks started on the same nodes as the spawnee ranks, etc.
*/
OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
&mca_btl_ugni_component.super.btl_version,
modex_msg, modex_size);
free (modex_msg);
return rc;
}
int mca_btl_ugni_fini (void)
{
return OPAL_SUCCESS;
}
int mca_btl_ugni_init (void)
{
int32_t pid_max = 32768;
int rc, bit;
FILE *fh;
if (0 == mca_btl_ugni_component.virtual_device_count) {
/* XXX -- TODO -- might want to improve this logic. One option would be to
* compare the number of local peers vs the number of cores or hyperthreads
* on the node. */
if (!opal_using_threads() || opal_process_info.num_local_peers >= 255) {
/* there is probably no benefit to using multiple device contexts when not
* using threads. */
mca_btl_ugni_component.virtual_device_count = 1;
} else if (opal_process_info.num_local_peers >= 127) {
mca_btl_ugni_component.virtual_device_count = 2;
} else if (opal_process_info.num_local_peers >= 63) {
mca_btl_ugni_component.virtual_device_count = 4;
} else if (opal_process_info.num_local_peers >= 31) {
mca_btl_ugni_component.virtual_device_count = 8;
} else {
mca_btl_ugni_component.virtual_device_count = 16;
}
} else if (MCA_BTL_UGNI_MAX_DEV_HANDLES < mca_btl_ugni_component.virtual_device_count) {
mca_btl_ugni_component.virtual_device_count = MCA_BTL_UGNI_MAX_DEV_HANDLES;
}
fh = fopen ("/proc/sys/kernel/pid_max", "r");
if (NULL != fh) {
fscanf (fh, "%d", &pid_max);
fclose (fh);
}
/* Use pid to generate the cdm_id. Although its not stated in the uGNI
* documentation, the cdm_id only needs to be unique within a node for a
* given ptag/cookie tuple */
bit = opal_hibit (pid_max, 31);
if (bit >= 31) {
mca_btl_ugni_component.virtual_device_count = 1;
mca_btl_ugni_component.cdm_id_base = getpid();
} else if (bit >= 30 && mca_btl_ugni_component.virtual_device_count > 2) {
mca_btl_ugni_component.virtual_device_count = 2;
mca_btl_ugni_component.cdm_id_base = getpid() << 1;
} else {
mca_btl_ugni_component.cdm_id_base = getpid() << 8;
}
/* Create a communication domain */
/* collect uGNI information */
rc = get_ptag(&mca_btl_ugni_component.ptag);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
rc = get_cookie(&mca_btl_ugni_component.cookie);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
/* get the device address of the NIC */
mca_btl_ugni_component.dev_addr = mca_btl_ugni_get_nic_address (0);
/* send ugni modex */
mca_btl_ugni_send_modex ();
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014-2016 Research Organization for Information Science
@ -62,22 +62,18 @@ mca_btl_ugni_module_t mca_btl_ugni_module = {
};
int
mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
opal_common_ugni_device_t *dev)
mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module)
{
int rc;
BTL_VERBOSE(("binding module %p to device %p", (void *) ugni_module,
(void *) dev));
BTL_VERBOSE(("binding module %p to device 0", (void *) ugni_module));
/* copy module defaults (and function pointers) */
memmove (ugni_module, &mca_btl_ugni_module, sizeof (mca_btl_ugni_module));
ugni_module->initialized = false;
ugni_module->nlocal_procs = 0;
ugni_module->active_send_count = 0;
ugni_module->connected_peer_count = 0;
ugni_module->active_rdma_count = 0;
OBJ_CONSTRUCT(&ugni_module->failed_frags, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->failed_frags_lock, opal_mutex_t);
@ -85,11 +81,10 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
OBJ_CONSTRUCT(&ugni_module->eager_get_pending, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->eager_get_pending_lock,opal_mutex_t);
OBJ_CONSTRUCT(&ugni_module->eager_frags_send, opal_free_list_t);
OBJ_CONSTRUCT(&ugni_module->eager_frags_recv, opal_free_list_t);
OBJ_CONSTRUCT(&ugni_module->smsg_frags, opal_free_list_t);
OBJ_CONSTRUCT(&ugni_module->rdma_frags, opal_free_list_t);
OBJ_CONSTRUCT(&ugni_module->rdma_int_frags, opal_free_list_t);
for (int i = 0 ; i < MCA_BTL_UGNI_LIST_MAX ; ++i) {
OBJ_CONSTRUCT(ugni_module->frags_lists + i, opal_free_list_t);
}
OBJ_CONSTRUCT(&ugni_module->pending_smsg_frags_bb, opal_pointer_array_t);
OBJ_CONSTRUCT(&ugni_module->ep_wait_list_lock,opal_mutex_t);
OBJ_CONSTRUCT(&ugni_module->ep_wait_list, opal_list_t);
@ -97,22 +92,26 @@ mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module,
OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t);
OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t);
OBJ_CONSTRUCT(&ugni_module->smsg_mboxes, opal_free_list_t);
OBJ_CONSTRUCT(&ugni_module->pending_descriptors, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->eager_get_pending, opal_list_t);
OBJ_CONSTRUCT(&ugni_module->post_descriptors, opal_free_list_t);
ugni_module->device = dev;
dev->btl_ctx = (void *) ugni_module;
/* set up virtual device handles */
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
rc = mca_btl_ugni_device_init (ugni_module->devices + i, i);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_VERBOSE(("error initializing uGNI device handle"));
return rc;
}
}
/* create wildcard endpoint to listen for connections.
* there is no need to bind this endpoint. */
OPAL_THREAD_LOCK(&dev->dev_lock);
rc = GNI_EpCreate (ugni_module->device->dev_handle, NULL,
/* create wildcard endpoint on first device to listen for connections.
* there is no need to bind this endpoint. We are single threaded
* here so there is no need for a device lock. */
rc = GNI_EpCreate (ugni_module->devices[0].dev_handle, NULL,
&ugni_module->wildcard_ep);
OPAL_THREAD_UNLOCK(&dev->dev_lock);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
BTL_ERROR(("error creating wildcard ugni endpoint"));
return opal_common_rc_ugni_to_opal (rc);
return mca_btl_rc_ugni_to_opal (rc);
}
/* post wildcard datagram */
@ -133,16 +132,8 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
uint64_t key;
int rc;
while (ugni_module->active_send_count) {
/* ensure all sends are complete before closing the module */
rc = mca_btl_ugni_progress_local_smsg (ugni_module);
if (OPAL_SUCCESS != rc) {
break;
}
}
/* close all open connections and release endpoints */
if (ugni_module->initialized) {
/* close all open connections and release endpoints */
OPAL_HASH_TABLE_FOREACH(key, uint64, ep, &ugni_module->id_to_endpoint) {
if (NULL != ep) {
mca_btl_ugni_release_ep (ep);
@ -154,28 +145,12 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
}
/* destroy all cqs */
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
rc = GNI_CqDestroy (ugni_module->rdma_local_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down local BTE/FMA CQ - %s",gni_err_str[rc]));
}
rc = GNI_CqDestroy (ugni_module->smsg_local_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down TX SMSG CQ - %s",gni_err_str[rc]));
}
rc = GNI_CqDestroy (ugni_module->smsg_remote_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down RX SMSG CQ - %s",gni_err_str[rc]));
}
if (mca_btl_ugni_component.progress_thread_enabled) {
rc = GNI_CqDestroy (ugni_module->rdma_local_irq_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down local BTE/FMA CQ - %s",gni_err_str[rc]));
}
rc = GNI_CqDestroy (ugni_module->smsg_remote_irq_cq);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error tearing down remote SMSG CQ - %s",gni_err_str[rc]));
@ -195,14 +170,12 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
if (GNI_RC_SUCCESS != rc) {
BTL_VERBOSE(("btl/ugni error destroying endpoint - %s",gni_err_str[rc]));
}
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
}
OBJ_DESTRUCT(&ugni_module->eager_frags_send);
OBJ_DESTRUCT(&ugni_module->eager_frags_recv);
OBJ_DESTRUCT(&ugni_module->smsg_frags);
OBJ_DESTRUCT(&ugni_module->rdma_frags);
OBJ_DESTRUCT(&ugni_module->rdma_int_frags);
for (int i = 0 ; i < MCA_BTL_UGNI_LIST_MAX ; ++i) {
OBJ_DESTRUCT(ugni_module->frags_lists + i);
}
OBJ_DESTRUCT(&ugni_module->ep_wait_list);
OBJ_DESTRUCT(&ugni_module->smsg_mboxes);
OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb);
@ -217,6 +190,10 @@ mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
mca_rcache_base_module_destroy (ugni_module->rcache);
}
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
mca_btl_ugni_device_fini (ugni_module->devices + i);
}
ugni_module->initialized = false;
return OPAL_SUCCESS;
@ -230,10 +207,17 @@ mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
{
mca_btl_ugni_base_frag_t *frag = NULL;
if (size <= mca_btl_ugni_component.smsg_max_data) {
(void) MCA_BTL_UGNI_FRAG_ALLOC_SMSG(endpoint, frag);
/* do not allocate a fragment unless the wait list is relatively small. this
* reduces the potential for resource exhaustion. note the wait list only exists
* because we have no way to notify the sender that credits are available. */
if (OPAL_UNLIKELY(opal_list_get_size (&endpoint->frag_wait_list) > 32)) {
return NULL;
}
if (size <= mca_btl_ugni_component.smsg_max_data) {
frag = mca_btl_ugni_frag_alloc_smsg (endpoint);
} else if (size <= btl->btl_eager_limit) {
(void) MCA_BTL_UGNI_FRAG_ALLOC_EAGER_SEND(endpoint, frag);
frag = mca_btl_ugni_frag_alloc_eager_send (endpoint);
}
if (OPAL_UNLIKELY(NULL == frag)) {
@ -284,6 +268,13 @@ mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
uint8_t order, size_t reserve, size_t *size,
uint32_t flags)
{
/* do not allocate a fragment unless the wait list is relatively small. this
* reduces the potential for resource exhaustion. note the wait list only exists
* because we have no way to notify the sender that credits are available. */
if (OPAL_UNLIKELY(opal_list_get_size (&endpoint->frag_wait_list) > 32)) {
return NULL;
}
return mca_btl_ugni_prepare_src_send (btl, endpoint, convertor,
order, reserve, size, flags);
}

Просмотреть файл

@ -26,7 +26,7 @@ mca_btl_ugni_prepare_src_send_nodata (struct mca_btl_base_module_t *btl,
{
mca_btl_ugni_base_frag_t *frag = NULL;
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA(endpoint, frag);
frag = mca_btl_ugni_frag_alloc_rdma (endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
@ -65,8 +65,7 @@ mca_btl_ugni_prepare_src_send_inplace (struct mca_btl_base_module_t *btl,
opal_convertor_get_current_pointer (convertor, &data_ptr);
(void) MCA_BTL_UGNI_FRAG_ALLOC_RDMA(endpoint, frag);
frag = mca_btl_ugni_frag_alloc_rdma (endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
@ -123,7 +122,7 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
int rc;
if (OPAL_UNLIKELY(true == use_eager_get)) {
(void) MCA_BTL_UGNI_FRAG_ALLOC_EAGER_SEND(endpoint, frag);
frag = mca_btl_ugni_frag_alloc_eager_send (endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
@ -136,7 +135,7 @@ mca_btl_ugni_prepare_src_send_buffered (struct mca_btl_base_module_t *btl,
frag->hdr_size = reserve + sizeof (frag->hdr.eager);
frag->segments[0].seg_addr.pval = frag->hdr.eager_ex.pml_header;
} else {
(void) MCA_BTL_UGNI_FRAG_ALLOC_SMSG(endpoint, frag);
frag = mca_btl_ugni_frag_alloc_smsg (endpoint);
if (OPAL_UNLIKELY(NULL == frag)) {
return NULL;
}
@ -186,8 +185,8 @@ mca_btl_ugni_prepare_src_send (struct mca_btl_base_module_t *btl,
opal_convertor_get_current_pointer (convertor, &data_ptr);
send_in_place = !(opal_convertor_need_buffers(convertor) ||
(use_eager_get && ((uintptr_t)data_ptr & 3)));
send_in_place = (btl->btl_flags & MCA_BTL_FLAGS_SEND_INPLACE) && !(opal_convertor_need_buffers(convertor) ||
(use_eager_get && ((uintptr_t)data_ptr & 3)));
if (send_in_place) {
return mca_btl_ugni_prepare_src_send_inplace (btl, endpoint, convertor, order,

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -29,17 +29,19 @@ static void *mca_btl_ugni_prog_thread_fn(void * data)
{
uint32_t which;
gni_return_t status;
gni_cq_handle_t cq_vec[2];
gni_cq_handle_t cq_vec[1 + MCA_BTL_UGNI_MAX_DEV_HANDLES];
struct mca_btl_ugni_module_t *btl = (mca_btl_ugni_module_t *)data;
int cq_count = 1 + mca_btl_ugni_component.virtual_device_count;
/*
* need to block signals
*/
cq_vec[0] = btl->smsg_remote_irq_cq;
cq_vec[1] = btl->rdma_local_irq_cq;
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
cq_vec[i + 1] = btl->devices[i].dev_rdma_local_irq_cq.gni_handle;
}
while (stop_progress_thread == 0) {
@ -48,7 +50,7 @@ static void *mca_btl_ugni_prog_thread_fn(void * data)
*/
status = GNI_CqVectorMonitor(cq_vec,
2,
cq_count,
-1,
&which);
@ -106,8 +108,8 @@ int mca_btl_ugni_kill_progress_thread(void)
*/
ret = mca_btl_ugni_post_cqwrite (mca_btl_ugni_component.modules[0].local_ep,
mca_btl_ugni_component.modules[0].rdma_local_cq,
mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl,
&mca_btl_ugni_component.modules[0].devices[0].dev_rdma_local_cq,
mca_btl_ugni_component.modules[0].devices[0].smsg_irq_mhndl,
0xdead, NULL, NULL, NULL);
/*
* TODO: if error returned, need to kill off thread manually

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -19,11 +19,8 @@ int mca_btl_ugni_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t
mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata)
{
BTL_VERBOSE(("Using RDMA/FMA Put from local address %p to remote address %" PRIx64,
local_address, remote_address));
/* cause endpoint to bind if it isn't already (bind is sufficient for rdma) */
(void) mca_btl_ugni_check_endpoint_state_rdma (endpoint);
BTL_VERBOSE(("Using RDMA/FMA Put %lu bytes from local address %p to remote address %" PRIx64,
(unsigned long) size, local_address, remote_address));
return mca_btl_ugni_post (endpoint, false, size, local_address, remote_address, local_handle,
remote_handle, order, cbfunc, cbcontext, cbdata);

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -15,12 +15,13 @@
#include "btl_ugni.h"
#include "btl_ugni_frag.h"
#include "btl_ugni_device.h"
int mca_btl_ugni_start_eager_get (mca_btl_base_endpoint_t *ep,
mca_btl_ugni_eager_ex_frag_hdr_t hdr,
mca_btl_ugni_base_frag_t *frag);
static inline void init_gni_post_desc (opal_common_ugni_post_desc_t *post_desc,
static inline void init_gni_post_desc (mca_btl_ugni_post_descriptor_t *post_desc,
int order, gni_post_type_t op_type,
uint64_t lcl_addr,
gni_mem_handle_t lcl_mdh,
@ -28,20 +29,20 @@ static inline void init_gni_post_desc (opal_common_ugni_post_desc_t *post_desc,
gni_mem_handle_t rem_mdh,
uint64_t bufsize,
gni_cq_handle_t cq_hndl) {
post_desc->base.type = op_type;
post_desc->base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
post_desc->desc.type = op_type;
post_desc->desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
if (MCA_BTL_NO_ORDER == order) {
post_desc->base.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
post_desc->desc.dlvr_mode = GNI_DLVMODE_PERFORMANCE;
} else {
post_desc->base.dlvr_mode = GNI_DLVMODE_NO_ADAPT;
post_desc->desc.dlvr_mode = GNI_DLVMODE_NO_ADAPT;
}
post_desc->base.local_addr = (uint64_t) lcl_addr;
post_desc->base.local_mem_hndl = lcl_mdh;
post_desc->base.remote_addr = (uint64_t) rem_addr;
post_desc->base.remote_mem_hndl = rem_mdh;
post_desc->base.length = bufsize;
post_desc->base.rdma_mode = 0;
post_desc->base.src_cq_hndl = cq_hndl;
post_desc->desc.local_addr = (uint64_t) lcl_addr;
post_desc->desc.local_mem_hndl = lcl_mdh;
post_desc->desc.remote_addr = (uint64_t) rem_addr;
post_desc->desc.remote_mem_hndl = rem_mdh;
post_desc->desc.length = bufsize;
post_desc->desc.rdma_mode = 0;
post_desc->desc.src_cq_hndl = cq_hndl;
post_desc->tries = 0;
}
@ -54,38 +55,28 @@ static inline int mca_btl_ugni_post_fma (struct mca_btl_base_endpoint_t *endpoin
{
mca_btl_ugni_post_descriptor_t *post_desc;
gni_mem_handle_t local_gni_handle = {0, 0};
gni_return_t grc;
int rc;
if (local_handle) {
local_gni_handle = local_handle->gni_handle;
}
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* Post descriptor (CQ is ignored for FMA transactions) -- The CQ associated with the endpoint
* is used. */
init_gni_post_desc (&post_desc->desc, order, op_type, (intptr_t) local_address, local_gni_handle,
init_gni_post_desc (post_desc, order, op_type, (intptr_t) local_address, local_gni_handle,
remote_address, remote_handle->gni_handle, size, 0);
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
grc = GNI_PostFma (endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
if (GNI_RC_ALIGNMENT_ERROR == grc) {
BTL_VERBOSE(("GNI_PostFma failed with an alignment error"));
return OPAL_ERR_NOT_AVAILABLE;
}
BTL_VERBOSE(("GNI_PostFma failed with gni rc: %d", grc));
return OPAL_ERR_OUT_OF_RESOURCE;
rc = mca_btl_ugni_endpoint_post_fma (endpoint, post_desc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_ugni_return_post_descriptor (post_desc);
}
return OPAL_SUCCESS;
return rc;
}
static inline int mca_btl_ugni_post_bte (mca_btl_base_endpoint_t *endpoint, gni_post_type_t op_type,
@ -96,70 +87,53 @@ static inline int mca_btl_ugni_post_bte (mca_btl_base_endpoint_t *endpoint, gni_
void *cbcontext, void *cbdata)
{
mca_btl_ugni_post_descriptor_t *post_desc;
gni_cq_handle_t cq_handle = endpoint->btl->rdma_local_cq;
gni_return_t status;
int rc;
mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata, &post_desc);
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, local_handle, cbfunc, cbcontext, cbdata);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
if (mca_btl_ugni_component.progress_thread_enabled) {
cq_handle = endpoint->btl->rdma_local_irq_cq;
}
/* Post descriptor */
init_gni_post_desc (&post_desc->desc, order, op_type, (intptr_t) local_address, local_handle->gni_handle,
remote_address, remote_handle->gni_handle, size, cq_handle);
init_gni_post_desc (post_desc, order, op_type, (intptr_t) local_address, local_handle->gni_handle,
remote_address, remote_handle->gni_handle, size, 0);
OPAL_THREAD_LOCK(&endpoint->btl->device->dev_lock);
status = GNI_PostRdma (endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->btl->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != status)) {
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
if (GNI_RC_ALIGNMENT_ERROR == status) {
BTL_VERBOSE(("GNI_PostRdma failed with an alignment error"));
return OPAL_ERR_NOT_AVAILABLE;
}
BTL_VERBOSE(("GNI_PostRdma failed with gni rc: %d", status));
return OPAL_ERR_OUT_OF_RESOURCE;
rc = mca_btl_ugni_endpoint_post_rdma (endpoint, post_desc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
mca_btl_ugni_return_post_descriptor (post_desc);
}
return OPAL_SUCCESS;
return rc;
}
static inline int mca_btl_ugni_post_cqwrite (mca_btl_base_endpoint_t *endpoint, gni_cq_handle_t cq_handle,
static inline int mca_btl_ugni_post_cqwrite (mca_btl_base_endpoint_t *endpoint, mca_btl_ugni_cq_t *cq,
gni_mem_handle_t irq_mhndl, uint64_t value,
mca_btl_base_rdma_completion_fn_t cbfunc,
void *cbcontext, void *cbdata)
{
mca_btl_ugni_post_descriptor_t *post_desc;
gni_return_t grc;
int rc;
mca_btl_ugni_alloc_post_descriptor (endpoint, NULL, cbfunc, cbcontext, cbdata, &post_desc);
post_desc = mca_btl_ugni_alloc_post_descriptor (endpoint, NULL, cbfunc, cbcontext, cbdata);
if (OPAL_UNLIKELY(NULL == post_desc)) {
return OPAL_ERR_OUT_OF_RESOURCE;
}
post_desc->desc.base.type = GNI_POST_CQWRITE;
post_desc->desc.base.cqwrite_value = value; /* up to 48 bytes here, not used for now */
post_desc->desc.base.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
post_desc->desc.base.dlvr_mode = GNI_DLVMODE_IN_ORDER;
post_desc->desc.base.src_cq_hndl = cq_handle;
post_desc->desc.base.remote_mem_hndl = irq_mhndl;
post_desc->desc.tries = 0;
post_desc->desc.type = GNI_POST_CQWRITE;
post_desc->desc.cqwrite_value = value; /* up to 48 bytes here, not used for now */
post_desc->desc.cq_mode = GNI_CQMODE_GLOBAL_EVENT;
post_desc->desc.dlvr_mode = GNI_DLVMODE_IN_ORDER;
post_desc->desc.src_cq_hndl = cq->gni_handle;
post_desc->desc.remote_mem_hndl = irq_mhndl;
post_desc->tries = 0;
post_desc->cq = cq;
OPAL_THREAD_LOCK(&endpoint->common->dev->dev_lock);
grc = GNI_PostCqWrite(endpoint->rdma_ep_handle, &post_desc->desc.base);
OPAL_THREAD_UNLOCK(&endpoint->common->dev->dev_lock);
if (GNI_RC_SUCCESS != grc) { /* errors for PostCqWrite treated as non-fatal */
BTL_VERBOSE(("GNI_PostCqWrite returned error - %s", gni_err_str[grc]));
mca_btl_ugni_return_post_descriptor (endpoint->btl, post_desc);
rc = mca_btl_ugni_endpoint_post_cqwrite (endpoint, post_desc);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { /* errors for PostCqWrite treated as non-fatal */
mca_btl_ugni_return_post_descriptor (post_desc);
}
return opal_common_rc_ugni_to_opal (grc);
return rc;
}
static inline int mca_btl_ugni_post (mca_btl_base_endpoint_t *endpoint, int get, size_t size,
@ -183,27 +157,11 @@ static inline int mca_btl_ugni_post (mca_btl_base_endpoint_t *endpoint, int get,
static inline int mca_btl_ugni_repost (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_post_descriptor_t *post_desc)
{
gni_return_t grc;
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
if (GNI_POST_RDMA_PUT == post_desc->desc.base.type ||
GNI_POST_RDMA_GET == post_desc->desc.base.type) {
grc = GNI_PostRdma (post_desc->endpoint->rdma_ep_handle, &post_desc->desc.base);
} else {
grc = GNI_PostFma (post_desc->endpoint->rdma_ep_handle, &post_desc->desc.base);
}
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
/* NTH: Should we even retry these? When this code was written there was no indication
* whether an error in post is recoverable. Clobber this code and the associated data
* structures if post errors are not recoverable. */
OPAL_THREAD_LOCK(&ugni_module->pending_descriptors_lock);
opal_list_append (&ugni_module->pending_descriptors, (opal_list_item_t *) post_desc);
OPAL_THREAD_UNLOCK(&ugni_module->pending_descriptors_lock);
if (GNI_POST_RDMA_PUT == post_desc->desc.type || GNI_POST_RDMA_GET == post_desc->desc.type) {
return mca_btl_ugni_endpoint_post_rdma (post_desc->endpoint, post_desc);
}
return opal_common_rc_ugni_to_opal (grc);
return mca_btl_ugni_endpoint_post_fma (post_desc->endpoint, post_desc);
}
#endif /* MCA_BTL_UGNI_RDMA_H */

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
@ -17,6 +17,30 @@
#include "btl_ugni_smsg.h"
#include "btl_ugni_prepare.h"
void mca_btl_ugni_wait_list_append (mca_btl_ugni_module_t *ugni_module, mca_btl_base_endpoint_t *endpoint,
mca_btl_ugni_base_frag_t *frag)
{
BTL_VERBOSE(("wait-listing fragment %p to %s. endpoint state %d\n", frag, OPAL_NAME_PRINT(endpoint->peer_proc->proc_name), endpoint->state));
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
/* queue up request */
OPAL_THREAD_LOCK(&endpoint->lock);
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&endpoint->lock);
if (false == endpoint->wait_listed && MCA_BTL_UGNI_EP_STATE_CONNECTED == endpoint->state) {
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
if (false == endpoint->wait_listed) {
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
endpoint->wait_listed = true;
}
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
}
}
int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
struct mca_btl_base_endpoint_t *endpoint,
struct mca_btl_base_descriptor_t *descriptor,
@ -30,18 +54,15 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
/* tag and len are at the same location in eager and smsg frag hdrs */
frag->hdr.send.lag = (tag << 24) | size;
BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor,
OPAL_PROC_MY_NAME.vpid, endpoint->peer_proc->proc_name.vpid, size));
rc = mca_btl_ugni_check_endpoint_state (endpoint);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
OPAL_THREAD_LOCK(&endpoint->lock);
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&endpoint->lock);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || opal_list_get_size (&endpoint->frag_wait_list))) {
mca_btl_ugni_wait_list_append (ugni_module, endpoint, frag);
return OPAL_SUCCESS;
}
BTL_VERBOSE(("btl/ugni sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor,
OPAL_PROC_MY_NAME.vpid, endpoint->common->ep_rem_id, size));
/* add a reference to prevent the fragment from being returned until after the
* completion flag is checked. */
++frag->ref_cnt;
@ -61,7 +82,7 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
frag->flags &= ~MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
if (call_callback) {
frag->base.des_cbfunc(&frag->endpoint->btl->super, frag->endpoint, &frag->base, rc);
frag->base.des_cbfunc(&ugni_module->super, frag->endpoint, &frag->base, rc);
}
(void) mca_btl_ugni_frag_del_ref (frag, OPAL_SUCCESS);
@ -77,18 +98,7 @@ int mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
if (OPAL_UNLIKELY(OPAL_ERR_OUT_OF_RESOURCE == rc)) {
/* queue up request */
if (false == endpoint->wait_listed) {
OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
if (false == endpoint->wait_listed) {
opal_list_append (&ugni_module->ep_wait_list, &endpoint->super);
endpoint->wait_listed = true;
}
OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
}
OPAL_THREAD_LOCK(&endpoint->lock);
opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag);
OPAL_THREAD_UNLOCK(&endpoint->lock);
mca_btl_ugni_wait_list_append (ugni_module, endpoint, frag);
rc = OPAL_SUCCESS;
}
@ -109,9 +119,9 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
int rc;
do {
if (OPAL_UNLIKELY(OPAL_SUCCESS != mca_btl_ugni_check_endpoint_state (endpoint))) {
break;
}
BTL_VERBOSE(("btl/ugni isend sending fragment from %d -> %d. length = %" PRIu64
" endoint state %d", OPAL_PROC_MY_NAME.vpid, endpoint->peer_proc->proc_name.vpid,
payload_size + header_size, endpoint->state));
flags |= MCA_BTL_DES_FLAGS_BTL_OWNERSHIP;
@ -124,7 +134,8 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
}
assert (packed_size == payload_size);
if (OPAL_UNLIKELY(NULL == frag)) {
if (OPAL_UNLIKELY(NULL == frag || OPAL_SUCCESS != mca_btl_ugni_check_endpoint_state (endpoint) ||
opal_list_get_size (&endpoint->frag_wait_list))) {
break;
}
@ -141,8 +152,9 @@ int mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
} while (0);
if (NULL != descriptor) {
*descriptor = NULL;
*descriptor = &frag->base;
}
return OPAL_ERR_OUT_OF_RESOURCE;
}

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -28,7 +28,7 @@ static void mca_btl_ugni_smsg_mbox_construct (mca_btl_ugni_smsg_mbox_t *mbox) {
mbox->attr.smsg_attr.buff_size = mca_btl_ugni_component.smsg_mbox_size;
mbox->attr.smsg_attr.mem_hndl = ugni_reg->handle.gni_handle;
mbox->attr.proc_name = OPAL_PROC_MY_NAME;
mbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].device->smsg_irq_mhndl;
mbox->attr.rmt_irq_mem_hndl = mca_btl_ugni_component.modules[0].devices[0].smsg_irq_mhndl;
}
OBJ_CLASS_INSTANCE(mca_btl_ugni_smsg_mbox_t, opal_free_list_item_t,
@ -39,11 +39,13 @@ int mca_btl_ugni_smsg_init (mca_btl_ugni_module_t *ugni_module)
{
gni_return_t rc;
rc = GNI_SmsgSetMaxRetrans (ugni_module->device->dev_handle,
mca_btl_ugni_component.smsg_max_retries);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error setting maximum SMSG retries %s",gni_err_str[rc]));
return opal_common_rc_ugni_to_opal (rc);
for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
rc = GNI_SmsgSetMaxRetrans (ugni_module->devices[i].dev_handle,
mca_btl_ugni_component.smsg_max_retries);
if (GNI_RC_SUCCESS != rc) {
BTL_ERROR(("error setting maximum SMSG retries %s",gni_err_str[rc]));
return mca_btl_rc_ugni_to_opal (rc);
}
}
return OPAL_SUCCESS;
@ -52,6 +54,7 @@ int mca_btl_ugni_smsg_init (mca_btl_ugni_module_t *ugni_module)
/* progress */
int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
{
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
mca_btl_active_message_callback_t *reg;
mca_btl_ugni_base_frag_t frag;
mca_btl_base_segment_t seg;
@ -70,27 +73,20 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
do {
uint8_t tag = GNI_SMSG_ANY_TAG;
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
rc = GNI_SmsgGetNextWTag (ep->smsg_ep_handle, (void **) &data_ptr, &tag);
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
if (GNI_RC_NOT_DONE == rc) {
BTL_VERBOSE(("no smsg message waiting. rc = %s", gni_err_str[rc]));
rc = mca_btl_ugni_smsg_get_next_wtag (ep->smsg_ep_handle, &data_ptr, &tag);
if (GNI_RC_SUCCESS != rc) {
if (OPAL_LIKELY(GNI_RC_NOT_DONE == rc)) {
BTL_VERBOSE(("no smsg message waiting. rc = %s", gni_err_str[rc]));
ep->smsg_progressing = 0;
ep->smsg_progressing = 0;
return count;
}
return count;
}
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
BTL_ERROR(("GNI_SmsgGetNextWTag returned error %s", gni_err_str[rc]));
BTL_ERROR(("unhandled GNI_SmsgGetNextWTag error"));
return OPAL_ERROR;
}
if (OPAL_UNLIKELY(0 == data_ptr)) {
BTL_ERROR(("null data ptr!"));
assert (0);
return OPAL_ERROR;
}
assert (0 != data_ptr);
count++;
@ -114,7 +110,7 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
assert (NULL != reg->cbfunc);
reg->cbfunc(&ep->btl->super, tag, &(frag.base), reg->cbdata);
reg->cbfunc(&ugni_module->super, tag, &(frag.base), reg->cbdata);
break;
case MCA_BTL_UGNI_TAG_GET_INIT:
@ -141,16 +137,14 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
break;
}
OPAL_THREAD_LOCK(&ep->common->dev->dev_lock);
rc = GNI_SmsgRelease (ep->smsg_ep_handle);
OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock);
rc = mca_btl_ugni_smsg_release (ep->smsg_ep_handle);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
BTL_ERROR(("Smsg release failed! rc = %d", rc));
return OPAL_ERROR;
}
} while (!disconnect);
ep->smsg_progressing = false;
ep->smsg_progressing = 0;
/* disconnect if we get here */
opal_mutex_lock (&ep->lock);
@ -165,7 +159,6 @@ int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep)
static inline int
mca_btl_ugni_handle_remote_smsg_overrun (mca_btl_ugni_module_t *btl)
{
gni_cq_entry_t event_data;
size_t endpoint_count;
unsigned int ep_index;
int count, rc;
@ -177,11 +170,7 @@ mca_btl_ugni_handle_remote_smsg_overrun (mca_btl_ugni_module_t *btl)
smsg remote cq and check all mailboxes */
/* clear out remote cq */
do {
OPAL_THREAD_LOCK(&btl->device->dev_lock);
rc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
OPAL_THREAD_UNLOCK(&btl->device->dev_lock);
} while (GNI_RC_NOT_DONE != rc);
mca_btl_ugni_cq_clear (btl->devices, btl->smsg_remote_cq);
endpoint_count = opal_pointer_array_get_size (&btl->endpoints);
@ -212,9 +201,7 @@ int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl)
gni_return_t grc;
uint64_t inst_id;
OPAL_THREAD_LOCK(&btl->device->dev_lock);
grc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data);
OPAL_THREAD_UNLOCK(&btl->device->dev_lock);
grc = mca_btl_ugni_gni_cq_get_event (btl->devices, btl->smsg_remote_cq, &event_data);
if (GNI_RC_NOT_DONE == grc) {
return 0;
}
@ -231,12 +218,12 @@ int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl)
/* unhandled error: crash */
assert (0);
return opal_common_rc_ugni_to_opal (grc);
return mca_btl_rc_ugni_to_opal (grc);
}
BTL_VERBOSE(("REMOTE CQ: Got event 0x%" PRIx64 ". msg id = %" PRIu64
". ok = %d, type = %" PRIu64 "\n", (uint64_t) event_data,
GNI_CQ_GET_MSG_ID(event_data), GNI_CQ_STATUS_OK(event_data),
". ok = %d, type = %" PRIu64, (uint64_t) event_data,
GNI_CQ_GET_INST_ID(event_data), GNI_CQ_STATUS_OK(event_data),
GNI_CQ_GET_TYPE(event_data)));
inst_id = GNI_CQ_GET_INST_ID(event_data);

Просмотреть файл

@ -1,6 +1,6 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
* Copyright (c) 2011-2017 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
@ -36,20 +36,13 @@ int mca_btl_ugni_smsg_init (mca_btl_ugni_module_t *ugni_module);
int mca_btl_ugni_smsg_process (mca_btl_base_endpoint_t *ep);
int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl);
static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_module)
static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device)
{
mca_btl_ugni_base_frag_t *frag;
gni_cq_entry_t event_data;
gni_return_t grc;
/* nothing to do */
if (0 == ugni_module->active_send_count) {
return OPAL_SUCCESS;
}
OPAL_THREAD_LOCK(&ugni_module->device->dev_lock);
grc = GNI_CqGetEvent (ugni_module->smsg_local_cq, &event_data);
OPAL_THREAD_UNLOCK(&ugni_module->device->dev_lock);
grc = mca_btl_ugni_cq_get_event (device, &device->dev_smsg_local_cq, &event_data);
if (GNI_RC_NOT_DONE == grc) {
return OPAL_SUCCESS;
}
@ -59,7 +52,7 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
will the event eventually come back? Ask Cray */
BTL_ERROR(("post error! cq overrun = %d", (int)GNI_CQ_OVERRUN(event_data)));
assert (0);
return opal_common_rc_ugni_to_opal (grc);
return mca_btl_rc_ugni_to_opal (grc);
}
assert (GNI_CQ_GET_TYPE(event_data) == GNI_CQ_EVENT_TYPE_SMSG);
@ -71,8 +64,6 @@ static inline int mca_btl_ugni_progress_local_smsg (mca_btl_ugni_module_t *ugni_
return OPAL_ERROR;
}
opal_atomic_add_32(&ugni_module->active_send_count,-1);
frag->flags |= MCA_BTL_UGNI_FRAG_SMSG_COMPLETE;
if (!(frag->flags & MCA_BTL_UGNI_FRAG_IGNORE)) {
@ -87,26 +78,22 @@ static inline int opal_mca_btl_ugni_smsg_send (mca_btl_ugni_base_frag_t *frag,
void *payload, size_t payload_len,
mca_btl_ugni_smsg_tag_t tag)
{
mca_btl_base_endpoint_t *endpoint = frag->endpoint;
mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (endpoint);
gni_return_t grc;
OPAL_THREAD_LOCK(&frag->endpoint->common->dev->dev_lock);
grc = GNI_SmsgSendWTag (frag->endpoint->smsg_ep_handle, hdr, hdr_len,
payload, payload_len, frag->msg_id, tag);
OPAL_THREAD_UNLOCK(&frag->endpoint->common->dev->dev_lock);
grc = mca_btl_ugni_endpoint_smsg_send_wtag (endpoint, hdr, hdr_len, payload, payload_len,
frag->msg_id, tag);
if (OPAL_LIKELY(GNI_RC_SUCCESS == grc)) {
/* increment the active send counter */
opal_atomic_add_32(&frag->endpoint->btl->active_send_count,1);
if (mca_btl_ugni_component.progress_thread_enabled) {
if (frag->base.des_flags & MCA_BTL_DES_FLAGS_SIGNAL) {
/* errors for PostCqWrite treated as non-fatal */
(void) mca_btl_ugni_post_cqwrite (frag->endpoint, frag->endpoint->btl->rdma_local_cq,
frag->endpoint->rmt_irq_mem_hndl, 0xdead, NULL, NULL, NULL);
(void) mca_btl_ugni_post_cqwrite (endpoint, &ugni_module->devices[0].dev_rdma_local_cq,
endpoint->rmt_irq_mem_hndl, 0xdead, NULL, NULL, NULL);
}
}
(void) mca_btl_ugni_progress_local_smsg ((mca_btl_ugni_module_t *) frag->endpoint->btl);
(void) mca_btl_ugni_progress_local_smsg (ugni_module, endpoint->smsg_ep_handle->device);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,67 +0,0 @@
# -*- indent-tabs-mode:nil -*-
#
# Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
# reserved.
# Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
#
# Additional copyrights may follow
#
# $HEADER$
#
# Make the output library in this directory, and name it either
# mca_<type>_<name>.la (for DSO builds) or libmca_<type>_<name>.la
# (for static builds).
AM_CPPFLAGS = $(common_ugni_CPPFLAGS)
component_noinst = lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst.la
component_install = lib@OPAL_LIB_PREFIX@mca_common_ugni.la
if MCA_BUILD_opal_common_ugni_DSO
lib_LTLIBRARIES = $(component_install)
else
noinst_LTLIBRARIES = $(component_noinst)
endif
headers = common_ugni.h \
common_ugni_ep.h
ugni_SOURCES = common_ugni.c \
common_ugni_ep.c
#mcacomponentdir = $(opallibdir)
lib@OPAL_LIB_PREFIX@mca_common_ugni_la_SOURCES = $(headers) $(ugni_SOURCES)
nodist_lib@OPAL_LIB_PREFIX@mca_common_ugni_la_SOURCES = $(ugni_nodist_SOURCES)
lib@OPAL_LIB_PREFIX@mca_common_ugni_la_LIBADD = $(common_ugni_LIBS)
lib@OPAL_LIB_PREFIX@mca_common_ugni_la_LDFLAGS = \
-version-info $(libmca_opal_common_ugni_so_version) \
$(common_ugni_LDFLAGS)
lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst_la_SOURCES = \
$(headers) $(ugni_SOURCES)
nodist_lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst_la_SOURCES = \
$(ugni_nodist_SOURCES)
lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst_la_LIBADD = $(common_ugni_LIBS)
lib@OPAL_LIB_PREFIX@mca_common_ugni_noinst_la_LDFLAGS = \
-module -avoid-version $(common_ugni_LDFLAGS)
# These two rules will sym link the "noinst" libtool library filename
# to the installable libtool library filename in the case where we are
# compiling this component statically (case 2), described above).
V=0
OMPI_V_LN_SCOMP = $(ompi__v_LN_SCOMP_$V)
ompi__v_LN_SCOMP_ = $(ompi__v_LN_SCOMP_$AM_DEFAULT_VERBOSITY)
ompi__v_LN_SCOMP_0 = @echo " LN_S " `basename $(component_install)`;
all-local:
$(OMPI_V_LN_SCOMP) if test -z "$(lib_LTLIBRARIES)"; then \
rm -f "$(component_install)"; \
$(LN_S) "$(component_noinst)" "$(component_install)"; \
fi
clean-local:
if test -z "$(mcacomponent_LTLIBRARIES)"; then \
rm -f "$(component_install)"; \
fi

Просмотреть файл

@ -1,301 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* Copyright (c) 2014 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "common_ugni.h"
#include "opal/class/opal_list.h"
#include "opal/dss/dss.h"
#include "opal/mca/pmix/pmix.h"
/* NTH: we need some options from the btl */
#include "opal/mca/btl/ugni/btl_ugni.h"
static int opal_common_ugni_module_ref_count = 0;
opal_common_ugni_module_t opal_common_ugni_module = {0};
mca_base_component_t opal_common_ugni_component = {
OPAL_MCA_BASE_VERSION_2_1_0("common", 1, 0, 0),
.mca_component_name = "ugni",
.mca_component_major_version = 1,
.mca_component_minor_version = 0,
.mca_component_release_version = 0,
};
static inline int
get_ptag(uint8_t *out_ptag)
{
/* TODO no need for tmp */
char *ptr;
uint8_t tmp_ptag;
if (NULL == (ptr = getenv("PMI_GNI_PTAG"))) {
/* TODO add err msg - better rc? */
return OPAL_ERR_NOT_FOUND;
}
errno = 0;
tmp_ptag = (uint8_t)strtoul (ptr, (char **)NULL, 10);
if (0 != errno) {
/* TODO add err msg - better rc? */
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
}
*out_ptag = tmp_ptag;
return OPAL_SUCCESS;
}
static inline int get_cookie (uint32_t *out_cookie)
{
/* TODO no need for tmp */
char *ptr;
uint32_t tmp_cookie;
if (NULL == (ptr = getenv("PMI_GNI_COOKIE"))) {
/* TODO add err msg - better rc? */
return OPAL_ERR_NOT_FOUND;
}
errno = 0;
tmp_cookie = (uint32_t) strtoul (ptr, NULL, 10);
if (0 != errno) {
/* TODO add err msg - better rc? */
return OPAL_ERR_VALUE_OUT_OF_BOUNDS;
}
*out_cookie = tmp_cookie;
return OPAL_SUCCESS;
}
static unsigned int
opal_common_ugni_get_nic_address(int device_id)
{
unsigned int address, cpu_id;
gni_return_t status;
int i, alps_dev_id = -1;
char *token,*p_ptr;
p_ptr = getenv("PMI_GNI_DEV_ID");
if (!p_ptr) {
status = GNI_CdmGetNicAddress(device_id, &address, &cpu_id);
if(status != GNI_RC_SUCCESS) {
opal_output (0, "FAILED:GNI_CdmGetNicAddress returned error %d", status);
return (unsigned int)-1;
}
return address;
}
while (NULL != (token = strtok(p_ptr, ":"))) {
alps_dev_id = atoi(token);
if (alps_dev_id == device_id) {
break;
}
p_ptr = NULL;
}
if (OPAL_UNLIKELY(-1 == alps_dev_id)) {
return (unsigned int)-1;
}
p_ptr = getenv("PMI_GNI_LOC_ADDR");
if (OPAL_UNLIKELY(NULL == p_ptr)) {
return (unsigned int)-1;
}
i = 0;
while (NULL != (token = strtok(p_ptr, ":"))) {
if (i == alps_dev_id) {
return strtoul (token, NULL, 10);
}
p_ptr = NULL;
++i;
}
return (unsigned int)-1;
}
static int opal_common_ugni_device_init (opal_common_ugni_device_t *device,
int device_id)
{
int rc;
/* Create a NIC Adress */
device->dev_id = device_id; /* Minor number of the Gemini NIC */
device->dev_addr = opal_common_ugni_get_nic_address (device->dev_id);
OPAL_OUTPUT((-1, "Got NIC Addr: 0x%08x, CPU ID: %d", device->dev_addr, device->dev_id));
OBJ_CONSTRUCT(&device->dev_lock,opal_mutex_t);
/* Attach device to the communication domain */
rc = GNI_CdmAttach (opal_common_ugni_module.cd_handle, device->dev_id,
&device->dev_pe_addr, &device->dev_handle);
if (GNI_RC_SUCCESS != rc) {
OPAL_OUTPUT((0, "Error: Creating communication domain %d\n", rc));
return opal_common_rc_ugni_to_opal (rc);
}
return OPAL_SUCCESS;
}
static int opal_common_ugni_device_fini (opal_common_ugni_device_t *dev)
{
return OPAL_SUCCESS;
}
/*
* Send local device information and other information
* required for setup
*/
static int opal_common_ugni_send_modex (int my_cdm_id)
{
uint32_t modex_size, total_msg_size, msg_offset;
struct opal_common_ugni_modex_t modex;
char *modex_msg;
int rc, i;
modex_size = sizeof (struct opal_common_ugni_modex_t);
total_msg_size = opal_common_ugni_module.device_count * modex_size;
modex_msg = (char *) malloc (total_msg_size);
if (NULL == modex_msg) {
OPAL_OUTPUT((-1, "Error allocating memory for modex @ %s:%d",
__FILE__, __LINE__));
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* pack modex for all available devices */
for (i = 0, msg_offset = 0; i < opal_common_ugni_module.device_count ; ++i) {
opal_common_ugni_device_t *dev = opal_common_ugni_module.devices + i;
modex.addr = dev->dev_addr;
modex.id = my_cdm_id;
memcpy ((void *)((uintptr_t) modex_msg + msg_offset),
(void *)&modex, modex_size);
msg_offset += modex_size;
}
/*
* need global for edge cases like MPI_Comm_spawn support with
* new ranks started on the same nodes as the spawnee ranks, etc.
*/
OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
&opal_common_ugni_component,
modex_msg, total_msg_size);
free(modex_msg);
return rc;
}
int opal_common_ugni_fini (void)
{
int i, rc;
if (0 == opal_common_ugni_module_ref_count) {
return OPAL_SUCCESS;
}
if (1 == opal_common_ugni_module_ref_count) {
/* tear down component */
if (opal_common_ugni_module.devices) {
/* finalize devices */
for (i = 0 ; i < opal_common_ugni_module.device_count ; ++i) {
opal_common_ugni_device_fini (opal_common_ugni_module.devices + i);
}
free (opal_common_ugni_module.devices);
opal_common_ugni_module.devices = NULL;
}
/* finally, tear down the communication domain */
rc = GNI_CdmDestroy (opal_common_ugni_module.cd_handle);
if (GNI_RC_SUCCESS != rc) {
OPAL_OUTPUT((-1, "error destroying cdm"));
}
}
opal_common_ugni_module_ref_count--;
return OPAL_SUCCESS;
}
int opal_common_ugni_init (void)
{
int modes, rc, i;
uint32_t my_cdm_id;
opal_common_ugni_module_ref_count ++;
if (opal_common_ugni_module_ref_count > 1) {
return OPAL_SUCCESS;
}
/* use pid for my_cdm_id. Although its not stated in the uGNI
documentation, the cdm_id only needs to be unique
within a node for a given ptag/cookie tuple */
my_cdm_id = getpid(); /*TODO: eventually need something else for thread-hot support */
/* pull settings from ugni btl */
opal_common_ugni_module.rdma_max_retries =
mca_btl_ugni_component.rdma_max_retries;
/* Create a communication domain */
modes = GNI_CDM_MODE_FORK_FULLCOPY | GNI_CDM_MODE_CACHED_AMO_ENABLED |
GNI_CDM_MODE_ERR_NO_KILL | GNI_CDM_MODE_FAST_DATAGRAM_POLL |
GNI_CDM_MODE_FMA_SHARED;
/* collect uGNI information */
rc = get_ptag(&opal_common_ugni_module.ptag);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
rc = get_cookie(&opal_common_ugni_module.cookie);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
return rc;
}
/* create a communication domain */
rc = GNI_CdmCreate (my_cdm_id, opal_common_ugni_module.ptag,
opal_common_ugni_module.cookie, modes,
&opal_common_ugni_module.cd_handle);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
OPAL_OUTPUT((0, "Error: Creating communication domain %d\n",rc));
return opal_common_rc_ugni_to_opal (rc);
}
/* setup uGNI devices. we only support one device atm */
opal_common_ugni_module.device_count = 1;
opal_common_ugni_module.devices = calloc (opal_common_ugni_module.device_count,
sizeof (opal_common_ugni_device_t));
for (i = 0 ; i < opal_common_ugni_module.device_count ; ++i) {
rc = opal_common_ugni_device_init (opal_common_ugni_module.devices + i, i);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
OPAL_OUTPUT((-1, "error initializing uGNI device"));
return rc;
}
}
/* send ugni modex */
opal_common_ugni_send_modex (my_cdm_id);
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,117 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "opal_config.h"
#include "opal/util/output.h"
#include "opal/util/proc.h"
#include "opal/class/opal_list.h"
#include "opal/include/opal/prefetch.h"
#include "opal_stdint.h"
#include <errno.h>
#include <stdint.h>
#include <sys/types.h>
#include <assert.h>
#include <sys/time.h>
#include <gni_pub.h>
#include "common_ugni_ep.h"
#if !defined(MPI_COMMON_UGNI_H)
#define MPI_COMMON_UGNI_H
struct opal_common_ugni_modex_t {
uint32_t addr;
int id;
gni_mem_handle_t irq_memhndl;
};
typedef struct opal_common_ugni_modex_t opal_common_ugni_modex_t;
struct opal_common_ugni_device_t {
opal_object_t super;
gni_nic_handle_t dev_handle;
/* Minor number of the Gemini NIC */
int32_t dev_id;
uint32_t dev_pe_addr;
uint32_t dev_addr;
uint32_t dev_cpu_id;
size_t dev_ep_count;
opal_mutex_t dev_lock;
gni_mem_handle_t smsg_irq_mhndl;
void *btl_ctx;
};
typedef struct opal_common_ugni_device_t opal_common_ugni_device_t;
struct opal_common_ugni_module_t {
/* protection tag */
uint8_t ptag;
/* unique id for this process assigned by the system */
uint32_t cookie;
/* communication domain handle */
gni_cdm_handle_t cd_handle;
/* device count. to be used if we have more than 1 common per ugni device */
int device_count;
opal_common_ugni_device_t *devices;
int rdma_max_retries;
};
typedef struct opal_common_ugni_module_t opal_common_ugni_module_t;
struct opal_common_ugni_post_desc_t {
gni_post_descriptor_t base;
opal_common_ugni_endpoint_t *endpoint;
int tries;
};
typedef struct opal_common_ugni_post_desc_t opal_common_ugni_post_desc_t;
extern opal_common_ugni_module_t opal_common_ugni_module;
extern mca_base_component_t opal_common_ugni_component;
static inline int
opal_common_rc_ugni_to_opal (gni_return_t rc)
{
int codes[] = {OPAL_SUCCESS,
OPAL_ERR_RESOURCE_BUSY,
OPAL_ERR_BAD_PARAM,
OPAL_ERR_OUT_OF_RESOURCE,
OPAL_ERR_TIMEOUT,
OPAL_ERR_PERM,
OPAL_ERROR,
OPAL_ERR_BAD_PARAM,
OPAL_ERR_BAD_PARAM,
OPAL_ERR_NOT_FOUND,
OPAL_ERR_VALUE_OUT_OF_BOUNDS,
OPAL_ERROR,
OPAL_ERR_NOT_SUPPORTED,
OPAL_ERR_OUT_OF_RESOURCE};
return codes[rc];
}
/*
* Initialize uGNI communication domain and device(s).
*/
int opal_common_ugni_init (void);
/*
* Finalize uGNI communication domain and device(s).
*/
int opal_common_ugni_fini (void);
#endif /* MPI_COMMON_UGNI_H */

Просмотреть файл

@ -1,118 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#include "common_ugni.h"
#include "opal/mca/pmix/pmix.h"
OBJ_CLASS_INSTANCE(opal_common_ugni_endpoint_t, opal_object_t, NULL, NULL);
int opal_common_ugni_endpoint_for_proc (opal_common_ugni_device_t *dev, opal_proc_t *peer_proc,
opal_common_ugni_endpoint_t **ep)
{
opal_common_ugni_endpoint_t *endpoint;
opal_common_ugni_modex_t *modex;
size_t msg_size;
int rc;
assert (NULL != dev && NULL != ep && peer_proc);
endpoint = OBJ_NEW(opal_common_ugni_endpoint_t);
if (OPAL_UNLIKELY(NULL == endpoint)) {
assert (0);
return OPAL_ERR_OUT_OF_RESOURCE;
}
/* Receive the modex */
OPAL_MODEX_RECV(rc, &opal_common_ugni_component,
&peer_proc->proc_name, (void **)&modex, &msg_size);
if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
OPAL_OUTPUT((-1, "btl/ugni error receiving modex"));
return rc;
}
endpoint->ep_rem_addr = modex->addr;
endpoint->ep_rem_id = modex->id;
endpoint->ep_rem_irq_memhndl = modex->irq_memhndl;
endpoint->dev = dev;
*ep = endpoint;
free (modex);
return OPAL_SUCCESS;
}
void opal_common_ugni_endpoint_return (opal_common_ugni_endpoint_t *ep)
{
assert(NULL != ep);
OBJ_RELEASE(ep);
}
int opal_common_ugni_ep_create (opal_common_ugni_endpoint_t *cep, gni_cq_handle_t cq,
gni_ep_handle_t *ep_handle)
{
gni_return_t grc;
if (OPAL_UNLIKELY(NULL == cep)) {
assert (0);
return OPAL_ERR_BAD_PARAM;
}
/* create a uGNI endpoint handle and bind it to the remote peer */
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
grc = GNI_EpCreate (cep->dev->dev_handle, cq, ep_handle);
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
return opal_common_rc_ugni_to_opal (grc);
}
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
grc = GNI_EpBind (*ep_handle, cep->ep_rem_addr, cep->ep_rem_id);
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
if (GNI_RC_SUCCESS != grc) {
OPAL_THREAD_LOCK(&cep->dev->dev_lock);
GNI_EpDestroy (*ep_handle);
OPAL_THREAD_UNLOCK(&cep->dev->dev_lock);
return opal_common_rc_ugni_to_opal (grc);
}
return OPAL_SUCCESS;
}
int opal_common_ugni_ep_destroy (gni_ep_handle_t *ep)
{
int rc;
if (NULL == ep || 0 == *ep) {
return OPAL_SUCCESS;
}
/* TODO: need to fix, may be outstanding tx's, etc. */
rc = GNI_EpUnbind (*ep);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
/* should warn */
}
GNI_EpDestroy (*ep);
if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
/* should warn */
}
*ep = 0;
return OPAL_SUCCESS;
}

Просмотреть файл

@ -1,63 +0,0 @@
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
/*
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2011 UT-Battelle, LLC. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
*
* $HEADER$
*/
#if !defined(MPI_COMMON_UGNI_EP_H)
#define MPI_COMMON_UGNI_EP_H
struct opal_common_ugni_device_t;
struct opal_common_ugni_endpoint_t {
opal_object_t super;
uint32_t ep_rem_addr, ep_rem_id; /**< remote information */
gni_mem_handle_t ep_rem_irq_memhndl;
struct opal_common_ugni_device_t *dev; /**< device this endpoint is using */
};
typedef struct opal_common_ugni_endpoint_t opal_common_ugni_endpoint_t;
OBJ_CLASS_DECLARATION(opal_common_ugni_endpoint_t);
/*
* Get (and retain) a reference to an endpoint to peer_proc. This endpoint
* needs to be returned with opal_common_ugni_endpoint_return.
*
* @param[IN] dev uGNI device this endpoint should be bound to.
* @param[IN] peer_proc remote peer the endpoint will be connected to.
* @param[OUT] ep uGNI endpoint for the peer
*/
int opal_common_ugni_endpoint_for_proc (struct opal_common_ugni_device_t *dev, opal_proc_t *peer_proc,
opal_common_ugni_endpoint_t **ep);
/*
* Allocate and bind a uGNI endpoint handle to the remote peer.
*
* @param[IN] cep common endpoint
* @param[IN] cq completion queue
* @param[OUT] ep_handle uGNI endpoint handle
*/
int opal_common_ugni_ep_create (opal_common_ugni_endpoint_t *cep, gni_cq_handle_t cq, gni_ep_handle_t *ep_handle);
/*
* Unbind and free the uGNI endpoint handle.
*
* @param[IN] ep_handle uGNI endpoint handle to unbind and release
*/
int opal_common_ugni_ep_destroy (gni_ep_handle_t *ep_handle);
/*
* Return (and possibly free) a common endpoint. The endpoint may not be used
* once it is returned.
*
* @param[IN] ep uGNI endpoint to return
*/
void opal_common_ugni_endpoint_return (opal_common_ugni_endpoint_t *ep);
#endif /* MPI_COMMON_UGNI_EP_H */

Просмотреть файл

@ -1,54 +0,0 @@
# -*- shell-script -*-
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2006 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2006 QLogic Corp. All rights reserved.
# Copyright (c) 2009 Cisco Systems, Inc. All rights reserved.
# Copyright (c) 2011 Los Alamos National Security, LLC.
# All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#
# OPAL_CHECK_UGNI(prefix, [action-if-found], [action-if-not-found])
# --------------------------------------------------------
# check if GNI support can be found. sets prefix_{CPPFLAGS,
# LDFLAGS, LIBS} as needed and runs action-if-found if there is
# support, otherwise executes action-if-not-found
#
# NOTES
# on Cray XE6 systems, the GNI development header (gni_pub.h) is in a
# completely different place than the ugni library (libugni).
#
# EXAMPLE CONFIGURE USAGE:
# --with-ugni=/base/path/to/libugni --with-ugni-includedir=/path/to/gni_pub.h
#
# --with-ugni=/opt/cray/ugni/default --with-ugni-includedir=/opt/cray/gni-headers/default/include
AC_DEFUN([MCA_opal_common_ugni_CONFIG],[
AC_CONFIG_FILES([opal/mca/common/ugni/Makefile])
OPAL_CHECK_UGNI([common_ugni],
[common_ugni_happy="yes"],
[common_ugni_happy="no"])
AS_IF([test "$common_ugni_happy" = "yes"],
[$1],
[$2])
# substitute in the things needed to build ugni
AC_SUBST([common_ugni_CPPFLAGS])
AC_SUBST([common_ugni_LDFLAGS])
AC_SUBST([common_ugni_LIBS])
])dnl

Просмотреть файл

@ -1,7 +0,0 @@
#
# owner/status file
# owner: institution that is responsible for this package
# status: e.g. active, maintenance, unmaintained
#
owner: LANL
status: active